804 |
sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 } |
sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 } |
805 |
sub SELF_CLOSING_START_TAG_STATE () { 34 } |
sub SELF_CLOSING_START_TAG_STATE () { 34 } |
806 |
sub CDATA_BLOCK_STATE () { 35 } |
sub CDATA_BLOCK_STATE () { 35 } |
807 |
sub MD_HYPHEN_STATE () { 36 } |
sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec |
808 |
sub MD_DOCTYPE_STATE () { 37 } |
sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec |
809 |
sub MD_CDATA_STATE () { 38 } |
sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec |
810 |
|
sub CDATA_PCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec |
811 |
|
|
812 |
sub DOCTYPE_TOKEN () { 1 } |
sub DOCTYPE_TOKEN () { 1 } |
813 |
sub COMMENT_TOKEN () { 2 } |
sub COMMENT_TOKEN () { 2 } |
1123 |
die "$0: $self->{content_model} in tag open"; |
die "$0: $self->{content_model} in tag open"; |
1124 |
} |
} |
1125 |
} elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) { |
} elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) { |
1126 |
|
## NOTE: The "close tag open state" in the spec is implemented as |
1127 |
|
## |CLOSE_TAG_OPEN_STATE| and |CDATA_PCDATA_CLOSE_TAG_STATE|. |
1128 |
|
|
1129 |
my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</" |
my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</" |
1130 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
1131 |
if (defined $self->{last_emitted_start_tag_name}) { |
if (defined $self->{last_emitted_start_tag_name}) { |
1132 |
|
$self->{state} = CDATA_PCDATA_CLOSE_TAG_STATE; |
1133 |
## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564> |
$self->{state_keyword} = ''; |
1134 |
my @next_char; |
## Reconsume. |
1135 |
TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) { |
redo A; |
|
push @next_char, $self->{next_char}; |
|
|
my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1); |
|
|
my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c; |
|
|
if ($self->{next_char} == $c or $self->{next_char} == $C) { |
|
|
!!!cp (24); |
|
|
!!!next-input-character; |
|
|
next TAGNAME; |
|
|
} else { |
|
|
!!!cp (25); |
|
|
$self->{next_char} = shift @next_char; # reconsume |
|
|
!!!back-next-input-character (@next_char); |
|
|
$self->{state} = DATA_STATE; |
|
|
|
|
|
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
|
|
line => $l, column => $c, |
|
|
}); |
|
|
|
|
|
redo A; |
|
|
} |
|
|
} |
|
|
push @next_char, $self->{next_char}; |
|
|
|
|
|
unless ($self->{next_char} == 0x0009 or # HT |
|
|
$self->{next_char} == 0x000A or # LF |
|
|
$self->{next_char} == 0x000B or # VT |
|
|
$self->{next_char} == 0x000C or # FF |
|
|
$self->{next_char} == 0x0020 or # SP |
|
|
$self->{next_char} == 0x003E or # > |
|
|
$self->{next_char} == 0x002F or # / |
|
|
$self->{next_char} == -1) { |
|
|
!!!cp (26); |
|
|
$self->{next_char} = shift @next_char; # reconsume |
|
|
!!!back-next-input-character (@next_char); |
|
|
$self->{state} = DATA_STATE; |
|
|
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
|
|
line => $l, column => $c, |
|
|
}); |
|
|
redo A; |
|
|
} else { |
|
|
!!!cp (27); |
|
|
$self->{next_char} = shift @next_char; |
|
|
!!!back-next-input-character (@next_char); |
|
|
# and consume... |
|
|
} |
|
1136 |
} else { |
} else { |
1137 |
## No start tag token has ever been emitted |
## No start tag token has ever been emitted |
1138 |
|
## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>. |
1139 |
!!!cp (28); |
!!!cp (28); |
|
# next-input-character is already done |
|
1140 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1141 |
|
## Reconsume. |
1142 |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
1143 |
line => $l, column => $c, |
line => $l, column => $c, |
1144 |
}); |
}); |
1145 |
redo A; |
redo A; |
1146 |
} |
} |
1147 |
} |
} |
1148 |
|
|
1149 |
if (0x0041 <= $self->{next_char} and |
if (0x0041 <= $self->{next_char} and |
1150 |
$self->{next_char} <= 0x005A) { # A..Z |
$self->{next_char} <= 0x005A) { # A..Z |
1151 |
!!!cp (29); |
!!!cp (29); |
1192 |
line => $self->{line_prev}, # "<" of "</" |
line => $self->{line_prev}, # "<" of "</" |
1193 |
column => $self->{column_prev} - 1, |
column => $self->{column_prev} - 1, |
1194 |
}; |
}; |
1195 |
## $self->{next_char} is intentionally left as is |
## NOTE: $self->{next_char} is intentionally left as is. |
1196 |
|
## Although the "anything else" case of the spec not explicitly |
1197 |
|
## states that the next input character is to be reconsumed, |
1198 |
|
## it will be included to the |data| of the comment token |
1199 |
|
## generated from the bogus end tag, as defined in the |
1200 |
|
## "bogus comment state" entry. |
1201 |
redo A; |
redo A; |
1202 |
} |
} |
1203 |
|
} elsif ($self->{state} == CDATA_PCDATA_CLOSE_TAG_STATE) { |
1204 |
|
my $ch = substr $self->{last_emitted_start_tag_name}, length $self->{state_keyword}, 1; |
1205 |
|
if (length $ch) { |
1206 |
|
my $CH = $ch; |
1207 |
|
$ch =~ tr/a-z/A-Z/; |
1208 |
|
my $nch = chr $self->{next_char}; |
1209 |
|
if ($nch eq $ch or $nch eq $CH) { |
1210 |
|
!!!cp (24); |
1211 |
|
## Stay in the state. |
1212 |
|
$self->{state_keyword} .= $nch; |
1213 |
|
!!!next-input-character; |
1214 |
|
redo A; |
1215 |
|
} else { |
1216 |
|
!!!cp (25); |
1217 |
|
$self->{state} = DATA_STATE; |
1218 |
|
## Reconsume. |
1219 |
|
!!!emit ({type => CHARACTER_TOKEN, |
1220 |
|
data => '</' . $self->{state_keyword}, |
1221 |
|
line => $self->{line_prev}, |
1222 |
|
column => $self->{column_prev} - 1 - length $self->{state_keyword}, |
1223 |
|
}); |
1224 |
|
redo A; |
1225 |
|
} |
1226 |
|
} else { # after "<{tag-name}" |
1227 |
|
unless ({ |
1228 |
|
0x0009 => 1, # HT |
1229 |
|
0x000A => 1, # LF |
1230 |
|
0x000B => 1, # VT |
1231 |
|
0x000C => 1, # FF |
1232 |
|
0x0020 => 1, # SP |
1233 |
|
0x003E => 1, # > |
1234 |
|
0x002F => 1, # / |
1235 |
|
-1 => 1, # EOF |
1236 |
|
}->{$self->{next_char}}) { |
1237 |
|
!!!cp (26); |
1238 |
|
## Reconsume. |
1239 |
|
$self->{state} = DATA_STATE; |
1240 |
|
!!!emit ({type => CHARACTER_TOKEN, |
1241 |
|
data => '</' . $self->{state_keyword}, |
1242 |
|
line => $self->{line_prev}, |
1243 |
|
column => $self->{column_prev} - 1 - length $self->{state_keyword}, |
1244 |
|
}); |
1245 |
|
redo A; |
1246 |
|
} else { |
1247 |
|
!!!cp (27); |
1248 |
|
$self->{current_token} |
1249 |
|
= {type => END_TAG_TOKEN, |
1250 |
|
tag_name => $self->{last_emitted_start_tag_name}, |
1251 |
|
line => $self->{line_prev}, |
1252 |
|
column => $self->{column_prev} - 1 - length $self->{state_keyword}}; |
1253 |
|
$self->{state} = TAG_NAME_STATE; |
1254 |
|
## Reconsume. |
1255 |
|
redo A; |
1256 |
|
} |
1257 |
|
} |
1258 |
} elsif ($self->{state} == TAG_NAME_STATE) { |
} elsif ($self->{state} == TAG_NAME_STATE) { |
1259 |
if ($self->{next_char} == 0x0009 or # HT |
if ($self->{next_char} == 0x0009 or # HT |
1260 |
$self->{next_char} == 0x000A or # LF |
$self->{next_char} == 0x000A or # LF |