| 804 |
sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 } |
sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 } |
| 805 |
sub SELF_CLOSING_START_TAG_STATE () { 34 } |
sub SELF_CLOSING_START_TAG_STATE () { 34 } |
| 806 |
sub CDATA_BLOCK_STATE () { 35 } |
sub CDATA_BLOCK_STATE () { 35 } |
| 807 |
sub MD_HYPHEN_STATE () { 36 } |
sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec |
| 808 |
sub MD_DOCTYPE_STATE () { 37 } |
sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec |
| 809 |
sub MD_CDATA_STATE () { 38 } |
sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec |
| 810 |
|
sub CDATA_PCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec |
| 811 |
|
|
| 812 |
sub DOCTYPE_TOKEN () { 1 } |
sub DOCTYPE_TOKEN () { 1 } |
| 813 |
sub COMMENT_TOKEN () { 2 } |
sub COMMENT_TOKEN () { 2 } |
| 1123 |
die "$0: $self->{content_model} in tag open"; |
die "$0: $self->{content_model} in tag open"; |
| 1124 |
} |
} |
| 1125 |
} elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) { |
} elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) { |
| 1126 |
|
## NOTE: The "close tag open state" in the spec is implemented as |
| 1127 |
|
## |CLOSE_TAG_OPEN_STATE| and |CDATA_PCDATA_CLOSE_TAG_STATE|. |
| 1128 |
|
|
| 1129 |
my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</" |
my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</" |
| 1130 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
| 1131 |
if (defined $self->{last_emitted_start_tag_name}) { |
if (defined $self->{last_emitted_start_tag_name}) { |
| 1132 |
|
$self->{state} = CDATA_PCDATA_CLOSE_TAG_STATE; |
| 1133 |
## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564> |
$self->{state_keyword} = ''; |
| 1134 |
my @next_char; |
## Reconsume. |
| 1135 |
TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) { |
redo A; |
|
push @next_char, $self->{next_char}; |
|
|
my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1); |
|
|
my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c; |
|
|
if ($self->{next_char} == $c or $self->{next_char} == $C) { |
|
|
!!!cp (24); |
|
|
!!!next-input-character; |
|
|
next TAGNAME; |
|
|
} else { |
|
|
!!!cp (25); |
|
|
$self->{next_char} = shift @next_char; # reconsume |
|
|
!!!back-next-input-character (@next_char); |
|
|
$self->{state} = DATA_STATE; |
|
|
|
|
|
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
|
|
line => $l, column => $c, |
|
|
}); |
|
|
|
|
|
redo A; |
|
|
} |
|
|
} |
|
|
push @next_char, $self->{next_char}; |
|
|
|
|
|
unless ($self->{next_char} == 0x0009 or # HT |
|
|
$self->{next_char} == 0x000A or # LF |
|
|
$self->{next_char} == 0x000B or # VT |
|
|
$self->{next_char} == 0x000C or # FF |
|
|
$self->{next_char} == 0x0020 or # SP |
|
|
$self->{next_char} == 0x003E or # > |
|
|
$self->{next_char} == 0x002F or # / |
|
|
$self->{next_char} == -1) { |
|
|
!!!cp (26); |
|
|
$self->{next_char} = shift @next_char; # reconsume |
|
|
!!!back-next-input-character (@next_char); |
|
|
$self->{state} = DATA_STATE; |
|
|
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
|
|
line => $l, column => $c, |
|
|
}); |
|
|
redo A; |
|
|
} else { |
|
|
!!!cp (27); |
|
|
$self->{next_char} = shift @next_char; |
|
|
!!!back-next-input-character (@next_char); |
|
|
# and consume... |
|
|
} |
|
| 1136 |
} else { |
} else { |
| 1137 |
## No start tag token has ever been emitted |
## No start tag token has ever been emitted |
| 1138 |
|
## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>. |
| 1139 |
!!!cp (28); |
!!!cp (28); |
|
# next-input-character is already done |
|
| 1140 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1141 |
|
## Reconsume. |
| 1142 |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
| 1143 |
line => $l, column => $c, |
line => $l, column => $c, |
| 1144 |
}); |
}); |
| 1145 |
redo A; |
redo A; |
| 1146 |
} |
} |
| 1147 |
} |
} |
| 1148 |
|
|
| 1149 |
if (0x0041 <= $self->{next_char} and |
if (0x0041 <= $self->{next_char} and |
| 1150 |
$self->{next_char} <= 0x005A) { # A..Z |
$self->{next_char} <= 0x005A) { # A..Z |
| 1151 |
!!!cp (29); |
!!!cp (29); |
| 1192 |
line => $self->{line_prev}, # "<" of "</" |
line => $self->{line_prev}, # "<" of "</" |
| 1193 |
column => $self->{column_prev} - 1, |
column => $self->{column_prev} - 1, |
| 1194 |
}; |
}; |
| 1195 |
## $self->{next_char} is intentionally left as is |
## NOTE: $self->{next_char} is intentionally left as is. |
| 1196 |
|
## Although the "anything else" case of the spec not explicitly |
| 1197 |
|
## states that the next input character is to be reconsumed, |
| 1198 |
|
## it will be included to the |data| of the comment token |
| 1199 |
|
## generated from the bogus end tag, as defined in the |
| 1200 |
|
## "bogus comment state" entry. |
| 1201 |
redo A; |
redo A; |
| 1202 |
} |
} |
| 1203 |
|
} elsif ($self->{state} == CDATA_PCDATA_CLOSE_TAG_STATE) { |
| 1204 |
|
my $ch = substr $self->{last_emitted_start_tag_name}, length $self->{state_keyword}, 1; |
| 1205 |
|
if (length $ch) { |
| 1206 |
|
my $CH = $ch; |
| 1207 |
|
$ch =~ tr/a-z/A-Z/; |
| 1208 |
|
my $nch = chr $self->{next_char}; |
| 1209 |
|
if ($nch eq $ch or $nch eq $CH) { |
| 1210 |
|
!!!cp (24); |
| 1211 |
|
## Stay in the state. |
| 1212 |
|
$self->{state_keyword} .= $nch; |
| 1213 |
|
!!!next-input-character; |
| 1214 |
|
redo A; |
| 1215 |
|
} else { |
| 1216 |
|
!!!cp (25); |
| 1217 |
|
$self->{state} = DATA_STATE; |
| 1218 |
|
## Reconsume. |
| 1219 |
|
!!!emit ({type => CHARACTER_TOKEN, |
| 1220 |
|
data => '</' . $self->{state_keyword}, |
| 1221 |
|
line => $self->{line_prev}, |
| 1222 |
|
column => $self->{column_prev} - 1 - length $self->{state_keyword}, |
| 1223 |
|
}); |
| 1224 |
|
redo A; |
| 1225 |
|
} |
| 1226 |
|
} else { # after "<{tag-name}" |
| 1227 |
|
unless ({ |
| 1228 |
|
0x0009 => 1, # HT |
| 1229 |
|
0x000A => 1, # LF |
| 1230 |
|
0x000B => 1, # VT |
| 1231 |
|
0x000C => 1, # FF |
| 1232 |
|
0x0020 => 1, # SP |
| 1233 |
|
0x003E => 1, # > |
| 1234 |
|
0x002F => 1, # / |
| 1235 |
|
-1 => 1, # EOF |
| 1236 |
|
}->{$self->{next_char}}) { |
| 1237 |
|
!!!cp (26); |
| 1238 |
|
## Reconsume. |
| 1239 |
|
$self->{state} = DATA_STATE; |
| 1240 |
|
!!!emit ({type => CHARACTER_TOKEN, |
| 1241 |
|
data => '</' . $self->{state_keyword}, |
| 1242 |
|
line => $self->{line_prev}, |
| 1243 |
|
column => $self->{column_prev} - 1 - length $self->{state_keyword}, |
| 1244 |
|
}); |
| 1245 |
|
redo A; |
| 1246 |
|
} else { |
| 1247 |
|
!!!cp (27); |
| 1248 |
|
$self->{current_token} |
| 1249 |
|
= {type => END_TAG_TOKEN, |
| 1250 |
|
tag_name => $self->{last_emitted_start_tag_name}, |
| 1251 |
|
line => $self->{line_prev}, |
| 1252 |
|
column => $self->{column_prev} - 1 - length $self->{state_keyword}}; |
| 1253 |
|
$self->{state} = TAG_NAME_STATE; |
| 1254 |
|
## Reconsume. |
| 1255 |
|
redo A; |
| 1256 |
|
} |
| 1257 |
|
} |
| 1258 |
} elsif ($self->{state} == TAG_NAME_STATE) { |
} elsif ($self->{state} == TAG_NAME_STATE) { |
| 1259 |
if ($self->{next_char} == 0x0009 or # HT |
if ($self->{next_char} == 0x0009 or # HT |
| 1260 |
$self->{next_char} == 0x000A or # LF |
$self->{next_char} == 0x000A or # LF |