| 141 |
address => ADDRESS_EL, |
address => ADDRESS_EL, |
| 142 |
applet => MISC_SCOPING_EL, |
applet => MISC_SCOPING_EL, |
| 143 |
area => MISC_SPECIAL_EL, |
area => MISC_SPECIAL_EL, |
| 144 |
|
article => MISC_SPECIAL_EL, |
| 145 |
|
aside => MISC_SPECIAL_EL, |
| 146 |
b => FORMATTING_EL, |
b => FORMATTING_EL, |
| 147 |
base => MISC_SPECIAL_EL, |
base => MISC_SPECIAL_EL, |
| 148 |
basefont => MISC_SPECIAL_EL, |
basefont => MISC_SPECIAL_EL, |
| 156 |
center => MISC_SPECIAL_EL, |
center => MISC_SPECIAL_EL, |
| 157 |
col => MISC_SPECIAL_EL, |
col => MISC_SPECIAL_EL, |
| 158 |
colgroup => MISC_SPECIAL_EL, |
colgroup => MISC_SPECIAL_EL, |
| 159 |
|
command => MISC_SPECIAL_EL, |
| 160 |
|
datagrid => MISC_SPECIAL_EL, |
| 161 |
dd => DD_EL, |
dd => DD_EL, |
| 162 |
|
details => MISC_SPECIAL_EL, |
| 163 |
|
dialog => MISC_SPECIAL_EL, |
| 164 |
dir => MISC_SPECIAL_EL, |
dir => MISC_SPECIAL_EL, |
| 165 |
div => DIV_EL, |
div => DIV_EL, |
| 166 |
dl => MISC_SPECIAL_EL, |
dl => MISC_SPECIAL_EL, |
| 167 |
dt => DT_EL, |
dt => DT_EL, |
| 168 |
em => FORMATTING_EL, |
em => FORMATTING_EL, |
| 169 |
embed => MISC_SPECIAL_EL, |
embed => MISC_SPECIAL_EL, |
| 170 |
|
eventsource => MISC_SPECIAL_EL, |
| 171 |
fieldset => MISC_SPECIAL_EL, |
fieldset => MISC_SPECIAL_EL, |
| 172 |
|
figure => MISC_SPECIAL_EL, |
| 173 |
font => FORMATTING_EL, |
font => FORMATTING_EL, |
| 174 |
|
footer => MISC_SPECIAL_EL, |
| 175 |
form => FORM_EL, |
form => FORM_EL, |
| 176 |
frame => MISC_SPECIAL_EL, |
frame => MISC_SPECIAL_EL, |
| 177 |
frameset => FRAMESET_EL, |
frameset => FRAMESET_EL, |
| 182 |
h5 => HEADING_EL, |
h5 => HEADING_EL, |
| 183 |
h6 => HEADING_EL, |
h6 => HEADING_EL, |
| 184 |
head => MISC_SPECIAL_EL, |
head => MISC_SPECIAL_EL, |
| 185 |
|
header => MISC_SPECIAL_EL, |
| 186 |
hr => MISC_SPECIAL_EL, |
hr => MISC_SPECIAL_EL, |
| 187 |
html => HTML_EL, |
html => HTML_EL, |
| 188 |
i => FORMATTING_EL, |
i => FORMATTING_EL, |
| 189 |
iframe => MISC_SPECIAL_EL, |
iframe => MISC_SPECIAL_EL, |
| 190 |
img => MISC_SPECIAL_EL, |
img => MISC_SPECIAL_EL, |
| 191 |
|
#image => MISC_SPECIAL_EL, ## NOTE: Commented out in the spec. |
| 192 |
input => MISC_SPECIAL_EL, |
input => MISC_SPECIAL_EL, |
| 193 |
isindex => MISC_SPECIAL_EL, |
isindex => MISC_SPECIAL_EL, |
| 194 |
li => LI_EL, |
li => LI_EL, |
| 197 |
marquee => MISC_SCOPING_EL, |
marquee => MISC_SCOPING_EL, |
| 198 |
menu => MISC_SPECIAL_EL, |
menu => MISC_SPECIAL_EL, |
| 199 |
meta => MISC_SPECIAL_EL, |
meta => MISC_SPECIAL_EL, |
| 200 |
|
nav => MISC_SPECIAL_EL, |
| 201 |
nobr => NOBR_EL | FORMATTING_EL, |
nobr => NOBR_EL | FORMATTING_EL, |
| 202 |
noembed => MISC_SPECIAL_EL, |
noembed => MISC_SPECIAL_EL, |
| 203 |
noframes => MISC_SPECIAL_EL, |
noframes => MISC_SPECIAL_EL, |
| 216 |
s => FORMATTING_EL, |
s => FORMATTING_EL, |
| 217 |
script => MISC_SPECIAL_EL, |
script => MISC_SPECIAL_EL, |
| 218 |
select => SELECT_EL, |
select => SELECT_EL, |
| 219 |
|
section => MISC_SPECIAL_EL, |
| 220 |
small => FORMATTING_EL, |
small => FORMATTING_EL, |
| 221 |
spacer => MISC_SPECIAL_EL, |
spacer => MISC_SPECIAL_EL, |
| 222 |
strike => FORMATTING_EL, |
strike => FORMATTING_EL, |
| 336 |
|
|
| 337 |
## ISSUE: xmlns:xlink="non-xlink-ns" is not an error. |
## ISSUE: xmlns:xlink="non-xlink-ns" is not an error. |
| 338 |
|
|
| 339 |
my $c1_entity_char = { |
my $charref_map = { |
| 340 |
|
0x0D => 0x000A, |
| 341 |
0x80 => 0x20AC, |
0x80 => 0x20AC, |
| 342 |
0x81 => 0xFFFD, |
0x81 => 0xFFFD, |
| 343 |
0x82 => 0x201A, |
0x82 => 0x201A, |
| 370 |
0x9D => 0xFFFD, |
0x9D => 0xFFFD, |
| 371 |
0x9E => 0x017E, |
0x9E => 0x017E, |
| 372 |
0x9F => 0x0178, |
0x9F => 0x0178, |
| 373 |
}; # $c1_entity_char |
}; # $charref_map |
| 374 |
|
$charref_map->{$_} = 0xFFFD |
| 375 |
|
for 0x0000..0x0008, 0x000B, 0x000E..0x001F, 0x007F, |
| 376 |
|
0xD800..0xDFFF, 0xFDD0..0xFDDF, ## ISSUE: 0xFDEF |
| 377 |
|
0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, |
| 378 |
|
0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, |
| 379 |
|
0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, |
| 380 |
|
0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, |
| 381 |
|
0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF; |
| 382 |
|
|
| 383 |
|
## TODO: Invoke the reset algorithm when a resettable element is |
| 384 |
|
## created (cf. HTML5 revision 2259). |
| 385 |
|
|
| 386 |
sub parse_byte_string ($$$$;$) { |
sub parse_byte_string ($$$$;$) { |
| 387 |
my $self = shift; |
my $self = shift; |
| 426 |
## TODO: Is this ok? Transfer protocol's parameter should be |
## TODO: Is this ok? Transfer protocol's parameter should be |
| 427 |
## interpreted in its semantics? |
## interpreted in its semantics? |
| 428 |
|
|
|
## ISSUE: Unsupported encoding is not ignored according to the spec. |
|
| 429 |
($char_stream, $e_status) = $charset->get_decode_handle |
($char_stream, $e_status) = $charset->get_decode_handle |
| 430 |
($byte_stream, allow_error_reporting => 1, |
($byte_stream, allow_error_reporting => 1, |
| 431 |
allow_fallback => 1); |
allow_fallback => 1); |
| 433 |
$self->{confident} = 1; |
$self->{confident} = 1; |
| 434 |
last SNIFFING; |
last SNIFFING; |
| 435 |
} else { |
} else { |
| 436 |
## TODO: unsupported error |
!!!parse-error (type => 'charset:not supported', |
| 437 |
|
layer => 'encode', |
| 438 |
|
line => 1, column => 1, |
| 439 |
|
value => $charset_name, |
| 440 |
|
level => $self->{level}->{uncertain}); |
| 441 |
} |
} |
| 442 |
} |
} |
| 443 |
|
|
| 881 |
sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec |
sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec |
| 882 |
sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec |
sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec |
| 883 |
sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec |
sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec |
| 884 |
sub CDATA_PCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec |
sub CDATA_RCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec |
| 885 |
sub CDATA_SECTION_MSE1_STATE () { 40 } # "CDATA section state" in the spec |
sub CDATA_SECTION_MSE1_STATE () { 40 } # "CDATA section state" in the spec |
| 886 |
sub CDATA_SECTION_MSE2_STATE () { 41 } # "CDATA section state" in the spec |
sub CDATA_SECTION_MSE2_STATE () { 41 } # "CDATA section state" in the spec |
| 887 |
sub PUBLIC_STATE () { 42 } # "after DOCTYPE name state" in the spec |
sub PUBLIC_STATE () { 42 } # "after DOCTYPE name state" in the spec |
| 895 |
sub HEXREF_X_STATE () { 47 } |
sub HEXREF_X_STATE () { 47 } |
| 896 |
sub HEXREF_HEX_STATE () { 48 } |
sub HEXREF_HEX_STATE () { 48 } |
| 897 |
sub ENTITY_NAME_STATE () { 49 } |
sub ENTITY_NAME_STATE () { 49 } |
| 898 |
|
sub PCDATA_STATE () { 50 } # "data state" in the spec |
| 899 |
|
|
| 900 |
sub DOCTYPE_TOKEN () { 1 } |
sub DOCTYPE_TOKEN () { 1 } |
| 901 |
sub COMMENT_TOKEN () { 2 } |
sub COMMENT_TOKEN () { 2 } |
| 994 |
## TODO: Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) |
## TODO: Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) |
| 995 |
## (This requirement was dropped from HTML5 spec, unfortunately.) |
## (This requirement was dropped from HTML5 spec, unfortunately.) |
| 996 |
|
|
| 997 |
|
my $is_space = { |
| 998 |
|
0x0009 => 1, # CHARACTER TABULATION (HT) |
| 999 |
|
0x000A => 1, # LINE FEED (LF) |
| 1000 |
|
#0x000B => 0, # LINE TABULATION (VT) |
| 1001 |
|
0x000C => 1, # FORM FEED (FF) |
| 1002 |
|
#0x000D => 1, # CARRIAGE RETURN (CR) |
| 1003 |
|
0x0020 => 1, # SPACE (SP) |
| 1004 |
|
}; |
| 1005 |
|
|
| 1006 |
sub _get_next_token ($) { |
sub _get_next_token ($) { |
| 1007 |
my $self = shift; |
my $self = shift; |
| 1008 |
|
|
| 1020 |
} |
} |
| 1021 |
|
|
| 1022 |
A: { |
A: { |
| 1023 |
if ($self->{state} == DATA_STATE) { |
if ($self->{state} == PCDATA_STATE) { |
| 1024 |
|
## NOTE: Same as |DATA_STATE|, but only for |PCDATA| content model. |
| 1025 |
|
|
| 1026 |
if ($self->{nc} == 0x0026) { # & |
if ($self->{nc} == 0x0026) { # & |
| 1027 |
delete $self->{s_kwd}; |
!!!cp (0.1); |
| 1028 |
|
## NOTE: In the spec, the tokenizer is switched to the |
| 1029 |
|
## "entity data state". In this implementation, the tokenizer |
| 1030 |
|
## is switched to the |ENTITY_STATE|, which is an implementation |
| 1031 |
|
## of the "consume a character reference" algorithm. |
| 1032 |
|
$self->{entity_add} = -1; |
| 1033 |
|
$self->{prev_state} = DATA_STATE; |
| 1034 |
|
$self->{state} = ENTITY_STATE; |
| 1035 |
|
!!!next-input-character; |
| 1036 |
|
redo A; |
| 1037 |
|
} elsif ($self->{nc} == 0x003C) { # < |
| 1038 |
|
!!!cp (0.2); |
| 1039 |
|
$self->{state} = TAG_OPEN_STATE; |
| 1040 |
|
!!!next-input-character; |
| 1041 |
|
redo A; |
| 1042 |
|
} elsif ($self->{nc} == -1) { |
| 1043 |
|
!!!cp (0.3); |
| 1044 |
|
!!!emit ({type => END_OF_FILE_TOKEN, |
| 1045 |
|
line => $self->{line}, column => $self->{column}}); |
| 1046 |
|
last A; ## TODO: ok? |
| 1047 |
|
} else { |
| 1048 |
|
!!!cp (0.4); |
| 1049 |
|
# |
| 1050 |
|
} |
| 1051 |
|
|
| 1052 |
|
# Anything else |
| 1053 |
|
my $token = {type => CHARACTER_TOKEN, |
| 1054 |
|
data => chr $self->{nc}, |
| 1055 |
|
line => $self->{line}, column => $self->{column}, |
| 1056 |
|
}; |
| 1057 |
|
$self->{read_until}->($token->{data}, q[<&], length $token->{data}); |
| 1058 |
|
|
| 1059 |
|
## Stay in the state. |
| 1060 |
|
!!!next-input-character; |
| 1061 |
|
!!!emit ($token); |
| 1062 |
|
redo A; |
| 1063 |
|
} elsif ($self->{state} == DATA_STATE) { |
| 1064 |
|
$self->{s_kwd} = '' unless defined $self->{s_kwd}; |
| 1065 |
|
if ($self->{nc} == 0x0026) { # & |
| 1066 |
|
$self->{s_kwd} = ''; |
| 1067 |
if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA |
if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA |
| 1068 |
not $self->{escape}) { |
not $self->{escape}) { |
| 1069 |
!!!cp (1); |
!!!cp (1); |
| 1082 |
} |
} |
| 1083 |
} elsif ($self->{nc} == 0x002D) { # - |
} elsif ($self->{nc} == 0x002D) { # - |
| 1084 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
| 1085 |
if (defined $self->{s_kwd}) { |
$self->{s_kwd} .= '-'; |
| 1086 |
!!!cp (2.1); |
|
|
$self->{s_kwd} .= '-'; |
|
|
} else { |
|
|
!!!cp (2.2); |
|
|
$self->{s_kwd} = '-'; |
|
|
} |
|
|
|
|
| 1087 |
if ($self->{s_kwd} eq '<!--') { |
if ($self->{s_kwd} eq '<!--') { |
| 1088 |
!!!cp (3); |
!!!cp (3); |
| 1089 |
$self->{escape} = 1; # unless $self->{escape}; |
$self->{escape} = 1; # unless $self->{escape}; |
| 1101 |
|
|
| 1102 |
# |
# |
| 1103 |
} elsif ($self->{nc} == 0x0021) { # ! |
} elsif ($self->{nc} == 0x0021) { # ! |
| 1104 |
if (defined $self->{s_kwd}) { |
if (length $self->{s_kwd}) { |
| 1105 |
!!!cp (5.1); |
!!!cp (5.1); |
| 1106 |
$self->{s_kwd} .= '!'; |
$self->{s_kwd} .= '!'; |
| 1107 |
# |
# |
| 1108 |
} else { |
} else { |
| 1109 |
!!!cp (5.2); |
!!!cp (5.2); |
| 1110 |
|
#$self->{s_kwd} = ''; |
| 1111 |
# |
# |
| 1112 |
} |
} |
| 1113 |
# |
# |
| 1114 |
} elsif ($self->{nc} == 0x003C) { # < |
} elsif ($self->{nc} == 0x003C) { # < |
|
delete $self->{s_kwd}; |
|
| 1115 |
if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA |
if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA |
| 1116 |
(($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA |
(($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA |
| 1117 |
not $self->{escape})) { |
not $self->{escape})) { |
| 1121 |
redo A; |
redo A; |
| 1122 |
} else { |
} else { |
| 1123 |
!!!cp (7); |
!!!cp (7); |
| 1124 |
|
$self->{s_kwd} = ''; |
| 1125 |
# |
# |
| 1126 |
} |
} |
| 1127 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 1128 |
if ($self->{escape} and |
if ($self->{escape} and |
| 1129 |
($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA |
($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA |
| 1130 |
if (defined $self->{s_kwd} and $self->{s_kwd} eq '--') { |
if ($self->{s_kwd} eq '--') { |
| 1131 |
!!!cp (8); |
!!!cp (8); |
| 1132 |
delete $self->{escape}; |
delete $self->{escape}; |
| 1133 |
} else { |
} else { |
| 1137 |
!!!cp (10); |
!!!cp (10); |
| 1138 |
} |
} |
| 1139 |
|
|
| 1140 |
delete $self->{s_kwd}; |
$self->{s_kwd} = ''; |
| 1141 |
# |
# |
| 1142 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 1143 |
!!!cp (11); |
!!!cp (11); |
| 1144 |
delete $self->{s_kwd}; |
$self->{s_kwd} = ''; |
| 1145 |
!!!emit ({type => END_OF_FILE_TOKEN, |
!!!emit ({type => END_OF_FILE_TOKEN, |
| 1146 |
line => $self->{line}, column => $self->{column}}); |
line => $self->{line}, column => $self->{column}}); |
| 1147 |
last A; ## TODO: ok? |
last A; ## TODO: ok? |
| 1148 |
} else { |
} else { |
| 1149 |
!!!cp (12); |
!!!cp (12); |
| 1150 |
delete $self->{s_kwd}; |
$self->{s_kwd} = ''; |
| 1151 |
# |
# |
| 1152 |
} |
} |
| 1153 |
|
|
| 1158 |
}; |
}; |
| 1159 |
if ($self->{read_until}->($token->{data}, q[-!<>&], |
if ($self->{read_until}->($token->{data}, q[-!<>&], |
| 1160 |
length $token->{data})) { |
length $token->{data})) { |
| 1161 |
delete $self->{s_kwd}; |
$self->{s_kwd} = ''; |
| 1162 |
} |
} |
| 1163 |
|
|
| 1164 |
## Stay in the data state |
## Stay in the data state. |
| 1165 |
|
if ($self->{content_model} == PCDATA_CONTENT_MODEL) { |
| 1166 |
|
!!!cp (13); |
| 1167 |
|
$self->{state} = PCDATA_STATE; |
| 1168 |
|
} else { |
| 1169 |
|
!!!cp (14); |
| 1170 |
|
## Stay in the state. |
| 1171 |
|
} |
| 1172 |
!!!next-input-character; |
!!!next-input-character; |
| 1173 |
!!!emit ($token); |
!!!emit ($token); |
| 1174 |
redo A; |
redo A; |
| 1273 |
} |
} |
| 1274 |
} elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) { |
} elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) { |
| 1275 |
## NOTE: The "close tag open state" in the spec is implemented as |
## NOTE: The "close tag open state" in the spec is implemented as |
| 1276 |
## |CLOSE_TAG_OPEN_STATE| and |CDATA_PCDATA_CLOSE_TAG_STATE|. |
## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|. |
| 1277 |
|
|
| 1278 |
my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</" |
my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</" |
| 1279 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
| 1280 |
if (defined $self->{last_stag_name}) { |
if (defined $self->{last_stag_name}) { |
| 1281 |
$self->{state} = CDATA_PCDATA_CLOSE_TAG_STATE; |
$self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE; |
| 1282 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 1283 |
## Reconsume. |
## Reconsume. |
| 1284 |
redo A; |
redo A; |
| 1349 |
## "bogus comment state" entry. |
## "bogus comment state" entry. |
| 1350 |
redo A; |
redo A; |
| 1351 |
} |
} |
| 1352 |
} elsif ($self->{state} == CDATA_PCDATA_CLOSE_TAG_STATE) { |
} elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) { |
| 1353 |
my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1; |
my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1; |
| 1354 |
if (length $ch) { |
if (length $ch) { |
| 1355 |
my $CH = $ch; |
my $CH = $ch; |
| 1373 |
redo A; |
redo A; |
| 1374 |
} |
} |
| 1375 |
} else { # after "<{tag-name}" |
} else { # after "<{tag-name}" |
| 1376 |
unless ({ |
unless ($is_space->{$self->{nc}} or |
| 1377 |
0x0009 => 1, # HT |
{ |
|
0x000A => 1, # LF |
|
|
0x000B => 1, # VT |
|
|
0x000C => 1, # FF |
|
|
0x0020 => 1, # SP |
|
| 1378 |
0x003E => 1, # > |
0x003E => 1, # > |
| 1379 |
0x002F => 1, # / |
0x002F => 1, # / |
| 1380 |
-1 => 1, # EOF |
-1 => 1, # EOF |
| 1401 |
} |
} |
| 1402 |
} |
} |
| 1403 |
} elsif ($self->{state} == TAG_NAME_STATE) { |
} elsif ($self->{state} == TAG_NAME_STATE) { |
| 1404 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1405 |
!!!cp (34); |
!!!cp (34); |
| 1406 |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
| 1407 |
!!!next-input-character; |
!!!next-input-character; |
| 1473 |
redo A; |
redo A; |
| 1474 |
} |
} |
| 1475 |
} elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) { |
} elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) { |
| 1476 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1477 |
!!!cp (45); |
!!!cp (45); |
| 1478 |
## Stay in the state |
## Stay in the state |
| 1479 |
!!!next-input-character; |
!!!next-input-character; |
| 1569 |
} |
} |
| 1570 |
}; # $before_leave |
}; # $before_leave |
| 1571 |
|
|
| 1572 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1573 |
!!!cp (59); |
!!!cp (59); |
| 1574 |
$before_leave->(); |
$before_leave->(); |
| 1575 |
$self->{state} = AFTER_ATTRIBUTE_NAME_STATE; |
$self->{state} = AFTER_ATTRIBUTE_NAME_STATE; |
| 1652 |
redo A; |
redo A; |
| 1653 |
} |
} |
| 1654 |
} elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) { |
} elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) { |
| 1655 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1656 |
!!!cp (71); |
!!!cp (71); |
| 1657 |
## Stay in the state |
## Stay in the state |
| 1658 |
!!!next-input-character; |
!!!next-input-character; |
| 1739 |
redo A; |
redo A; |
| 1740 |
} |
} |
| 1741 |
} elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) { |
} elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) { |
| 1742 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1743 |
!!!cp (83); |
!!!cp (83); |
| 1744 |
## Stay in the state |
## Stay in the state |
| 1745 |
!!!next-input-character; |
!!!next-input-character; |
| 1920 |
redo A; |
redo A; |
| 1921 |
} |
} |
| 1922 |
} elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) { |
} elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) { |
| 1923 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # HT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1924 |
!!!cp (107); |
!!!cp (107); |
| 1925 |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
| 1926 |
!!!next-input-character; |
!!!next-input-character; |
| 2002 |
redo A; |
redo A; |
| 2003 |
} |
} |
| 2004 |
} elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) { |
} elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) { |
| 2005 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 2006 |
!!!cp (118); |
!!!cp (118); |
| 2007 |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
| 2008 |
!!!next-input-character; |
!!!next-input-character; |
| 2443 |
redo A; |
redo A; |
| 2444 |
} |
} |
| 2445 |
} elsif ($self->{state} == DOCTYPE_STATE) { |
} elsif ($self->{state} == DOCTYPE_STATE) { |
| 2446 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 2447 |
!!!cp (155); |
!!!cp (155); |
| 2448 |
$self->{state} = BEFORE_DOCTYPE_NAME_STATE; |
$self->{state} = BEFORE_DOCTYPE_NAME_STATE; |
| 2449 |
!!!next-input-character; |
!!!next-input-character; |
| 2456 |
redo A; |
redo A; |
| 2457 |
} |
} |
| 2458 |
} elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) { |
} elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) { |
| 2459 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 2460 |
!!!cp (157); |
!!!cp (157); |
| 2461 |
## Stay in the state |
## Stay in the state |
| 2462 |
!!!next-input-character; |
!!!next-input-character; |
| 2490 |
} |
} |
| 2491 |
} elsif ($self->{state} == DOCTYPE_NAME_STATE) { |
} elsif ($self->{state} == DOCTYPE_NAME_STATE) { |
| 2492 |
## ISSUE: Redundant "First," in the spec. |
## ISSUE: Redundant "First," in the spec. |
| 2493 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 2494 |
!!!cp (161); |
!!!cp (161); |
| 2495 |
$self->{state} = AFTER_DOCTYPE_NAME_STATE; |
$self->{state} = AFTER_DOCTYPE_NAME_STATE; |
| 2496 |
!!!next-input-character; |
!!!next-input-character; |
| 2522 |
redo A; |
redo A; |
| 2523 |
} |
} |
| 2524 |
} elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) { |
} elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) { |
| 2525 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 2526 |
!!!cp (165); |
!!!cp (165); |
| 2527 |
## Stay in the state |
## Stay in the state |
| 2528 |
!!!next-input-character; |
!!!next-input-character; |
| 2645 |
redo A; |
redo A; |
| 2646 |
} |
} |
| 2647 |
} elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) { |
} elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) { |
| 2648 |
if ({ |
if ($is_space->{$self->{nc}}) { |
|
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1, |
|
|
#0x000D => 1, # HT, LF, VT, FF, SP, CR |
|
|
}->{$self->{nc}}) { |
|
| 2649 |
!!!cp (181); |
!!!cp (181); |
| 2650 |
## Stay in the state |
## Stay in the state |
| 2651 |
!!!next-input-character; |
!!!next-input-character; |
| 2772 |
redo A; |
redo A; |
| 2773 |
} |
} |
| 2774 |
} elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) { |
} elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) { |
| 2775 |
if ({ |
if ($is_space->{$self->{nc}}) { |
|
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1, |
|
|
#0x000D => 1, # HT, LF, VT, FF, SP, CR |
|
|
}->{$self->{nc}}) { |
|
| 2776 |
!!!cp (195); |
!!!cp (195); |
| 2777 |
## Stay in the state |
## Stay in the state |
| 2778 |
!!!next-input-character; |
!!!next-input-character; |
| 2818 |
redo A; |
redo A; |
| 2819 |
} |
} |
| 2820 |
} elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) { |
} elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) { |
| 2821 |
if ({ |
if ($is_space->{$self->{nc}}) { |
|
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1, |
|
|
#0x000D => 1, # HT, LF, VT, FF, SP, CR |
|
|
}->{$self->{nc}}) { |
|
| 2822 |
!!!cp (201); |
!!!cp (201); |
| 2823 |
## Stay in the state |
## Stay in the state |
| 2824 |
!!!next-input-character; |
!!!next-input-character; |
| 2944 |
redo A; |
redo A; |
| 2945 |
} |
} |
| 2946 |
} elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) { |
} elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) { |
| 2947 |
if ({ |
if ($is_space->{$self->{nc}}) { |
|
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1, |
|
|
#0x000D => 1, # HT, LF, VT, FF, SP, CR |
|
|
}->{$self->{nc}}) { |
|
| 2948 |
!!!cp (215); |
!!!cp (215); |
| 2949 |
## Stay in the state |
## Stay in the state |
| 2950 |
!!!next-input-character; |
!!!next-input-character; |
| 3076 |
redo A; |
redo A; |
| 3077 |
} |
} |
| 3078 |
} elsif ($self->{state} == ENTITY_STATE) { |
} elsif ($self->{state} == ENTITY_STATE) { |
| 3079 |
if ({ |
if ($is_space->{$self->{nc}} or |
| 3080 |
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF, |
{ |
| 3081 |
0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & |
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
| 3082 |
$self->{entity_add} => 1, |
$self->{entity_add} => 1, |
| 3083 |
}->{$self->{nc}}) { |
}->{$self->{nc}}) { |
| 3084 |
!!!cp (1001); |
!!!cp (1001); |
| 3085 |
## Don't consume |
## Don't consume |
| 3086 |
## No error |
## No error |
| 3199 |
my $code = $self->{s_kwd}; |
my $code = $self->{s_kwd}; |
| 3200 |
my $l = $self->{line_prev}; |
my $l = $self->{line_prev}; |
| 3201 |
my $c = $self->{column_prev}; |
my $c = $self->{column_prev}; |
| 3202 |
if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) { |
if ($charref_map->{$code}) { |
| 3203 |
!!!cp (1015); |
!!!cp (1015); |
| 3204 |
!!!parse-error (type => 'invalid character reference', |
!!!parse-error (type => 'invalid character reference', |
| 3205 |
text => (sprintf 'U+%04X', $code), |
text => (sprintf 'U+%04X', $code), |
| 3206 |
line => $l, column => $c); |
line => $l, column => $c); |
| 3207 |
$code = 0xFFFD; |
$code = $charref_map->{$code}; |
| 3208 |
} elsif ($code > 0x10FFFF) { |
} elsif ($code > 0x10FFFF) { |
| 3209 |
!!!cp (1016); |
!!!cp (1016); |
| 3210 |
!!!parse-error (type => 'invalid character reference', |
!!!parse-error (type => 'invalid character reference', |
| 3211 |
text => (sprintf 'U-%08X', $code), |
text => (sprintf 'U-%08X', $code), |
| 3212 |
line => $l, column => $c); |
line => $l, column => $c); |
| 3213 |
$code = 0xFFFD; |
$code = 0xFFFD; |
|
} elsif ($code == 0x000D) { |
|
|
!!!cp (1017); |
|
|
!!!parse-error (type => 'CR character reference', |
|
|
line => $l, column => $c); |
|
|
$code = 0x000A; |
|
|
} elsif (0x80 <= $code and $code <= 0x9F) { |
|
|
!!!cp (1018); |
|
|
!!!parse-error (type => 'C1 character reference', |
|
|
text => (sprintf 'U+%04X', $code), |
|
|
line => $l, column => $c); |
|
|
$code = $c1_entity_char->{$code}; |
|
| 3214 |
} |
} |
| 3215 |
|
|
| 3216 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 3307 |
my $code = $self->{s_kwd}; |
my $code = $self->{s_kwd}; |
| 3308 |
my $l = $self->{line_prev}; |
my $l = $self->{line_prev}; |
| 3309 |
my $c = $self->{column_prev}; |
my $c = $self->{column_prev}; |
| 3310 |
if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) { |
if ($charref_map->{$code}) { |
| 3311 |
!!!cp (1008); |
!!!cp (1008); |
| 3312 |
!!!parse-error (type => 'invalid character reference', |
!!!parse-error (type => 'invalid character reference', |
| 3313 |
text => (sprintf 'U+%04X', $code), |
text => (sprintf 'U+%04X', $code), |
| 3314 |
line => $l, column => $c); |
line => $l, column => $c); |
| 3315 |
$code = 0xFFFD; |
$code = $charref_map->{$code}; |
| 3316 |
} elsif ($code > 0x10FFFF) { |
} elsif ($code > 0x10FFFF) { |
| 3317 |
!!!cp (1009); |
!!!cp (1009); |
| 3318 |
!!!parse-error (type => 'invalid character reference', |
!!!parse-error (type => 'invalid character reference', |
| 3319 |
text => (sprintf 'U-%08X', $code), |
text => (sprintf 'U-%08X', $code), |
| 3320 |
line => $l, column => $c); |
line => $l, column => $c); |
| 3321 |
$code = 0xFFFD; |
$code = 0xFFFD; |
|
} elsif ($code == 0x000D) { |
|
|
!!!cp (1010); |
|
|
!!!parse-error (type => 'CR character reference', line => $l, column => $c); |
|
|
$code = 0x000A; |
|
|
} elsif (0x80 <= $code and $code <= 0x9F) { |
|
|
!!!cp (1011); |
|
|
!!!parse-error (type => 'C1 character reference', text => (sprintf 'U+%04X', $code), line => $l, column => $c); |
|
|
$code = $c1_entity_char->{$code}; |
|
| 3322 |
} |
} |
| 3323 |
|
|
| 3324 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 3659 |
!!!ack-later; |
!!!ack-later; |
| 3660 |
return; |
return; |
| 3661 |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
| 3662 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 3663 |
## Ignore the token |
## Ignore the token |
| 3664 |
|
|
| 3665 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 3716 |
!!!next-token; |
!!!next-token; |
| 3717 |
redo B; |
redo B; |
| 3718 |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
| 3719 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 3720 |
## Ignore the token. |
## Ignore the token. |
| 3721 |
|
|
| 3722 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 4530 |
|
|
| 4531 |
if ($self->{insertion_mode} & HEAD_IMS) { |
if ($self->{insertion_mode} & HEAD_IMS) { |
| 4532 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 4533 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 4534 |
unless ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
unless ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
| 4535 |
!!!cp ('t88.2'); |
!!!cp ('t88.2'); |
| 4536 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 4709 |
} elsif ($token->{attributes}->{content}) { |
} elsif ($token->{attributes}->{content}) { |
| 4710 |
if ($token->{attributes}->{content}->{value} |
if ($token->{attributes}->{content}->{value} |
| 4711 |
=~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt] |
=~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt] |
| 4712 |
[\x09-\x0D\x20]*= |
[\x09\x0A\x0C\x0D\x20]*= |
| 4713 |
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
[\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
| 4714 |
([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) { |
([^"'\x09\x0A\x0C\x0D\x20] |
| 4715 |
|
[^\x09\x0A\x0C\x0D\x20\x3B]*))/x) { |
| 4716 |
!!!cp ('t107'); |
!!!cp ('t107'); |
| 4717 |
## NOTE: Whether the encoding is supported or not is handled |
## NOTE: Whether the encoding is supported or not is handled |
| 4718 |
## in the {change_encoding} callback. |
## in the {change_encoding} callback. |
| 5521 |
} elsif ($self->{insertion_mode} & TABLE_IMS) { |
} elsif ($self->{insertion_mode} & TABLE_IMS) { |
| 5522 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 5523 |
if (not $open_tables->[-1]->[1] and # tainted |
if (not $open_tables->[-1]->[1] and # tainted |
| 5524 |
$token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
$token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 5525 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 5526 |
|
|
| 5527 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 6205 |
} |
} |
| 6206 |
} elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) { |
} elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) { |
| 6207 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 6208 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 6209 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 6210 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 6211 |
!!!cp ('t260'); |
!!!cp ('t260'); |
| 6546 |
} |
} |
| 6547 |
} elsif ($self->{insertion_mode} & BODY_AFTER_IMS) { |
} elsif ($self->{insertion_mode} & BODY_AFTER_IMS) { |
| 6548 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 6549 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 6550 |
my $data = $1; |
my $data = $1; |
| 6551 |
## As if in body |
## As if in body |
| 6552 |
$reconstruct_active_formatting_elements->($insert_to_current); |
$reconstruct_active_formatting_elements->($insert_to_current); |
| 6563 |
if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) { |
if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) { |
| 6564 |
!!!cp ('t301'); |
!!!cp ('t301'); |
| 6565 |
!!!parse-error (type => 'after html:#text', token => $token); |
!!!parse-error (type => 'after html:#text', token => $token); |
| 6566 |
|
# |
|
## Reprocess in the "after body" insertion mode. |
|
| 6567 |
} else { |
} else { |
| 6568 |
!!!cp ('t302'); |
!!!cp ('t302'); |
| 6569 |
|
## "after body" insertion mode |
| 6570 |
|
!!!parse-error (type => 'after body:#text', token => $token); |
| 6571 |
|
# |
| 6572 |
} |
} |
|
|
|
|
## "after body" insertion mode |
|
|
!!!parse-error (type => 'after body:#text', token => $token); |
|
| 6573 |
|
|
| 6574 |
$self->{insertion_mode} = IN_BODY_IM; |
$self->{insertion_mode} = IN_BODY_IM; |
| 6575 |
## reprocess |
## reprocess |
| 6579 |
!!!cp ('t303'); |
!!!cp ('t303'); |
| 6580 |
!!!parse-error (type => 'after html', |
!!!parse-error (type => 'after html', |
| 6581 |
text => $token->{tag_name}, token => $token); |
text => $token->{tag_name}, token => $token); |
| 6582 |
|
# |
|
## Reprocess in the "after body" insertion mode. |
|
| 6583 |
} else { |
} else { |
| 6584 |
!!!cp ('t304'); |
!!!cp ('t304'); |
| 6585 |
|
## "after body" insertion mode |
| 6586 |
|
!!!parse-error (type => 'after body', |
| 6587 |
|
text => $token->{tag_name}, token => $token); |
| 6588 |
|
# |
| 6589 |
} |
} |
| 6590 |
|
|
|
## "after body" insertion mode |
|
|
!!!parse-error (type => 'after body', |
|
|
text => $token->{tag_name}, token => $token); |
|
|
|
|
| 6591 |
$self->{insertion_mode} = IN_BODY_IM; |
$self->{insertion_mode} = IN_BODY_IM; |
| 6592 |
!!!ack-later; |
!!!ack-later; |
| 6593 |
## reprocess |
## reprocess |
| 6598 |
!!!parse-error (type => 'after html:/', |
!!!parse-error (type => 'after html:/', |
| 6599 |
text => $token->{tag_name}, token => $token); |
text => $token->{tag_name}, token => $token); |
| 6600 |
|
|
| 6601 |
$self->{insertion_mode} = AFTER_BODY_IM; |
$self->{insertion_mode} = IN_BODY_IM; |
| 6602 |
## Reprocess in the "after body" insertion mode. |
## Reprocess. |
| 6603 |
|
next B; |
| 6604 |
} else { |
} else { |
| 6605 |
!!!cp ('t306'); |
!!!cp ('t306'); |
| 6606 |
} |
} |
| 6638 |
} |
} |
| 6639 |
} elsif ($self->{insertion_mode} & FRAME_IMS) { |
} elsif ($self->{insertion_mode} & FRAME_IMS) { |
| 6640 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 6641 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 6642 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 6643 |
|
|
| 6644 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 6648 |
} |
} |
| 6649 |
} |
} |
| 6650 |
|
|
| 6651 |
if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) { |
if ($token->{data} =~ s/^[^\x09\x0A\x0C\x20]+//) { |
| 6652 |
if ($self->{insertion_mode} == IN_FRAMESET_IM) { |
if ($self->{insertion_mode} == IN_FRAMESET_IM) { |
| 6653 |
!!!cp ('t311'); |
!!!cp ('t311'); |
| 6654 |
!!!parse-error (type => 'in frameset:#text', token => $token); |
!!!parse-error (type => 'in frameset:#text', token => $token); |
| 6825 |
} elsif ($token->{attributes}->{content}) { |
} elsif ($token->{attributes}->{content}) { |
| 6826 |
if ($token->{attributes}->{content}->{value} |
if ($token->{attributes}->{content}->{value} |
| 6827 |
=~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt] |
=~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt] |
| 6828 |
[\x09-\x0D\x20]*= |
[\x09\x0A\x0C\x0D\x20]*= |
| 6829 |
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
[\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
| 6830 |
([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) { |
([^"'\x09\x0A\x0C\x0D\x20][^\x09\x0A\x0C\x0D\x20\x3B]*)) |
| 6831 |
|
/x) { |
| 6832 |
!!!cp ('t336'); |
!!!cp ('t336'); |
| 6833 |
## NOTE: Whether the encoding is supported or not is handled |
## NOTE: Whether the encoding is supported or not is handled |
| 6834 |
## in the {change_encoding} callback. |
## in the {change_encoding} callback. |
| 6973 |
last INSCOPE; |
last INSCOPE; |
| 6974 |
} |
} |
| 6975 |
} # INSCOPE |
} # INSCOPE |
| 6976 |
|
|
| 6977 |
|
## NOTE: Special, Scope (<li><foo><li> == <li><foo><li/></foo></li>) |
| 6978 |
|
## Interpreted as <li><foo/></li><li/> (non-conforming) |
| 6979 |
|
## blockquote (O9.27), center (O), dd (Fx3, O, S3.1.2, IE7), |
| 6980 |
|
## dt (Fx, O, S, IE), dl (O), fieldset (O, S, IE), form (Fx, O, S), |
| 6981 |
|
## hn (O), pre (O), applet (O, S), button (O, S), marquee (Fx, O, S), |
| 6982 |
|
## object (Fx) |
| 6983 |
|
## Generate non-tree (non-conforming) |
| 6984 |
|
## basefont (IE7 (where basefont is non-void)), center (IE), |
| 6985 |
|
## form (IE), hn (IE) |
| 6986 |
|
## address, div, p (<li><foo><li> == <li><foo/></li><li/>) |
| 6987 |
|
## Interpreted as <li><foo><li/></foo></li> (non-conforming) |
| 6988 |
|
## div (Fx, S) |
| 6989 |
|
|
| 6990 |
## Step 1 |
## Step 1 |
| 6991 |
my $i = -1; |
my $i = -1; |
| 7366 |
!!!nack ('t380.1'); |
!!!nack ('t380.1'); |
| 7367 |
} elsif ({ |
} elsif ({ |
| 7368 |
b => 1, big => 1, em => 1, font => 1, i => 1, |
b => 1, big => 1, em => 1, font => 1, i => 1, |
| 7369 |
s => 1, small => 1, strile => 1, |
s => 1, small => 1, strike => 1, |
| 7370 |
strong => 1, tt => 1, u => 1, |
strong => 1, tt => 1, u => 1, |
| 7371 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 7372 |
!!!cp ('t375'); |
!!!cp ('t375'); |
| 7671 |
} elsif ({ |
} elsif ({ |
| 7672 |
a => 1, |
a => 1, |
| 7673 |
b => 1, big => 1, em => 1, font => 1, i => 1, |
b => 1, big => 1, em => 1, font => 1, i => 1, |
| 7674 |
nobr => 1, s => 1, small => 1, strile => 1, |
nobr => 1, s => 1, small => 1, strike => 1, |
| 7675 |
strong => 1, tt => 1, u => 1, |
strong => 1, tt => 1, u => 1, |
| 7676 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 7677 |
!!!cp ('t427'); |
!!!cp ('t427'); |
| 7762 |
## Ignore the token |
## Ignore the token |
| 7763 |
!!!next-token; |
!!!next-token; |
| 7764 |
last S2; |
last S2; |
|
} |
|
| 7765 |
|
|
| 7766 |
|
## NOTE: |<span><dd></span>a|: In Safari 3.1.2 and Opera |
| 7767 |
|
## 9.27, "a" is a child of <dd> (conforming). In |
| 7768 |
|
## Firefox 3.0.2, "a" is a child of <body>. In WinIE 7, |
| 7769 |
|
## "a" is a child of both <body> and <dd>. |
| 7770 |
|
} |
| 7771 |
|
|
| 7772 |
!!!cp ('t434'); |
!!!cp ('t434'); |
| 7773 |
} |
} |
| 7774 |
|
|