| 323 |
|
|
| 324 |
## ISSUE: xmlns:xlink="non-xlink-ns" is not an error. |
## ISSUE: xmlns:xlink="non-xlink-ns" is not an error. |
| 325 |
|
|
| 326 |
my $c1_entity_char = { |
my $charref_map = { |
| 327 |
|
0x0D => 0x000A, |
| 328 |
0x80 => 0x20AC, |
0x80 => 0x20AC, |
| 329 |
0x81 => 0xFFFD, |
0x81 => 0xFFFD, |
| 330 |
0x82 => 0x201A, |
0x82 => 0x201A, |
| 357 |
0x9D => 0xFFFD, |
0x9D => 0xFFFD, |
| 358 |
0x9E => 0x017E, |
0x9E => 0x017E, |
| 359 |
0x9F => 0x0178, |
0x9F => 0x0178, |
| 360 |
}; # $c1_entity_char |
}; # $charref_map |
| 361 |
|
$charref_map->{$_} = 0xFFFD |
| 362 |
|
for 0x0000..0x0008, 0x000B, 0x000E..0x001F, 0x007F, |
| 363 |
|
0xD800..0xDFFF, 0xFDD0..0xFDDF, ## ISSUE: 0xFDEF |
| 364 |
|
0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, |
| 365 |
|
0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, |
| 366 |
|
0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, |
| 367 |
|
0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, |
| 368 |
|
0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF; |
| 369 |
|
|
| 370 |
sub parse_byte_string ($$$$;$) { |
sub parse_byte_string ($$$$;$) { |
| 371 |
my $self = shift; |
my $self = shift; |
| 410 |
## TODO: Is this ok? Transfer protocol's parameter should be |
## TODO: Is this ok? Transfer protocol's parameter should be |
| 411 |
## interpreted in its semantics? |
## interpreted in its semantics? |
| 412 |
|
|
|
## ISSUE: Unsupported encoding is not ignored according to the spec. |
|
| 413 |
($char_stream, $e_status) = $charset->get_decode_handle |
($char_stream, $e_status) = $charset->get_decode_handle |
| 414 |
($byte_stream, allow_error_reporting => 1, |
($byte_stream, allow_error_reporting => 1, |
| 415 |
allow_fallback => 1); |
allow_fallback => 1); |
| 417 |
$self->{confident} = 1; |
$self->{confident} = 1; |
| 418 |
last SNIFFING; |
last SNIFFING; |
| 419 |
} else { |
} else { |
| 420 |
## TODO: unsupported error |
!!!parse-error (type => 'charset:not supported', |
| 421 |
|
layer => 'encode', |
| 422 |
|
line => 1, column => 1, |
| 423 |
|
value => $charset_name, |
| 424 |
|
level => $self->{level}->{uncertain}); |
| 425 |
} |
} |
| 426 |
} |
} |
| 427 |
|
|
| 738 |
$self->{char_buffer_pos} += $count; |
$self->{char_buffer_pos} += $count; |
| 739 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 740 |
$self->{column_prev} = $self->{column} - 1; |
$self->{column_prev} = $self->{column} - 1; |
|
$self->{prev_char} = [-1, -1, -1]; |
|
| 741 |
$self->{nc} = -1; |
$self->{nc} = -1; |
| 742 |
} |
} |
| 743 |
return $count; |
return $count; |
| 750 |
$self->{column} += $count; |
$self->{column} += $count; |
| 751 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 752 |
$self->{column_prev} = $self->{column} - 1; |
$self->{column_prev} = $self->{column} - 1; |
|
$self->{prev_char} = [-1, -1, -1]; |
|
| 753 |
$self->{nc} = -1; |
$self->{nc} = -1; |
| 754 |
} |
} |
| 755 |
return $count; |
return $count; |
| 865 |
sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec |
sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec |
| 866 |
sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec |
sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec |
| 867 |
sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec |
sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec |
| 868 |
sub CDATA_PCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec |
sub CDATA_RCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec |
| 869 |
sub CDATA_SECTION_MSE1_STATE () { 40 } # "CDATA section state" in the spec |
sub CDATA_SECTION_MSE1_STATE () { 40 } # "CDATA section state" in the spec |
| 870 |
sub CDATA_SECTION_MSE2_STATE () { 41 } # "CDATA section state" in the spec |
sub CDATA_SECTION_MSE2_STATE () { 41 } # "CDATA section state" in the spec |
| 871 |
sub PUBLIC_STATE () { 42 } # "after DOCTYPE name state" in the spec |
sub PUBLIC_STATE () { 42 } # "after DOCTYPE name state" in the spec |
| 879 |
sub HEXREF_X_STATE () { 47 } |
sub HEXREF_X_STATE () { 47 } |
| 880 |
sub HEXREF_HEX_STATE () { 48 } |
sub HEXREF_HEX_STATE () { 48 } |
| 881 |
sub ENTITY_NAME_STATE () { 49 } |
sub ENTITY_NAME_STATE () { 49 } |
| 882 |
|
sub PCDATA_STATE () { 50 } # "data state" in the spec |
| 883 |
|
|
| 884 |
sub DOCTYPE_TOKEN () { 1 } |
sub DOCTYPE_TOKEN () { 1 } |
| 885 |
sub COMMENT_TOKEN () { 2 } |
sub COMMENT_TOKEN () { 2 } |
| 943 |
delete $self->{self_closing}; |
delete $self->{self_closing}; |
| 944 |
$self->{char_buffer} = ''; |
$self->{char_buffer} = ''; |
| 945 |
$self->{char_buffer_pos} = 0; |
$self->{char_buffer_pos} = 0; |
|
$self->{prev_char} = [-1, -1, -1]; |
|
| 946 |
$self->{nc} = -1; # next input character |
$self->{nc} = -1; # next input character |
| 947 |
#$self->{next_nc} |
#$self->{next_nc} |
| 948 |
!!!next-input-character; |
!!!next-input-character; |
| 978 |
## TODO: Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) |
## TODO: Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) |
| 979 |
## (This requirement was dropped from HTML5 spec, unfortunately.) |
## (This requirement was dropped from HTML5 spec, unfortunately.) |
| 980 |
|
|
| 981 |
|
my $is_space = { |
| 982 |
|
0x0009 => 1, # CHARACTER TABULATION (HT) |
| 983 |
|
0x000A => 1, # LINE FEED (LF) |
| 984 |
|
#0x000B => 0, # LINE TABULATION (VT) |
| 985 |
|
0x000C => 1, # FORM FEED (FF) |
| 986 |
|
#0x000D => 1, # CARRIAGE RETURN (CR) |
| 987 |
|
0x0020 => 1, # SPACE (SP) |
| 988 |
|
}; |
| 989 |
|
|
| 990 |
sub _get_next_token ($) { |
sub _get_next_token ($) { |
| 991 |
my $self = shift; |
my $self = shift; |
| 992 |
|
|
| 1004 |
} |
} |
| 1005 |
|
|
| 1006 |
A: { |
A: { |
| 1007 |
if ($self->{state} == DATA_STATE) { |
if ($self->{state} == PCDATA_STATE) { |
| 1008 |
|
## NOTE: Same as |DATA_STATE|, but only for |PCDATA| content model. |
| 1009 |
|
|
| 1010 |
if ($self->{nc} == 0x0026) { # & |
if ($self->{nc} == 0x0026) { # & |
| 1011 |
|
!!!cp (0.1); |
| 1012 |
|
## NOTE: In the spec, the tokenizer is switched to the |
| 1013 |
|
## "entity data state". In this implementation, the tokenizer |
| 1014 |
|
## is switched to the |ENTITY_STATE|, which is an implementation |
| 1015 |
|
## of the "consume a character reference" algorithm. |
| 1016 |
|
$self->{entity_add} = -1; |
| 1017 |
|
$self->{prev_state} = DATA_STATE; |
| 1018 |
|
$self->{state} = ENTITY_STATE; |
| 1019 |
|
!!!next-input-character; |
| 1020 |
|
redo A; |
| 1021 |
|
} elsif ($self->{nc} == 0x003C) { # < |
| 1022 |
|
!!!cp (0.2); |
| 1023 |
|
$self->{state} = TAG_OPEN_STATE; |
| 1024 |
|
!!!next-input-character; |
| 1025 |
|
redo A; |
| 1026 |
|
} elsif ($self->{nc} == -1) { |
| 1027 |
|
!!!cp (0.3); |
| 1028 |
|
!!!emit ({type => END_OF_FILE_TOKEN, |
| 1029 |
|
line => $self->{line}, column => $self->{column}}); |
| 1030 |
|
last A; ## TODO: ok? |
| 1031 |
|
} else { |
| 1032 |
|
!!!cp (0.4); |
| 1033 |
|
# |
| 1034 |
|
} |
| 1035 |
|
|
| 1036 |
|
# Anything else |
| 1037 |
|
my $token = {type => CHARACTER_TOKEN, |
| 1038 |
|
data => chr $self->{nc}, |
| 1039 |
|
line => $self->{line}, column => $self->{column}, |
| 1040 |
|
}; |
| 1041 |
|
$self->{read_until}->($token->{data}, q[<&], length $token->{data}); |
| 1042 |
|
|
| 1043 |
|
## Stay in the state. |
| 1044 |
|
!!!next-input-character; |
| 1045 |
|
!!!emit ($token); |
| 1046 |
|
redo A; |
| 1047 |
|
} elsif ($self->{state} == DATA_STATE) { |
| 1048 |
|
$self->{s_kwd} = '' unless defined $self->{s_kwd}; |
| 1049 |
|
if ($self->{nc} == 0x0026) { # & |
| 1050 |
|
$self->{s_kwd} = ''; |
| 1051 |
if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA |
if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA |
| 1052 |
not $self->{escape}) { |
not $self->{escape}) { |
| 1053 |
!!!cp (1); |
!!!cp (1); |
| 1066 |
} |
} |
| 1067 |
} elsif ($self->{nc} == 0x002D) { # - |
} elsif ($self->{nc} == 0x002D) { # - |
| 1068 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
| 1069 |
unless ($self->{escape}) { |
$self->{s_kwd} .= '-'; |
| 1070 |
if ($self->{prev_char}->[0] == 0x002D and # - |
|
| 1071 |
$self->{prev_char}->[1] == 0x0021 and # ! |
if ($self->{s_kwd} eq '<!--') { |
| 1072 |
$self->{prev_char}->[2] == 0x003C) { # < |
!!!cp (3); |
| 1073 |
!!!cp (3); |
$self->{escape} = 1; # unless $self->{escape}; |
| 1074 |
$self->{escape} = 1; |
$self->{s_kwd} = '--'; |
| 1075 |
} else { |
# |
| 1076 |
!!!cp (4); |
} elsif ($self->{s_kwd} eq '---') { |
| 1077 |
} |
!!!cp (4); |
| 1078 |
|
$self->{s_kwd} = '--'; |
| 1079 |
|
# |
| 1080 |
} else { |
} else { |
| 1081 |
!!!cp (5); |
!!!cp (5); |
| 1082 |
|
# |
| 1083 |
} |
} |
| 1084 |
} |
} |
| 1085 |
|
|
| 1086 |
# |
# |
| 1087 |
|
} elsif ($self->{nc} == 0x0021) { # ! |
| 1088 |
|
if (length $self->{s_kwd}) { |
| 1089 |
|
!!!cp (5.1); |
| 1090 |
|
$self->{s_kwd} .= '!'; |
| 1091 |
|
# |
| 1092 |
|
} else { |
| 1093 |
|
!!!cp (5.2); |
| 1094 |
|
#$self->{s_kwd} = ''; |
| 1095 |
|
# |
| 1096 |
|
} |
| 1097 |
|
# |
| 1098 |
} elsif ($self->{nc} == 0x003C) { # < |
} elsif ($self->{nc} == 0x003C) { # < |
| 1099 |
if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA |
if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA |
| 1100 |
(($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA |
(($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA |
| 1105 |
redo A; |
redo A; |
| 1106 |
} else { |
} else { |
| 1107 |
!!!cp (7); |
!!!cp (7); |
| 1108 |
|
$self->{s_kwd} = ''; |
| 1109 |
# |
# |
| 1110 |
} |
} |
| 1111 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 1112 |
if ($self->{escape} and |
if ($self->{escape} and |
| 1113 |
($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA |
($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA |
| 1114 |
if ($self->{prev_char}->[0] == 0x002D and # - |
if ($self->{s_kwd} eq '--') { |
|
$self->{prev_char}->[1] == 0x002D) { # - |
|
| 1115 |
!!!cp (8); |
!!!cp (8); |
| 1116 |
delete $self->{escape}; |
delete $self->{escape}; |
| 1117 |
} else { |
} else { |
| 1121 |
!!!cp (10); |
!!!cp (10); |
| 1122 |
} |
} |
| 1123 |
|
|
| 1124 |
|
$self->{s_kwd} = ''; |
| 1125 |
# |
# |
| 1126 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 1127 |
!!!cp (11); |
!!!cp (11); |
| 1128 |
|
$self->{s_kwd} = ''; |
| 1129 |
!!!emit ({type => END_OF_FILE_TOKEN, |
!!!emit ({type => END_OF_FILE_TOKEN, |
| 1130 |
line => $self->{line}, column => $self->{column}}); |
line => $self->{line}, column => $self->{column}}); |
| 1131 |
last A; ## TODO: ok? |
last A; ## TODO: ok? |
| 1132 |
} else { |
} else { |
| 1133 |
!!!cp (12); |
!!!cp (12); |
| 1134 |
|
$self->{s_kwd} = ''; |
| 1135 |
|
# |
| 1136 |
} |
} |
| 1137 |
|
|
| 1138 |
# Anything else |
# Anything else |
| 1139 |
my $token = {type => CHARACTER_TOKEN, |
my $token = {type => CHARACTER_TOKEN, |
| 1140 |
data => chr $self->{nc}, |
data => chr $self->{nc}, |
| 1141 |
line => $self->{line}, column => $self->{column}, |
line => $self->{line}, column => $self->{column}, |
| 1142 |
}; |
}; |
| 1143 |
$self->{read_until}->($token->{data}, q[-!<>&], length $token->{data}); |
if ($self->{read_until}->($token->{data}, q[-!<>&], |
| 1144 |
|
length $token->{data})) { |
| 1145 |
|
$self->{s_kwd} = ''; |
| 1146 |
|
} |
| 1147 |
|
|
| 1148 |
## Stay in the data state |
## Stay in the data state. |
| 1149 |
|
if ($self->{content_model} == PCDATA_CONTENT_MODEL) { |
| 1150 |
|
!!!cp (13); |
| 1151 |
|
$self->{state} = PCDATA_STATE; |
| 1152 |
|
} else { |
| 1153 |
|
!!!cp (14); |
| 1154 |
|
## Stay in the state. |
| 1155 |
|
} |
| 1156 |
!!!next-input-character; |
!!!next-input-character; |
|
|
|
| 1157 |
!!!emit ($token); |
!!!emit ($token); |
|
|
|
| 1158 |
redo A; |
redo A; |
| 1159 |
} elsif ($self->{state} == TAG_OPEN_STATE) { |
} elsif ($self->{state} == TAG_OPEN_STATE) { |
| 1160 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
| 1163 |
!!!next-input-character; |
!!!next-input-character; |
| 1164 |
$self->{state} = CLOSE_TAG_OPEN_STATE; |
$self->{state} = CLOSE_TAG_OPEN_STATE; |
| 1165 |
redo A; |
redo A; |
| 1166 |
|
} elsif ($self->{nc} == 0x0021) { # ! |
| 1167 |
|
!!!cp (15.1); |
| 1168 |
|
$self->{s_kwd} = '<' unless $self->{escape}; |
| 1169 |
|
# |
| 1170 |
} else { |
} else { |
| 1171 |
!!!cp (16); |
!!!cp (16); |
| 1172 |
## reconsume |
# |
|
$self->{state} = DATA_STATE; |
|
|
|
|
|
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
|
|
line => $self->{line_prev}, |
|
|
column => $self->{column_prev}, |
|
|
}); |
|
|
|
|
|
redo A; |
|
| 1173 |
} |
} |
| 1174 |
|
|
| 1175 |
|
## reconsume |
| 1176 |
|
$self->{state} = DATA_STATE; |
| 1177 |
|
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
| 1178 |
|
line => $self->{line_prev}, |
| 1179 |
|
column => $self->{column_prev}, |
| 1180 |
|
}); |
| 1181 |
|
redo A; |
| 1182 |
} elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA |
} elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA |
| 1183 |
if ($self->{nc} == 0x0021) { # ! |
if ($self->{nc} == 0x0021) { # ! |
| 1184 |
!!!cp (17); |
!!!cp (17); |
| 1257 |
} |
} |
| 1258 |
} elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) { |
} elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) { |
| 1259 |
## NOTE: The "close tag open state" in the spec is implemented as |
## NOTE: The "close tag open state" in the spec is implemented as |
| 1260 |
## |CLOSE_TAG_OPEN_STATE| and |CDATA_PCDATA_CLOSE_TAG_STATE|. |
## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|. |
| 1261 |
|
|
| 1262 |
my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</" |
my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</" |
| 1263 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
| 1264 |
if (defined $self->{last_stag_name}) { |
if (defined $self->{last_stag_name}) { |
| 1265 |
$self->{state} = CDATA_PCDATA_CLOSE_TAG_STATE; |
$self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE; |
| 1266 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 1267 |
## Reconsume. |
## Reconsume. |
| 1268 |
redo A; |
redo A; |
| 1333 |
## "bogus comment state" entry. |
## "bogus comment state" entry. |
| 1334 |
redo A; |
redo A; |
| 1335 |
} |
} |
| 1336 |
} elsif ($self->{state} == CDATA_PCDATA_CLOSE_TAG_STATE) { |
} elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) { |
| 1337 |
my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1; |
my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1; |
| 1338 |
if (length $ch) { |
if (length $ch) { |
| 1339 |
my $CH = $ch; |
my $CH = $ch; |
| 1357 |
redo A; |
redo A; |
| 1358 |
} |
} |
| 1359 |
} else { # after "<{tag-name}" |
} else { # after "<{tag-name}" |
| 1360 |
unless ({ |
unless ($is_space->{$self->{nc}} or |
| 1361 |
0x0009 => 1, # HT |
{ |
|
0x000A => 1, # LF |
|
|
0x000B => 1, # VT |
|
|
0x000C => 1, # FF |
|
|
0x0020 => 1, # SP |
|
| 1362 |
0x003E => 1, # > |
0x003E => 1, # > |
| 1363 |
0x002F => 1, # / |
0x002F => 1, # / |
| 1364 |
-1 => 1, # EOF |
-1 => 1, # EOF |
| 1385 |
} |
} |
| 1386 |
} |
} |
| 1387 |
} elsif ($self->{state} == TAG_NAME_STATE) { |
} elsif ($self->{state} == TAG_NAME_STATE) { |
| 1388 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1389 |
!!!cp (34); |
!!!cp (34); |
| 1390 |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
| 1391 |
!!!next-input-character; |
!!!next-input-character; |
| 1457 |
redo A; |
redo A; |
| 1458 |
} |
} |
| 1459 |
} elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) { |
} elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) { |
| 1460 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1461 |
!!!cp (45); |
!!!cp (45); |
| 1462 |
## Stay in the state |
## Stay in the state |
| 1463 |
!!!next-input-character; |
!!!next-input-character; |
| 1553 |
} |
} |
| 1554 |
}; # $before_leave |
}; # $before_leave |
| 1555 |
|
|
| 1556 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1557 |
!!!cp (59); |
!!!cp (59); |
| 1558 |
$before_leave->(); |
$before_leave->(); |
| 1559 |
$self->{state} = AFTER_ATTRIBUTE_NAME_STATE; |
$self->{state} = AFTER_ATTRIBUTE_NAME_STATE; |
| 1636 |
redo A; |
redo A; |
| 1637 |
} |
} |
| 1638 |
} elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) { |
} elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) { |
| 1639 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1640 |
!!!cp (71); |
!!!cp (71); |
| 1641 |
## Stay in the state |
## Stay in the state |
| 1642 |
!!!next-input-character; |
!!!next-input-character; |
| 1723 |
redo A; |
redo A; |
| 1724 |
} |
} |
| 1725 |
} elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) { |
} elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) { |
| 1726 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1727 |
!!!cp (83); |
!!!cp (83); |
| 1728 |
## Stay in the state |
## Stay in the state |
| 1729 |
!!!next-input-character; |
!!!next-input-character; |
| 1904 |
redo A; |
redo A; |
| 1905 |
} |
} |
| 1906 |
} elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) { |
} elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) { |
| 1907 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # HT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1908 |
!!!cp (107); |
!!!cp (107); |
| 1909 |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
| 1910 |
!!!next-input-character; |
!!!next-input-character; |
| 1986 |
redo A; |
redo A; |
| 1987 |
} |
} |
| 1988 |
} elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) { |
} elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) { |
| 1989 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 1990 |
!!!cp (118); |
!!!cp (118); |
| 1991 |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
$self->{state} = BEFORE_ATTRIBUTE_NAME_STATE; |
| 1992 |
!!!next-input-character; |
!!!next-input-character; |
| 2427 |
redo A; |
redo A; |
| 2428 |
} |
} |
| 2429 |
} elsif ($self->{state} == DOCTYPE_STATE) { |
} elsif ($self->{state} == DOCTYPE_STATE) { |
| 2430 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 2431 |
!!!cp (155); |
!!!cp (155); |
| 2432 |
$self->{state} = BEFORE_DOCTYPE_NAME_STATE; |
$self->{state} = BEFORE_DOCTYPE_NAME_STATE; |
| 2433 |
!!!next-input-character; |
!!!next-input-character; |
| 2440 |
redo A; |
redo A; |
| 2441 |
} |
} |
| 2442 |
} elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) { |
} elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) { |
| 2443 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 2444 |
!!!cp (157); |
!!!cp (157); |
| 2445 |
## Stay in the state |
## Stay in the state |
| 2446 |
!!!next-input-character; |
!!!next-input-character; |
| 2474 |
} |
} |
| 2475 |
} elsif ($self->{state} == DOCTYPE_NAME_STATE) { |
} elsif ($self->{state} == DOCTYPE_NAME_STATE) { |
| 2476 |
## ISSUE: Redundant "First," in the spec. |
## ISSUE: Redundant "First," in the spec. |
| 2477 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 2478 |
!!!cp (161); |
!!!cp (161); |
| 2479 |
$self->{state} = AFTER_DOCTYPE_NAME_STATE; |
$self->{state} = AFTER_DOCTYPE_NAME_STATE; |
| 2480 |
!!!next-input-character; |
!!!next-input-character; |
| 2506 |
redo A; |
redo A; |
| 2507 |
} |
} |
| 2508 |
} elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) { |
} elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) { |
| 2509 |
if ($self->{nc} == 0x0009 or # HT |
if ($is_space->{$self->{nc}}) { |
|
$self->{nc} == 0x000A or # LF |
|
|
$self->{nc} == 0x000B or # VT |
|
|
$self->{nc} == 0x000C or # FF |
|
|
$self->{nc} == 0x0020) { # SP |
|
| 2510 |
!!!cp (165); |
!!!cp (165); |
| 2511 |
## Stay in the state |
## Stay in the state |
| 2512 |
!!!next-input-character; |
!!!next-input-character; |
| 2629 |
redo A; |
redo A; |
| 2630 |
} |
} |
| 2631 |
} elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) { |
} elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) { |
| 2632 |
if ({ |
if ($is_space->{$self->{nc}}) { |
|
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1, |
|
|
#0x000D => 1, # HT, LF, VT, FF, SP, CR |
|
|
}->{$self->{nc}}) { |
|
| 2633 |
!!!cp (181); |
!!!cp (181); |
| 2634 |
## Stay in the state |
## Stay in the state |
| 2635 |
!!!next-input-character; |
!!!next-input-character; |
| 2756 |
redo A; |
redo A; |
| 2757 |
} |
} |
| 2758 |
} elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) { |
} elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) { |
| 2759 |
if ({ |
if ($is_space->{$self->{nc}}) { |
|
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1, |
|
|
#0x000D => 1, # HT, LF, VT, FF, SP, CR |
|
|
}->{$self->{nc}}) { |
|
| 2760 |
!!!cp (195); |
!!!cp (195); |
| 2761 |
## Stay in the state |
## Stay in the state |
| 2762 |
!!!next-input-character; |
!!!next-input-character; |
| 2802 |
redo A; |
redo A; |
| 2803 |
} |
} |
| 2804 |
} elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) { |
} elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) { |
| 2805 |
if ({ |
if ($is_space->{$self->{nc}}) { |
|
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1, |
|
|
#0x000D => 1, # HT, LF, VT, FF, SP, CR |
|
|
}->{$self->{nc}}) { |
|
| 2806 |
!!!cp (201); |
!!!cp (201); |
| 2807 |
## Stay in the state |
## Stay in the state |
| 2808 |
!!!next-input-character; |
!!!next-input-character; |
| 2928 |
redo A; |
redo A; |
| 2929 |
} |
} |
| 2930 |
} elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) { |
} elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) { |
| 2931 |
if ({ |
if ($is_space->{$self->{nc}}) { |
|
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1, |
|
|
#0x000D => 1, # HT, LF, VT, FF, SP, CR |
|
|
}->{$self->{nc}}) { |
|
| 2932 |
!!!cp (215); |
!!!cp (215); |
| 2933 |
## Stay in the state |
## Stay in the state |
| 2934 |
!!!next-input-character; |
!!!next-input-character; |
| 3060 |
redo A; |
redo A; |
| 3061 |
} |
} |
| 3062 |
} elsif ($self->{state} == ENTITY_STATE) { |
} elsif ($self->{state} == ENTITY_STATE) { |
| 3063 |
if ({ |
if ($is_space->{$self->{nc}} or |
| 3064 |
0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF, |
{ |
| 3065 |
0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & |
0x003C => 1, 0x0026 => 1, -1 => 1, # <, & |
| 3066 |
$self->{entity_add} => 1, |
$self->{entity_add} => 1, |
| 3067 |
}->{$self->{nc}}) { |
}->{$self->{nc}}) { |
| 3068 |
!!!cp (1001); |
!!!cp (1001); |
| 3069 |
## Don't consume |
## Don't consume |
| 3070 |
## No error |
## No error |
| 3183 |
my $code = $self->{s_kwd}; |
my $code = $self->{s_kwd}; |
| 3184 |
my $l = $self->{line_prev}; |
my $l = $self->{line_prev}; |
| 3185 |
my $c = $self->{column_prev}; |
my $c = $self->{column_prev}; |
| 3186 |
if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) { |
if ($charref_map->{$code}) { |
| 3187 |
!!!cp (1015); |
!!!cp (1015); |
| 3188 |
!!!parse-error (type => 'invalid character reference', |
!!!parse-error (type => 'invalid character reference', |
| 3189 |
text => (sprintf 'U+%04X', $code), |
text => (sprintf 'U+%04X', $code), |
| 3190 |
line => $l, column => $c); |
line => $l, column => $c); |
| 3191 |
$code = 0xFFFD; |
$code = $charref_map->{$code}; |
| 3192 |
} elsif ($code > 0x10FFFF) { |
} elsif ($code > 0x10FFFF) { |
| 3193 |
!!!cp (1016); |
!!!cp (1016); |
| 3194 |
!!!parse-error (type => 'invalid character reference', |
!!!parse-error (type => 'invalid character reference', |
| 3195 |
text => (sprintf 'U-%08X', $code), |
text => (sprintf 'U-%08X', $code), |
| 3196 |
line => $l, column => $c); |
line => $l, column => $c); |
| 3197 |
$code = 0xFFFD; |
$code = 0xFFFD; |
|
} elsif ($code == 0x000D) { |
|
|
!!!cp (1017); |
|
|
!!!parse-error (type => 'CR character reference', |
|
|
line => $l, column => $c); |
|
|
$code = 0x000A; |
|
|
} elsif (0x80 <= $code and $code <= 0x9F) { |
|
|
!!!cp (1018); |
|
|
!!!parse-error (type => 'C1 character reference', |
|
|
text => (sprintf 'U+%04X', $code), |
|
|
line => $l, column => $c); |
|
|
$code = $c1_entity_char->{$code}; |
|
| 3198 |
} |
} |
| 3199 |
|
|
| 3200 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 3291 |
my $code = $self->{s_kwd}; |
my $code = $self->{s_kwd}; |
| 3292 |
my $l = $self->{line_prev}; |
my $l = $self->{line_prev}; |
| 3293 |
my $c = $self->{column_prev}; |
my $c = $self->{column_prev}; |
| 3294 |
if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) { |
if ($charref_map->{$code}) { |
| 3295 |
!!!cp (1008); |
!!!cp (1008); |
| 3296 |
!!!parse-error (type => 'invalid character reference', |
!!!parse-error (type => 'invalid character reference', |
| 3297 |
text => (sprintf 'U+%04X', $code), |
text => (sprintf 'U+%04X', $code), |
| 3298 |
line => $l, column => $c); |
line => $l, column => $c); |
| 3299 |
$code = 0xFFFD; |
$code = $charref_map->{$code}; |
| 3300 |
} elsif ($code > 0x10FFFF) { |
} elsif ($code > 0x10FFFF) { |
| 3301 |
!!!cp (1009); |
!!!cp (1009); |
| 3302 |
!!!parse-error (type => 'invalid character reference', |
!!!parse-error (type => 'invalid character reference', |
| 3303 |
text => (sprintf 'U-%08X', $code), |
text => (sprintf 'U-%08X', $code), |
| 3304 |
line => $l, column => $c); |
line => $l, column => $c); |
| 3305 |
$code = 0xFFFD; |
$code = 0xFFFD; |
|
} elsif ($code == 0x000D) { |
|
|
!!!cp (1010); |
|
|
!!!parse-error (type => 'CR character reference', line => $l, column => $c); |
|
|
$code = 0x000A; |
|
|
} elsif (0x80 <= $code and $code <= 0x9F) { |
|
|
!!!cp (1011); |
|
|
!!!parse-error (type => 'C1 character reference', text => (sprintf 'U+%04X', $code), line => $l, column => $c); |
|
|
$code = $c1_entity_char->{$code}; |
|
| 3306 |
} |
} |
| 3307 |
|
|
| 3308 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 3643 |
!!!ack-later; |
!!!ack-later; |
| 3644 |
return; |
return; |
| 3645 |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
| 3646 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 3647 |
## Ignore the token |
## Ignore the token |
| 3648 |
|
|
| 3649 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 3700 |
!!!next-token; |
!!!next-token; |
| 3701 |
redo B; |
redo B; |
| 3702 |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
| 3703 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 3704 |
## Ignore the token. |
## Ignore the token. |
| 3705 |
|
|
| 3706 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 4514 |
|
|
| 4515 |
if ($self->{insertion_mode} & HEAD_IMS) { |
if ($self->{insertion_mode} & HEAD_IMS) { |
| 4516 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 4517 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 4518 |
unless ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
unless ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
| 4519 |
!!!cp ('t88.2'); |
!!!cp ('t88.2'); |
| 4520 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 4693 |
} elsif ($token->{attributes}->{content}) { |
} elsif ($token->{attributes}->{content}) { |
| 4694 |
if ($token->{attributes}->{content}->{value} |
if ($token->{attributes}->{content}->{value} |
| 4695 |
=~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt] |
=~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt] |
| 4696 |
[\x09-\x0D\x20]*= |
[\x09\x0A\x0C\x0D\x20]*= |
| 4697 |
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
[\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
| 4698 |
([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) { |
([^"'\x09\x0A\x0C\x0D\x20] |
| 4699 |
|
[^\x09\x0A\x0C\x0D\x20\x3B]*))/x) { |
| 4700 |
!!!cp ('t107'); |
!!!cp ('t107'); |
| 4701 |
## NOTE: Whether the encoding is supported or not is handled |
## NOTE: Whether the encoding is supported or not is handled |
| 4702 |
## in the {change_encoding} callback. |
## in the {change_encoding} callback. |
| 5505 |
} elsif ($self->{insertion_mode} & TABLE_IMS) { |
} elsif ($self->{insertion_mode} & TABLE_IMS) { |
| 5506 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 5507 |
if (not $open_tables->[-1]->[1] and # tainted |
if (not $open_tables->[-1]->[1] and # tainted |
| 5508 |
$token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
$token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 5509 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 5510 |
|
|
| 5511 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 6189 |
} |
} |
| 6190 |
} elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) { |
} elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) { |
| 6191 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 6192 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 6193 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 6194 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 6195 |
!!!cp ('t260'); |
!!!cp ('t260'); |
| 6530 |
} |
} |
| 6531 |
} elsif ($self->{insertion_mode} & BODY_AFTER_IMS) { |
} elsif ($self->{insertion_mode} & BODY_AFTER_IMS) { |
| 6532 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 6533 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 6534 |
my $data = $1; |
my $data = $1; |
| 6535 |
## As if in body |
## As if in body |
| 6536 |
$reconstruct_active_formatting_elements->($insert_to_current); |
$reconstruct_active_formatting_elements->($insert_to_current); |
| 6547 |
if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) { |
if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) { |
| 6548 |
!!!cp ('t301'); |
!!!cp ('t301'); |
| 6549 |
!!!parse-error (type => 'after html:#text', token => $token); |
!!!parse-error (type => 'after html:#text', token => $token); |
| 6550 |
|
# |
|
## Reprocess in the "after body" insertion mode. |
|
| 6551 |
} else { |
} else { |
| 6552 |
!!!cp ('t302'); |
!!!cp ('t302'); |
| 6553 |
|
## "after body" insertion mode |
| 6554 |
|
!!!parse-error (type => 'after body:#text', token => $token); |
| 6555 |
|
# |
| 6556 |
} |
} |
|
|
|
|
## "after body" insertion mode |
|
|
!!!parse-error (type => 'after body:#text', token => $token); |
|
| 6557 |
|
|
| 6558 |
$self->{insertion_mode} = IN_BODY_IM; |
$self->{insertion_mode} = IN_BODY_IM; |
| 6559 |
## reprocess |
## reprocess |
| 6563 |
!!!cp ('t303'); |
!!!cp ('t303'); |
| 6564 |
!!!parse-error (type => 'after html', |
!!!parse-error (type => 'after html', |
| 6565 |
text => $token->{tag_name}, token => $token); |
text => $token->{tag_name}, token => $token); |
| 6566 |
|
# |
|
## Reprocess in the "after body" insertion mode. |
|
| 6567 |
} else { |
} else { |
| 6568 |
!!!cp ('t304'); |
!!!cp ('t304'); |
| 6569 |
|
## "after body" insertion mode |
| 6570 |
|
!!!parse-error (type => 'after body', |
| 6571 |
|
text => $token->{tag_name}, token => $token); |
| 6572 |
|
# |
| 6573 |
} |
} |
| 6574 |
|
|
|
## "after body" insertion mode |
|
|
!!!parse-error (type => 'after body', |
|
|
text => $token->{tag_name}, token => $token); |
|
|
|
|
| 6575 |
$self->{insertion_mode} = IN_BODY_IM; |
$self->{insertion_mode} = IN_BODY_IM; |
| 6576 |
!!!ack-later; |
!!!ack-later; |
| 6577 |
## reprocess |
## reprocess |
| 6582 |
!!!parse-error (type => 'after html:/', |
!!!parse-error (type => 'after html:/', |
| 6583 |
text => $token->{tag_name}, token => $token); |
text => $token->{tag_name}, token => $token); |
| 6584 |
|
|
| 6585 |
$self->{insertion_mode} = AFTER_BODY_IM; |
$self->{insertion_mode} = IN_BODY_IM; |
| 6586 |
## Reprocess in the "after body" insertion mode. |
## Reprocess. |
| 6587 |
|
next B; |
| 6588 |
} else { |
} else { |
| 6589 |
!!!cp ('t306'); |
!!!cp ('t306'); |
| 6590 |
} |
} |
| 6622 |
} |
} |
| 6623 |
} elsif ($self->{insertion_mode} & FRAME_IMS) { |
} elsif ($self->{insertion_mode} & FRAME_IMS) { |
| 6624 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 6625 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 6626 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 6627 |
|
|
| 6628 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 6632 |
} |
} |
| 6633 |
} |
} |
| 6634 |
|
|
| 6635 |
if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) { |
if ($token->{data} =~ s/^[^\x09\x0A\x0C\x20]+//) { |
| 6636 |
if ($self->{insertion_mode} == IN_FRAMESET_IM) { |
if ($self->{insertion_mode} == IN_FRAMESET_IM) { |
| 6637 |
!!!cp ('t311'); |
!!!cp ('t311'); |
| 6638 |
!!!parse-error (type => 'in frameset:#text', token => $token); |
!!!parse-error (type => 'in frameset:#text', token => $token); |
| 6809 |
} elsif ($token->{attributes}->{content}) { |
} elsif ($token->{attributes}->{content}) { |
| 6810 |
if ($token->{attributes}->{content}->{value} |
if ($token->{attributes}->{content}->{value} |
| 6811 |
=~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt] |
=~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt] |
| 6812 |
[\x09-\x0D\x20]*= |
[\x09\x0A\x0C\x0D\x20]*= |
| 6813 |
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
[\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
| 6814 |
([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) { |
([^"'\x09\x0A\x0C\x0D\x20][^\x09\x0A\x0C\x0D\x20\x3B]*)) |
| 6815 |
|
/x) { |
| 6816 |
!!!cp ('t336'); |
!!!cp ('t336'); |
| 6817 |
## NOTE: Whether the encoding is supported or not is handled |
## NOTE: Whether the encoding is supported or not is handled |
| 6818 |
## in the {change_encoding} callback. |
## in the {change_encoding} callback. |
| 7896 |
$p->{char_buffer_pos} += $count; |
$p->{char_buffer_pos} += $count; |
| 7897 |
$p->{line_prev} = $p->{line}; |
$p->{line_prev} = $p->{line}; |
| 7898 |
$p->{column_prev} = $p->{column} - 1; |
$p->{column_prev} = $p->{column} - 1; |
|
$p->{prev_char} = [-1, -1, -1]; |
|
| 7899 |
$p->{nc} = -1; |
$p->{nc} = -1; |
| 7900 |
} |
} |
| 7901 |
return $count; |
return $count; |
| 7907 |
if ($count) { |
if ($count) { |
| 7908 |
$p->{column} += $count; |
$p->{column} += $count; |
| 7909 |
$p->{column_prev} += $count; |
$p->{column_prev} += $count; |
|
$p->{prev_char} = [-1, -1, -1]; |
|
| 7910 |
$p->{nc} = -1; |
$p->{nc} = -1; |
| 7911 |
} |
} |
| 7912 |
return $count; |
return $count; |