278 |
zeta => "\x{03B6}", |
zeta => "\x{03B6}", |
279 |
zwj => "\x{200D}", |
zwj => "\x{200D}", |
280 |
zwnj => "\x{200C}", |
zwnj => "\x{200C}", |
281 |
}; |
}; # $entity_char |
282 |
|
|
283 |
|
## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562> |
284 |
|
my $c1_entity_char = { |
285 |
|
128, 8364, |
286 |
|
129, 65533, |
287 |
|
130, 8218, |
288 |
|
131, 402, |
289 |
|
132, 8222, |
290 |
|
133, 8230, |
291 |
|
134, 8224, |
292 |
|
135, 8225, |
293 |
|
136, 710, |
294 |
|
137, 8240, |
295 |
|
138, 352, |
296 |
|
139, 8249, |
297 |
|
140, 338, |
298 |
|
141, 65533, |
299 |
|
142, 381, |
300 |
|
143, 65533, |
301 |
|
144, 65533, |
302 |
|
145, 8216, |
303 |
|
146, 8217, |
304 |
|
147, 8220, |
305 |
|
148, 8221, |
306 |
|
149, 8226, |
307 |
|
150, 8211, |
308 |
|
151, 8212, |
309 |
|
152, 732, |
310 |
|
153, 8482, |
311 |
|
154, 353, |
312 |
|
155, 8250, |
313 |
|
156, 339, |
314 |
|
157, 65533, |
315 |
|
158, 382, |
316 |
|
159, 376, |
317 |
|
}; # $c1_entity_char |
318 |
|
|
319 |
my $special_category = { |
my $special_category = { |
320 |
address => 1, area => 1, base => 1, basefont => 1, bgsound => 1, |
address => 1, area => 1, base => 1, basefont => 1, bgsound => 1, |
354 |
$self->{next_input_character} = ord substr $$s, $i++, 1; |
$self->{next_input_character} = ord substr $$s, $i++, 1; |
355 |
$column++; |
$column++; |
356 |
|
|
357 |
if ($self->{next_input_character} == 0x000D) { # CR |
if ($self->{next_input_character} == 0x000A) { # LF |
358 |
|
$line++; |
359 |
|
$column = 0; |
360 |
|
} elsif ($self->{next_input_character} == 0x000D) { # CR |
361 |
if ($i >= length $$s) { |
if ($i >= length $$s) { |
362 |
# |
# |
363 |
} else { |
} else { |
370 |
} |
} |
371 |
$self->{next_input_character} = 0x000A; # LF # MUST |
$self->{next_input_character} = 0x000A; # LF # MUST |
372 |
$line++; |
$line++; |
373 |
$column = -1; |
$column = 0; |
374 |
} elsif ($self->{next_input_character} > 0x10FFFF) { |
} elsif ($self->{next_input_character} > 0x10FFFF) { |
375 |
$self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST |
$self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST |
376 |
} elsif ($self->{next_input_character} == 0x0000) { # NULL |
} elsif ($self->{next_input_character} == 0x0000) { # NULL |
1350 |
redo A; |
redo A; |
1351 |
} elsif (0x0061 <= $self->{next_input_character} and |
} elsif (0x0061 <= $self->{next_input_character} and |
1352 |
$self->{next_input_character} <= 0x007A) { # a..z |
$self->{next_input_character} <= 0x007A) { # a..z |
1353 |
|
## ISSUE: "Set the token's name name to the" in the spec |
1354 |
$self->{current_token} = {type => 'DOCTYPE', |
$self->{current_token} = {type => 'DOCTYPE', |
1355 |
name => chr ($self->{next_input_character} - 0x0020), |
name => chr ($self->{next_input_character} - 0x0020), |
1356 |
error => 1}; |
error => 1}; |
1377 |
$self->{current_token} = {type => 'DOCTYPE', |
$self->{current_token} = {type => 'DOCTYPE', |
1378 |
name => chr ($self->{next_input_character}), |
name => chr ($self->{next_input_character}), |
1379 |
error => 1}; |
error => 1}; |
1380 |
|
## ISSUE: "Set the token's name name to the" in the spec |
1381 |
$self->{state} = 'DOCTYPE name'; |
$self->{state} = 'DOCTYPE name'; |
1382 |
!!!next-input-character; |
!!!next-input-character; |
1383 |
redo A; |
redo A; |
1495 |
|
|
1496 |
if ($self->{next_input_character} == 0x0023) { # # |
if ($self->{next_input_character} == 0x0023) { # # |
1497 |
!!!next-input-character; |
!!!next-input-character; |
|
my $num; |
|
1498 |
if ($self->{next_input_character} == 0x0078 or # x |
if ($self->{next_input_character} == 0x0078 or # x |
1499 |
$self->{next_input_character} == 0x0058) { # X |
$self->{next_input_character} == 0x0058) { # X |
1500 |
|
my $num; |
1501 |
X: { |
X: { |
1502 |
my $x_char = $self->{next_input_character}; |
my $x_char = $self->{next_input_character}; |
1503 |
!!!next-input-character; |
!!!next-input-character; |
1533 |
} |
} |
1534 |
|
|
1535 |
## TODO: check the definition for |a valid Unicode character|. |
## TODO: check the definition for |a valid Unicode character|. |
1536 |
|
## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8189> |
1537 |
if ($num > 1114111 or $num == 0) { |
if ($num > 1114111 or $num == 0) { |
1538 |
$num = 0xFFFD; # REPLACEMENT CHARACTER |
$num = 0xFFFD; # REPLACEMENT CHARACTER |
1539 |
## ISSUE: Why this is not an error? |
## ISSUE: Why this is not an error? |
1540 |
|
} elsif (0x80 <= $num and $num <= 0x9F) { |
1541 |
|
## NOTE: <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562> |
1542 |
|
## ISSUE: Not in the spec yet; parse error? |
1543 |
|
$num = $c1_entity_char->{$num}; |
1544 |
} |
} |
1545 |
|
|
1546 |
return {type => 'character', data => chr $num}; |
return {type => 'character', data => chr $num}; |
1568 |
if ($code > 1114111 or $code == 0) { |
if ($code > 1114111 or $code == 0) { |
1569 |
$code = 0xFFFD; # REPLACEMENT CHARACTER |
$code = 0xFFFD; # REPLACEMENT CHARACTER |
1570 |
## ISSUE: Why this is not an error? |
## ISSUE: Why this is not an error? |
1571 |
|
} elsif (0x80 <= $code and $code <= 0x9F) { |
1572 |
|
## NOTE: <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562> |
1573 |
|
## ISSUE: Not in the spec yet; parse error? |
1574 |
|
$code = $c1_entity_char->{$code}; |
1575 |
} |
} |
1576 |
|
|
1577 |
return {type => 'character', data => chr $code}; |
return {type => 'character', data => chr $code}; |
2025 |
$formatting_element_i_in_open = $_; |
$formatting_element_i_in_open = $_; |
2026 |
last INSCOPE; |
last INSCOPE; |
2027 |
} else { # in open elements but not in scope |
} else { # in open elements but not in scope |
2028 |
!!!parse-error; |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
2029 |
## Ignore the token |
## Ignore the token |
2030 |
!!!next-token; |
!!!next-token; |
2031 |
return; |
return; |
2038 |
} |
} |
2039 |
} # INSCOPE |
} # INSCOPE |
2040 |
unless (defined $formatting_element_i_in_open) { |
unless (defined $formatting_element_i_in_open) { |
2041 |
!!!parse-error; |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
2042 |
pop @$active_formatting_elements; # $formatting_element |
pop @$active_formatting_elements; # $formatting_element |
2043 |
!!!next-token; ## TODO: ok? |
!!!next-token; ## TODO: ok? |
2044 |
return; |
return; |
2045 |
} |
} |
2046 |
if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) { |
if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) { |
2047 |
!!!parse-error; |
!!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]); |
2048 |
} |
} |
2049 |
|
|
2050 |
## Step 2 |
## Step 2 |
4857 |
$self->{next_input_character} = -1 and return if $i >= length $$s; |
$self->{next_input_character} = -1 and return if $i >= length $$s; |
4858 |
$self->{next_input_character} = ord substr $$s, $i++, 1; |
$self->{next_input_character} = ord substr $$s, $i++, 1; |
4859 |
$column++; |
$column++; |
4860 |
|
|
4861 |
if ($self->{next_input_character} == 0x000D) { # CR |
if ($self->{next_input_character} == 0x000A) { # LF |
4862 |
|
$line++; |
4863 |
|
$column = 0; |
4864 |
|
} elsif ($self->{next_input_character} == 0x000D) { # CR |
4865 |
if ($i >= length $$s) { |
if ($i >= length $$s) { |
4866 |
# |
# |
4867 |
} else { |
} else { |
4874 |
} |
} |
4875 |
$self->{next_input_character} = 0x000A; # LF # MUST |
$self->{next_input_character} = 0x000A; # LF # MUST |
4876 |
$line++; |
$line++; |
4877 |
$column = -1; |
$column = 0; |
4878 |
} elsif ($self->{next_input_character} > 0x10FFFF) { |
} elsif ($self->{next_input_character} > 0x10FFFF) { |
4879 |
$self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST |
$self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST |
4880 |
} elsif ($self->{next_input_character} == 0x0000) { # NULL |
} elsif ($self->{next_input_character} == 0x0000) { # NULL |