| 280 |
zwnj => "\x{200C}", |
zwnj => "\x{200C}", |
| 281 |
}; # $entity_char |
}; # $entity_char |
| 282 |
|
|
|
## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562> |
|
| 283 |
my $c1_entity_char = { |
my $c1_entity_char = { |
| 284 |
128, 8364, |
0x80 => 0x20AC, |
| 285 |
129, 65533, |
0x81 => 0xFFFD, |
| 286 |
130, 8218, |
0x82 => 0x201A, |
| 287 |
131, 402, |
0x83 => 0x0192, |
| 288 |
132, 8222, |
0x84 => 0x201E, |
| 289 |
133, 8230, |
0x85 => 0x2026, |
| 290 |
134, 8224, |
0x86 => 0x2020, |
| 291 |
135, 8225, |
0x87 => 0x2021, |
| 292 |
136, 710, |
0x88 => 0x02C6, |
| 293 |
137, 8240, |
0x89 => 0x2030, |
| 294 |
138, 352, |
0x8A => 0x0160, |
| 295 |
139, 8249, |
0x8B => 0x2039, |
| 296 |
140, 338, |
0x8C => 0x0152, |
| 297 |
141, 65533, |
0x8D => 0xFFFD, |
| 298 |
142, 381, |
0x8E => 0x017D, |
| 299 |
143, 65533, |
0x8F => 0xFFFD, |
| 300 |
144, 65533, |
0x90 => 0xFFFD, |
| 301 |
145, 8216, |
0x91 => 0x2018, |
| 302 |
146, 8217, |
0x92 => 0x2019, |
| 303 |
147, 8220, |
0x93 => 0x201C, |
| 304 |
148, 8221, |
0x94 => 0x201D, |
| 305 |
149, 8226, |
0x95 => 0x2022, |
| 306 |
150, 8211, |
0x96 => 0x2013, |
| 307 |
151, 8212, |
0x97 => 0x2014, |
| 308 |
152, 732, |
0x98 => 0x02DC, |
| 309 |
153, 8482, |
0x99 => 0x2122, |
| 310 |
154, 353, |
0x9A => 0x0161, |
| 311 |
155, 8250, |
0x9B => 0x203A, |
| 312 |
156, 339, |
0x9C => 0x0153, |
| 313 |
157, 65533, |
0x9D => 0xFFFD, |
| 314 |
158, 382, |
0x9E => 0x017E, |
| 315 |
159, 376, |
0x9F => 0x0178, |
| 316 |
}; # $c1_entity_char |
}; # $c1_entity_char |
| 317 |
|
|
| 318 |
my $special_category = { |
my $special_category = { |
| 349 |
my $column = 0; |
my $column = 0; |
| 350 |
$self->{set_next_input_character} = sub { |
$self->{set_next_input_character} = sub { |
| 351 |
my $self = shift; |
my $self = shift; |
| 352 |
|
|
| 353 |
|
pop @{$self->{prev_input_character}}; |
| 354 |
|
unshift @{$self->{prev_input_character}}, $self->{next_input_character}; |
| 355 |
|
|
| 356 |
$self->{next_input_character} = -1 and return if $i >= length $$s; |
$self->{next_input_character} = -1 and return if $i >= length $$s; |
| 357 |
$self->{next_input_character} = ord substr $$s, $i++, 1; |
$self->{next_input_character} = ord substr $$s, $i++, 1; |
| 358 |
$column++; |
$column++; |
| 377 |
} elsif ($self->{next_input_character} > 0x10FFFF) { |
} elsif ($self->{next_input_character} > 0x10FFFF) { |
| 378 |
$self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST |
$self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST |
| 379 |
} elsif ($self->{next_input_character} == 0x0000) { # NULL |
} elsif ($self->{next_input_character} == 0x0000) { # NULL |
| 380 |
|
!!!parse-error (type => 'NULL'); |
| 381 |
$self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST |
$self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST |
| 382 |
} |
} |
| 383 |
}; |
}; |
| 384 |
|
$self->{prev_input_character} = [-1, -1, -1]; |
| 385 |
|
$self->{next_input_character} = -1; |
| 386 |
|
|
| 387 |
my $onerror = $_[2] || sub { |
my $onerror = $_[2] || sub { |
| 388 |
my (%opt) = @_; |
my (%opt) = @_; |
| 467 |
} else { |
} else { |
| 468 |
# |
# |
| 469 |
} |
} |
| 470 |
|
} elsif ($self->{next_input_character} == 0x002D) { # - |
| 471 |
|
if ($self->{content_model_flag} eq 'RCDATA' or |
| 472 |
|
$self->{content_model_flag} eq 'CDATA') { |
| 473 |
|
unless ($self->{escape}) { |
| 474 |
|
if ($self->{prev_input_character}->[0] == 0x002D and # - |
| 475 |
|
$self->{prev_input_character}->[1] == 0x0021 and # ! |
| 476 |
|
$self->{prev_input_character}->[2] == 0x003C) { # < |
| 477 |
|
$self->{escape} = 1; |
| 478 |
|
} |
| 479 |
|
} |
| 480 |
|
} |
| 481 |
|
|
| 482 |
|
# |
| 483 |
} elsif ($self->{next_input_character} == 0x003C) { # < |
} elsif ($self->{next_input_character} == 0x003C) { # < |
| 484 |
if ($self->{content_model_flag} ne 'PLAINTEXT') { |
if ($self->{content_model_flag} eq 'PCDATA' or |
| 485 |
|
(($self->{content_model_flag} eq 'CDATA' or |
| 486 |
|
$self->{content_model_flag} eq 'RCDATA') and |
| 487 |
|
not $self->{escape})) { |
| 488 |
$self->{state} = 'tag open'; |
$self->{state} = 'tag open'; |
| 489 |
!!!next-input-character; |
!!!next-input-character; |
| 490 |
redo A; |
redo A; |
| 491 |
} else { |
} else { |
| 492 |
# |
# |
| 493 |
} |
} |
| 494 |
|
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 495 |
|
if ($self->{escape} and |
| 496 |
|
($self->{content_model_flag} eq 'RCDATA' or |
| 497 |
|
$self->{content_model_flag} eq 'CDATA')) { |
| 498 |
|
if ($self->{prev_input_character}->[0] == 0x002D and # - |
| 499 |
|
$self->{prev_input_character}->[1] == 0x002D) { # - |
| 500 |
|
delete $self->{escape}; |
| 501 |
|
} |
| 502 |
|
} |
| 503 |
|
|
| 504 |
|
# |
| 505 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 506 |
!!!emit ({type => 'end-of-file'}); |
!!!emit ({type => 'end-of-file'}); |
| 507 |
last A; ## TODO: ok? |
last A; ## TODO: ok? |
| 1571 |
$num = 0xFFFD; # REPLACEMENT CHARACTER |
$num = 0xFFFD; # REPLACEMENT CHARACTER |
| 1572 |
## ISSUE: Why this is not an error? |
## ISSUE: Why this is not an error? |
| 1573 |
} elsif (0x80 <= $num and $num <= 0x9F) { |
} elsif (0x80 <= $num and $num <= 0x9F) { |
| 1574 |
## NOTE: <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562> |
!!!parse-error (type => sprintf 'c1 entity:U+%04X', $num); |
|
## ISSUE: Not in the spec yet; parse error? |
|
| 1575 |
$num = $c1_entity_char->{$num}; |
$num = $c1_entity_char->{$num}; |
| 1576 |
} |
} |
| 1577 |
|
|
| 1601 |
$code = 0xFFFD; # REPLACEMENT CHARACTER |
$code = 0xFFFD; # REPLACEMENT CHARACTER |
| 1602 |
## ISSUE: Why this is not an error? |
## ISSUE: Why this is not an error? |
| 1603 |
} elsif (0x80 <= $code and $code <= 0x9F) { |
} elsif (0x80 <= $code and $code <= 0x9F) { |
| 1604 |
## NOTE: <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562> |
!!!parse-error (type => sprintf 'c1 entity:U+%04X', $code); |
|
## ISSUE: Not in the spec yet; parse error? |
|
| 1605 |
$code = $c1_entity_char->{$code}; |
$code = $c1_entity_char->{$code}; |
| 1606 |
} |
} |
| 1607 |
|
|
| 1957 |
? $self->{head_element} : $self->{open_elements}->[-1]->[0]) |
? $self->{head_element} : $self->{open_elements}->[-1]->[0]) |
| 1958 |
->append_child ($style_el); |
->append_child ($style_el); |
| 1959 |
$self->{content_model_flag} = 'CDATA'; |
$self->{content_model_flag} = 'CDATA'; |
| 1960 |
|
delete $self->{escape}; # MUST |
| 1961 |
|
|
| 1962 |
my $text = ''; |
my $text = ''; |
| 1963 |
!!!next-token; |
!!!next-token; |
| 1986 |
## TODO: mark as "parser-inserted" |
## TODO: mark as "parser-inserted" |
| 1987 |
|
|
| 1988 |
$self->{content_model_flag} = 'CDATA'; |
$self->{content_model_flag} = 'CDATA'; |
| 1989 |
|
delete $self->{escape}; # MUST |
| 1990 |
|
|
| 1991 |
my $text = ''; |
my $text = ''; |
| 1992 |
!!!next-token; |
!!!next-token; |
| 2276 |
(defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0]) |
(defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0]) |
| 2277 |
->append_child ($title_el); |
->append_child ($title_el); |
| 2278 |
$self->{content_model_flag} = 'RCDATA'; |
$self->{content_model_flag} = 'RCDATA'; |
| 2279 |
|
delete $self->{escape}; # MUST |
| 2280 |
|
|
| 2281 |
my $text = ''; |
my $text = ''; |
| 2282 |
!!!next-token; |
!!!next-token; |
| 2397 |
LI: { |
LI: { |
| 2398 |
## Step 2 |
## Step 2 |
| 2399 |
if ($node->[1] eq 'li') { |
if ($node->[1] eq 'li') { |
| 2400 |
|
if ($i != -1) { |
| 2401 |
|
!!!parse-error (type => 'end tag missing:'. |
| 2402 |
|
$self->{open_elements}->[-1]->[1]); |
| 2403 |
|
## TODO: test |
| 2404 |
|
} |
| 2405 |
splice @{$self->{open_elements}}, $i; |
splice @{$self->{open_elements}}, $i; |
| 2406 |
last LI; |
last LI; |
| 2407 |
} |
} |
| 2445 |
LI: { |
LI: { |
| 2446 |
## Step 2 |
## Step 2 |
| 2447 |
if ($node->[1] eq 'dt' or $node->[1] eq 'dd') { |
if ($node->[1] eq 'dt' or $node->[1] eq 'dd') { |
| 2448 |
|
if ($i != -1) { |
| 2449 |
|
!!!parse-error (type => 'end tag missing:'. |
| 2450 |
|
$self->{open_elements}->[-1]->[1]); |
| 2451 |
|
## TODO: test |
| 2452 |
|
} |
| 2453 |
splice @{$self->{open_elements}}, $i; |
splice @{$self->{open_elements}}, $i; |
| 2454 |
last LI; |
last LI; |
| 2455 |
} |
} |
| 2623 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
| 2624 |
|
|
| 2625 |
$self->{content_model_flag} = 'CDATA'; |
$self->{content_model_flag} = 'CDATA'; |
| 2626 |
|
delete $self->{escape}; # MUST |
| 2627 |
|
|
| 2628 |
!!!next-token; |
!!!next-token; |
| 2629 |
return; |
return; |
| 2740 |
} else { |
} else { |
| 2741 |
$self->{content_model_flag} = 'CDATA'; |
$self->{content_model_flag} = 'CDATA'; |
| 2742 |
} |
} |
| 2743 |
|
delete $self->{escape}; # MUST |
| 2744 |
|
|
| 2745 |
$insert->($el); |
$insert->($el); |
| 2746 |
|
|
| 2747 |
my $text = ''; |
my $text = ''; |
| 2748 |
!!!next-token; |
if ($token->{tag_name} eq 'textarea') { |
| 2749 |
|
!!!next-token; |
| 2750 |
|
if ($token->{type} eq 'character') { |
| 2751 |
|
$token->{data} =~ s/^\x0A//; |
| 2752 |
|
unless (length $token->{data}) { |
| 2753 |
|
!!!next-token; |
| 2754 |
|
} |
| 2755 |
|
} |
| 2756 |
|
} else { |
| 2757 |
|
!!!next-token; |
| 2758 |
|
} |
| 2759 |
while ($token->{type} eq 'character') { |
while ($token->{type} eq 'character') { |
| 2760 |
$text .= $token->{data}; |
$text .= $token->{data}; |
| 2761 |
!!!next-token; |
!!!next-token; |
| 2771 |
## Ignore the token |
## Ignore the token |
| 2772 |
} else { |
} else { |
| 2773 |
if ($token->{tag_name} eq 'textarea') { |
if ($token->{tag_name} eq 'textarea') { |
|
!!!parse-error (type => 'in CDATA:#'.$token->{type}); |
|
|
} else { |
|
| 2774 |
!!!parse-error (type => 'in RCDATA:#'.$token->{type}); |
!!!parse-error (type => 'in RCDATA:#'.$token->{type}); |
| 2775 |
|
} else { |
| 2776 |
|
!!!parse-error (type => 'in CDATA:#'.$token->{type}); |
| 2777 |
} |
} |
| 2778 |
## ISSUE: And ignore? |
## ISSUE: And ignore? |
| 2779 |
} |
} |
| 2842 |
address => 1, blockquote => 1, center => 1, dir => 1, |
address => 1, blockquote => 1, center => 1, dir => 1, |
| 2843 |
div => 1, dl => 1, fieldset => 1, listing => 1, |
div => 1, dl => 1, fieldset => 1, listing => 1, |
| 2844 |
menu => 1, ol => 1, pre => 1, ul => 1, |
menu => 1, ol => 1, pre => 1, ul => 1, |
|
form => 1, |
|
| 2845 |
p => 1, |
p => 1, |
| 2846 |
dd => 1, dt => 1, li => 1, |
dd => 1, dt => 1, li => 1, |
| 2847 |
button => 1, marquee => 1, object => 1, |
button => 1, marquee => 1, object => 1, |
| 2879 |
} |
} |
| 2880 |
|
|
| 2881 |
splice @{$self->{open_elements}}, $i if defined $i; |
splice @{$self->{open_elements}}, $i if defined $i; |
|
undef $self->{form_element} if $token->{tag_name} eq 'form'; |
|
| 2882 |
$clear_up_to_marker->() |
$clear_up_to_marker->() |
| 2883 |
if { |
if { |
| 2884 |
button => 1, marquee => 1, object => 1, |
button => 1, marquee => 1, object => 1, |
| 2885 |
}->{$token->{tag_name}}; |
}->{$token->{tag_name}}; |
| 2886 |
!!!next-token; |
!!!next-token; |
| 2887 |
return; |
return; |
| 2888 |
|
} elsif ($token->{tag_name} eq 'form') { |
| 2889 |
|
## has an element in scope |
| 2890 |
|
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 2891 |
|
my $node = $self->{open_elements}->[$_]; |
| 2892 |
|
if ($node->[1] eq $token->{tag_name}) { |
| 2893 |
|
## generate implied end tags |
| 2894 |
|
if ({ |
| 2895 |
|
dd => 1, dt => 1, li => 1, p => 1, |
| 2896 |
|
td => 1, th => 1, tr => 1, |
| 2897 |
|
}->{$self->{open_elements}->[-1]->[1]}) { |
| 2898 |
|
!!!back-token; |
| 2899 |
|
$token = {type => 'end tag', |
| 2900 |
|
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
| 2901 |
|
return; |
| 2902 |
|
} |
| 2903 |
|
last INSCOPE; |
| 2904 |
|
} elsif ({ |
| 2905 |
|
table => 1, caption => 1, td => 1, th => 1, |
| 2906 |
|
button => 1, marquee => 1, object => 1, html => 1, |
| 2907 |
|
}->{$node->[1]}) { |
| 2908 |
|
last INSCOPE; |
| 2909 |
|
} |
| 2910 |
|
} # INSCOPE |
| 2911 |
|
|
| 2912 |
|
if ($self->{open_elements}->[-1]->[1] eq $token->{tag_name}) { |
| 2913 |
|
pop @{$self->{open_elements}}; |
| 2914 |
|
} else { |
| 2915 |
|
!!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]); |
| 2916 |
|
} |
| 2917 |
|
|
| 2918 |
|
undef $self->{form_element}; |
| 2919 |
|
!!!next-token; |
| 2920 |
|
return; |
| 2921 |
} elsif ({ |
} elsif ({ |
| 2922 |
h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, |
h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1, |
| 2923 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 2962 |
strong => 1, tt => 1, u => 1, |
strong => 1, tt => 1, u => 1, |
| 2963 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 2964 |
$formatting_end_tag->($token->{tag_name}); |
$formatting_end_tag->($token->{tag_name}); |
| 2965 |
|
## TODO: <http://html5.org/tools/web-apps-tracker?from=883&to=884> |
| 2966 |
return; |
return; |
| 2967 |
} elsif ({ |
} elsif ({ |
| 2968 |
caption => 1, col => 1, colgroup => 1, frame => 1, |
caption => 1, col => 1, colgroup => 1, frame => 1, |
| 3163 |
(defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0]) |
(defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0]) |
| 3164 |
->append_child ($title_el); |
->append_child ($title_el); |
| 3165 |
$self->{content_model_flag} = 'RCDATA'; |
$self->{content_model_flag} = 'RCDATA'; |
| 3166 |
|
delete $self->{escape}; # MUST |
| 3167 |
|
|
| 3168 |
my $text = ''; |
my $text = ''; |
| 3169 |
!!!next-token; |
!!!next-token; |