| 525 |
|
|
| 526 |
if ($char_stream) { # if supported |
if ($char_stream) { # if supported |
| 527 |
## "Change the encoding" algorithm: |
## "Change the encoding" algorithm: |
|
|
|
|
## Step 1 |
|
|
if ($charset->{category} & |
|
|
Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) { |
|
|
$charset = Message::Charset::Info->get_by_html_name ('utf-8'); |
|
|
($char_stream, $e_status) = $charset->get_decode_handle |
|
|
($byte_stream, |
|
|
byte_buffer => \ $buffer->{buffer}); |
|
|
} |
|
|
$charset_name = $charset->get_iana_name; |
|
| 528 |
|
|
| 529 |
## Step 2 |
## Step 1 |
| 530 |
if (defined $self->{input_encoding} and |
if (defined $self->{input_encoding} and |
| 531 |
$self->{input_encoding} eq $charset_name) { |
$self->{input_encoding} eq $charset_name) { |
| 532 |
!!!parse-error (type => 'charset label:matching', |
!!!parse-error (type => 'charset label:matching', |
| 536 |
return; |
return; |
| 537 |
} |
} |
| 538 |
|
|
| 539 |
|
## Step 2 (HTML5 revision 3205) |
| 540 |
|
if (defined $self->{input_encoding} and |
| 541 |
|
Message::Charset::Info->get_by_html_name ($self->{input_encoding}) |
| 542 |
|
->{category} & Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) { |
| 543 |
|
$self->{confident} = 1; |
| 544 |
|
return; |
| 545 |
|
} |
| 546 |
|
|
| 547 |
|
## Step 3 |
| 548 |
|
if ($charset->{category} & |
| 549 |
|
Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) { |
| 550 |
|
$charset = Message::Charset::Info->get_by_html_name ('utf-8'); |
| 551 |
|
($char_stream, $e_status) = $charset->get_decode_handle |
| 552 |
|
($byte_stream, |
| 553 |
|
byte_buffer => \ $buffer->{buffer}); |
| 554 |
|
} |
| 555 |
|
$charset_name = $charset->get_iana_name; |
| 556 |
|
|
| 557 |
!!!parse-error (type => 'charset label detected', |
!!!parse-error (type => 'charset label detected', |
| 558 |
text => $self->{input_encoding}, |
text => $self->{input_encoding}, |
| 559 |
value => $charset_name, |
value => $charset_name, |
| 560 |
level => $self->{level}->{warn}, |
level => $self->{level}->{warn}, |
| 561 |
token => $token); |
token => $token); |
| 562 |
|
|
| 563 |
## Step 3 |
## Step 4 |
| 564 |
# if (can) { |
# if (can) { |
| 565 |
## change the encoding on the fly. |
## change the encoding on the fly. |
| 566 |
#$self->{confident} = 1; |
#$self->{confident} = 1; |
| 567 |
#return; |
#return; |
| 568 |
# } |
# } |
| 569 |
|
|
| 570 |
## Step 4 |
## Step 5 |
| 571 |
throw Whatpm::HTML::RestartParser (); |
throw Whatpm::HTML::RestartParser (); |
| 572 |
} |
} |
| 573 |
}; # $self->{change_encoding} |
}; # $self->{change_encoding} |
| 828 |
## combined with the original insertion mode. In thie parser, |
## combined with the original insertion mode. In thie parser, |
| 829 |
## they are stored together in the bit-or'ed form. |
## they are stored together in the bit-or'ed form. |
| 830 |
|
|
| 831 |
|
sub IM_MASK () { 0b11111111111 } |
| 832 |
|
|
| 833 |
## NOTE: "initial" and "before html" insertion modes have no constants. |
## NOTE: "initial" and "before html" insertion modes have no constants. |
| 834 |
|
|
| 835 |
## NOTE: "after after body" insertion mode. |
## NOTE: "after after body" insertion mode. |
| 944 |
$doctype->public_id ($token->{pubid}) if defined $token->{pubid}; |
$doctype->public_id ($token->{pubid}) if defined $token->{pubid}; |
| 945 |
$doctype->system_id ($token->{sysid}) if defined $token->{sysid}; |
$doctype->system_id ($token->{sysid}) if defined $token->{sysid}; |
| 946 |
## NOTE: Other DocumentType attributes are null or empty lists. |
## NOTE: Other DocumentType attributes are null or empty lists. |
| 947 |
## ISSUE: internalSubset = null?? |
## In Firefox3, |internalSubset| attribute is set to the empty |
| 948 |
|
## string, while |null| is an allowed value for the attribute |
| 949 |
|
## according to DOM3 Core. |
| 950 |
$self->{document}->append_child ($doctype); |
$self->{document}->append_child ($doctype); |
| 951 |
|
|
| 952 |
if ($token->{quirks} or $doctype_name ne 'HTML') { |
if ($token->{quirks} or $doctype_name ne 'HTML') { |
| 1419 |
## Step 3 |
## Step 3 |
| 1420 |
## TODO: Mark as "already executed", if ... |
## TODO: Mark as "already executed", if ... |
| 1421 |
|
|
| 1422 |
## Step 4 |
## Step 4 (HTML5 revision 2702) |
| 1423 |
$insert->($script_el); |
$insert->($script_el); |
|
|
|
|
## ISSUE: $script_el is not put into the stack |
|
| 1424 |
push @{$self->{open_elements}}, [$script_el, $el_category->{script}]; |
push @{$self->{open_elements}}, [$script_el, $el_category->{script}]; |
| 1425 |
|
|
| 1426 |
## Step 5 |
## Step 5 |
| 1872 |
} elsif ({ |
} elsif ({ |
| 1873 |
b => 1, big => 1, blockquote => 1, body => 1, br => 1, |
b => 1, big => 1, blockquote => 1, body => 1, br => 1, |
| 1874 |
center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1, |
center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1, |
| 1875 |
em => 1, embed => 1, font => 1, h1 => 1, h2 => 1, h3 => 1, |
em => 1, embed => 1, h1 => 1, h2 => 1, h3 => 1, |
| 1876 |
h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1, |
h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1, |
| 1877 |
img => 1, li => 1, listing => 1, menu => 1, meta => 1, |
img => 1, li => 1, listing => 1, menu => 1, meta => 1, |
| 1878 |
nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1, |
nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1, |
| 1879 |
small => 1, span => 1, strong => 1, strike => 1, sub => 1, |
small => 1, span => 1, strong => 1, strike => 1, sub => 1, |
| 1880 |
sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1, |
sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1, |
| 1881 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}} or |
| 1882 |
|
($token->{tag_name} eq 'font' and |
| 1883 |
|
($token->{attributes}->{color} or |
| 1884 |
|
$token->{attributes}->{face} or |
| 1885 |
|
$token->{attributes}->{size}))) { |
| 1886 |
!!!cp ('t87.2'); |
!!!cp ('t87.2'); |
| 1887 |
!!!parse-error (type => 'not closed', |
!!!parse-error (type => 'not closed', |
| 1888 |
text => $self->{open_elements}->[-1]->[0] |
text => $self->{open_elements}->[-1]->[0] |
| 1958 |
} |
} |
| 1959 |
} elsif ($token->{type} == END_TAG_TOKEN) { |
} elsif ($token->{type} == END_TAG_TOKEN) { |
| 1960 |
## NOTE: "using the rules for secondary insertion mode" then "continue" |
## NOTE: "using the rules for secondary insertion mode" then "continue" |
| 1961 |
!!!cp ('t87.5'); |
if ($token->{tag_name} eq 'script') { |
| 1962 |
# |
!!!cp ('t87.41'); |
| 1963 |
|
# |
| 1964 |
|
## XXXscript: Execute script here. |
| 1965 |
|
} else { |
| 1966 |
|
!!!cp ('t87.5'); |
| 1967 |
|
# |
| 1968 |
|
} |
| 1969 |
} elsif ($token->{type} == END_OF_FILE_TOKEN) { |
} elsif ($token->{type} == END_OF_FILE_TOKEN) { |
| 1970 |
!!!cp ('t87.6'); |
!!!cp ('t87.6'); |
| 1971 |
!!!parse-error (type => 'not closed', |
!!!parse-error (type => 'not closed', |
| 2265 |
|
|
| 2266 |
## NOTE: There is a "as if in head" code clone. |
## NOTE: There is a "as if in head" code clone. |
| 2267 |
$parse_rcdata->(RCDATA_CONTENT_MODEL); |
$parse_rcdata->(RCDATA_CONTENT_MODEL); |
| 2268 |
## ISSUE: A spec bug [Bug 6038] |
|
| 2269 |
|
## NOTE: At this point the stack of open elements contain |
| 2270 |
|
## the |head| element (index == -2) and the |script| element |
| 2271 |
|
## (index == -1). In the "after head" insertion mode the |
| 2272 |
|
## |head| element is inserted only for the purpose of |
| 2273 |
|
## providing the context for the |script| element, and |
| 2274 |
|
## therefore we can now and have to remove the element from |
| 2275 |
|
## the stack. |
| 2276 |
splice @{$self->{open_elements}}, -2, 1, () # <head> |
splice @{$self->{open_elements}}, -2, 1, () # <head> |
| 2277 |
if ($self->{insertion_mode} & AFTER_HEAD_IM) == AFTER_HEAD_IM; |
if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM; |
| 2278 |
next B; |
next B; |
| 2279 |
} elsif ($token->{tag_name} eq 'style' or |
} elsif ($token->{tag_name} eq 'style' or |
| 2280 |
$token->{tag_name} eq 'noframes') { |
$token->{tag_name} eq 'noframes') { |
| 2294 |
$parse_rcdata->(CDATA_CONTENT_MODEL); |
$parse_rcdata->(CDATA_CONTENT_MODEL); |
| 2295 |
## ISSUE: A spec bug [Bug 6038] |
## ISSUE: A spec bug [Bug 6038] |
| 2296 |
splice @{$self->{open_elements}}, -2, 1, () # <head> |
splice @{$self->{open_elements}}, -2, 1, () # <head> |
| 2297 |
if ($self->{insertion_mode} & AFTER_HEAD_IM) == AFTER_HEAD_IM; |
if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM; |
| 2298 |
next B; |
next B; |
| 2299 |
} elsif ($token->{tag_name} eq 'noscript') { |
} elsif ($token->{tag_name} eq 'noscript') { |
| 2300 |
if ($self->{insertion_mode} == IN_HEAD_IM) { |
if ($self->{insertion_mode} == IN_HEAD_IM) { |
| 2342 |
$script_start_tag->(); |
$script_start_tag->(); |
| 2343 |
## ISSUE: A spec bug [Bug 6038] |
## ISSUE: A spec bug [Bug 6038] |
| 2344 |
splice @{$self->{open_elements}}, -2, 1 # <head> |
splice @{$self->{open_elements}}, -2, 1 # <head> |
| 2345 |
if ($self->{insertion_mode} & AFTER_HEAD_IM) == AFTER_HEAD_IM; |
if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM; |
| 2346 |
next B; |
next B; |
| 2347 |
} elsif ($token->{tag_name} eq 'body' or |
} elsif ($token->{tag_name} eq 'body' or |
| 2348 |
$token->{tag_name} eq 'frameset') { |
$token->{tag_name} eq 'frameset') { |
| 2507 |
## Ignore the token |
## Ignore the token |
| 2508 |
!!!next-token; |
!!!next-token; |
| 2509 |
next B; |
next B; |
| 2510 |
} elsif ($token->{tag_name} eq 'br') { |
} elsif ($token->{tag_name} eq 'br') { |
| 2511 |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
| 2512 |
!!!cp ('t142.2'); |
!!!cp ('t142.2'); |
| 2513 |
## (before head) as if <head>, (in head) as if </head> |
## (before head) as if <head>, (in head) as if </head> |
| 2514 |
!!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token); |
!!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token); |
| 2515 |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
| 2516 |
$self->{insertion_mode} = AFTER_HEAD_IM; |
$self->{insertion_mode} = AFTER_HEAD_IM; |
| 2517 |
|
|
| 2518 |
## Reprocess in the "after head" insertion mode... |
## Reprocess in the "after head" insertion mode... |
| 2519 |
} elsif ($self->{insertion_mode} == IN_HEAD_IM) { |
} elsif ($self->{insertion_mode} == IN_HEAD_IM) { |
| 2520 |
!!!cp ('t143.2'); |
!!!cp ('t143.2'); |
| 2521 |
## As if </head> |
## As if </head> |
| 2522 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2523 |
$self->{insertion_mode} = AFTER_HEAD_IM; |
$self->{insertion_mode} = AFTER_HEAD_IM; |
| 2524 |
|
|
| 2525 |
## Reprocess in the "after head" insertion mode... |
## Reprocess in the "after head" insertion mode... |
| 2526 |
} elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
} elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 2527 |
!!!cp ('t143.3'); |
!!!cp ('t143.3'); |
| 2528 |
## ISSUE: Two parse errors for <head><noscript></br> |
## NOTE: Two parse errors for <head><noscript></br> |
| 2529 |
!!!parse-error (type => 'unmatched end tag', |
!!!parse-error (type => 'unmatched end tag', |
| 2530 |
text => 'br', token => $token); |
text => 'br', token => $token); |
| 2531 |
## As if </noscript> |
## As if </noscript> |
| 2532 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2533 |
$self->{insertion_mode} = IN_HEAD_IM; |
$self->{insertion_mode} = IN_HEAD_IM; |
| 2534 |
|
|
| 2535 |
## Reprocess in the "in head" insertion mode... |
## Reprocess in the "in head" insertion mode... |
| 2536 |
## As if </head> |
## As if </head> |
| 2537 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2538 |
$self->{insertion_mode} = AFTER_HEAD_IM; |
$self->{insertion_mode} = AFTER_HEAD_IM; |
| 2539 |
|
|
| 2540 |
## Reprocess in the "after head" insertion mode... |
## Reprocess in the "after head" insertion mode... |
| 2541 |
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 2542 |
!!!cp ('t143.4'); |
!!!cp ('t143.4'); |
| 2543 |
# |
# |
| 2544 |
} else { |
} else { |
| 2545 |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
| 2546 |
} |
} |
| 2547 |
|
|
| 2548 |
## ISSUE: does not agree with IE7 - it doesn't ignore </br>. |
# |
| 2549 |
!!!parse-error (type => 'unmatched end tag', |
} else { ## Other end tags |
|
text => 'br', token => $token); |
|
|
## Ignore the token |
|
|
!!!next-token; |
|
|
next B; |
|
|
} else { |
|
| 2550 |
!!!cp ('t145'); |
!!!cp ('t145'); |
| 2551 |
!!!parse-error (type => 'unmatched end tag', |
!!!parse-error (type => 'unmatched end tag', |
| 2552 |
text => $token->{tag_name}, token => $token); |
text => $token->{tag_name}, token => $token); |
| 2590 |
!!!insert-element ('body',, $token); |
!!!insert-element ('body',, $token); |
| 2591 |
$self->{insertion_mode} = IN_BODY_IM; |
$self->{insertion_mode} = IN_BODY_IM; |
| 2592 |
## reprocess |
## reprocess |
| 2593 |
next B; |
next B; |
| 2594 |
} elsif ($token->{type} == END_OF_FILE_TOKEN) { |
} elsif ($token->{type} == END_OF_FILE_TOKEN) { |
| 2595 |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
| 2596 |
!!!cp ('t149.1'); |
!!!cp ('t149.1'); |
| 2663 |
caption => 1, col => 1, colgroup => 1, tbody => 1, |
caption => 1, col => 1, colgroup => 1, tbody => 1, |
| 2664 |
td => 1, tfoot => 1, th => 1, thead => 1, tr => 1, |
td => 1, tfoot => 1, th => 1, thead => 1, tr => 1, |
| 2665 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 2666 |
if ($self->{insertion_mode} == IN_CELL_IM) { |
if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) { |
| 2667 |
## have an element in table scope |
## have an element in table scope |
| 2668 |
for (reverse 0..$#{$self->{open_elements}}) { |
for (reverse 0..$#{$self->{open_elements}}) { |
| 2669 |
my $node = $self->{open_elements}->[$_]; |
my $node = $self->{open_elements}->[$_]; |
| 2691 |
!!!nack ('t153.1'); |
!!!nack ('t153.1'); |
| 2692 |
!!!next-token; |
!!!next-token; |
| 2693 |
next B; |
next B; |
| 2694 |
} elsif ($self->{insertion_mode} == IN_CAPTION_IM) { |
} elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) { |
| 2695 |
!!!parse-error (type => 'not closed', text => 'caption', |
!!!parse-error (type => 'not closed', text => 'caption', |
| 2696 |
token => $token); |
token => $token); |
| 2697 |
|
|
| 2756 |
} |
} |
| 2757 |
} elsif ($token->{type} == END_TAG_TOKEN) { |
} elsif ($token->{type} == END_TAG_TOKEN) { |
| 2758 |
if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') { |
if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') { |
| 2759 |
if ($self->{insertion_mode} == IN_CELL_IM) { |
if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) { |
| 2760 |
## have an element in table scope |
## have an element in table scope |
| 2761 |
my $i; |
my $i; |
| 2762 |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 2806 |
|
|
| 2807 |
!!!next-token; |
!!!next-token; |
| 2808 |
next B; |
next B; |
| 2809 |
} elsif ($self->{insertion_mode} == IN_CAPTION_IM) { |
} elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) { |
| 2810 |
!!!cp ('t169'); |
!!!cp ('t169'); |
| 2811 |
!!!parse-error (type => 'unmatched end tag', |
!!!parse-error (type => 'unmatched end tag', |
| 2812 |
text => $token->{tag_name}, token => $token); |
text => $token->{tag_name}, token => $token); |
| 2818 |
# |
# |
| 2819 |
} |
} |
| 2820 |
} elsif ($token->{tag_name} eq 'caption') { |
} elsif ($token->{tag_name} eq 'caption') { |
| 2821 |
if ($self->{insertion_mode} == IN_CAPTION_IM) { |
if (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) { |
| 2822 |
## have a table element in table scope |
## have a table element in table scope |
| 2823 |
my $i; |
my $i; |
| 2824 |
INSCOPE: { |
INSCOPE: { |
| 2867 |
|
|
| 2868 |
!!!next-token; |
!!!next-token; |
| 2869 |
next B; |
next B; |
| 2870 |
} elsif ($self->{insertion_mode} == IN_CELL_IM) { |
} elsif (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) { |
| 2871 |
!!!cp ('t177'); |
!!!cp ('t177'); |
| 2872 |
!!!parse-error (type => 'unmatched end tag', |
!!!parse-error (type => 'unmatched end tag', |
| 2873 |
text => $token->{tag_name}, token => $token); |
text => $token->{tag_name}, token => $token); |
| 2882 |
table => 1, tbody => 1, tfoot => 1, |
table => 1, tbody => 1, tfoot => 1, |
| 2883 |
thead => 1, tr => 1, |
thead => 1, tr => 1, |
| 2884 |
}->{$token->{tag_name}} and |
}->{$token->{tag_name}} and |
| 2885 |
$self->{insertion_mode} == IN_CELL_IM) { |
($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) { |
| 2886 |
## have an element in table scope |
## have an element in table scope |
| 2887 |
my $i; |
my $i; |
| 2888 |
my $tn; |
my $tn; |
| 2919 |
next B; |
next B; |
| 2920 |
} # INSCOPE |
} # INSCOPE |
| 2921 |
} elsif ($token->{tag_name} eq 'table' and |
} elsif ($token->{tag_name} eq 'table' and |
| 2922 |
$self->{insertion_mode} == IN_CAPTION_IM) { |
($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) { |
| 2923 |
!!!parse-error (type => 'not closed', text => 'caption', |
!!!parse-error (type => 'not closed', text => 'caption', |
| 2924 |
token => $token); |
token => $token); |
| 2925 |
|
|
| 2939 |
} # INSCOPE |
} # INSCOPE |
| 2940 |
unless (defined $i) { |
unless (defined $i) { |
| 2941 |
!!!cp ('t186'); |
!!!cp ('t186'); |
| 2942 |
|
## TODO: Wrong error type? |
| 2943 |
!!!parse-error (type => 'unmatched end tag', |
!!!parse-error (type => 'unmatched end tag', |
| 2944 |
text => 'caption', token => $token); |
text => 'caption', token => $token); |
| 2945 |
## Ignore the token |
## Ignore the token |
| 2985 |
!!!cp ('t191'); |
!!!cp ('t191'); |
| 2986 |
# |
# |
| 2987 |
} |
} |
| 2988 |
} elsif ({ |
} elsif ({ |
| 2989 |
tbody => 1, tfoot => 1, |
tbody => 1, tfoot => 1, |
| 2990 |
thead => 1, tr => 1, |
thead => 1, tr => 1, |
| 2991 |
}->{$token->{tag_name}} and |
}->{$token->{tag_name}} and |
| 2992 |
$self->{insertion_mode} == IN_CAPTION_IM) { |
($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) { |
| 2993 |
!!!cp ('t192'); |
!!!cp ('t192'); |
| 2994 |
!!!parse-error (type => 'unmatched end tag', |
!!!parse-error (type => 'unmatched end tag', |
| 2995 |
text => $token->{tag_name}, token => $token); |
text => $token->{tag_name}, token => $token); |
| 2996 |
## Ignore the token |
## Ignore the token |
| 2997 |
!!!next-token; |
!!!next-token; |
| 2998 |
next B; |
next B; |
| 2999 |
} else { |
} else { |
| 3000 |
!!!cp ('t193'); |
!!!cp ('t193'); |
| 3001 |
# |
# |
| 3002 |
} |
} |
| 3003 |
} elsif ($token->{type} == END_OF_FILE_TOKEN) { |
} elsif ($token->{type} == END_OF_FILE_TOKEN) { |
| 3004 |
for my $entry (@{$self->{open_elements}}) { |
for my $entry (@{$self->{open_elements}}) { |
| 3005 |
unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) { |
unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) { |
| 3092 |
next B; |
next B; |
| 3093 |
} elsif ($token->{type} == START_TAG_TOKEN) { |
} elsif ($token->{type} == START_TAG_TOKEN) { |
| 3094 |
if ({ |
if ({ |
| 3095 |
tr => ($self->{insertion_mode} != IN_ROW_IM), |
tr => (($self->{insertion_mode} & IM_MASK) != IN_ROW_IM), |
| 3096 |
th => 1, td => 1, |
th => 1, td => 1, |
| 3097 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 3098 |
if ($self->{insertion_mode} == IN_TABLE_IM) { |
if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_IM) { |
| 3099 |
## Clear back to table context |
## Clear back to table context |
| 3100 |
while (not ($self->{open_elements}->[-1]->[1] |
while (not ($self->{open_elements}->[-1]->[1] |
| 3101 |
& TABLE_SCOPING_EL)) { |
& TABLE_SCOPING_EL)) { |
| 3108 |
## reprocess in the "in table body" insertion mode... |
## reprocess in the "in table body" insertion mode... |
| 3109 |
} |
} |
| 3110 |
|
|
| 3111 |
if ($self->{insertion_mode} == IN_TABLE_BODY_IM) { |
if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) { |
| 3112 |
unless ($token->{tag_name} eq 'tr') { |
unless ($token->{tag_name} eq 'tr') { |
| 3113 |
!!!cp ('t202'); |
!!!cp ('t202'); |
| 3114 |
!!!parse-error (type => 'missing start tag:tr', token => $token); |
!!!parse-error (type => 'missing start tag:tr', token => $token); |
| 3160 |
tbody => 1, tfoot => 1, thead => 1, |
tbody => 1, tfoot => 1, thead => 1, |
| 3161 |
tr => 1, # $self->{insertion_mode} == IN_ROW_IM |
tr => 1, # $self->{insertion_mode} == IN_ROW_IM |
| 3162 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 3163 |
if ($self->{insertion_mode} == IN_ROW_IM) { |
if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) { |
| 3164 |
## As if </tr> |
## As if </tr> |
| 3165 |
## have an element in table scope |
## have an element in table scope |
| 3166 |
my $i; |
my $i; |
| 3207 |
} |
} |
| 3208 |
} |
} |
| 3209 |
|
|
| 3210 |
if ($self->{insertion_mode} == IN_TABLE_BODY_IM) { |
if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) { |
| 3211 |
## have an element in table scope |
## have an element in table scope |
| 3212 |
my $i; |
my $i; |
| 3213 |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 3421 |
$insert = $insert_to_foster; |
$insert = $insert_to_foster; |
| 3422 |
# |
# |
| 3423 |
} elsif ($token->{type} == END_TAG_TOKEN) { |
} elsif ($token->{type} == END_TAG_TOKEN) { |
| 3424 |
if ($token->{tag_name} eq 'tr' and |
if ($token->{tag_name} eq 'tr' and |
| 3425 |
$self->{insertion_mode} == IN_ROW_IM) { |
($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) { |
| 3426 |
## have an element in table scope |
## have an element in table scope |
| 3427 |
my $i; |
my $i; |
| 3428 |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 3429 |
my $node = $self->{open_elements}->[$_]; |
my $node = $self->{open_elements}->[$_]; |
| 3462 |
!!!nack ('t231.1'); |
!!!nack ('t231.1'); |
| 3463 |
next B; |
next B; |
| 3464 |
} elsif ($token->{tag_name} eq 'table') { |
} elsif ($token->{tag_name} eq 'table') { |
| 3465 |
if ($self->{insertion_mode} == IN_ROW_IM) { |
if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) { |
| 3466 |
## As if </tr> |
## As if </tr> |
| 3467 |
## have an element in table scope |
## have an element in table scope |
| 3468 |
my $i; |
my $i; |
| 3501 |
## reprocess in the "in table body" insertion mode... |
## reprocess in the "in table body" insertion mode... |
| 3502 |
} |
} |
| 3503 |
|
|
| 3504 |
if ($self->{insertion_mode} == IN_TABLE_BODY_IM) { |
if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) { |
| 3505 |
## have an element in table scope |
## have an element in table scope |
| 3506 |
my $i; |
my $i; |
| 3507 |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 3583 |
tbody => 1, tfoot => 1, thead => 1, |
tbody => 1, tfoot => 1, thead => 1, |
| 3584 |
}->{$token->{tag_name}} and |
}->{$token->{tag_name}} and |
| 3585 |
$self->{insertion_mode} & ROW_IMS) { |
$self->{insertion_mode} & ROW_IMS) { |
| 3586 |
if ($self->{insertion_mode} == IN_ROW_IM) { |
if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) { |
| 3587 |
## have an element in table scope |
## have an element in table scope |
| 3588 |
my $i; |
my $i; |
| 3589 |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 3717 |
} else { |
} else { |
| 3718 |
die "$0: $token->{type}: Unknown token type"; |
die "$0: $token->{type}: Unknown token type"; |
| 3719 |
} |
} |
| 3720 |
} elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) { |
} elsif (($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) { |
| 3721 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 3722 |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) { |
| 3723 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 3847 |
!!!next-token; |
!!!next-token; |
| 3848 |
next B; |
next B; |
| 3849 |
} elsif ({ |
} elsif ({ |
| 3850 |
select => 1, input => 1, textarea => 1, |
select => 1, input => 1, textarea => 1, keygen => 1, |
| 3851 |
}->{$token->{tag_name}} or |
}->{$token->{tag_name}} or |
| 3852 |
($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and |
(($self->{insertion_mode} & IM_MASK) |
| 3853 |
|
== IN_SELECT_IN_TABLE_IM and |
| 3854 |
{ |
{ |
| 3855 |
caption => 1, table => 1, |
caption => 1, table => 1, |
| 3856 |
tbody => 1, tfoot => 1, thead => 1, |
tbody => 1, tfoot => 1, thead => 1, |
| 3857 |
tr => 1, td => 1, th => 1, |
tr => 1, td => 1, th => 1, |
| 3858 |
}->{$token->{tag_name}})) { |
}->{$token->{tag_name}})) { |
| 3859 |
## TODO: The type below is not good - <select> is replaced by </select> |
|
| 3860 |
!!!parse-error (type => 'not closed', text => 'select', |
## 1. Parse error. |
| 3861 |
token => $token); |
if ($token->{tag_name} eq 'select') { |
| 3862 |
## NOTE: As if the token were </select> (<select> case) or |
!!!parse-error (type => 'select in select', ## XXX: documentation |
| 3863 |
## as if there were </select> (otherwise). |
token => $token); |
| 3864 |
## have an element in table scope |
} else { |
| 3865 |
|
!!!parse-error (type => 'not closed', text => 'select', |
| 3866 |
|
token => $token); |
| 3867 |
|
} |
| 3868 |
|
|
| 3869 |
|
## 2./<select>-1. Unless "have an element in table scope" (select): |
| 3870 |
my $i; |
my $i; |
| 3871 |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 3872 |
my $node = $self->{open_elements}->[$_]; |
my $node = $self->{open_elements}->[$_]; |
| 3881 |
} # INSCOPE |
} # INSCOPE |
| 3882 |
unless (defined $i) { |
unless (defined $i) { |
| 3883 |
!!!cp ('t280'); |
!!!cp ('t280'); |
| 3884 |
!!!parse-error (type => 'unmatched end tag', |
if ($token->{tag_name} eq 'select') { |
| 3885 |
text => 'select', token => $token); |
## NOTE: This error would be raised when |
| 3886 |
## Ignore the token |
## |select.innerHTML = '<select>'| is executed; in this |
| 3887 |
|
## case two errors, "select in select" and "unmatched |
| 3888 |
|
## end tags" are reported to the user, the latter might |
| 3889 |
|
## be confusing but this is what the spec requires. |
| 3890 |
|
!!!parse-error (type => 'unmatched end tag', |
| 3891 |
|
text => 'select', |
| 3892 |
|
token => $token); |
| 3893 |
|
} |
| 3894 |
|
## Ignore the token. |
| 3895 |
!!!nack ('t280.1'); |
!!!nack ('t280.1'); |
| 3896 |
!!!next-token; |
!!!next-token; |
| 3897 |
next B; |
next B; |
| 3898 |
} |
} |
| 3899 |
|
|
| 3900 |
|
## 3. Otherwise, as if there were <select>: |
| 3901 |
|
|
| 3902 |
!!!cp ('t281'); |
!!!cp ('t281'); |
| 3903 |
splice @{$self->{open_elements}}, $i; |
splice @{$self->{open_elements}}, $i; |
| 3987 |
!!!nack ('t291.1'); |
!!!nack ('t291.1'); |
| 3988 |
!!!next-token; |
!!!next-token; |
| 3989 |
next B; |
next B; |
| 3990 |
} elsif ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and |
} elsif (($self->{insertion_mode} & IM_MASK) |
| 3991 |
|
== IN_SELECT_IN_TABLE_IM and |
| 3992 |
{ |
{ |
| 3993 |
caption => 1, table => 1, tbody => 1, |
caption => 1, table => 1, tbody => 1, |
| 3994 |
tfoot => 1, thead => 1, tr => 1, td => 1, th => 1, |
tfoot => 1, thead => 1, tr => 1, td => 1, th => 1, |
| 4433 |
table => 1, |
table => 1, |
| 4434 |
hr => 1, |
hr => 1, |
| 4435 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 4436 |
|
|
| 4437 |
|
## 1. When there is an opening |form| element: |
| 4438 |
if ($token->{tag_name} eq 'form' and defined $self->{form_element}) { |
if ($token->{tag_name} eq 'form' and defined $self->{form_element}) { |
| 4439 |
!!!cp ('t350'); |
!!!cp ('t350'); |
| 4440 |
!!!parse-error (type => 'in form:form', token => $token); |
!!!parse-error (type => 'in form:form', token => $token); |
| 4444 |
next B; |
next B; |
| 4445 |
} |
} |
| 4446 |
|
|
| 4447 |
## has a p element in scope |
## 2. Close the |p| element, if any. |
| 4448 |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
if ($token->{tag_name} ne 'table' or # The Hixie Quirk |
| 4449 |
if ($_->[1] == P_EL) { |
$self->{document}->manakai_compat_mode ne 'quirks') { |
| 4450 |
!!!cp ('t344'); |
## has a p element in scope |
| 4451 |
!!!back-token; # <form> |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
| 4452 |
$token = {type => END_TAG_TOKEN, tag_name => 'p', |
if ($_->[1] == P_EL) { |
| 4453 |
line => $token->{line}, column => $token->{column}}; |
!!!cp ('t344'); |
| 4454 |
next B; |
!!!back-token; # <form> |
| 4455 |
} elsif ($_->[1] & SCOPING_EL) { |
$token = {type => END_TAG_TOKEN, tag_name => 'p', |
| 4456 |
!!!cp ('t345'); |
line => $token->{line}, column => $token->{column}}; |
| 4457 |
last INSCOPE; |
next B; |
| 4458 |
|
} elsif ($_->[1] & SCOPING_EL) { |
| 4459 |
|
!!!cp ('t345'); |
| 4460 |
|
last INSCOPE; |
| 4461 |
|
} |
| 4462 |
|
} # INSCOPE |
| 4463 |
|
} |
| 4464 |
|
|
| 4465 |
|
## 3. Close the opening <hn> element, if any. |
| 4466 |
|
if ({h1 => 1, h2 => 1, h3 => 1, |
| 4467 |
|
h4 => 1, h5 => 1, h6 => 1}->{$token->{tag_name}}) { |
| 4468 |
|
if ($self->{open_elements}->[-1]->[1] == HEADING_EL) { |
| 4469 |
|
!!!parse-error (type => 'not closed', |
| 4470 |
|
text => $self->{open_elements}->[-1]->[0]->manakai_local_name, |
| 4471 |
|
token => $token); |
| 4472 |
|
pop @{$self->{open_elements}}; |
| 4473 |
} |
} |
| 4474 |
} # INSCOPE |
} |
| 4475 |
|
|
| 4476 |
|
## 4. Insertion. |
| 4477 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
| 4478 |
if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') { |
if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') { |
| 4479 |
!!!nack ('t346.1'); |
!!!nack ('t346.1'); |
| 4517 |
} elsif ($token->{tag_name} eq 'li') { |
} elsif ($token->{tag_name} eq 'li') { |
| 4518 |
## NOTE: As normal, but imply </li> when there's another <li> ... |
## NOTE: As normal, but imply </li> when there's another <li> ... |
| 4519 |
|
|
| 4520 |
## NOTE: Special, Scope (<li><foo><li> == <li><foo><li/></foo></li>) |
## NOTE: Special, Scope (<li><foo><li> == <li><foo><li/></foo></li>):: |
| 4521 |
## Interpreted as <li><foo/></li><li/> (non-conforming) |
## Interpreted as <li><foo/></li><li/> (non-conforming): |
| 4522 |
## blockquote (O9.27), center (O), dd (Fx3, O, S3.1.2, IE7), |
## blockquote (O9.27), center (O), dd (Fx3, O, S3.1.2, IE7), |
| 4523 |
## dt (Fx, O, S, IE), dl (O), fieldset (O, S, IE), form (Fx, O, S), |
## dt (Fx, O, S, IE), dl (O), fieldset (O, S, IE), form (Fx, O, S), |
| 4524 |
## hn (O), pre (O), applet (O, S), button (O, S), marquee (Fx, O, S), |
## hn (O), pre (O), applet (O, S), button (O, S), marquee (Fx, O, S), |
| 4525 |
## object (Fx) |
## object (Fx) |
| 4526 |
## Generate non-tree (non-conforming) |
## Generate non-tree (non-conforming): |
| 4527 |
## basefont (IE7 (where basefont is non-void)), center (IE), |
## basefont (IE7 (where basefont is non-void)), center (IE), |
| 4528 |
## form (IE), hn (IE) |
## form (IE), hn (IE) |
| 4529 |
## address, div, p (<li><foo><li> == <li><foo/></li><li/>) |
## address, div, p (<li><foo><li> == <li><foo/></li><li/>):: |
| 4530 |
## Interpreted as <li><foo><li/></foo></li> (non-conforming) |
## Interpreted as <li><foo><li/></foo></li> (non-conforming): |
| 4531 |
## div (Fx, S) |
## div (Fx, S) |
| 4532 |
|
|
| 4533 |
my $non_optional; |
my $non_optional; |
| 4843 |
line => $token->{line}, column => $token->{column}}, |
line => $token->{line}, column => $token->{column}}, |
| 4844 |
{type => START_TAG_TOKEN, tag_name => 'hr', |
{type => START_TAG_TOKEN, tag_name => 'hr', |
| 4845 |
line => $token->{line}, column => $token->{column}}, |
line => $token->{line}, column => $token->{column}}, |
|
{type => START_TAG_TOKEN, tag_name => 'p', |
|
|
line => $token->{line}, column => $token->{column}}, |
|
| 4846 |
{type => START_TAG_TOKEN, tag_name => 'label', |
{type => START_TAG_TOKEN, tag_name => 'label', |
| 4847 |
line => $token->{line}, column => $token->{column}}, |
line => $token->{line}, column => $token->{column}}, |
| 4848 |
); |
); |
| 4865 |
#{type => CHARACTER_TOKEN, data => ''}, # SHOULD |
#{type => CHARACTER_TOKEN, data => ''}, # SHOULD |
| 4866 |
{type => END_TAG_TOKEN, tag_name => 'label', |
{type => END_TAG_TOKEN, tag_name => 'label', |
| 4867 |
line => $token->{line}, column => $token->{column}}, |
line => $token->{line}, column => $token->{column}}, |
|
{type => END_TAG_TOKEN, tag_name => 'p', |
|
|
line => $token->{line}, column => $token->{column}}, |
|
| 4868 |
{type => START_TAG_TOKEN, tag_name => 'hr', |
{type => START_TAG_TOKEN, tag_name => 'hr', |
| 4869 |
line => $token->{line}, column => $token->{column}}, |
line => $token->{line}, column => $token->{column}}, |
| 4870 |
{type => END_TAG_TOKEN, tag_name => 'form', |
{type => END_TAG_TOKEN, tag_name => 'form', |
| 4874 |
next B; |
next B; |
| 4875 |
} |
} |
| 4876 |
} elsif ($token->{tag_name} eq 'textarea') { |
} elsif ($token->{tag_name} eq 'textarea') { |
| 4877 |
## Step 1 |
## 1. Insert |
| 4878 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
| 4879 |
|
|
| 4880 |
## Step 2 |
## Step 2 # XXX |
| 4881 |
## TODO: $self->{form_element} if defined |
## TODO: $self->{form_element} if defined |
| 4882 |
|
|
| 4883 |
## Step 3 |
## 2. Drop U+000A LINE FEED |
| 4884 |
$self->{ignore_newline} = 1; |
$self->{ignore_newline} = 1; |
| 4885 |
|
|
| 4886 |
## Step 4 |
## 3. RCDATA |
|
## ISSUE: This step is wrong. (r2302 enbugged) |
|
|
|
|
|
## Step 5 |
|
| 4887 |
$self->{content_model} = RCDATA_CONTENT_MODEL; |
$self->{content_model} = RCDATA_CONTENT_MODEL; |
| 4888 |
delete $self->{escape}; # MUST |
delete $self->{escape}; # MUST |
| 4889 |
|
|
| 4890 |
## Step 6-7 |
## 4., 6. Insertion mode |
| 4891 |
$self->{insertion_mode} |= IN_CDATA_RCDATA_IM; |
$self->{insertion_mode} |= IN_CDATA_RCDATA_IM; |
| 4892 |
|
|
| 4893 |
|
## XXX: 5. frameset-ok flag |
| 4894 |
|
|
| 4895 |
!!!nack ('t392.1'); |
!!!nack ('t392.1'); |
| 4896 |
!!!next-token; |
!!!next-token; |
| 4897 |
next B; |
next B; |
| 4947 |
last INSCOPE; |
last INSCOPE; |
| 4948 |
} |
} |
| 4949 |
} # INSCOPE |
} # INSCOPE |
| 4950 |
|
|
| 4951 |
|
## TODO: <non-ruby><rt> is not allowed. |
| 4952 |
|
|
| 4953 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
| 4954 |
|
|
| 5046 |
|
|
| 5047 |
if ($self->{insertion_mode} & TABLE_IMS or |
if ($self->{insertion_mode} & TABLE_IMS or |
| 5048 |
$self->{insertion_mode} & BODY_TABLE_IMS or |
$self->{insertion_mode} & BODY_TABLE_IMS or |
| 5049 |
$self->{insertion_mode} == IN_COLUMN_GROUP_IM) { |
($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) { |
| 5050 |
!!!cp ('t400.1'); |
!!!cp ('t400.1'); |
| 5051 |
$self->{insertion_mode} = IN_SELECT_IN_TABLE_IM; |
$self->{insertion_mode} = IN_SELECT_IN_TABLE_IM; |
| 5052 |
} else { |
} else { |
| 5063 |
} |
} |
| 5064 |
} elsif ($token->{type} == END_TAG_TOKEN) { |
} elsif ($token->{type} == END_TAG_TOKEN) { |
| 5065 |
if ($token->{tag_name} eq 'body') { |
if ($token->{tag_name} eq 'body') { |
| 5066 |
## has a |body| element in scope |
|
| 5067 |
|
## 1. If not "have an element in scope": |
| 5068 |
|
## "has a |body| element in scope" |
| 5069 |
my $i; |
my $i; |
| 5070 |
INSCOPE: { |
INSCOPE: { |
| 5071 |
for (reverse @{$self->{open_elements}}) { |
for (reverse @{$self->{open_elements}}) { |
| 5088 |
next B; |
next B; |
| 5089 |
} # INSCOPE |
} # INSCOPE |
| 5090 |
|
|
| 5091 |
|
## 2. If unclosed elements: |
| 5092 |
for (@{$self->{open_elements}}) { |
for (@{$self->{open_elements}}) { |
| 5093 |
unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL) { |
unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL || |
| 5094 |
|
$_->[1] == OPTGROUP_EL || |
| 5095 |
|
$_->[1] == OPTION_EL || |
| 5096 |
|
$_->[1] == RUBY_COMPONENT_EL) { |
| 5097 |
!!!cp ('t403'); |
!!!cp ('t403'); |
| 5098 |
!!!parse-error (type => 'not closed', |
!!!parse-error (type => 'not closed', |
| 5099 |
text => $_->[0]->manakai_local_name, |
text => $_->[0]->manakai_local_name, |
| 5104 |
} |
} |
| 5105 |
} |
} |
| 5106 |
|
|
| 5107 |
|
## 3. Switch the insertion mode. |
| 5108 |
$self->{insertion_mode} = AFTER_BODY_IM; |
$self->{insertion_mode} = AFTER_BODY_IM; |
| 5109 |
!!!next-token; |
!!!next-token; |
| 5110 |
next B; |
next B; |
| 5491 |
## TODO: script stuffs |
## TODO: script stuffs |
| 5492 |
} # _tree_construct_main |
} # _tree_construct_main |
| 5493 |
|
|
| 5494 |
|
## XXX: How this method is organized is somewhat out of date, although |
| 5495 |
|
## it still does what the current spec documents. |
| 5496 |
sub set_inner_html ($$$$;$) { |
sub set_inner_html ($$$$;$) { |
| 5497 |
my $class = shift; |
my $class = shift; |
| 5498 |
my $node = shift; |
my $node = shift; # /context/ |
| 5499 |
#my $s = \$_[0]; |
#my $s = \$_[0]; |
| 5500 |
my $onerror = $_[1]; |
my $onerror = $_[1]; |
| 5501 |
my $get_wrapper = $_[2] || sub ($) { return $_[0] }; |
my $get_wrapper = $_[2] || sub ($) { return $_[0] }; |
| 5503 |
## ISSUE: Should {confident} be true? |
## ISSUE: Should {confident} be true? |
| 5504 |
|
|
| 5505 |
my $nt = $node->node_type; |
my $nt = $node->node_type; |
| 5506 |
if ($nt == 9) { |
if ($nt == 9) { # Document (invoke the algorithm with no /context/ element) |
| 5507 |
# MUST |
# MUST |
| 5508 |
|
|
| 5509 |
## Step 1 # MUST |
## Step 1 # MUST |
| 5518 |
|
|
| 5519 |
## Step 3, 4, 5 # MUST |
## Step 3, 4, 5 # MUST |
| 5520 |
$class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper); |
$class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper); |
| 5521 |
} elsif ($nt == 1) { |
} elsif ($nt == 1) { # Element (invoke the algorithm with /context/ element) |
| 5522 |
## TODO: If non-html element |
## TODO: If non-html element |
| 5523 |
|
|
| 5524 |
## NOTE: Most of this code is copied from |parse_string| |
## NOTE: Most of this code is copied from |parse_string| |
| 5525 |
|
|
| 5526 |
## TODO: Support for $get_wrapper |
## TODO: Support for $get_wrapper |
| 5527 |
|
|
| 5528 |
## Step 1 # MUST |
## F1. Create an HTML document. |
| 5529 |
my $this_doc = $node->owner_document; |
my $this_doc = $node->owner_document; |
| 5530 |
my $doc = $this_doc->implementation->create_document; |
my $doc = $this_doc->implementation->create_document; |
| 5531 |
$doc->manakai_is_html (1); |
$doc->manakai_is_html (1); |
| 5532 |
|
|
| 5533 |
|
## F2. Propagate quirkness flag |
| 5534 |
|
my $node_doc = $node->owner_document; |
| 5535 |
|
$doc->manakai_compat_mode ($node_doc->manakai_compat_mode); |
| 5536 |
|
|
| 5537 |
|
## F3. Create an HTML parser |
| 5538 |
my $p = $class->new; |
my $p = $class->new; |
| 5539 |
$p->{document} = $doc; |
$p->{document} = $doc; |
| 5540 |
|
|
| 5662 |
$p->_initialize_tokenizer; |
$p->_initialize_tokenizer; |
| 5663 |
$p->_initialize_tree_constructor; |
$p->_initialize_tree_constructor; |
| 5664 |
|
|
| 5665 |
## Step 2 |
## F4. If /context/ is not undef... |
| 5666 |
|
|
| 5667 |
|
## F4.1. content model flag |
| 5668 |
my $node_ln = $node->manakai_local_name; |
my $node_ln = $node->manakai_local_name; |
| 5669 |
$p->{content_model} = { |
$p->{content_model} = { |
| 5670 |
title => RCDATA_CONTENT_MODEL, |
title => RCDATA_CONTENT_MODEL, |
| 5680 |
}->{$node_ln}; |
}->{$node_ln}; |
| 5681 |
$p->{content_model} = PCDATA_CONTENT_MODEL |
$p->{content_model} = PCDATA_CONTENT_MODEL |
| 5682 |
unless defined $p->{content_model}; |
unless defined $p->{content_model}; |
|
## ISSUE: What is "the name of the element"? local name? |
|
| 5683 |
|
|
| 5684 |
$p->{inner_html_node} = [$node, $el_category->{$node_ln}]; |
$p->{inner_html_node} = [$node, $el_category->{$node_ln}]; |
| 5685 |
## TODO: Foreign element OK? |
## TODO: Foreign element OK? |
| 5686 |
|
|
| 5687 |
## Step 3 |
## F4.2. Root |html| element |
| 5688 |
my $root = $doc->create_element_ns |
my $root = $doc->create_element_ns |
| 5689 |
('http://www.w3.org/1999/xhtml', [undef, 'html']); |
('http://www.w3.org/1999/xhtml', [undef, 'html']); |
| 5690 |
|
|
| 5691 |
## Step 4 # MUST |
## F4.3. |
| 5692 |
$doc->append_child ($root); |
$doc->append_child ($root); |
| 5693 |
|
|
| 5694 |
## Step 5 # MUST |
## F4.4. |
| 5695 |
push @{$p->{open_elements}}, [$root, $el_category->{html}]; |
push @{$p->{open_elements}}, [$root, $el_category->{html}]; |
| 5696 |
|
|
| 5697 |
undef $p->{head_element}; |
undef $p->{head_element}; |
| 5698 |
undef $p->{head_element_inserted}; |
undef $p->{head_element_inserted}; |
| 5699 |
|
|
| 5700 |
## Step 6 # MUST |
## F4.5. |
| 5701 |
$p->_reset_insertion_mode; |
$p->_reset_insertion_mode; |
| 5702 |
|
|
| 5703 |
## Step 7 # MUST |
## F4.6. |
| 5704 |
my $anode = $node; |
my $anode = $node; |
| 5705 |
AN: while (defined $anode) { |
AN: while (defined $anode) { |
| 5706 |
if ($anode->node_type == 1) { |
if ($anode->node_type == 1) { |
| 5715 |
} |
} |
| 5716 |
$anode = $anode->parent_node; |
$anode = $anode->parent_node; |
| 5717 |
} # AN |
} # AN |
| 5718 |
|
|
| 5719 |
## Step 9 # MUST |
## F.6. Start the parser. |
| 5720 |
{ |
{ |
| 5721 |
my $self = $p; |
my $self = $p; |
| 5722 |
!!!next-token; |
!!!next-token; |
| 5723 |
} |
} |
| 5724 |
$p->_tree_construction_main; |
$p->_tree_construction_main; |
| 5725 |
|
|
| 5726 |
## Step 10 # MUST |
## F.7. |
| 5727 |
my @cn = @{$node->child_nodes}; |
my @cn = @{$node->child_nodes}; |
| 5728 |
for (@cn) { |
for (@cn) { |
| 5729 |
$node->remove_child ($_); |
$node->remove_child ($_); |