| 304 |
sub BODY_AFTER_IMS () { 0b100000000 } |
sub BODY_AFTER_IMS () { 0b100000000 } |
| 305 |
sub FRAME_IMS () { 0b1000000000 } |
sub FRAME_IMS () { 0b1000000000 } |
| 306 |
|
|
| 307 |
|
## NOTE: "initial" and "before html" insertion modes have no constants. |
| 308 |
|
|
| 309 |
|
## NOTE: "after after body" insertion mode. |
| 310 |
sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS } |
sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS } |
| 311 |
|
|
| 312 |
|
## NOTE: "after after frameset" insertion mode. |
| 313 |
sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS } |
sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS } |
| 314 |
|
|
| 315 |
sub IN_HEAD_IM () { HEAD_IMS | 0b00 } |
sub IN_HEAD_IM () { HEAD_IMS | 0b00 } |
| 316 |
sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 } |
sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 } |
| 317 |
sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 } |
sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 } |
| 2426 |
|
|
| 2427 |
!!!next-token; |
!!!next-token; |
| 2428 |
|
|
|
$self->{insertion_mode} = BEFORE_HEAD_IM; |
|
| 2429 |
undef $self->{form_element}; |
undef $self->{form_element}; |
| 2430 |
undef $self->{head_element}; |
undef $self->{head_element}; |
| 2431 |
$self->{open_elements} = []; |
$self->{open_elements} = []; |
| 2432 |
undef $self->{inner_html_node}; |
undef $self->{inner_html_node}; |
| 2433 |
|
|
| 2434 |
|
## NOTE: The "initial" insertion mode. |
| 2435 |
$self->_tree_construction_initial; # MUST |
$self->_tree_construction_initial; # MUST |
| 2436 |
|
|
| 2437 |
|
## NOTE: The "before html" insertion mode. |
| 2438 |
$self->_tree_construction_root_element; |
$self->_tree_construction_root_element; |
| 2439 |
|
$self->{insertion_mode} = BEFORE_HEAD_IM; |
| 2440 |
|
|
| 2441 |
|
## NOTE: The "before head" insertion mode and so on. |
| 2442 |
$self->_tree_construction_main; |
$self->_tree_construction_main; |
| 2443 |
} # _construct_tree |
} # _construct_tree |
| 2444 |
|
|
| 2445 |
sub _tree_construction_initial ($) { |
sub _tree_construction_initial ($) { |
| 2446 |
my $self = shift; |
my $self = shift; |
| 2447 |
|
|
| 2448 |
|
## NOTE: "initial" insertion mode |
| 2449 |
|
|
| 2450 |
INITIAL: { |
INITIAL: { |
| 2451 |
if ($token->{type} == DOCTYPE_TOKEN) { |
if ($token->{type} == DOCTYPE_TOKEN) { |
| 2452 |
## NOTE: Conformance checkers MAY, instead of reporting "not HTML5" |
## NOTE: Conformance checkers MAY, instead of reporting "not HTML5" |
| 2593 |
!!!cp ('t13'); |
!!!cp ('t13'); |
| 2594 |
} |
} |
| 2595 |
|
|
| 2596 |
## Go to the root element phase. |
## Go to the "before html" insertion mode. |
| 2597 |
!!!next-token; |
!!!next-token; |
| 2598 |
return; |
return; |
| 2599 |
} elsif ({ |
} elsif ({ |
| 2604 |
!!!cp ('t14'); |
!!!cp ('t14'); |
| 2605 |
!!!parse-error (type => 'no DOCTYPE'); |
!!!parse-error (type => 'no DOCTYPE'); |
| 2606 |
$self->{document}->manakai_compat_mode ('quirks'); |
$self->{document}->manakai_compat_mode ('quirks'); |
| 2607 |
## Go to the root element phase |
## Go to the "before html" insertion mode. |
| 2608 |
## reprocess |
## reprocess |
| 2609 |
return; |
return; |
| 2610 |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
| 2613 |
|
|
| 2614 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 2615 |
!!!cp ('t15'); |
!!!cp ('t15'); |
| 2616 |
## Stay in the phase |
## Stay in the insertion mode. |
| 2617 |
!!!next-token; |
!!!next-token; |
| 2618 |
redo INITIAL; |
redo INITIAL; |
| 2619 |
} else { |
} else { |
| 2625 |
|
|
| 2626 |
!!!parse-error (type => 'no DOCTYPE'); |
!!!parse-error (type => 'no DOCTYPE'); |
| 2627 |
$self->{document}->manakai_compat_mode ('quirks'); |
$self->{document}->manakai_compat_mode ('quirks'); |
| 2628 |
## Go to the root element phase |
## Go to the "before html" insertion mode. |
| 2629 |
## reprocess |
## reprocess |
| 2630 |
return; |
return; |
| 2631 |
} elsif ($token->{type} == COMMENT_TOKEN) { |
} elsif ($token->{type} == COMMENT_TOKEN) { |
| 2633 |
my $comment = $self->{document}->create_comment ($token->{data}); |
my $comment = $self->{document}->create_comment ($token->{data}); |
| 2634 |
$self->{document}->append_child ($comment); |
$self->{document}->append_child ($comment); |
| 2635 |
|
|
| 2636 |
## Stay in the phase. |
## Stay in the insertion mode. |
| 2637 |
!!!next-token; |
!!!next-token; |
| 2638 |
redo INITIAL; |
redo INITIAL; |
| 2639 |
} else { |
} else { |
| 2646 |
|
|
| 2647 |
sub _tree_construction_root_element ($) { |
sub _tree_construction_root_element ($) { |
| 2648 |
my $self = shift; |
my $self = shift; |
| 2649 |
|
|
| 2650 |
|
## NOTE: "before html" insertion mode. |
| 2651 |
|
|
| 2652 |
B: { |
B: { |
| 2653 |
if ($token->{type} == DOCTYPE_TOKEN) { |
if ($token->{type} == DOCTYPE_TOKEN) { |
| 2654 |
!!!cp ('t19'); |
!!!cp ('t19'); |
| 2655 |
!!!parse-error (type => 'in html:#DOCTYPE'); |
!!!parse-error (type => 'in html:#DOCTYPE'); |
| 2656 |
## Ignore the token |
## Ignore the token |
| 2657 |
## Stay in the phase |
## Stay in the insertion mode. |
| 2658 |
!!!next-token; |
!!!next-token; |
| 2659 |
redo B; |
redo B; |
| 2660 |
} elsif ($token->{type} == COMMENT_TOKEN) { |
} elsif ($token->{type} == COMMENT_TOKEN) { |
| 2661 |
!!!cp ('t20'); |
!!!cp ('t20'); |
| 2662 |
my $comment = $self->{document}->create_comment ($token->{data}); |
my $comment = $self->{document}->create_comment ($token->{data}); |
| 2663 |
$self->{document}->append_child ($comment); |
$self->{document}->append_child ($comment); |
| 2664 |
## Stay in the phase |
## Stay in the insertion mode. |
| 2665 |
!!!next-token; |
!!!next-token; |
| 2666 |
redo B; |
redo B; |
| 2667 |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
| 2670 |
|
|
| 2671 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 2672 |
!!!cp ('t21'); |
!!!cp ('t21'); |
| 2673 |
## Stay in the phase |
## Stay in the insertion mode. |
| 2674 |
!!!next-token; |
!!!next-token; |
| 2675 |
redo B; |
redo B; |
| 2676 |
} else { |
} else { |
| 2684 |
|
|
| 2685 |
# |
# |
| 2686 |
} elsif ($token->{type} == START_TAG_TOKEN) { |
} elsif ($token->{type} == START_TAG_TOKEN) { |
| 2687 |
if ($token->{tag_name} eq 'html' and |
if ($token->{tag_name} eq 'html') { |
| 2688 |
$token->{attributes}->{manifest}) { |
my $root_element; |
| 2689 |
!!!cp ('t24'); |
!!!create-element ($root_element, $token->{tag_name}, $token->{attributes}); |
| 2690 |
$self->{application_cache_selection} |
$self->{document}->append_child ($root_element); |
| 2691 |
->($token->{attributes}->{manifest}->{value}); |
push @{$self->{open_elements}}, [$root_element, 'html']; |
| 2692 |
## ISSUE: No relative reference resolution? |
|
| 2693 |
|
if ($token->{attributes}->{manifest}) { |
| 2694 |
|
!!!cp ('t24'); |
| 2695 |
|
$self->{application_cache_selection} |
| 2696 |
|
->($token->{attributes}->{manifest}->{value}); |
| 2697 |
|
## ISSUE: No relative reference resolution? |
| 2698 |
|
} else { |
| 2699 |
|
!!!cp ('t25'); |
| 2700 |
|
$self->{application_cache_selection}->(undef); |
| 2701 |
|
} |
| 2702 |
|
|
| 2703 |
|
!!!next-token; |
| 2704 |
|
return; ## Go to the "before head" insertion mode. |
| 2705 |
} else { |
} else { |
| 2706 |
!!!cp ('t25'); |
!!!cp ('t25.1'); |
| 2707 |
$self->{application_cache_selection}->(undef); |
# |
| 2708 |
} |
} |
|
|
|
|
## ISSUE: There is an issue in the spec |
|
|
# |
|
| 2709 |
} elsif ({ |
} elsif ({ |
| 2710 |
END_TAG_TOKEN, 1, |
END_TAG_TOKEN, 1, |
| 2711 |
END_OF_FILE_TOKEN, 1, |
END_OF_FILE_TOKEN, 1, |
| 2712 |
}->{$token->{type}}) { |
}->{$token->{type}}) { |
| 2713 |
!!!cp ('t26'); |
!!!cp ('t26'); |
|
$self->{application_cache_selection}->(undef); |
|
|
|
|
|
## ISSUE: There is an issue in the spec |
|
| 2714 |
# |
# |
| 2715 |
} else { |
} else { |
| 2716 |
die "$0: $token->{type}: Unknown token type"; |
die "$0: $token->{type}: Unknown token type"; |
| 2717 |
} |
} |
| 2718 |
|
|
| 2719 |
my $root_element; !!!create-element ($root_element, 'html'); |
my $root_element; !!!create-element ($root_element, 'html'); |
| 2720 |
$self->{document}->append_child ($root_element); |
$self->{document}->append_child ($root_element); |
| 2721 |
push @{$self->{open_elements}}, [$root_element, 'html']; |
push @{$self->{open_elements}}, [$root_element, 'html']; |
| 2722 |
## reprocess |
|
| 2723 |
#redo B; |
$self->{application_cache_selection}->(undef); |
| 2724 |
return; ## Go to the main phase. |
|
| 2725 |
|
## NOTE: Reprocess the token. |
| 2726 |
|
return; ## Go to the "before head" insertion mode. |
| 2727 |
|
|
| 2728 |
|
## ISSUE: There is an issue in the spec |
| 2729 |
} # B |
} # B |
| 2730 |
|
|
| 2731 |
die "$0: _tree_construction_root_element: This should never be reached"; |
die "$0: _tree_construction_root_element: This should never be reached"; |
| 3300 |
$token->{tag_name} eq 'html') { |
$token->{tag_name} eq 'html') { |
| 3301 |
if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) { |
if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) { |
| 3302 |
!!!cp ('t79'); |
!!!cp ('t79'); |
|
## Turn into the main phase |
|
| 3303 |
!!!parse-error (type => 'after html:html'); |
!!!parse-error (type => 'after html:html'); |
| 3304 |
$self->{insertion_mode} = AFTER_BODY_IM; |
$self->{insertion_mode} = AFTER_BODY_IM; |
| 3305 |
} elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) { |
} elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) { |
| 3306 |
!!!cp ('t80'); |
!!!cp ('t80'); |
|
## Turn into the main phase |
|
| 3307 |
!!!parse-error (type => 'after html:html'); |
!!!parse-error (type => 'after html:html'); |
| 3308 |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
| 3309 |
} else { |
} else { |
| 3310 |
!!!cp ('t81'); |
!!!cp ('t81'); |
| 3311 |
} |
} |
| 3312 |
|
|
| 3313 |
## ISSUE: "aa<html>" is not a parse error. |
!!!cp ('t82'); |
| 3314 |
## ISSUE: "<html>" in fragment is not a parse error. |
!!!parse-error (type => 'not first start tag'); |
|
unless ($token->{first_start_tag}) { |
|
|
!!!cp ('t82'); |
|
|
!!!parse-error (type => 'not first start tag'); |
|
|
} else { |
|
|
!!!cp ('t83'); |
|
|
} |
|
| 3315 |
my $top_el = $self->{open_elements}->[0]->[0]; |
my $top_el = $self->{open_elements}->[0]->[0]; |
| 3316 |
for my $attr_name (keys %{$token->{attributes}}) { |
for my $attr_name (keys %{$token->{attributes}}) { |
| 3317 |
unless ($top_el->has_attribute_ns (undef, $attr_name)) { |
unless ($top_el->has_attribute_ns (undef, $attr_name)) { |
| 5143 |
!!!cp ('t301'); |
!!!cp ('t301'); |
| 5144 |
!!!parse-error (type => 'after html:#character'); |
!!!parse-error (type => 'after html:#character'); |
| 5145 |
|
|
| 5146 |
## Reprocess in the "main" phase, "after body" insertion mode... |
## Reprocess in the "after body" insertion mode. |
| 5147 |
} else { |
} else { |
| 5148 |
!!!cp ('t302'); |
!!!cp ('t302'); |
| 5149 |
} |
} |
| 5159 |
!!!cp ('t303'); |
!!!cp ('t303'); |
| 5160 |
!!!parse-error (type => 'after html:'.$token->{tag_name}); |
!!!parse-error (type => 'after html:'.$token->{tag_name}); |
| 5161 |
|
|
| 5162 |
## Reprocess in the "main" phase, "after body" insertion mode... |
## Reprocess in the "after body" insertion mode. |
| 5163 |
} else { |
} else { |
| 5164 |
!!!cp ('t304'); |
!!!cp ('t304'); |
| 5165 |
} |
} |
| 5176 |
!!!parse-error (type => 'after html:/'.$token->{tag_name}); |
!!!parse-error (type => 'after html:/'.$token->{tag_name}); |
| 5177 |
|
|
| 5178 |
$self->{insertion_mode} = AFTER_BODY_IM; |
$self->{insertion_mode} = AFTER_BODY_IM; |
| 5179 |
## Reprocess in the "main" phase, "after body" insertion mode... |
## Reprocess in the "after body" insertion mode. |
| 5180 |
} else { |
} else { |
| 5181 |
!!!cp ('t306'); |
!!!cp ('t306'); |
| 5182 |
} |
} |
| 5230 |
!!!parse-error (type => 'after html:#character'); |
!!!parse-error (type => 'after html:#character'); |
| 5231 |
|
|
| 5232 |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
| 5233 |
## Reprocess in the "main" phase, "after frameset"... |
## Reprocess in the "after frameset" insertion mode. |
| 5234 |
!!!parse-error (type => 'after frameset:#character'); |
!!!parse-error (type => 'after frameset:#character'); |
| 5235 |
} |
} |
| 5236 |
|
|
| 5252 |
!!!parse-error (type => 'after html:'.$token->{tag_name}); |
!!!parse-error (type => 'after html:'.$token->{tag_name}); |
| 5253 |
|
|
| 5254 |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
| 5255 |
## Process in the "main" phase, "after frameset" insertion mode... |
## Process in the "after frameset" insertion mode. |
| 5256 |
} else { |
} else { |
| 5257 |
!!!cp ('t317'); |
!!!cp ('t317'); |
| 5258 |
} |
} |
| 5293 |
!!!parse-error (type => 'after html:/'.$token->{tag_name}); |
!!!parse-error (type => 'after html:/'.$token->{tag_name}); |
| 5294 |
|
|
| 5295 |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
| 5296 |
## Process in the "main" phase, "after frameset" insertion mode... |
## Process in the "after frameset" insertion mode. |
| 5297 |
} else { |
} else { |
| 5298 |
!!!cp ('t324'); |
!!!cp ('t324'); |
| 5299 |
} |
} |
| 6340 |
redo B; |
redo B; |
| 6341 |
} # B |
} # B |
| 6342 |
|
|
|
## NOTE: The "trailing end" phase in HTML5 is split into |
|
|
## two insertion modes: "after html body" and "after html frameset". |
|
|
## NOTE: States in the main stage is preserved while |
|
|
## the parser stays in the trailing end phase. # MUST |
|
|
|
|
| 6343 |
## Stop parsing # MUST |
## Stop parsing # MUST |
| 6344 |
|
|
| 6345 |
## TODO: script stuffs |
## TODO: script stuffs |
| 6381 |
my $p = $class->new; |
my $p = $class->new; |
| 6382 |
$p->{document} = $doc; |
$p->{document} = $doc; |
| 6383 |
|
|
| 6384 |
## Step 9 # MUST |
## Step 8 # MUST |
| 6385 |
my $i = 0; |
my $i = 0; |
| 6386 |
my $line = 1; |
my $line = 1; |
| 6387 |
my $column = 0; |
my $column = 0; |
| 6448 |
|
|
| 6449 |
$p->{inner_html_node} = [$node, $node_ln]; |
$p->{inner_html_node} = [$node, $node_ln]; |
| 6450 |
|
|
| 6451 |
## Step 4 |
## Step 3 |
| 6452 |
my $root = $doc->create_element_ns |
my $root = $doc->create_element_ns |
| 6453 |
('http://www.w3.org/1999/xhtml', [undef, 'html']); |
('http://www.w3.org/1999/xhtml', [undef, 'html']); |
| 6454 |
|
|
| 6455 |
## Step 5 # MUST |
## Step 4 # MUST |
| 6456 |
$doc->append_child ($root); |
$doc->append_child ($root); |
| 6457 |
|
|
| 6458 |
## Step 6 # MUST |
## Step 5 # MUST |
| 6459 |
push @{$p->{open_elements}}, [$root, 'html']; |
push @{$p->{open_elements}}, [$root, 'html']; |
| 6460 |
|
|
| 6461 |
undef $p->{head_element}; |
undef $p->{head_element}; |
| 6462 |
|
|
| 6463 |
## Step 7 # MUST |
## Step 6 # MUST |
| 6464 |
$p->_reset_insertion_mode; |
$p->_reset_insertion_mode; |
| 6465 |
|
|
| 6466 |
## Step 8 # MUST |
## Step 7 # MUST |
| 6467 |
my $anode = $node; |
my $anode = $node; |
| 6468 |
AN: while (defined $anode) { |
AN: while (defined $anode) { |
| 6469 |
if ($anode->node_type == 1) { |
if ($anode->node_type == 1) { |
| 6479 |
$anode = $anode->parent_node; |
$anode = $anode->parent_node; |
| 6480 |
} # AN |
} # AN |
| 6481 |
|
|
| 6482 |
## Step 3 # MUST |
## Step 9 # MUST |
|
## Step 10 # MUST |
|
| 6483 |
{ |
{ |
| 6484 |
my $self = $p; |
my $self = $p; |
| 6485 |
!!!next-token; |
!!!next-token; |
| 6486 |
} |
} |
| 6487 |
$p->_tree_construction_main; |
$p->_tree_construction_main; |
| 6488 |
|
|
| 6489 |
## Step 11 # MUST |
## Step 10 # MUST |
| 6490 |
my @cn = @{$node->child_nodes}; |
my @cn = @{$node->child_nodes}; |
| 6491 |
for (@cn) { |
for (@cn) { |
| 6492 |
$node->remove_child ($_); |
$node->remove_child ($_); |
| 6493 |
} |
} |
| 6494 |
## ISSUE: mutation events? read-only? |
## ISSUE: mutation events? read-only? |
| 6495 |
|
|
| 6496 |
## Step 12 # MUST |
## Step 11 # MUST |
| 6497 |
@cn = @{$root->child_nodes}; |
@cn = @{$root->child_nodes}; |
| 6498 |
for (@cn) { |
for (@cn) { |
| 6499 |
$this_doc->adopt_node ($_); |
$this_doc->adopt_node ($_); |