304 |
sub BODY_AFTER_IMS () { 0b100000000 } |
sub BODY_AFTER_IMS () { 0b100000000 } |
305 |
sub FRAME_IMS () { 0b1000000000 } |
sub FRAME_IMS () { 0b1000000000 } |
306 |
|
|
307 |
|
## NOTE: "initial" and "before html" insertion modes have no constants. |
308 |
|
|
309 |
|
## NOTE: "after after body" insertion mode. |
310 |
sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS } |
sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS } |
311 |
|
|
312 |
|
## NOTE: "after after frameset" insertion mode. |
313 |
sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS } |
sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS } |
314 |
|
|
315 |
sub IN_HEAD_IM () { HEAD_IMS | 0b00 } |
sub IN_HEAD_IM () { HEAD_IMS | 0b00 } |
316 |
sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 } |
sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 } |
317 |
sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 } |
sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 } |
2426 |
|
|
2427 |
!!!next-token; |
!!!next-token; |
2428 |
|
|
|
$self->{insertion_mode} = BEFORE_HEAD_IM; |
|
2429 |
undef $self->{form_element}; |
undef $self->{form_element}; |
2430 |
undef $self->{head_element}; |
undef $self->{head_element}; |
2431 |
$self->{open_elements} = []; |
$self->{open_elements} = []; |
2432 |
undef $self->{inner_html_node}; |
undef $self->{inner_html_node}; |
2433 |
|
|
2434 |
|
## NOTE: The "initial" insertion mode. |
2435 |
$self->_tree_construction_initial; # MUST |
$self->_tree_construction_initial; # MUST |
2436 |
|
|
2437 |
|
## NOTE: The "before html" insertion mode. |
2438 |
$self->_tree_construction_root_element; |
$self->_tree_construction_root_element; |
2439 |
|
$self->{insertion_mode} = BEFORE_HEAD_IM; |
2440 |
|
|
2441 |
|
## NOTE: The "before head" insertion mode and so on. |
2442 |
$self->_tree_construction_main; |
$self->_tree_construction_main; |
2443 |
} # _construct_tree |
} # _construct_tree |
2444 |
|
|
2445 |
sub _tree_construction_initial ($) { |
sub _tree_construction_initial ($) { |
2446 |
my $self = shift; |
my $self = shift; |
2447 |
|
|
2448 |
|
## NOTE: "initial" insertion mode |
2449 |
|
|
2450 |
INITIAL: { |
INITIAL: { |
2451 |
if ($token->{type} == DOCTYPE_TOKEN) { |
if ($token->{type} == DOCTYPE_TOKEN) { |
2452 |
## NOTE: Conformance checkers MAY, instead of reporting "not HTML5" |
## NOTE: Conformance checkers MAY, instead of reporting "not HTML5" |
2593 |
!!!cp ('t13'); |
!!!cp ('t13'); |
2594 |
} |
} |
2595 |
|
|
2596 |
## Go to the root element phase. |
## Go to the "before html" insertion mode. |
2597 |
!!!next-token; |
!!!next-token; |
2598 |
return; |
return; |
2599 |
} elsif ({ |
} elsif ({ |
2604 |
!!!cp ('t14'); |
!!!cp ('t14'); |
2605 |
!!!parse-error (type => 'no DOCTYPE'); |
!!!parse-error (type => 'no DOCTYPE'); |
2606 |
$self->{document}->manakai_compat_mode ('quirks'); |
$self->{document}->manakai_compat_mode ('quirks'); |
2607 |
## Go to the root element phase |
## Go to the "before html" insertion mode. |
2608 |
## reprocess |
## reprocess |
2609 |
return; |
return; |
2610 |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
2613 |
|
|
2614 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
2615 |
!!!cp ('t15'); |
!!!cp ('t15'); |
2616 |
## Stay in the phase |
## Stay in the insertion mode. |
2617 |
!!!next-token; |
!!!next-token; |
2618 |
redo INITIAL; |
redo INITIAL; |
2619 |
} else { |
} else { |
2625 |
|
|
2626 |
!!!parse-error (type => 'no DOCTYPE'); |
!!!parse-error (type => 'no DOCTYPE'); |
2627 |
$self->{document}->manakai_compat_mode ('quirks'); |
$self->{document}->manakai_compat_mode ('quirks'); |
2628 |
## Go to the root element phase |
## Go to the "before html" insertion mode. |
2629 |
## reprocess |
## reprocess |
2630 |
return; |
return; |
2631 |
} elsif ($token->{type} == COMMENT_TOKEN) { |
} elsif ($token->{type} == COMMENT_TOKEN) { |
2633 |
my $comment = $self->{document}->create_comment ($token->{data}); |
my $comment = $self->{document}->create_comment ($token->{data}); |
2634 |
$self->{document}->append_child ($comment); |
$self->{document}->append_child ($comment); |
2635 |
|
|
2636 |
## Stay in the phase. |
## Stay in the insertion mode. |
2637 |
!!!next-token; |
!!!next-token; |
2638 |
redo INITIAL; |
redo INITIAL; |
2639 |
} else { |
} else { |
2646 |
|
|
2647 |
sub _tree_construction_root_element ($) { |
sub _tree_construction_root_element ($) { |
2648 |
my $self = shift; |
my $self = shift; |
2649 |
|
|
2650 |
|
## NOTE: "before html" insertion mode. |
2651 |
|
|
2652 |
B: { |
B: { |
2653 |
if ($token->{type} == DOCTYPE_TOKEN) { |
if ($token->{type} == DOCTYPE_TOKEN) { |
2654 |
!!!cp ('t19'); |
!!!cp ('t19'); |
2655 |
!!!parse-error (type => 'in html:#DOCTYPE'); |
!!!parse-error (type => 'in html:#DOCTYPE'); |
2656 |
## Ignore the token |
## Ignore the token |
2657 |
## Stay in the phase |
## Stay in the insertion mode. |
2658 |
!!!next-token; |
!!!next-token; |
2659 |
redo B; |
redo B; |
2660 |
} elsif ($token->{type} == COMMENT_TOKEN) { |
} elsif ($token->{type} == COMMENT_TOKEN) { |
2661 |
!!!cp ('t20'); |
!!!cp ('t20'); |
2662 |
my $comment = $self->{document}->create_comment ($token->{data}); |
my $comment = $self->{document}->create_comment ($token->{data}); |
2663 |
$self->{document}->append_child ($comment); |
$self->{document}->append_child ($comment); |
2664 |
## Stay in the phase |
## Stay in the insertion mode. |
2665 |
!!!next-token; |
!!!next-token; |
2666 |
redo B; |
redo B; |
2667 |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
2670 |
|
|
2671 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
2672 |
!!!cp ('t21'); |
!!!cp ('t21'); |
2673 |
## Stay in the phase |
## Stay in the insertion mode. |
2674 |
!!!next-token; |
!!!next-token; |
2675 |
redo B; |
redo B; |
2676 |
} else { |
} else { |
2684 |
|
|
2685 |
# |
# |
2686 |
} elsif ($token->{type} == START_TAG_TOKEN) { |
} elsif ($token->{type} == START_TAG_TOKEN) { |
2687 |
if ($token->{tag_name} eq 'html' and |
if ($token->{tag_name} eq 'html') { |
2688 |
$token->{attributes}->{manifest}) { |
my $root_element; |
2689 |
!!!cp ('t24'); |
!!!create-element ($root_element, $token->{tag_name}, $token->{attributes}); |
2690 |
$self->{application_cache_selection} |
$self->{document}->append_child ($root_element); |
2691 |
->($token->{attributes}->{manifest}->{value}); |
push @{$self->{open_elements}}, [$root_element, 'html']; |
2692 |
## ISSUE: No relative reference resolution? |
|
2693 |
|
if ($token->{attributes}->{manifest}) { |
2694 |
|
!!!cp ('t24'); |
2695 |
|
$self->{application_cache_selection} |
2696 |
|
->($token->{attributes}->{manifest}->{value}); |
2697 |
|
## ISSUE: No relative reference resolution? |
2698 |
|
} else { |
2699 |
|
!!!cp ('t25'); |
2700 |
|
$self->{application_cache_selection}->(undef); |
2701 |
|
} |
2702 |
|
|
2703 |
|
!!!next-token; |
2704 |
|
return; ## Go to the "before head" insertion mode. |
2705 |
} else { |
} else { |
2706 |
!!!cp ('t25'); |
!!!cp ('t25.1'); |
2707 |
$self->{application_cache_selection}->(undef); |
# |
2708 |
} |
} |
|
|
|
|
## ISSUE: There is an issue in the spec |
|
|
# |
|
2709 |
} elsif ({ |
} elsif ({ |
2710 |
END_TAG_TOKEN, 1, |
END_TAG_TOKEN, 1, |
2711 |
END_OF_FILE_TOKEN, 1, |
END_OF_FILE_TOKEN, 1, |
2712 |
}->{$token->{type}}) { |
}->{$token->{type}}) { |
2713 |
!!!cp ('t26'); |
!!!cp ('t26'); |
|
$self->{application_cache_selection}->(undef); |
|
|
|
|
|
## ISSUE: There is an issue in the spec |
|
2714 |
# |
# |
2715 |
} else { |
} else { |
2716 |
die "$0: $token->{type}: Unknown token type"; |
die "$0: $token->{type}: Unknown token type"; |
2717 |
} |
} |
2718 |
|
|
2719 |
my $root_element; !!!create-element ($root_element, 'html'); |
my $root_element; !!!create-element ($root_element, 'html'); |
2720 |
$self->{document}->append_child ($root_element); |
$self->{document}->append_child ($root_element); |
2721 |
push @{$self->{open_elements}}, [$root_element, 'html']; |
push @{$self->{open_elements}}, [$root_element, 'html']; |
2722 |
## reprocess |
|
2723 |
#redo B; |
$self->{application_cache_selection}->(undef); |
2724 |
return; ## Go to the main phase. |
|
2725 |
|
## NOTE: Reprocess the token. |
2726 |
|
return; ## Go to the "before head" insertion mode. |
2727 |
|
|
2728 |
|
## ISSUE: There is an issue in the spec |
2729 |
} # B |
} # B |
2730 |
|
|
2731 |
die "$0: _tree_construction_root_element: This should never be reached"; |
die "$0: _tree_construction_root_element: This should never be reached"; |
3300 |
$token->{tag_name} eq 'html') { |
$token->{tag_name} eq 'html') { |
3301 |
if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) { |
if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) { |
3302 |
!!!cp ('t79'); |
!!!cp ('t79'); |
|
## Turn into the main phase |
|
3303 |
!!!parse-error (type => 'after html:html'); |
!!!parse-error (type => 'after html:html'); |
3304 |
$self->{insertion_mode} = AFTER_BODY_IM; |
$self->{insertion_mode} = AFTER_BODY_IM; |
3305 |
} elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) { |
} elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) { |
3306 |
!!!cp ('t80'); |
!!!cp ('t80'); |
|
## Turn into the main phase |
|
3307 |
!!!parse-error (type => 'after html:html'); |
!!!parse-error (type => 'after html:html'); |
3308 |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
3309 |
} else { |
} else { |
3310 |
!!!cp ('t81'); |
!!!cp ('t81'); |
3311 |
} |
} |
3312 |
|
|
3313 |
## ISSUE: "aa<html>" is not a parse error. |
!!!cp ('t82'); |
3314 |
## ISSUE: "<html>" in fragment is not a parse error. |
!!!parse-error (type => 'not first start tag'); |
|
unless ($token->{first_start_tag}) { |
|
|
!!!cp ('t82'); |
|
|
!!!parse-error (type => 'not first start tag'); |
|
|
} else { |
|
|
!!!cp ('t83'); |
|
|
} |
|
3315 |
my $top_el = $self->{open_elements}->[0]->[0]; |
my $top_el = $self->{open_elements}->[0]->[0]; |
3316 |
for my $attr_name (keys %{$token->{attributes}}) { |
for my $attr_name (keys %{$token->{attributes}}) { |
3317 |
unless ($top_el->has_attribute_ns (undef, $attr_name)) { |
unless ($top_el->has_attribute_ns (undef, $attr_name)) { |
5143 |
!!!cp ('t301'); |
!!!cp ('t301'); |
5144 |
!!!parse-error (type => 'after html:#character'); |
!!!parse-error (type => 'after html:#character'); |
5145 |
|
|
5146 |
## Reprocess in the "main" phase, "after body" insertion mode... |
## Reprocess in the "after body" insertion mode. |
5147 |
} else { |
} else { |
5148 |
!!!cp ('t302'); |
!!!cp ('t302'); |
5149 |
} |
} |
5159 |
!!!cp ('t303'); |
!!!cp ('t303'); |
5160 |
!!!parse-error (type => 'after html:'.$token->{tag_name}); |
!!!parse-error (type => 'after html:'.$token->{tag_name}); |
5161 |
|
|
5162 |
## Reprocess in the "main" phase, "after body" insertion mode... |
## Reprocess in the "after body" insertion mode. |
5163 |
} else { |
} else { |
5164 |
!!!cp ('t304'); |
!!!cp ('t304'); |
5165 |
} |
} |
5176 |
!!!parse-error (type => 'after html:/'.$token->{tag_name}); |
!!!parse-error (type => 'after html:/'.$token->{tag_name}); |
5177 |
|
|
5178 |
$self->{insertion_mode} = AFTER_BODY_IM; |
$self->{insertion_mode} = AFTER_BODY_IM; |
5179 |
## Reprocess in the "main" phase, "after body" insertion mode... |
## Reprocess in the "after body" insertion mode. |
5180 |
} else { |
} else { |
5181 |
!!!cp ('t306'); |
!!!cp ('t306'); |
5182 |
} |
} |
5230 |
!!!parse-error (type => 'after html:#character'); |
!!!parse-error (type => 'after html:#character'); |
5231 |
|
|
5232 |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
5233 |
## Reprocess in the "main" phase, "after frameset"... |
## Reprocess in the "after frameset" insertion mode. |
5234 |
!!!parse-error (type => 'after frameset:#character'); |
!!!parse-error (type => 'after frameset:#character'); |
5235 |
} |
} |
5236 |
|
|
5252 |
!!!parse-error (type => 'after html:'.$token->{tag_name}); |
!!!parse-error (type => 'after html:'.$token->{tag_name}); |
5253 |
|
|
5254 |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
5255 |
## Process in the "main" phase, "after frameset" insertion mode... |
## Process in the "after frameset" insertion mode. |
5256 |
} else { |
} else { |
5257 |
!!!cp ('t317'); |
!!!cp ('t317'); |
5258 |
} |
} |
5293 |
!!!parse-error (type => 'after html:/'.$token->{tag_name}); |
!!!parse-error (type => 'after html:/'.$token->{tag_name}); |
5294 |
|
|
5295 |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
5296 |
## Process in the "main" phase, "after frameset" insertion mode... |
## Process in the "after frameset" insertion mode. |
5297 |
} else { |
} else { |
5298 |
!!!cp ('t324'); |
!!!cp ('t324'); |
5299 |
} |
} |
6340 |
redo B; |
redo B; |
6341 |
} # B |
} # B |
6342 |
|
|
|
## NOTE: The "trailing end" phase in HTML5 is split into |
|
|
## two insertion modes: "after html body" and "after html frameset". |
|
|
## NOTE: States in the main stage is preserved while |
|
|
## the parser stays in the trailing end phase. # MUST |
|
|
|
|
6343 |
## Stop parsing # MUST |
## Stop parsing # MUST |
6344 |
|
|
6345 |
## TODO: script stuffs |
## TODO: script stuffs |
6381 |
my $p = $class->new; |
my $p = $class->new; |
6382 |
$p->{document} = $doc; |
$p->{document} = $doc; |
6383 |
|
|
6384 |
## Step 9 # MUST |
## Step 8 # MUST |
6385 |
my $i = 0; |
my $i = 0; |
6386 |
my $line = 1; |
my $line = 1; |
6387 |
my $column = 0; |
my $column = 0; |
6448 |
|
|
6449 |
$p->{inner_html_node} = [$node, $node_ln]; |
$p->{inner_html_node} = [$node, $node_ln]; |
6450 |
|
|
6451 |
## Step 4 |
## Step 3 |
6452 |
my $root = $doc->create_element_ns |
my $root = $doc->create_element_ns |
6453 |
('http://www.w3.org/1999/xhtml', [undef, 'html']); |
('http://www.w3.org/1999/xhtml', [undef, 'html']); |
6454 |
|
|
6455 |
## Step 5 # MUST |
## Step 4 # MUST |
6456 |
$doc->append_child ($root); |
$doc->append_child ($root); |
6457 |
|
|
6458 |
## Step 6 # MUST |
## Step 5 # MUST |
6459 |
push @{$p->{open_elements}}, [$root, 'html']; |
push @{$p->{open_elements}}, [$root, 'html']; |
6460 |
|
|
6461 |
undef $p->{head_element}; |
undef $p->{head_element}; |
6462 |
|
|
6463 |
## Step 7 # MUST |
## Step 6 # MUST |
6464 |
$p->_reset_insertion_mode; |
$p->_reset_insertion_mode; |
6465 |
|
|
6466 |
## Step 8 # MUST |
## Step 7 # MUST |
6467 |
my $anode = $node; |
my $anode = $node; |
6468 |
AN: while (defined $anode) { |
AN: while (defined $anode) { |
6469 |
if ($anode->node_type == 1) { |
if ($anode->node_type == 1) { |
6479 |
$anode = $anode->parent_node; |
$anode = $anode->parent_node; |
6480 |
} # AN |
} # AN |
6481 |
|
|
6482 |
## Step 3 # MUST |
## Step 9 # MUST |
|
## Step 10 # MUST |
|
6483 |
{ |
{ |
6484 |
my $self = $p; |
my $self = $p; |
6485 |
!!!next-token; |
!!!next-token; |
6486 |
} |
} |
6487 |
$p->_tree_construction_main; |
$p->_tree_construction_main; |
6488 |
|
|
6489 |
## Step 11 # MUST |
## Step 10 # MUST |
6490 |
my @cn = @{$node->child_nodes}; |
my @cn = @{$node->child_nodes}; |
6491 |
for (@cn) { |
for (@cn) { |
6492 |
$node->remove_child ($_); |
$node->remove_child ($_); |
6493 |
} |
} |
6494 |
## ISSUE: mutation events? read-only? |
## ISSUE: mutation events? read-only? |
6495 |
|
|
6496 |
## Step 12 # MUST |
## Step 11 # MUST |
6497 |
@cn = @{$root->child_nodes}; |
@cn = @{$root->child_nodes}; |
6498 |
for (@cn) { |
for (@cn) { |
6499 |
$this_doc->adopt_node ($_); |
$this_doc->adopt_node ($_); |