| 525 |
|
|
| 526 |
if ($char_stream) { # if supported |
if ($char_stream) { # if supported |
| 527 |
## "Change the encoding" algorithm: |
## "Change the encoding" algorithm: |
|
|
|
|
## Step 1 |
|
|
if ($charset->{category} & |
|
|
Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) { |
|
|
$charset = Message::Charset::Info->get_by_html_name ('utf-8'); |
|
|
($char_stream, $e_status) = $charset->get_decode_handle |
|
|
($byte_stream, |
|
|
byte_buffer => \ $buffer->{buffer}); |
|
|
} |
|
|
$charset_name = $charset->get_iana_name; |
|
| 528 |
|
|
| 529 |
## Step 2 |
## Step 1 |
| 530 |
if (defined $self->{input_encoding} and |
if (defined $self->{input_encoding} and |
| 531 |
$self->{input_encoding} eq $charset_name) { |
$self->{input_encoding} eq $charset_name) { |
| 532 |
!!!parse-error (type => 'charset label:matching', |
!!!parse-error (type => 'charset label:matching', |
| 536 |
return; |
return; |
| 537 |
} |
} |
| 538 |
|
|
| 539 |
|
## Step 2 (HTML5 revision 3205) |
| 540 |
|
if (defined $self->{input_encoding} and |
| 541 |
|
Message::Charset::Info->get_by_html_name ($self->{input_encoding}) |
| 542 |
|
->{category} & Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) { |
| 543 |
|
$self->{confident} = 1; |
| 544 |
|
return; |
| 545 |
|
} |
| 546 |
|
|
| 547 |
|
## Step 3 |
| 548 |
|
if ($charset->{category} & |
| 549 |
|
Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) { |
| 550 |
|
$charset = Message::Charset::Info->get_by_html_name ('utf-8'); |
| 551 |
|
($char_stream, $e_status) = $charset->get_decode_handle |
| 552 |
|
($byte_stream, |
| 553 |
|
byte_buffer => \ $buffer->{buffer}); |
| 554 |
|
} |
| 555 |
|
$charset_name = $charset->get_iana_name; |
| 556 |
|
|
| 557 |
!!!parse-error (type => 'charset label detected', |
!!!parse-error (type => 'charset label detected', |
| 558 |
text => $self->{input_encoding}, |
text => $self->{input_encoding}, |
| 559 |
value => $charset_name, |
value => $charset_name, |
| 560 |
level => $self->{level}->{warn}, |
level => $self->{level}->{warn}, |
| 561 |
token => $token); |
token => $token); |
| 562 |
|
|
| 563 |
## Step 3 |
## Step 4 |
| 564 |
# if (can) { |
# if (can) { |
| 565 |
## change the encoding on the fly. |
## change the encoding on the fly. |
| 566 |
#$self->{confident} = 1; |
#$self->{confident} = 1; |
| 567 |
#return; |
#return; |
| 568 |
# } |
# } |
| 569 |
|
|
| 570 |
## Step 4 |
## Step 5 |
| 571 |
throw Whatpm::HTML::RestartParser (); |
throw Whatpm::HTML::RestartParser (); |
| 572 |
} |
} |
| 573 |
}; # $self->{change_encoding} |
}; # $self->{change_encoding} |
| 1419 |
## Step 3 |
## Step 3 |
| 1420 |
## TODO: Mark as "already executed", if ... |
## TODO: Mark as "already executed", if ... |
| 1421 |
|
|
| 1422 |
## Step 4 |
## Step 4 (HTML5 revision 2702) |
| 1423 |
$insert->($script_el); |
$insert->($script_el); |
|
|
|
|
## ISSUE: $script_el is not put into the stack |
|
| 1424 |
push @{$self->{open_elements}}, [$script_el, $el_category->{script}]; |
push @{$self->{open_elements}}, [$script_el, $el_category->{script}]; |
| 1425 |
|
|
| 1426 |
## Step 5 |
## Step 5 |
| 1954 |
} |
} |
| 1955 |
} elsif ($token->{type} == END_TAG_TOKEN) { |
} elsif ($token->{type} == END_TAG_TOKEN) { |
| 1956 |
## NOTE: "using the rules for secondary insertion mode" then "continue" |
## NOTE: "using the rules for secondary insertion mode" then "continue" |
| 1957 |
!!!cp ('t87.5'); |
if ($token->{tag_name} eq 'script') { |
| 1958 |
# |
!!!cp ('t87.41'); |
| 1959 |
|
# |
| 1960 |
|
## XXXscript: Execute script here. |
| 1961 |
|
} else { |
| 1962 |
|
!!!cp ('t87.5'); |
| 1963 |
|
# |
| 1964 |
|
} |
| 1965 |
} elsif ($token->{type} == END_OF_FILE_TOKEN) { |
} elsif ($token->{type} == END_OF_FILE_TOKEN) { |
| 1966 |
!!!cp ('t87.6'); |
!!!cp ('t87.6'); |
| 1967 |
!!!parse-error (type => 'not closed', |
!!!parse-error (type => 'not closed', |
| 3841 |
!!!next-token; |
!!!next-token; |
| 3842 |
next B; |
next B; |
| 3843 |
} elsif ({ |
} elsif ({ |
| 3844 |
select => 1, input => 1, textarea => 1, |
select => 1, input => 1, textarea => 1, keygen => 1, |
| 3845 |
}->{$token->{tag_name}} or |
}->{$token->{tag_name}} or |
| 3846 |
(($self->{insertion_mode} & IM_MASK) |
(($self->{insertion_mode} & IM_MASK) |
| 3847 |
== IN_SELECT_IN_TABLE_IM and |
== IN_SELECT_IN_TABLE_IM and |
| 4421 |
next B; |
next B; |
| 4422 |
} |
} |
| 4423 |
|
|
| 4424 |
## has a p element in scope |
if ($token->{tag_name} ne 'table' or # The Hixie Quirk |
| 4425 |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
$self->{document}->manakai_compat_mode ne 'quirks') { |
| 4426 |
if ($_->[1] == P_EL) { |
## has a p element in scope |
| 4427 |
!!!cp ('t344'); |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
| 4428 |
!!!back-token; # <form> |
if ($_->[1] == P_EL) { |
| 4429 |
$token = {type => END_TAG_TOKEN, tag_name => 'p', |
!!!cp ('t344'); |
| 4430 |
line => $token->{line}, column => $token->{column}}; |
!!!back-token; # <form> |
| 4431 |
next B; |
$token = {type => END_TAG_TOKEN, tag_name => 'p', |
| 4432 |
} elsif ($_->[1] & SCOPING_EL) { |
line => $token->{line}, column => $token->{column}}; |
| 4433 |
!!!cp ('t345'); |
next B; |
| 4434 |
last INSCOPE; |
} elsif ($_->[1] & SCOPING_EL) { |
| 4435 |
} |
!!!cp ('t345'); |
| 4436 |
} # INSCOPE |
last INSCOPE; |
| 4437 |
|
} |
| 4438 |
|
} # INSCOPE |
| 4439 |
|
} |
| 4440 |
|
|
| 4441 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
| 4442 |
if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') { |
if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') { |
| 4807 |
line => $token->{line}, column => $token->{column}}, |
line => $token->{line}, column => $token->{column}}, |
| 4808 |
{type => START_TAG_TOKEN, tag_name => 'hr', |
{type => START_TAG_TOKEN, tag_name => 'hr', |
| 4809 |
line => $token->{line}, column => $token->{column}}, |
line => $token->{line}, column => $token->{column}}, |
|
{type => START_TAG_TOKEN, tag_name => 'p', |
|
|
line => $token->{line}, column => $token->{column}}, |
|
| 4810 |
{type => START_TAG_TOKEN, tag_name => 'label', |
{type => START_TAG_TOKEN, tag_name => 'label', |
| 4811 |
line => $token->{line}, column => $token->{column}}, |
line => $token->{line}, column => $token->{column}}, |
| 4812 |
); |
); |
| 4829 |
#{type => CHARACTER_TOKEN, data => ''}, # SHOULD |
#{type => CHARACTER_TOKEN, data => ''}, # SHOULD |
| 4830 |
{type => END_TAG_TOKEN, tag_name => 'label', |
{type => END_TAG_TOKEN, tag_name => 'label', |
| 4831 |
line => $token->{line}, column => $token->{column}}, |
line => $token->{line}, column => $token->{column}}, |
|
{type => END_TAG_TOKEN, tag_name => 'p', |
|
|
line => $token->{line}, column => $token->{column}}, |
|
| 4832 |
{type => START_TAG_TOKEN, tag_name => 'hr', |
{type => START_TAG_TOKEN, tag_name => 'hr', |
| 4833 |
line => $token->{line}, column => $token->{column}}, |
line => $token->{line}, column => $token->{column}}, |
| 4834 |
{type => END_TAG_TOKEN, tag_name => 'form', |
{type => END_TAG_TOKEN, tag_name => 'form', |
| 4912 |
last INSCOPE; |
last INSCOPE; |
| 4913 |
} |
} |
| 4914 |
} # INSCOPE |
} # INSCOPE |
| 4915 |
|
|
| 4916 |
|
## TODO: <non-ruby><rt> is not allowed. |
| 4917 |
|
|
| 4918 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
| 4919 |
|
|
| 5052 |
} # INSCOPE |
} # INSCOPE |
| 5053 |
|
|
| 5054 |
for (@{$self->{open_elements}}) { |
for (@{$self->{open_elements}}) { |
| 5055 |
unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL) { |
unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL || |
| 5056 |
|
$_->[1] == OPTGROUP_EL || |
| 5057 |
|
$_->[1] == OPTION_EL || |
| 5058 |
|
$_->[1] == RUBY_COMPONENT_EL) { |
| 5059 |
!!!cp ('t403'); |
!!!cp ('t403'); |
| 5060 |
!!!parse-error (type => 'not closed', |
!!!parse-error (type => 'not closed', |
| 5061 |
text => $_->[0]->manakai_local_name, |
text => $_->[0]->manakai_local_name, |
| 5452 |
## TODO: script stuffs |
## TODO: script stuffs |
| 5453 |
} # _tree_construct_main |
} # _tree_construct_main |
| 5454 |
|
|
| 5455 |
|
## XXX: How this method is organized is somewhat out of date, although |
| 5456 |
|
## it still does what the current spec documents. |
| 5457 |
sub set_inner_html ($$$$;$) { |
sub set_inner_html ($$$$;$) { |
| 5458 |
my $class = shift; |
my $class = shift; |
| 5459 |
my $node = shift; |
my $node = shift; # /context/ |
| 5460 |
#my $s = \$_[0]; |
#my $s = \$_[0]; |
| 5461 |
my $onerror = $_[1]; |
my $onerror = $_[1]; |
| 5462 |
my $get_wrapper = $_[2] || sub ($) { return $_[0] }; |
my $get_wrapper = $_[2] || sub ($) { return $_[0] }; |
| 5464 |
## ISSUE: Should {confident} be true? |
## ISSUE: Should {confident} be true? |
| 5465 |
|
|
| 5466 |
my $nt = $node->node_type; |
my $nt = $node->node_type; |
| 5467 |
if ($nt == 9) { |
if ($nt == 9) { # Document (invoke the algorithm with no /context/ element) |
| 5468 |
# MUST |
# MUST |
| 5469 |
|
|
| 5470 |
## Step 1 # MUST |
## Step 1 # MUST |
| 5479 |
|
|
| 5480 |
## Step 3, 4, 5 # MUST |
## Step 3, 4, 5 # MUST |
| 5481 |
$class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper); |
$class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper); |
| 5482 |
} elsif ($nt == 1) { |
} elsif ($nt == 1) { # Element (invoke the algorithm with /context/ element) |
| 5483 |
## TODO: If non-html element |
## TODO: If non-html element |
| 5484 |
|
|
| 5485 |
## NOTE: Most of this code is copied from |parse_string| |
## NOTE: Most of this code is copied from |parse_string| |
| 5486 |
|
|
| 5487 |
## TODO: Support for $get_wrapper |
## TODO: Support for $get_wrapper |
| 5488 |
|
|
| 5489 |
## Step 1 # MUST |
## F1. Create an HTML document. |
| 5490 |
my $this_doc = $node->owner_document; |
my $this_doc = $node->owner_document; |
| 5491 |
my $doc = $this_doc->implementation->create_document; |
my $doc = $this_doc->implementation->create_document; |
| 5492 |
$doc->manakai_is_html (1); |
$doc->manakai_is_html (1); |
| 5493 |
|
|
| 5494 |
|
## F2. Propagate quirkness flag |
| 5495 |
|
my $node_doc = $node->owner_document; |
| 5496 |
|
$doc->manakai_compat_mode ($node_doc->manakai_compat_mode); |
| 5497 |
|
|
| 5498 |
|
## F3. Create an HTML parser |
| 5499 |
my $p = $class->new; |
my $p = $class->new; |
| 5500 |
$p->{document} = $doc; |
$p->{document} = $doc; |
| 5501 |
|
|
| 5623 |
$p->_initialize_tokenizer; |
$p->_initialize_tokenizer; |
| 5624 |
$p->_initialize_tree_constructor; |
$p->_initialize_tree_constructor; |
| 5625 |
|
|
| 5626 |
## Step 2 |
## F4. If /context/ is not undef... |
| 5627 |
|
|
| 5628 |
|
## F4.1. content model flag |
| 5629 |
my $node_ln = $node->manakai_local_name; |
my $node_ln = $node->manakai_local_name; |
| 5630 |
$p->{content_model} = { |
$p->{content_model} = { |
| 5631 |
title => RCDATA_CONTENT_MODEL, |
title => RCDATA_CONTENT_MODEL, |
| 5645 |
$p->{inner_html_node} = [$node, $el_category->{$node_ln}]; |
$p->{inner_html_node} = [$node, $el_category->{$node_ln}]; |
| 5646 |
## TODO: Foreign element OK? |
## TODO: Foreign element OK? |
| 5647 |
|
|
| 5648 |
## Step 3 |
## F4.2. Root |html| element |
| 5649 |
my $root = $doc->create_element_ns |
my $root = $doc->create_element_ns |
| 5650 |
('http://www.w3.org/1999/xhtml', [undef, 'html']); |
('http://www.w3.org/1999/xhtml', [undef, 'html']); |
| 5651 |
|
|
| 5652 |
## Step 4 # MUST |
## F4.3. |
| 5653 |
$doc->append_child ($root); |
$doc->append_child ($root); |
| 5654 |
|
|
| 5655 |
## Step 5 # MUST |
## F4.4. |
| 5656 |
push @{$p->{open_elements}}, [$root, $el_category->{html}]; |
push @{$p->{open_elements}}, [$root, $el_category->{html}]; |
| 5657 |
|
|
| 5658 |
undef $p->{head_element}; |
undef $p->{head_element}; |
| 5659 |
undef $p->{head_element_inserted}; |
undef $p->{head_element_inserted}; |
| 5660 |
|
|
| 5661 |
## Step 6 # MUST |
## F4.5. |
| 5662 |
$p->_reset_insertion_mode; |
$p->_reset_insertion_mode; |
| 5663 |
|
|
| 5664 |
## Step 7 # MUST |
## F4.6. |
| 5665 |
my $anode = $node; |
my $anode = $node; |
| 5666 |
AN: while (defined $anode) { |
AN: while (defined $anode) { |
| 5667 |
if ($anode->node_type == 1) { |
if ($anode->node_type == 1) { |
| 5676 |
} |
} |
| 5677 |
$anode = $anode->parent_node; |
$anode = $anode->parent_node; |
| 5678 |
} # AN |
} # AN |
| 5679 |
|
|
| 5680 |
## Step 9 # MUST |
## F.6. Start the parser. |
| 5681 |
{ |
{ |
| 5682 |
my $self = $p; |
my $self = $p; |
| 5683 |
!!!next-token; |
!!!next-token; |
| 5684 |
} |
} |
| 5685 |
$p->_tree_construction_main; |
$p->_tree_construction_main; |
| 5686 |
|
|
| 5687 |
## Step 10 # MUST |
## F.7. |
| 5688 |
my @cn = @{$node->child_nodes}; |
my @cn = @{$node->child_nodes}; |
| 5689 |
for (@cn) { |
for (@cn) { |
| 5690 |
$node->remove_child ($_); |
$node->remove_child ($_); |