| 525 |
|
|
| 526 |
if ($char_stream) { # if supported |
if ($char_stream) { # if supported |
| 527 |
## "Change the encoding" algorithm: |
## "Change the encoding" algorithm: |
|
|
|
|
## Step 1 |
|
|
if ($charset->{category} & |
|
|
Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) { |
|
|
$charset = Message::Charset::Info->get_by_html_name ('utf-8'); |
|
|
($char_stream, $e_status) = $charset->get_decode_handle |
|
|
($byte_stream, |
|
|
byte_buffer => \ $buffer->{buffer}); |
|
|
} |
|
|
$charset_name = $charset->get_iana_name; |
|
| 528 |
|
|
| 529 |
## Step 2 |
## Step 1 |
| 530 |
if (defined $self->{input_encoding} and |
if (defined $self->{input_encoding} and |
| 531 |
$self->{input_encoding} eq $charset_name) { |
$self->{input_encoding} eq $charset_name) { |
| 532 |
!!!parse-error (type => 'charset label:matching', |
!!!parse-error (type => 'charset label:matching', |
| 536 |
return; |
return; |
| 537 |
} |
} |
| 538 |
|
|
| 539 |
|
## Step 2 (HTML5 revision 3205) |
| 540 |
|
if (defined $self->{input_encoding} and |
| 541 |
|
Message::Charset::Info->get_by_html_name ($self->{input_encoding}) |
| 542 |
|
->{category} & Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) { |
| 543 |
|
$self->{confident} = 1; |
| 544 |
|
return; |
| 545 |
|
} |
| 546 |
|
|
| 547 |
|
## Step 3 |
| 548 |
|
if ($charset->{category} & |
| 549 |
|
Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) { |
| 550 |
|
$charset = Message::Charset::Info->get_by_html_name ('utf-8'); |
| 551 |
|
($char_stream, $e_status) = $charset->get_decode_handle |
| 552 |
|
($byte_stream, |
| 553 |
|
byte_buffer => \ $buffer->{buffer}); |
| 554 |
|
} |
| 555 |
|
$charset_name = $charset->get_iana_name; |
| 556 |
|
|
| 557 |
!!!parse-error (type => 'charset label detected', |
!!!parse-error (type => 'charset label detected', |
| 558 |
text => $self->{input_encoding}, |
text => $self->{input_encoding}, |
| 559 |
value => $charset_name, |
value => $charset_name, |
| 560 |
level => $self->{level}->{warn}, |
level => $self->{level}->{warn}, |
| 561 |
token => $token); |
token => $token); |
| 562 |
|
|
| 563 |
## Step 3 |
## Step 4 |
| 564 |
# if (can) { |
# if (can) { |
| 565 |
## change the encoding on the fly. |
## change the encoding on the fly. |
| 566 |
#$self->{confident} = 1; |
#$self->{confident} = 1; |
| 567 |
#return; |
#return; |
| 568 |
# } |
# } |
| 569 |
|
|
| 570 |
## Step 4 |
## Step 5 |
| 571 |
throw Whatpm::HTML::RestartParser (); |
throw Whatpm::HTML::RestartParser (); |
| 572 |
} |
} |
| 573 |
}; # $self->{change_encoding} |
}; # $self->{change_encoding} |
| 3837 |
!!!next-token; |
!!!next-token; |
| 3838 |
next B; |
next B; |
| 3839 |
} elsif ({ |
} elsif ({ |
| 3840 |
select => 1, input => 1, textarea => 1, |
select => 1, input => 1, textarea => 1, keygen => 1, |
| 3841 |
}->{$token->{tag_name}} or |
}->{$token->{tag_name}} or |
| 3842 |
(($self->{insertion_mode} & IM_MASK) |
(($self->{insertion_mode} & IM_MASK) |
| 3843 |
== IN_SELECT_IN_TABLE_IM and |
== IN_SELECT_IN_TABLE_IM and |
| 4417 |
next B; |
next B; |
| 4418 |
} |
} |
| 4419 |
|
|
| 4420 |
## has a p element in scope |
if ($token->{tag_name} ne 'table' or # The Hixie Quirk |
| 4421 |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
$self->{document}->manakai_compat_mode ne 'quirks') { |
| 4422 |
if ($_->[1] == P_EL) { |
## has a p element in scope |
| 4423 |
!!!cp ('t344'); |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
| 4424 |
!!!back-token; # <form> |
if ($_->[1] == P_EL) { |
| 4425 |
$token = {type => END_TAG_TOKEN, tag_name => 'p', |
!!!cp ('t344'); |
| 4426 |
line => $token->{line}, column => $token->{column}}; |
!!!back-token; # <form> |
| 4427 |
next B; |
$token = {type => END_TAG_TOKEN, tag_name => 'p', |
| 4428 |
} elsif ($_->[1] & SCOPING_EL) { |
line => $token->{line}, column => $token->{column}}; |
| 4429 |
!!!cp ('t345'); |
next B; |
| 4430 |
last INSCOPE; |
} elsif ($_->[1] & SCOPING_EL) { |
| 4431 |
} |
!!!cp ('t345'); |
| 4432 |
} # INSCOPE |
last INSCOPE; |
| 4433 |
|
} |
| 4434 |
|
} # INSCOPE |
| 4435 |
|
} |
| 4436 |
|
|
| 4437 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
| 4438 |
if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') { |
if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') { |
| 4803 |
line => $token->{line}, column => $token->{column}}, |
line => $token->{line}, column => $token->{column}}, |
| 4804 |
{type => START_TAG_TOKEN, tag_name => 'hr', |
{type => START_TAG_TOKEN, tag_name => 'hr', |
| 4805 |
line => $token->{line}, column => $token->{column}}, |
line => $token->{line}, column => $token->{column}}, |
|
{type => START_TAG_TOKEN, tag_name => 'p', |
|
|
line => $token->{line}, column => $token->{column}}, |
|
| 4806 |
{type => START_TAG_TOKEN, tag_name => 'label', |
{type => START_TAG_TOKEN, tag_name => 'label', |
| 4807 |
line => $token->{line}, column => $token->{column}}, |
line => $token->{line}, column => $token->{column}}, |
| 4808 |
); |
); |
| 4825 |
#{type => CHARACTER_TOKEN, data => ''}, # SHOULD |
#{type => CHARACTER_TOKEN, data => ''}, # SHOULD |
| 4826 |
{type => END_TAG_TOKEN, tag_name => 'label', |
{type => END_TAG_TOKEN, tag_name => 'label', |
| 4827 |
line => $token->{line}, column => $token->{column}}, |
line => $token->{line}, column => $token->{column}}, |
|
{type => END_TAG_TOKEN, tag_name => 'p', |
|
|
line => $token->{line}, column => $token->{column}}, |
|
| 4828 |
{type => START_TAG_TOKEN, tag_name => 'hr', |
{type => START_TAG_TOKEN, tag_name => 'hr', |
| 4829 |
line => $token->{line}, column => $token->{column}}, |
line => $token->{line}, column => $token->{column}}, |
| 4830 |
{type => END_TAG_TOKEN, tag_name => 'form', |
{type => END_TAG_TOKEN, tag_name => 'form', |
| 4908 |
last INSCOPE; |
last INSCOPE; |
| 4909 |
} |
} |
| 4910 |
} # INSCOPE |
} # INSCOPE |
| 4911 |
|
|
| 4912 |
|
## TODO: <non-ruby><rt> is not allowed. |
| 4913 |
|
|
| 4914 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
| 4915 |
|
|
| 5445 |
## TODO: script stuffs |
## TODO: script stuffs |
| 5446 |
} # _tree_construct_main |
} # _tree_construct_main |
| 5447 |
|
|
| 5448 |
|
## XXX: How this method is organized is somewhat out of date, although |
| 5449 |
|
## it still does what the current spec documents. |
| 5450 |
sub set_inner_html ($$$$;$) { |
sub set_inner_html ($$$$;$) { |
| 5451 |
my $class = shift; |
my $class = shift; |
| 5452 |
my $node = shift; |
my $node = shift; # /context/ |
| 5453 |
#my $s = \$_[0]; |
#my $s = \$_[0]; |
| 5454 |
my $onerror = $_[1]; |
my $onerror = $_[1]; |
| 5455 |
my $get_wrapper = $_[2] || sub ($) { return $_[0] }; |
my $get_wrapper = $_[2] || sub ($) { return $_[0] }; |
| 5457 |
## ISSUE: Should {confident} be true? |
## ISSUE: Should {confident} be true? |
| 5458 |
|
|
| 5459 |
my $nt = $node->node_type; |
my $nt = $node->node_type; |
| 5460 |
if ($nt == 9) { |
if ($nt == 9) { # Document (invoke the algorithm with no /context/ element) |
| 5461 |
# MUST |
# MUST |
| 5462 |
|
|
| 5463 |
## Step 1 # MUST |
## Step 1 # MUST |
| 5472 |
|
|
| 5473 |
## Step 3, 4, 5 # MUST |
## Step 3, 4, 5 # MUST |
| 5474 |
$class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper); |
$class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper); |
| 5475 |
} elsif ($nt == 1) { |
} elsif ($nt == 1) { # Element (invoke the algorithm with /context/ element) |
| 5476 |
## TODO: If non-html element |
## TODO: If non-html element |
| 5477 |
|
|
| 5478 |
## NOTE: Most of this code is copied from |parse_string| |
## NOTE: Most of this code is copied from |parse_string| |
| 5479 |
|
|
| 5480 |
## TODO: Support for $get_wrapper |
## TODO: Support for $get_wrapper |
| 5481 |
|
|
| 5482 |
## Step 1 # MUST |
## F1. Create an HTML document. |
| 5483 |
my $this_doc = $node->owner_document; |
my $this_doc = $node->owner_document; |
| 5484 |
my $doc = $this_doc->implementation->create_document; |
my $doc = $this_doc->implementation->create_document; |
| 5485 |
$doc->manakai_is_html (1); |
$doc->manakai_is_html (1); |
| 5486 |
|
|
| 5487 |
|
## F2. Propagate quirkness flag |
| 5488 |
|
my $node_doc = $node->owner_document; |
| 5489 |
|
$doc->manakai_compat_mode ($node_doc->manakai_compat_mode); |
| 5490 |
|
|
| 5491 |
|
## F3. Create an HTML parser |
| 5492 |
my $p = $class->new; |
my $p = $class->new; |
| 5493 |
$p->{document} = $doc; |
$p->{document} = $doc; |
| 5494 |
|
|
| 5616 |
$p->_initialize_tokenizer; |
$p->_initialize_tokenizer; |
| 5617 |
$p->_initialize_tree_constructor; |
$p->_initialize_tree_constructor; |
| 5618 |
|
|
| 5619 |
## Step 2 |
## F4. If /context/ is not undef... |
| 5620 |
|
|
| 5621 |
|
## F4.1. content model flag |
| 5622 |
my $node_ln = $node->manakai_local_name; |
my $node_ln = $node->manakai_local_name; |
| 5623 |
$p->{content_model} = { |
$p->{content_model} = { |
| 5624 |
title => RCDATA_CONTENT_MODEL, |
title => RCDATA_CONTENT_MODEL, |
| 5638 |
$p->{inner_html_node} = [$node, $el_category->{$node_ln}]; |
$p->{inner_html_node} = [$node, $el_category->{$node_ln}]; |
| 5639 |
## TODO: Foreign element OK? |
## TODO: Foreign element OK? |
| 5640 |
|
|
| 5641 |
## Step 3 |
## F4.2. Root |html| element |
| 5642 |
my $root = $doc->create_element_ns |
my $root = $doc->create_element_ns |
| 5643 |
('http://www.w3.org/1999/xhtml', [undef, 'html']); |
('http://www.w3.org/1999/xhtml', [undef, 'html']); |
| 5644 |
|
|
| 5645 |
## Step 4 # MUST |
## F4.3. |
| 5646 |
$doc->append_child ($root); |
$doc->append_child ($root); |
| 5647 |
|
|
| 5648 |
## Step 5 # MUST |
## F4.4. |
| 5649 |
push @{$p->{open_elements}}, [$root, $el_category->{html}]; |
push @{$p->{open_elements}}, [$root, $el_category->{html}]; |
| 5650 |
|
|
| 5651 |
undef $p->{head_element}; |
undef $p->{head_element}; |
| 5652 |
undef $p->{head_element_inserted}; |
undef $p->{head_element_inserted}; |
| 5653 |
|
|
| 5654 |
## Step 6 # MUST |
## F4.5. |
| 5655 |
$p->_reset_insertion_mode; |
$p->_reset_insertion_mode; |
| 5656 |
|
|
| 5657 |
## Step 7 # MUST |
## F4.6. |
| 5658 |
my $anode = $node; |
my $anode = $node; |
| 5659 |
AN: while (defined $anode) { |
AN: while (defined $anode) { |
| 5660 |
if ($anode->node_type == 1) { |
if ($anode->node_type == 1) { |
| 5669 |
} |
} |
| 5670 |
$anode = $anode->parent_node; |
$anode = $anode->parent_node; |
| 5671 |
} # AN |
} # AN |
| 5672 |
|
|
| 5673 |
## Step 9 # MUST |
## F.6. Start the parser. |
| 5674 |
{ |
{ |
| 5675 |
my $self = $p; |
my $self = $p; |
| 5676 |
!!!next-token; |
!!!next-token; |
| 5677 |
} |
} |
| 5678 |
$p->_tree_construction_main; |
$p->_tree_construction_main; |
| 5679 |
|
|
| 5680 |
## Step 10 # MUST |
## F.7. |
| 5681 |
my @cn = @{$node->child_nodes}; |
my @cn = @{$node->child_nodes}; |
| 5682 |
for (@cn) { |
for (@cn) { |
| 5683 |
$node->remove_child ($_); |
$node->remove_child ($_); |