| 11 |
## TODO: 1252 parse error (revision 1264) |
## TODO: 1252 parse error (revision 1264) |
| 12 |
## TODO: 8859-11 = 874 (revision 1271) |
## TODO: 8859-11 = 874 (revision 1271) |
| 13 |
|
|
| 14 |
|
require IO::Handle; |
| 15 |
|
|
| 16 |
my $HTML_NS = q<http://www.w3.org/1999/xhtml>; |
my $HTML_NS = q<http://www.w3.org/1999/xhtml>; |
| 17 |
my $MML_NS = q<http://www.w3.org/1998/Math/MathML>; |
my $MML_NS = q<http://www.w3.org/1998/Math/MathML>; |
| 18 |
my $SVG_NS = q<http://www.w3.org/2000/svg>; |
my $SVG_NS = q<http://www.w3.org/2000/svg>; |
| 334 |
}; # $c1_entity_char |
}; # $c1_entity_char |
| 335 |
|
|
| 336 |
sub parse_byte_string ($$$$;$) { |
sub parse_byte_string ($$$$;$) { |
| 337 |
|
my $self = shift; |
| 338 |
|
my $charset_name = shift; |
| 339 |
|
open my $input, '<', ref $_[0] ? $_[0] : \($_[0]); |
| 340 |
|
return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]); |
| 341 |
|
} # parse_byte_string |
| 342 |
|
|
| 343 |
|
sub parse_byte_stream ($$$$;$) { |
| 344 |
my $self = ref $_[0] ? shift : shift->new; |
my $self = ref $_[0] ? shift : shift->new; |
| 345 |
my $charset_name = shift; |
my $charset_name = shift; |
| 346 |
my $bytes_s = ref $_[0] ? $_[0] : \($_[0]); |
my $byte_stream = $_[0]; |
|
my $s; |
|
| 347 |
|
|
| 348 |
my $onerror = $_[2] || sub { |
my $onerror = $_[2] || sub { |
| 349 |
my (%opt) = @_; |
my (%opt) = @_; |
| 354 |
## HTML5 encoding sniffing algorithm |
## HTML5 encoding sniffing algorithm |
| 355 |
require Message::Charset::Info; |
require Message::Charset::Info; |
| 356 |
my $charset; |
my $charset; |
| 357 |
my ($e, $e_status); |
my $buffer; |
| 358 |
|
my ($char_stream, $e_status); |
| 359 |
|
|
| 360 |
SNIFFING: { |
SNIFFING: { |
| 361 |
|
|
| 364 |
$charset = Message::Charset::Info->get_by_iana_name ($charset_name); |
$charset = Message::Charset::Info->get_by_iana_name ($charset_name); |
| 365 |
|
|
| 366 |
## ISSUE: Unsupported encoding is not ignored according to the spec. |
## ISSUE: Unsupported encoding is not ignored according to the spec. |
| 367 |
($e, $e_status) = $charset->get_perl_encoding |
($char_stream, $e_status) = $charset->get_decode_handle |
| 368 |
(allow_error_reporting => 1, |
($byte_stream, allow_error_reporting => 1, |
| 369 |
allow_fallback => 1); |
allow_fallback => 1); |
| 370 |
if ($e) { |
if ($char_stream) { |
| 371 |
$self->{confident} = 1; |
$self->{confident} = 1; |
| 372 |
last SNIFFING; |
last SNIFFING; |
| 373 |
|
} else { |
| 374 |
|
## TODO: unsupported error |
| 375 |
} |
} |
| 376 |
} |
} |
| 377 |
|
|
| 378 |
## Step 2 |
## Step 2 |
| 379 |
# wait |
my $byte_buffer = ''; |
| 380 |
|
for (1..1024) { |
| 381 |
|
my $char = $byte_stream->getc; |
| 382 |
|
last unless defined $char; |
| 383 |
|
$byte_buffer .= $char; |
| 384 |
|
} ## TODO: timeout |
| 385 |
|
|
| 386 |
## Step 3 |
## Step 3 |
| 387 |
my $head = substr ($$bytes_s, 0, 3); |
if ($byte_buffer =~ /^\xFE\xFF/) { |
|
if ($head =~ /^\xFE\xFF/) { |
|
| 388 |
$charset = Message::Charset::Info->get_by_iana_name ('utf-16be'); |
$charset = Message::Charset::Info->get_by_iana_name ('utf-16be'); |
| 389 |
($e, $e_status) = $charset->get_perl_encoding |
($char_stream, $e_status) = $charset->get_decode_handle |
| 390 |
(allow_error_reporting => 1, |
($byte_stream, allow_error_reporting => 1, |
| 391 |
allow_fallback => 1); |
allow_fallback => 1, byte_buffer => \$byte_buffer); |
| 392 |
$self->{confident} = 1; |
$self->{confident} = 1; |
| 393 |
last SNIFFING; |
last SNIFFING; |
| 394 |
} elsif ($head =~ /^\xFF\xFE/) { |
} elsif ($byte_buffer =~ /^\xFF\xFE/) { |
| 395 |
$charset = Message::Charset::Info->get_by_iana_name ('utf-16le'); |
$charset = Message::Charset::Info->get_by_iana_name ('utf-16le'); |
| 396 |
($e, $e_status) = $charset->get_perl_encoding |
($char_stream, $e_status) = $charset->get_decode_handle |
| 397 |
(allow_error_reporting => 1, |
($byte_stream, allow_error_reporting => 1, |
| 398 |
allow_fallback => 1); |
allow_fallback => 1, byte_buffer => \$byte_buffer); |
| 399 |
$self->{confident} = 1; |
$self->{confident} = 1; |
| 400 |
last SNIFFING; |
last SNIFFING; |
| 401 |
} elsif ($head eq "\xEF\xBB\xBF") { |
} elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) { |
| 402 |
$charset = Message::Charset::Info->get_by_iana_name ('utf-8'); |
$charset = Message::Charset::Info->get_by_iana_name ('utf-8'); |
| 403 |
($e, $e_status) = $charset->get_perl_encoding |
($char_stream, $e_status) = $charset->get_decode_handle |
| 404 |
(allow_error_reporting => 1, |
($byte_stream, allow_error_reporting => 1, |
| 405 |
allow_fallback => 1); |
allow_fallback => 1, byte_buffer => \$byte_buffer); |
| 406 |
$self->{confident} = 1; |
$self->{confident} = 1; |
| 407 |
last SNIFFING; |
last SNIFFING; |
| 408 |
} |
} |
| 416 |
## Step 6 |
## Step 6 |
| 417 |
require Whatpm::Charset::UniversalCharDet; |
require Whatpm::Charset::UniversalCharDet; |
| 418 |
$charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string |
$charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string |
| 419 |
(substr ($$bytes_s, 0, 1024)); |
($byte_buffer); |
| 420 |
if (defined $charset_name) { |
if (defined $charset_name) { |
| 421 |
$charset = Message::Charset::Info->get_by_iana_name ($charset_name); |
$charset = Message::Charset::Info->get_by_iana_name ($charset_name); |
| 422 |
|
|
| 423 |
## ISSUE: Unsupported encoding is not ignored according to the spec. |
## ISSUE: Unsupported encoding is not ignored according to the spec. |
| 424 |
($e, $e_status) = $charset->get_perl_encoding |
require Whatpm::Charset::DecodeHandle; |
| 425 |
(allow_error_reporting => 1, |
$buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new |
| 426 |
allow_fallback => 1); |
($byte_stream); |
| 427 |
if ($e) { |
($char_stream, $e_status) = $charset->get_decode_handle |
| 428 |
|
($buffer, allow_error_reporting => 1, |
| 429 |
|
allow_fallback => 1, byte_buffer => \$byte_buffer); |
| 430 |
|
if ($char_stream) { |
| 431 |
|
$buffer->{buffer} = $byte_buffer; |
| 432 |
!!!parse-error (type => 'sniffing:chardet', ## TODO: type name |
!!!parse-error (type => 'sniffing:chardet', ## TODO: type name |
| 433 |
value => $charset_name, |
value => $charset_name, |
| 434 |
level => $self->{info_level}, |
level => $self->{info_level}, |
| 443 |
$charset = Message::Charset::Info->get_by_iana_name ('windows-1252'); |
$charset = Message::Charset::Info->get_by_iana_name ('windows-1252'); |
| 444 |
## NOTE: We choose |windows-1252| here, since |utf-8| should be |
## NOTE: We choose |windows-1252| here, since |utf-8| should be |
| 445 |
## detectable in the step 6. |
## detectable in the step 6. |
| 446 |
($e, $e_status) = $charset->get_perl_encoding (allow_error_reporting => 1, |
require Whatpm::Charset::DecodeHandle; |
| 447 |
allow_fallback => 1); |
$buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new |
| 448 |
|
($byte_stream); |
| 449 |
|
($char_stream, $e_status) |
| 450 |
|
= $charset->get_decode_handle ($buffer, |
| 451 |
|
allow_error_reporting => 1, |
| 452 |
|
allow_fallback => 1, |
| 453 |
|
byte_buffer => \$byte_buffer); |
| 454 |
|
$buffer->{buffer} = $byte_buffer; |
| 455 |
!!!parse-error (type => 'sniffing:default', ## TODO: type name |
!!!parse-error (type => 'sniffing:default', ## TODO: type name |
| 456 |
value => 'windows-1252', |
value => 'windows-1252', |
| 457 |
level => $self->{info_level}, |
level => $self->{info_level}, |
| 462 |
$self->{input_encoding} = $charset->get_iana_name; |
$self->{input_encoding} = $charset->get_iana_name; |
| 463 |
if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) { |
if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) { |
| 464 |
!!!parse-error (type => 'chardecode:fallback', ## TODO: type name |
!!!parse-error (type => 'chardecode:fallback', ## TODO: type name |
| 465 |
value => $e->name, |
value => $self->{input_encoding}, |
| 466 |
level => $self->{unsupported_level}, |
level => $self->{unsupported_level}, |
| 467 |
line => 1, column => 1); |
line => 1, column => 1); |
| 468 |
} elsif (not ($e_status & |
} elsif (not ($e_status & |
| 472 |
level => $self->{unsupported_level}, |
level => $self->{unsupported_level}, |
| 473 |
line => 1, column => 1); |
line => 1, column => 1); |
| 474 |
} |
} |
|
$s = \ $e->decode ($$bytes_s); |
|
| 475 |
|
|
| 476 |
$self->{change_encoding} = sub { |
$self->{change_encoding} = sub { |
| 477 |
my $self = shift; |
my $self = shift; |
| 479 |
my $token = shift; |
my $token = shift; |
| 480 |
|
|
| 481 |
$charset = Message::Charset::Info->get_by_iana_name ($charset_name); |
$charset = Message::Charset::Info->get_by_iana_name ($charset_name); |
| 482 |
($e, $e_status) = $charset->get_perl_encoding |
($char_stream, $e_status) = $charset->get_decode_handle |
| 483 |
(allow_error_reporting => 1, allow_fallback => 1); |
($byte_stream, allow_error_reporting => 1, allow_fallback => 1, |
| 484 |
|
byte_buffer => \ $buffer->{buffer}); |
| 485 |
|
|
| 486 |
if ($e) { # if supported |
if ($char_stream) { # if supported |
| 487 |
## "Change the encoding" algorithm: |
## "Change the encoding" algorithm: |
| 488 |
|
|
| 489 |
## Step 1 |
## Step 1 |
| 490 |
if ($charset->{iana_names}->{'utf-16'}) { ## ISSUE: UTF-16BE -> UTF-8? UTF-16LE -> UTF-8? |
if ($charset->{iana_names}->{'utf-16'}) { ## ISSUE: UTF-16BE -> UTF-8? UTF-16LE -> UTF-8? |
| 491 |
$charset = Message::Charset::Info->get_by_iana_name ('utf-8'); |
$charset = Message::Charset::Info->get_by_iana_name ('utf-8'); |
| 492 |
($e, $e_status) = $charset->get_perl_encoding; |
($char_stream, $e_status) = $charset->get_decode_handle |
| 493 |
|
($byte_stream, |
| 494 |
|
byte_buffer => \ $buffer->{buffer}); |
| 495 |
} |
} |
| 496 |
$charset_name = $charset->get_iana_name; |
$charset_name = $charset->get_iana_name; |
| 497 |
|
|
| 520 |
} |
} |
| 521 |
}; # $self->{change_encoding} |
}; # $self->{change_encoding} |
| 522 |
|
|
| 523 |
|
my $char_onerror = sub { |
| 524 |
|
my (undef, $type, %opt) = @_; |
| 525 |
|
!!!parse-error (%opt, type => $type, |
| 526 |
|
line => $self->{line}, column => $self->{column} + 1); |
| 527 |
|
if ($opt{octets}) { |
| 528 |
|
${$opt{octets}} = "\x{FFFD}"; # relacement character |
| 529 |
|
} |
| 530 |
|
}; |
| 531 |
|
$char_stream->onerror ($char_onerror); |
| 532 |
|
|
| 533 |
my @args = @_; shift @args; # $s |
my @args = @_; shift @args; # $s |
| 534 |
my $return; |
my $return; |
| 535 |
try { |
try { |
| 536 |
$return = $self->parse_char_string ($s, @args); |
$return = $self->parse_char_stream ($char_stream, @args); |
| 537 |
} catch Whatpm::HTML::RestartParser with { |
} catch Whatpm::HTML::RestartParser with { |
| 538 |
## NOTE: Invoked after {change_encoding}. |
## NOTE: Invoked after {change_encoding}. |
| 539 |
|
|
| 540 |
$self->{input_encoding} = $charset->get_iana_name; |
$self->{input_encoding} = $charset->get_iana_name; |
| 541 |
if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) { |
if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) { |
| 542 |
!!!parse-error (type => 'chardecode:fallback', ## TODO: type name |
!!!parse-error (type => 'chardecode:fallback', ## TODO: type name |
| 543 |
value => $e->name, |
value => $self->{input_encoding}, |
| 544 |
level => $self->{unsupported_level}, |
level => $self->{unsupported_level}, |
| 545 |
line => 1, column => 1); |
line => 1, column => 1); |
| 546 |
} elsif (not ($e_status & |
} elsif (not ($e_status & |
| 550 |
level => $self->{unsupported_level}, |
level => $self->{unsupported_level}, |
| 551 |
line => 1, column => 1); |
line => 1, column => 1); |
| 552 |
} |
} |
|
$s = \ $e->decode ($$bytes_s); |
|
| 553 |
$self->{confident} = 1; |
$self->{confident} = 1; |
| 554 |
$return = $self->parse_char_string ($s, @args); |
$char_stream->onerror ($char_onerror); |
| 555 |
|
$return = $self->parse_char_stream ($char_stream, @args); |
| 556 |
}; |
}; |
| 557 |
return $return; |
return $return; |
| 558 |
} # parse_byte_string |
} # parse_byte_stream |
| 559 |
|
|
| 560 |
## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM |
## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM |
| 561 |
## and the HTML layer MUST ignore it. However, we does strip BOM in |
## and the HTML layer MUST ignore it. However, we does strip BOM in |
| 566 |
## such as |parse_byte_string| in this module, must ensure that it does |
## such as |parse_byte_string| in this module, must ensure that it does |
| 567 |
## strip the BOM and never strip any ZWNBSP. |
## strip the BOM and never strip any ZWNBSP. |
| 568 |
|
|
| 569 |
*parse_char_string = \&parse_string; |
sub parse_char_string ($$$;$) { |
| 570 |
|
my $self = shift; |
| 571 |
|
require utf8; |
| 572 |
|
my $s = ref $_[0] ? $_[0] : \($_[0]); |
| 573 |
|
open my $input, '<' . (utf8::is_utf8 ($$s) ? ':utf8' : ''), $s; |
| 574 |
|
return $self->parse_char_stream ($input, @_[1..$#_]); |
| 575 |
|
} # parse_char_string |
| 576 |
|
*parse_string = \&parse_char_string; |
| 577 |
|
|
| 578 |
sub parse_string ($$$;$) { |
sub parse_char_stream ($$$;$) { |
| 579 |
my $self = ref $_[0] ? shift : shift->new; |
my $self = ref $_[0] ? shift : shift->new; |
| 580 |
my $s = ref $_[0] ? $_[0] : \($_[0]); |
my $input = $_[0]; |
| 581 |
$self->{document} = $_[1]; |
$self->{document} = $_[1]; |
| 582 |
@{$self->{document}->child_nodes} = (); |
@{$self->{document}->child_nodes} = (); |
| 583 |
|
|
| 596 |
pop @{$self->{prev_char}}; |
pop @{$self->{prev_char}}; |
| 597 |
unshift @{$self->{prev_char}}, $self->{next_char}; |
unshift @{$self->{prev_char}}, $self->{next_char}; |
| 598 |
|
|
| 599 |
$self->{next_char} = -1 and return if $i >= length $$s; |
my $char; |
| 600 |
$self->{next_char} = ord substr $$s, $i++, 1; |
if (defined $self->{next_next_char}) { |
| 601 |
|
$char = $self->{next_next_char}; |
| 602 |
|
delete $self->{next_next_char}; |
| 603 |
|
} else { |
| 604 |
|
$char = $input->getc; |
| 605 |
|
} |
| 606 |
|
$self->{next_char} = -1 and return unless defined $char; |
| 607 |
|
$self->{next_char} = ord $char; |
| 608 |
|
|
| 609 |
($self->{line_prev}, $self->{column_prev}) |
($self->{line_prev}, $self->{column_prev}) |
| 610 |
= ($self->{line}, $self->{column}); |
= ($self->{line}, $self->{column}); |
| 616 |
$self->{column} = 0; |
$self->{column} = 0; |
| 617 |
} elsif ($self->{next_char} == 0x000D) { # CR |
} elsif ($self->{next_char} == 0x000D) { # CR |
| 618 |
!!!cp ('j2'); |
!!!cp ('j2'); |
| 619 |
$i++ if substr ($$s, $i, 1) eq "\x0A"; |
my $next = $input->getc; |
| 620 |
|
if (defined $next and $next ne "\x0A") { |
| 621 |
|
$self->{next_next_char} = $next; |
| 622 |
|
} |
| 623 |
$self->{next_char} = 0x000A; # LF # MUST |
$self->{next_char} = 0x000A; # LF # MUST |
| 624 |
$self->{line}++; |
$self->{line}++; |
| 625 |
$self->{column} = 0; |
$self->{column} = 0; |
| 672 |
delete $self->{parse_error}; # remove loop |
delete $self->{parse_error}; # remove loop |
| 673 |
|
|
| 674 |
return $self->{document}; |
return $self->{document}; |
| 675 |
} # parse_string |
} # parse_char_stream |
| 676 |
|
|
| 677 |
sub new ($) { |
sub new ($) { |
| 678 |
my $class = shift; |
my $class = shift; |
| 1048 |
redo A; |
redo A; |
| 1049 |
} else { |
} else { |
| 1050 |
!!!cp (23); |
!!!cp (23); |
| 1051 |
!!!parse-error (type => 'bare stago'); |
!!!parse-error (type => 'bare stago', |
| 1052 |
|
line => $self->{line_prev}, |
| 1053 |
|
column => $self->{column_prev}); |
| 1054 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 1055 |
## reconsume |
## reconsume |
| 1056 |
|
|
| 4154 |
!!!next-token; |
!!!next-token; |
| 4155 |
next B; |
next B; |
| 4156 |
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 4157 |
!!!cp ('t94'); |
!!!cp ('t93.2'); |
| 4158 |
# |
!!!parse-error (type => 'after head:head', token => $token); ## TODO: error type |
| 4159 |
|
## Ignore the token |
| 4160 |
|
!!!nack ('t93.3'); |
| 4161 |
|
!!!next-token; |
| 4162 |
|
next B; |
| 4163 |
} else { |
} else { |
| 4164 |
!!!cp ('t95'); |
!!!cp ('t95'); |
| 4165 |
!!!parse-error (type => 'in head:head', token => $token); # or in head noscript |
!!!parse-error (type => 'in head:head', token => $token); # or in head noscript |
| 4481 |
$self->{insertion_mode} = AFTER_HEAD_IM; |
$self->{insertion_mode} = AFTER_HEAD_IM; |
| 4482 |
!!!next-token; |
!!!next-token; |
| 4483 |
next B; |
next B; |
| 4484 |
|
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 4485 |
|
!!!cp ('t134.1'); |
| 4486 |
|
!!!parse-error (type => 'unmatched end tag:head', token => $token); |
| 4487 |
|
## Ignore the token |
| 4488 |
|
!!!next-token; |
| 4489 |
|
next B; |
| 4490 |
} else { |
} else { |
| 4491 |
!!!cp ('t135'); |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
|
# |
|
| 4492 |
} |
} |
| 4493 |
} elsif ($token->{tag_name} eq 'noscript') { |
} elsif ($token->{tag_name} eq 'noscript') { |
| 4494 |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 4497 |
$self->{insertion_mode} = IN_HEAD_IM; |
$self->{insertion_mode} = IN_HEAD_IM; |
| 4498 |
!!!next-token; |
!!!next-token; |
| 4499 |
next B; |
next B; |
| 4500 |
} elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
} elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or |
| 4501 |
|
$self->{insertion_mode} == AFTER_HEAD_IM) { |
| 4502 |
!!!cp ('t137'); |
!!!cp ('t137'); |
| 4503 |
!!!parse-error (type => 'unmatched end tag:noscript', token => $token); |
!!!parse-error (type => 'unmatched end tag:noscript', token => $token); |
| 4504 |
## Ignore the token ## ISSUE: An issue in the spec. |
## Ignore the token ## ISSUE: An issue in the spec. |
| 4511 |
} elsif ({ |
} elsif ({ |
| 4512 |
body => 1, html => 1, |
body => 1, html => 1, |
| 4513 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 4514 |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
if ($self->{insertion_mode} == BEFORE_HEAD_IM or |
| 4515 |
!!!cp ('t139'); |
$self->{insertion_mode} == IN_HEAD_IM or |
| 4516 |
## As if <head> |
$self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
|
!!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token); |
|
|
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
|
|
push @{$self->{open_elements}}, |
|
|
[$self->{head_element}, $el_category->{head}]; |
|
|
|
|
|
$self->{insertion_mode} = IN_HEAD_IM; |
|
|
## Reprocess in the "in head" insertion mode... |
|
|
} elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
|
| 4517 |
!!!cp ('t140'); |
!!!cp ('t140'); |
| 4518 |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token); |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token); |
| 4519 |
## Ignore the token |
## Ignore the token |
| 4520 |
!!!next-token; |
!!!next-token; |
| 4521 |
next B; |
next B; |
| 4522 |
|
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 4523 |
|
!!!cp ('t140.1'); |
| 4524 |
|
!!!parse-error (type => 'unmatched end tag:' . $token->{tag_name}, token => $token); |
| 4525 |
|
## Ignore the token |
| 4526 |
|
!!!next-token; |
| 4527 |
|
next B; |
| 4528 |
} else { |
} else { |
| 4529 |
!!!cp ('t141'); |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
| 4530 |
} |
} |
| 4531 |
|
} elsif ($token->{tag_name} eq 'p') { |
| 4532 |
# |
!!!cp ('t142'); |
| 4533 |
} elsif ({ |
!!!parse-error (type => 'unmatched end tag:p', token => $token); |
| 4534 |
p => 1, br => 1, |
## Ignore the token |
| 4535 |
}->{$token->{tag_name}}) { |
!!!next-token; |
| 4536 |
|
next B; |
| 4537 |
|
} elsif ($token->{tag_name} eq 'br') { |
| 4538 |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
| 4539 |
!!!cp ('t142'); |
!!!cp ('t142.2'); |
| 4540 |
## As if <head> |
## (before head) as if <head>, (in head) as if </head> |
| 4541 |
!!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token); |
!!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token); |
| 4542 |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
| 4543 |
push @{$self->{open_elements}}, |
$self->{insertion_mode} = AFTER_HEAD_IM; |
| 4544 |
[$self->{head_element}, $el_category->{head}]; |
|
| 4545 |
|
## Reprocess in the "after head" insertion mode... |
| 4546 |
|
} elsif ($self->{insertion_mode} == IN_HEAD_IM) { |
| 4547 |
|
!!!cp ('t143.2'); |
| 4548 |
|
## As if </head> |
| 4549 |
|
pop @{$self->{open_elements}}; |
| 4550 |
|
$self->{insertion_mode} = AFTER_HEAD_IM; |
| 4551 |
|
|
| 4552 |
|
## Reprocess in the "after head" insertion mode... |
| 4553 |
|
} elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 4554 |
|
!!!cp ('t143.3'); |
| 4555 |
|
## ISSUE: Two parse errors for <head><noscript></br> |
| 4556 |
|
!!!parse-error (type => 'unmatched end tag:br', token => $token); |
| 4557 |
|
## As if </noscript> |
| 4558 |
|
pop @{$self->{open_elements}}; |
| 4559 |
$self->{insertion_mode} = IN_HEAD_IM; |
$self->{insertion_mode} = IN_HEAD_IM; |
| 4560 |
|
|
| 4561 |
## Reprocess in the "in head" insertion mode... |
## Reprocess in the "in head" insertion mode... |
| 4562 |
} else { |
## As if </head> |
| 4563 |
!!!cp ('t143'); |
pop @{$self->{open_elements}}; |
| 4564 |
} |
$self->{insertion_mode} = AFTER_HEAD_IM; |
| 4565 |
|
|
| 4566 |
# |
## Reprocess in the "after head" insertion mode... |
| 4567 |
} else { |
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 4568 |
if ($self->{insertion_mode} == AFTER_HEAD_IM) { |
!!!cp ('t143.4'); |
|
!!!cp ('t144'); |
|
| 4569 |
# |
# |
| 4570 |
} else { |
} else { |
| 4571 |
!!!cp ('t145'); |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
|
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token); |
|
|
## Ignore the token |
|
|
!!!next-token; |
|
|
next B; |
|
| 4572 |
} |
} |
| 4573 |
|
|
| 4574 |
|
## ISSUE: does not agree with IE7 - it doesn't ignore </br>. |
| 4575 |
|
!!!parse-error (type => 'unmatched end tag:br', token => $token); |
| 4576 |
|
## Ignore the token |
| 4577 |
|
!!!next-token; |
| 4578 |
|
next B; |
| 4579 |
|
} else { |
| 4580 |
|
!!!cp ('t145'); |
| 4581 |
|
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token); |
| 4582 |
|
## Ignore the token |
| 4583 |
|
!!!next-token; |
| 4584 |
|
next B; |
| 4585 |
} |
} |
| 4586 |
|
|
| 4587 |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |