11 |
## TODO: 1252 parse error (revision 1264) |
## TODO: 1252 parse error (revision 1264) |
12 |
## TODO: 8859-11 = 874 (revision 1271) |
## TODO: 8859-11 = 874 (revision 1271) |
13 |
|
|
14 |
|
require IO::Handle; |
15 |
|
|
16 |
my $HTML_NS = q<http://www.w3.org/1999/xhtml>; |
my $HTML_NS = q<http://www.w3.org/1999/xhtml>; |
17 |
my $MML_NS = q<http://www.w3.org/1998/Math/MathML>; |
my $MML_NS = q<http://www.w3.org/1998/Math/MathML>; |
18 |
my $SVG_NS = q<http://www.w3.org/2000/svg>; |
my $SVG_NS = q<http://www.w3.org/2000/svg>; |
568 |
|
|
569 |
sub parse_char_string ($$$;$) { |
sub parse_char_string ($$$;$) { |
570 |
my $self = shift; |
my $self = shift; |
571 |
open my $input, '<:utf8', ref $_[0] ? $_[0] : \($_[0]); |
require utf8; |
572 |
|
my $s = ref $_[0] ? $_[0] : \($_[0]); |
573 |
|
open my $input, '<' . (utf8::is_utf8 ($$s) ? ':utf8' : ''), $s; |
574 |
return $self->parse_char_stream ($input, @_[1..$#_]); |
return $self->parse_char_stream ($input, @_[1..$#_]); |
575 |
} # parse_char_string |
} # parse_char_string |
576 |
*parse_string = \&parse_char_string; |
*parse_string = \&parse_char_string; |
596 |
pop @{$self->{prev_char}}; |
pop @{$self->{prev_char}}; |
597 |
unshift @{$self->{prev_char}}, $self->{next_char}; |
unshift @{$self->{prev_char}}, $self->{next_char}; |
598 |
|
|
599 |
my $char = $input->getc; |
my $char; |
600 |
|
if (defined $self->{next_next_char}) { |
601 |
|
$char = $self->{next_next_char}; |
602 |
|
delete $self->{next_next_char}; |
603 |
|
} else { |
604 |
|
$char = $input->getc; |
605 |
|
} |
606 |
$self->{next_char} = -1 and return unless defined $char; |
$self->{next_char} = -1 and return unless defined $char; |
607 |
$self->{next_char} = ord $char; |
$self->{next_char} = ord $char; |
608 |
|
|
617 |
} elsif ($self->{next_char} == 0x000D) { # CR |
} elsif ($self->{next_char} == 0x000D) { # CR |
618 |
!!!cp ('j2'); |
!!!cp ('j2'); |
619 |
my $next = $input->getc; |
my $next = $input->getc; |
620 |
if ($next ne "\x0A") { |
if (defined $next and $next ne "\x0A") { |
621 |
$input->ungetc ($next); |
$self->{next_next_char} = $next; |
622 |
} |
} |
623 |
$self->{next_char} = 0x000A; # LF # MUST |
$self->{next_char} = 0x000A; # LF # MUST |
624 |
$self->{line}++; |
$self->{line}++; |
4154 |
!!!next-token; |
!!!next-token; |
4155 |
next B; |
next B; |
4156 |
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
4157 |
!!!cp ('t94'); |
!!!cp ('t93.2'); |
4158 |
# |
!!!parse-error (type => 'after head:head', token => $token); ## TODO: error type |
4159 |
|
## Ignore the token |
4160 |
|
!!!nack ('t93.3'); |
4161 |
|
!!!next-token; |
4162 |
|
next B; |
4163 |
} else { |
} else { |
4164 |
!!!cp ('t95'); |
!!!cp ('t95'); |
4165 |
!!!parse-error (type => 'in head:head', token => $token); # or in head noscript |
!!!parse-error (type => 'in head:head', token => $token); # or in head noscript |
4481 |
$self->{insertion_mode} = AFTER_HEAD_IM; |
$self->{insertion_mode} = AFTER_HEAD_IM; |
4482 |
!!!next-token; |
!!!next-token; |
4483 |
next B; |
next B; |
4484 |
|
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
4485 |
|
!!!cp ('t134.1'); |
4486 |
|
!!!parse-error (type => 'unmatched end tag:head', token => $token); |
4487 |
|
## Ignore the token |
4488 |
|
!!!next-token; |
4489 |
|
next B; |
4490 |
} else { |
} else { |
4491 |
!!!cp ('t135'); |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
|
# |
|
4492 |
} |
} |
4493 |
} elsif ($token->{tag_name} eq 'noscript') { |
} elsif ($token->{tag_name} eq 'noscript') { |
4494 |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
4497 |
$self->{insertion_mode} = IN_HEAD_IM; |
$self->{insertion_mode} = IN_HEAD_IM; |
4498 |
!!!next-token; |
!!!next-token; |
4499 |
next B; |
next B; |
4500 |
} elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
} elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or |
4501 |
|
$self->{insertion_mode} == AFTER_HEAD_IM) { |
4502 |
!!!cp ('t137'); |
!!!cp ('t137'); |
4503 |
!!!parse-error (type => 'unmatched end tag:noscript', token => $token); |
!!!parse-error (type => 'unmatched end tag:noscript', token => $token); |
4504 |
## Ignore the token ## ISSUE: An issue in the spec. |
## Ignore the token ## ISSUE: An issue in the spec. |
4511 |
} elsif ({ |
} elsif ({ |
4512 |
body => 1, html => 1, |
body => 1, html => 1, |
4513 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
4514 |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
if ($self->{insertion_mode} == BEFORE_HEAD_IM or |
4515 |
!!!cp ('t139'); |
$self->{insertion_mode} == IN_HEAD_IM or |
4516 |
## As if <head> |
$self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
|
!!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token); |
|
|
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
|
|
push @{$self->{open_elements}}, |
|
|
[$self->{head_element}, $el_category->{head}]; |
|
|
|
|
|
$self->{insertion_mode} = IN_HEAD_IM; |
|
|
## Reprocess in the "in head" insertion mode... |
|
|
} elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
|
4517 |
!!!cp ('t140'); |
!!!cp ('t140'); |
4518 |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token); |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token); |
4519 |
## Ignore the token |
## Ignore the token |
4520 |
!!!next-token; |
!!!next-token; |
4521 |
next B; |
next B; |
4522 |
|
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
4523 |
|
!!!cp ('t140.1'); |
4524 |
|
!!!parse-error (type => 'unmatched end tag:' . $token->{tag_name}, token => $token); |
4525 |
|
## Ignore the token |
4526 |
|
!!!next-token; |
4527 |
|
next B; |
4528 |
} else { |
} else { |
4529 |
!!!cp ('t141'); |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
4530 |
} |
} |
4531 |
|
} elsif ($token->{tag_name} eq 'p') { |
4532 |
# |
!!!cp ('t142'); |
4533 |
} elsif ({ |
!!!parse-error (type => 'unmatched end tag:p', token => $token); |
4534 |
p => 1, br => 1, |
## Ignore the token |
4535 |
}->{$token->{tag_name}}) { |
!!!next-token; |
4536 |
|
next B; |
4537 |
|
} elsif ($token->{tag_name} eq 'br') { |
4538 |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
4539 |
!!!cp ('t142'); |
!!!cp ('t142.2'); |
4540 |
## As if <head> |
## (before head) as if <head>, (in head) as if </head> |
4541 |
!!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token); |
!!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token); |
4542 |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
4543 |
push @{$self->{open_elements}}, |
$self->{insertion_mode} = AFTER_HEAD_IM; |
4544 |
[$self->{head_element}, $el_category->{head}]; |
|
4545 |
|
## Reprocess in the "after head" insertion mode... |
4546 |
|
} elsif ($self->{insertion_mode} == IN_HEAD_IM) { |
4547 |
|
!!!cp ('t143.2'); |
4548 |
|
## As if </head> |
4549 |
|
pop @{$self->{open_elements}}; |
4550 |
|
$self->{insertion_mode} = AFTER_HEAD_IM; |
4551 |
|
|
4552 |
|
## Reprocess in the "after head" insertion mode... |
4553 |
|
} elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
4554 |
|
!!!cp ('t143.3'); |
4555 |
|
## ISSUE: Two parse errors for <head><noscript></br> |
4556 |
|
!!!parse-error (type => 'unmatched end tag:br', token => $token); |
4557 |
|
## As if </noscript> |
4558 |
|
pop @{$self->{open_elements}}; |
4559 |
$self->{insertion_mode} = IN_HEAD_IM; |
$self->{insertion_mode} = IN_HEAD_IM; |
4560 |
|
|
4561 |
## Reprocess in the "in head" insertion mode... |
## Reprocess in the "in head" insertion mode... |
4562 |
} else { |
## As if </head> |
4563 |
!!!cp ('t143'); |
pop @{$self->{open_elements}}; |
4564 |
} |
$self->{insertion_mode} = AFTER_HEAD_IM; |
4565 |
|
|
4566 |
# |
## Reprocess in the "after head" insertion mode... |
4567 |
} else { |
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
4568 |
if ($self->{insertion_mode} == AFTER_HEAD_IM) { |
!!!cp ('t143.4'); |
|
!!!cp ('t144'); |
|
4569 |
# |
# |
4570 |
} else { |
} else { |
4571 |
!!!cp ('t145'); |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
|
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token); |
|
|
## Ignore the token |
|
|
!!!next-token; |
|
|
next B; |
|
4572 |
} |
} |
4573 |
|
|
4574 |
|
## ISSUE: does not agree with IE7 - it doesn't ignore </br>. |
4575 |
|
!!!parse-error (type => 'unmatched end tag:br', token => $token); |
4576 |
|
## Ignore the token |
4577 |
|
!!!next-token; |
4578 |
|
next B; |
4579 |
|
} else { |
4580 |
|
!!!cp ('t145'); |
4581 |
|
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token); |
4582 |
|
## Ignore the token |
4583 |
|
!!!next-token; |
4584 |
|
next B; |
4585 |
} |
} |
4586 |
|
|
4587 |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |