| 7 |
## doc.write (''); |
## doc.write (''); |
| 8 |
## alert (doc.compatMode); |
## alert (doc.compatMode); |
| 9 |
|
|
| 10 |
|
## ISSUE: HTML5 revision 967 says that the encoding layer MUST NOT |
| 11 |
|
## strip BOM and the HTML layer MUST ignore it. Whether we can do it |
| 12 |
|
## is not yet clear. |
| 13 |
|
## "{U+FEFF}..." in UTF-16BE/UTF-16LE is three or four characters? |
| 14 |
|
## "{U+FEFF}..." in GB18030? |
| 15 |
|
|
| 16 |
my $permitted_slash_tag_name = { |
my $permitted_slash_tag_name = { |
| 17 |
base => 1, |
base => 1, |
| 18 |
link => 1, |
link => 1, |
| 150 |
return $self; |
return $self; |
| 151 |
} # new |
} # new |
| 152 |
|
|
| 153 |
|
sub CM_ENTITY () { 0b001 } # & markup in data |
| 154 |
|
sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited) |
| 155 |
|
sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any) |
| 156 |
|
|
| 157 |
|
sub PLAINTEXT_CONTENT_MODEL () { 0 } |
| 158 |
|
sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP } |
| 159 |
|
sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP } |
| 160 |
|
sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP } |
| 161 |
|
|
| 162 |
## Implementations MUST act as if state machine in the spec |
## Implementations MUST act as if state machine in the spec |
| 163 |
|
|
| 164 |
sub _initialize_tokenizer ($) { |
sub _initialize_tokenizer ($) { |
| 165 |
my $self = shift; |
my $self = shift; |
| 166 |
$self->{state} = 'data'; # MUST |
$self->{state} = 'data'; # MUST |
| 167 |
$self->{content_model_flag} = 'PCDATA'; # be |
$self->{content_model} = PCDATA_CONTENT_MODEL; # be |
| 168 |
undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE |
undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE |
| 169 |
undef $self->{current_attribute}; |
undef $self->{current_attribute}; |
| 170 |
undef $self->{last_emitted_start_tag_name}; |
undef $self->{last_emitted_start_tag_name}; |
| 203 |
A: { |
A: { |
| 204 |
if ($self->{state} eq 'data') { |
if ($self->{state} eq 'data') { |
| 205 |
if ($self->{next_input_character} == 0x0026) { # & |
if ($self->{next_input_character} == 0x0026) { # & |
| 206 |
if ($self->{content_model_flag} eq 'PCDATA' or |
if ($self->{content_model} & CM_ENTITY) { # PCDATA | RCDATA |
|
$self->{content_model_flag} eq 'RCDATA') { |
|
| 207 |
$self->{state} = 'entity data'; |
$self->{state} = 'entity data'; |
| 208 |
!!!next-input-character; |
!!!next-input-character; |
| 209 |
redo A; |
redo A; |
| 211 |
# |
# |
| 212 |
} |
} |
| 213 |
} elsif ($self->{next_input_character} == 0x002D) { # - |
} elsif ($self->{next_input_character} == 0x002D) { # - |
| 214 |
if ($self->{content_model_flag} eq 'RCDATA' or |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
|
$self->{content_model_flag} eq 'CDATA') { |
|
| 215 |
unless ($self->{escape}) { |
unless ($self->{escape}) { |
| 216 |
if ($self->{prev_input_character}->[0] == 0x002D and # - |
if ($self->{prev_input_character}->[0] == 0x002D and # - |
| 217 |
$self->{prev_input_character}->[1] == 0x0021 and # ! |
$self->{prev_input_character}->[1] == 0x0021 and # ! |
| 223 |
|
|
| 224 |
# |
# |
| 225 |
} elsif ($self->{next_input_character} == 0x003C) { # < |
} elsif ($self->{next_input_character} == 0x003C) { # < |
| 226 |
if ($self->{content_model_flag} eq 'PCDATA' or |
if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA |
| 227 |
(($self->{content_model_flag} eq 'CDATA' or |
(($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA |
|
$self->{content_model_flag} eq 'RCDATA') and |
|
| 228 |
not $self->{escape})) { |
not $self->{escape})) { |
| 229 |
$self->{state} = 'tag open'; |
$self->{state} = 'tag open'; |
| 230 |
!!!next-input-character; |
!!!next-input-character; |
| 234 |
} |
} |
| 235 |
} elsif ($self->{next_input_character} == 0x003E) { # > |
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 236 |
if ($self->{escape} and |
if ($self->{escape} and |
| 237 |
($self->{content_model_flag} eq 'RCDATA' or |
($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA |
|
$self->{content_model_flag} eq 'CDATA')) { |
|
| 238 |
if ($self->{prev_input_character}->[0] == 0x002D and # - |
if ($self->{prev_input_character}->[0] == 0x002D and # - |
| 239 |
$self->{prev_input_character}->[1] == 0x002D) { # - |
$self->{prev_input_character}->[1] == 0x002D) { # - |
| 240 |
delete $self->{escape}; |
delete $self->{escape}; |
| 271 |
|
|
| 272 |
redo A; |
redo A; |
| 273 |
} elsif ($self->{state} eq 'tag open') { |
} elsif ($self->{state} eq 'tag open') { |
| 274 |
if ($self->{content_model_flag} eq 'RCDATA' or |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
|
$self->{content_model_flag} eq 'CDATA') { |
|
| 275 |
if ($self->{next_input_character} == 0x002F) { # / |
if ($self->{next_input_character} == 0x002F) { # / |
| 276 |
!!!next-input-character; |
!!!next-input-character; |
| 277 |
$self->{state} = 'close tag open'; |
$self->{state} = 'close tag open'; |
| 284 |
|
|
| 285 |
redo A; |
redo A; |
| 286 |
} |
} |
| 287 |
} elsif ($self->{content_model_flag} eq 'PCDATA') { |
} elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA |
| 288 |
if ($self->{next_input_character} == 0x0021) { # ! |
if ($self->{next_input_character} == 0x0021) { # ! |
| 289 |
$self->{state} = 'markup declaration open'; |
$self->{state} = 'markup declaration open'; |
| 290 |
!!!next-input-character; |
!!!next-input-character; |
| 331 |
redo A; |
redo A; |
| 332 |
} |
} |
| 333 |
} else { |
} else { |
| 334 |
die "$0: $self->{content_model_flag}: Unknown content model flag"; |
die "$0: $self->{content_model} in tag open"; |
| 335 |
} |
} |
| 336 |
} elsif ($self->{state} eq 'close tag open') { |
} elsif ($self->{state} eq 'close tag open') { |
| 337 |
if ($self->{content_model_flag} eq 'RCDATA' or |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
|
$self->{content_model_flag} eq 'CDATA') { |
|
| 338 |
if (defined $self->{last_emitted_start_tag_name}) { |
if (defined $self->{last_emitted_start_tag_name}) { |
| 339 |
|
## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564> |
| 340 |
my @next_char; |
my @next_char; |
| 341 |
TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) { |
TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) { |
| 342 |
push @next_char, $self->{next_input_character}; |
push @next_char, $self->{next_input_character}; |
| 428 |
redo A; |
redo A; |
| 429 |
} elsif ($self->{next_input_character} == 0x003E) { # > |
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 430 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} eq 'start tag') { |
| 431 |
|
$self->{current_token}->{first_start_tag} |
| 432 |
|
= not defined $self->{last_emitted_start_tag_name}; |
| 433 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 434 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
| 435 |
$self->{content_model_flag} = 'PCDATA'; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 436 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 437 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 438 |
} |
} |
| 455 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 456 |
!!!parse-error (type => 'unclosed tag'); |
!!!parse-error (type => 'unclosed tag'); |
| 457 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} eq 'start tag') { |
| 458 |
|
$self->{current_token}->{first_start_tag} |
| 459 |
|
= not defined $self->{last_emitted_start_tag_name}; |
| 460 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 461 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
| 462 |
$self->{content_model_flag} = 'PCDATA'; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 463 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 464 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 465 |
} |
} |
| 503 |
redo A; |
redo A; |
| 504 |
} elsif ($self->{next_input_character} == 0x003E) { # > |
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 505 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} eq 'start tag') { |
| 506 |
|
$self->{current_token}->{first_start_tag} |
| 507 |
|
= not defined $self->{last_emitted_start_tag_name}; |
| 508 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 509 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
| 510 |
$self->{content_model_flag} = 'PCDATA'; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 511 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 512 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 513 |
} |
} |
| 543 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 544 |
!!!parse-error (type => 'unclosed tag'); |
!!!parse-error (type => 'unclosed tag'); |
| 545 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} eq 'start tag') { |
| 546 |
|
$self->{current_token}->{first_start_tag} |
| 547 |
|
= not defined $self->{last_emitted_start_tag_name}; |
| 548 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 549 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
| 550 |
$self->{content_model_flag} = 'PCDATA'; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 551 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 552 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 553 |
} |
} |
| 571 |
my $before_leave = sub { |
my $before_leave = sub { |
| 572 |
if (exists $self->{current_token}->{attributes} # start tag or end tag |
if (exists $self->{current_token}->{attributes} # start tag or end tag |
| 573 |
->{$self->{current_attribute}->{name}}) { # MUST |
->{$self->{current_attribute}->{name}}) { # MUST |
| 574 |
!!!parse-error (type => 'dupulicate attribute'); |
!!!parse-error (type => 'duplicate attribute:'.$self->{current_attribute}->{name}); |
| 575 |
## Discard $self->{current_attribute} # MUST |
## Discard $self->{current_attribute} # MUST |
| 576 |
} else { |
} else { |
| 577 |
$self->{current_token}->{attributes}->{$self->{current_attribute}->{name}} |
$self->{current_token}->{attributes}->{$self->{current_attribute}->{name}} |
| 596 |
} elsif ($self->{next_input_character} == 0x003E) { # > |
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 597 |
$before_leave->(); |
$before_leave->(); |
| 598 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} eq 'start tag') { |
| 599 |
|
$self->{current_token}->{first_start_tag} |
| 600 |
|
= not defined $self->{last_emitted_start_tag_name}; |
| 601 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 602 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
| 603 |
$self->{content_model_flag} = 'PCDATA'; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 604 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 605 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 606 |
} |
} |
| 637 |
!!!parse-error (type => 'unclosed tag'); |
!!!parse-error (type => 'unclosed tag'); |
| 638 |
$before_leave->(); |
$before_leave->(); |
| 639 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} eq 'start tag') { |
| 640 |
|
$self->{current_token}->{first_start_tag} |
| 641 |
|
= not defined $self->{last_emitted_start_tag_name}; |
| 642 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 643 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
| 644 |
$self->{content_model_flag} = 'PCDATA'; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 645 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 646 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 647 |
} |
} |
| 675 |
redo A; |
redo A; |
| 676 |
} elsif ($self->{next_input_character} == 0x003E) { # > |
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 677 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} eq 'start tag') { |
| 678 |
|
$self->{current_token}->{first_start_tag} |
| 679 |
|
= not defined $self->{last_emitted_start_tag_name}; |
| 680 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 681 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
| 682 |
$self->{content_model_flag} = 'PCDATA'; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 683 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 684 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 685 |
} |
} |
| 708 |
# |
# |
| 709 |
} else { |
} else { |
| 710 |
!!!parse-error (type => 'nestc'); |
!!!parse-error (type => 'nestc'); |
| 711 |
|
## TODO: Different error type for <aa / bb> than <aa/> |
| 712 |
} |
} |
| 713 |
$self->{state} = 'before attribute name'; |
$self->{state} = 'before attribute name'; |
| 714 |
# next-input-character is already done |
# next-input-character is already done |
| 716 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 717 |
!!!parse-error (type => 'unclosed tag'); |
!!!parse-error (type => 'unclosed tag'); |
| 718 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} eq 'start tag') { |
| 719 |
|
$self->{current_token}->{first_start_tag} |
| 720 |
|
= not defined $self->{last_emitted_start_tag_name}; |
| 721 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 722 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
| 723 |
$self->{content_model_flag} = 'PCDATA'; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 724 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 725 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 726 |
} |
} |
| 763 |
redo A; |
redo A; |
| 764 |
} elsif ($self->{next_input_character} == 0x003E) { # > |
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 765 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} eq 'start tag') { |
| 766 |
|
$self->{current_token}->{first_start_tag} |
| 767 |
|
= not defined $self->{last_emitted_start_tag_name}; |
| 768 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 769 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
| 770 |
$self->{content_model_flag} = 'PCDATA'; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 771 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 772 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 773 |
} |
} |
| 783 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 784 |
!!!parse-error (type => 'unclosed tag'); |
!!!parse-error (type => 'unclosed tag'); |
| 785 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} eq 'start tag') { |
| 786 |
|
$self->{current_token}->{first_start_tag} |
| 787 |
|
= not defined $self->{last_emitted_start_tag_name}; |
| 788 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 789 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
| 790 |
$self->{content_model_flag} = 'PCDATA'; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 791 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 792 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 793 |
} |
} |
| 819 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 820 |
!!!parse-error (type => 'unclosed attribute value'); |
!!!parse-error (type => 'unclosed attribute value'); |
| 821 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} eq 'start tag') { |
| 822 |
|
$self->{current_token}->{first_start_tag} |
| 823 |
|
= not defined $self->{last_emitted_start_tag_name}; |
| 824 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 825 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
| 826 |
$self->{content_model_flag} = 'PCDATA'; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 827 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 828 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 829 |
} |
} |
| 855 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 856 |
!!!parse-error (type => 'unclosed attribute value'); |
!!!parse-error (type => 'unclosed attribute value'); |
| 857 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} eq 'start tag') { |
| 858 |
|
$self->{current_token}->{first_start_tag} |
| 859 |
|
= not defined $self->{last_emitted_start_tag_name}; |
| 860 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 861 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
| 862 |
$self->{content_model_flag} = 'PCDATA'; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 863 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 864 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 865 |
} |
} |
| 894 |
redo A; |
redo A; |
| 895 |
} elsif ($self->{next_input_character} == 0x003E) { # > |
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 896 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} eq 'start tag') { |
| 897 |
|
$self->{current_token}->{first_start_tag} |
| 898 |
|
= not defined $self->{last_emitted_start_tag_name}; |
| 899 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 900 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
| 901 |
$self->{content_model_flag} = 'PCDATA'; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 902 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 903 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 904 |
} |
} |
| 914 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 915 |
!!!parse-error (type => 'unclosed tag'); |
!!!parse-error (type => 'unclosed tag'); |
| 916 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} eq 'start tag') { |
| 917 |
|
$self->{current_token}->{first_start_tag} |
| 918 |
|
= not defined $self->{last_emitted_start_tag_name}; |
| 919 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 920 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
| 921 |
$self->{content_model_flag} = 'PCDATA'; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 922 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 923 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 924 |
} |
} |
| 1029 |
} |
} |
| 1030 |
} |
} |
| 1031 |
|
|
| 1032 |
!!!parse-error (type => 'bogus comment open'); |
!!!parse-error (type => 'bogus comment'); |
| 1033 |
$self->{next_input_character} = shift @next_char; |
$self->{next_input_character} = shift @next_char; |
| 1034 |
!!!back-next-input-character (@next_char); |
!!!back-next-input-character (@next_char); |
| 1035 |
$self->{state} = 'bogus comment'; |
$self->{state} = 'bogus comment'; |
| 1088 |
redo A; |
redo A; |
| 1089 |
} else { |
} else { |
| 1090 |
$self->{current_token}->{data} # comment |
$self->{current_token}->{data} # comment |
| 1091 |
.= chr ($self->{next_input_character}); |
.= '-' . chr ($self->{next_input_character}); |
| 1092 |
$self->{state} = 'comment'; |
$self->{state} = 'comment'; |
| 1093 |
!!!next-input-character; |
!!!next-input-character; |
| 1094 |
redo A; |
redo A; |
| 1498 |
|
|
| 1499 |
redo A; |
redo A; |
| 1500 |
} else { |
} else { |
| 1501 |
!!!parse-error (type => 'string after PUBLIC literal'); |
!!!parse-error (type => 'string after SYSTEM'); |
| 1502 |
$self->{state} = 'bogus DOCTYPE'; |
$self->{state} = 'bogus DOCTYPE'; |
| 1503 |
!!!next-input-character; |
!!!next-input-character; |
| 1504 |
redo A; |
redo A; |
| 1647 |
redo X; |
redo X; |
| 1648 |
} elsif (not defined $code) { # no hexadecimal digit |
} elsif (not defined $code) { # no hexadecimal digit |
| 1649 |
!!!parse-error (type => 'bare hcro'); |
!!!parse-error (type => 'bare hcro'); |
| 1650 |
|
!!!back-next-input-character ($x_char, $self->{next_input_character}); |
| 1651 |
$self->{next_input_character} = 0x0023; # # |
$self->{next_input_character} = 0x0023; # # |
|
!!!back-next-input-character ($x_char); |
|
| 1652 |
return undef; |
return undef; |
| 1653 |
} elsif ($self->{next_input_character} == 0x003B) { # ; |
} elsif ($self->{next_input_character} == 0x003B) { # ; |
| 1654 |
!!!next-input-character; |
!!!next-input-character; |
| 1666 |
!!!parse-error (type => 'CR character reference'); |
!!!parse-error (type => 'CR character reference'); |
| 1667 |
$code = 0x000A; |
$code = 0x000A; |
| 1668 |
} elsif (0x80 <= $code and $code <= 0x9F) { |
} elsif (0x80 <= $code and $code <= 0x9F) { |
| 1669 |
!!!parse-error (type => sprintf 'c1 entity:U+%04X', $code); |
!!!parse-error (type => sprintf 'C1 character reference:U+%04X', $code); |
| 1670 |
$code = $c1_entity_char->{$code}; |
$code = $c1_entity_char->{$code}; |
| 1671 |
} |
} |
| 1672 |
|
|
| 1701 |
!!!parse-error (type => 'CR character reference'); |
!!!parse-error (type => 'CR character reference'); |
| 1702 |
$code = 0x000A; |
$code = 0x000A; |
| 1703 |
} elsif (0x80 <= $code and $code <= 0x9F) { |
} elsif (0x80 <= $code and $code <= 0x9F) { |
| 1704 |
!!!parse-error (type => sprintf 'c1 entity:U+%04X', $code); |
!!!parse-error (type => sprintf 'C1 character reference:U+%04X', $code); |
| 1705 |
$code = $c1_entity_char->{$code}; |
$code = $c1_entity_char->{$code}; |
| 1706 |
} |
} |
| 1707 |
|
|
| 1720 |
!!!next-input-character; |
!!!next-input-character; |
| 1721 |
|
|
| 1722 |
my $value = $entity_name; |
my $value = $entity_name; |
| 1723 |
my $match; |
my $match = 0; |
| 1724 |
require Whatpm::_NamedEntityList; |
require Whatpm::_NamedEntityList; |
| 1725 |
our $EntityChar; |
our $EntityChar; |
| 1726 |
|
|
| 1740 |
$match = 1; |
$match = 1; |
| 1741 |
!!!next-input-character; |
!!!next-input-character; |
| 1742 |
last; |
last; |
| 1743 |
} elsif (not $in_attr) { |
} else { |
| 1744 |
$value = $EntityChar->{$entity_name}; |
$value = $EntityChar->{$entity_name}; |
| 1745 |
$match = -1; |
$match = -1; |
| 1746 |
} else { |
!!!next-input-character; |
|
$value .= chr $self->{next_input_character}; |
|
| 1747 |
} |
} |
| 1748 |
} else { |
} else { |
| 1749 |
$value .= chr $self->{next_input_character}; |
$value .= chr $self->{next_input_character}; |
| 1750 |
|
$match *= 2; |
| 1751 |
|
!!!next-input-character; |
| 1752 |
} |
} |
|
!!!next-input-character; |
|
| 1753 |
} |
} |
| 1754 |
|
|
| 1755 |
if ($match > 0) { |
if ($match > 0) { |
| 1756 |
return {type => 'character', data => $value}; |
return {type => 'character', data => $value}; |
| 1757 |
} elsif ($match < 0) { |
} elsif ($match < 0) { |
| 1758 |
!!!parse-error (type => 'refc'); |
!!!parse-error (type => 'no refc'); |
| 1759 |
return {type => 'character', data => $value}; |
if ($in_attr and $match < -1) { |
| 1760 |
|
return {type => 'character', data => '&'.$entity_name}; |
| 1761 |
|
} else { |
| 1762 |
|
return {type => 'character', data => $value}; |
| 1763 |
|
} |
| 1764 |
} else { |
} else { |
| 1765 |
!!!parse-error (type => 'bare ero'); |
!!!parse-error (type => 'bare ero'); |
| 1766 |
## NOTE: No characters are consumed in the spec. |
## NOTE: No characters are consumed in the spec. |
| 2025 |
my $root_element; !!!create-element ($root_element, 'html'); |
my $root_element; !!!create-element ($root_element, 'html'); |
| 2026 |
$self->{document}->append_child ($root_element); |
$self->{document}->append_child ($root_element); |
| 2027 |
push @{$self->{open_elements}}, [$root_element, 'html']; |
push @{$self->{open_elements}}, [$root_element, 'html']; |
|
#$phase = 'main'; |
|
| 2028 |
## reprocess |
## reprocess |
| 2029 |
#redo B; |
#redo B; |
| 2030 |
return; |
return; ## Go to the main phase. |
| 2031 |
} # B |
} # B |
| 2032 |
} # _tree_construction_root_element |
} # _tree_construction_root_element |
| 2033 |
|
|
| 2043 |
|
|
| 2044 |
## Step 3 |
## Step 3 |
| 2045 |
S3: { |
S3: { |
| 2046 |
$last = 1 if $self->{open_elements}->[0]->[0] eq $node->[0]; |
## ISSUE: Oops! "If node is the first node in the stack of open |
| 2047 |
if (defined $self->{inner_html_node}) { |
## elements, then set last to true. If the context element of the |
| 2048 |
if ($self->{inner_html_node}->[1] eq 'td' or |
## HTML fragment parsing algorithm is neither a td element nor a |
| 2049 |
$self->{inner_html_node}->[1] eq 'th') { |
## th element, then set node to the context element. (fragment case)": |
| 2050 |
# |
## The second "if" is in the scope of the first "if"!? |
| 2051 |
} else { |
if ($self->{open_elements}->[0]->[0] eq $node->[0]) { |
| 2052 |
$node = $self->{inner_html_node}; |
$last = 1; |
| 2053 |
|
if (defined $self->{inner_html_node}) { |
| 2054 |
|
if ($self->{inner_html_node}->[1] eq 'td' or |
| 2055 |
|
$self->{inner_html_node}->[1] eq 'th') { |
| 2056 |
|
# |
| 2057 |
|
} else { |
| 2058 |
|
$node = $self->{inner_html_node}; |
| 2059 |
|
} |
| 2060 |
} |
} |
| 2061 |
} |
} |
| 2062 |
|
|
| 2103 |
sub _tree_construction_main ($) { |
sub _tree_construction_main ($) { |
| 2104 |
my $self = shift; |
my $self = shift; |
| 2105 |
|
|
| 2106 |
my $phase = 'main'; |
my $previous_insertion_mode; |
| 2107 |
|
|
| 2108 |
my $active_formatting_elements = []; |
my $active_formatting_elements = []; |
| 2109 |
|
|
| 2199 |
$insert->($el); # /context node/->append_child ($el) |
$insert->($el); # /context node/->append_child ($el) |
| 2200 |
|
|
| 2201 |
## Step 3 |
## Step 3 |
| 2202 |
$self->{content_model_flag} = $content_model_flag; # CDATA or RCDATA |
$self->{content_model} = $content_model_flag; # CDATA or RCDATA |
| 2203 |
delete $self->{escape}; # MUST |
delete $self->{escape}; # MUST |
| 2204 |
|
|
| 2205 |
## Step 4 |
## Step 4 |
| 2217 |
} |
} |
| 2218 |
|
|
| 2219 |
## Step 6 |
## Step 6 |
| 2220 |
$self->{content_model_flag} = 'PCDATA'; |
$self->{content_model} = PCDATA_CONTENT_MODEL; |
| 2221 |
|
|
| 2222 |
## Step 7 |
## Step 7 |
| 2223 |
if ($token->{type} eq 'end tag' and $token->{tag_name} eq $start_tag_name) { |
if ($token->{type} eq 'end tag' and $token->{tag_name} eq $start_tag_name) { |
| 2224 |
## Ignore the token |
## Ignore the token |
| 2225 |
|
} elsif ($content_model_flag == CDATA_CONTENT_MODEL) { |
| 2226 |
|
!!!parse-error (type => 'in CDATA:#'.$token->{type}); |
| 2227 |
|
} elsif ($content_model_flag == RCDATA_CONTENT_MODEL) { |
| 2228 |
|
!!!parse-error (type => 'in RCDATA:#'.$token->{type}); |
| 2229 |
} else { |
} else { |
| 2230 |
!!!parse-error (type => 'in '.$content_model_flag.':#'.$token->{type}); |
die "$0: $content_model_flag in parse_rcdata"; |
| 2231 |
} |
} |
| 2232 |
!!!next-token; |
!!!next-token; |
| 2233 |
}; # $parse_rcdata |
}; # $parse_rcdata |
| 2238 |
!!!create-element ($script_el, 'script', $token->{attributes}); |
!!!create-element ($script_el, 'script', $token->{attributes}); |
| 2239 |
## TODO: mark as "parser-inserted" |
## TODO: mark as "parser-inserted" |
| 2240 |
|
|
| 2241 |
$self->{content_model_flag} = 'CDATA'; |
$self->{content_model} = CDATA_CONTENT_MODEL; |
| 2242 |
delete $self->{escape}; # MUST |
delete $self->{escape}; # MUST |
| 2243 |
|
|
| 2244 |
my $text = ''; |
my $text = ''; |
| 2251 |
$script_el->manakai_append_text ($text); |
$script_el->manakai_append_text ($text); |
| 2252 |
} |
} |
| 2253 |
|
|
| 2254 |
$self->{content_model_flag} = 'PCDATA'; |
$self->{content_model} = PCDATA_CONTENT_MODEL; |
| 2255 |
|
|
| 2256 |
if ($token->{type} eq 'end tag' and |
if ($token->{type} eq 'end tag' and |
| 2257 |
$token->{tag_name} eq 'script') { |
$token->{tag_name} eq 'script') { |
| 2505 |
return; |
return; |
| 2506 |
} elsif ($token->{tag_name} eq 'style') { |
} elsif ($token->{tag_name} eq 'style') { |
| 2507 |
## NOTE: This is an "as if in head" code clone |
## NOTE: This is an "as if in head" code clone |
| 2508 |
$parse_rcdata->('CDATA', $insert); |
$parse_rcdata->(CDATA_CONTENT_MODEL, $insert); |
| 2509 |
return; |
return; |
| 2510 |
} elsif ({ |
} elsif ({ |
| 2511 |
base => 1, link => 1, meta => 1, |
base => 1, link => 1, |
| 2512 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 2513 |
## NOTE: This is an "as if in head" code clone, only "-t" differs |
## NOTE: This is an "as if in head" code clone, only "-t" differs |
| 2514 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
| 2515 |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
| 2516 |
!!!next-token; |
!!!next-token; |
| 2517 |
## TODO: Extracting |charset| from |meta|. |
return; |
| 2518 |
|
} elsif ($token->{tag_name} eq 'meta') { |
| 2519 |
|
## NOTE: This is an "as if in head" code clone, only "-t" differs |
| 2520 |
|
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
| 2521 |
|
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
| 2522 |
|
|
| 2523 |
|
unless ($self->{confident}) { |
| 2524 |
|
my $charset; |
| 2525 |
|
if ($token->{attributes}->{charset}) { ## TODO: And if supported |
| 2526 |
|
$charset = $token->{attributes}->{charset}->{value}; |
| 2527 |
|
} |
| 2528 |
|
if ($token->{attributes}->{'http-equiv'}) { |
| 2529 |
|
## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition. |
| 2530 |
|
if ($token->{attributes}->{'http-equiv'}->{value} |
| 2531 |
|
=~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*= |
| 2532 |
|
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
| 2533 |
|
([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) { |
| 2534 |
|
$charset = defined $1 ? $1 : defined $2 ? $2 : $3; |
| 2535 |
|
} ## TODO: And if supported |
| 2536 |
|
} |
| 2537 |
|
## TODO: Change the encoding |
| 2538 |
|
} |
| 2539 |
|
|
| 2540 |
|
!!!next-token; |
| 2541 |
return; |
return; |
| 2542 |
} elsif ($token->{tag_name} eq 'title') { |
} elsif ($token->{tag_name} eq 'title') { |
| 2543 |
!!!parse-error (type => 'in body:title'); |
!!!parse-error (type => 'in body:title'); |
| 2544 |
## NOTE: This is an "as if in head" code clone |
## NOTE: This is an "as if in head" code clone |
| 2545 |
$parse_rcdata->('RCDATA', $insert); |
$parse_rcdata->(RCDATA_CONTENT_MODEL, sub { |
| 2546 |
|
if (defined $self->{head_element}) { |
| 2547 |
|
$self->{head_element}->append_child ($_[0]); |
| 2548 |
|
} else { |
| 2549 |
|
$insert->($_[0]); |
| 2550 |
|
} |
| 2551 |
|
}); |
| 2552 |
return; |
return; |
| 2553 |
} elsif ($token->{tag_name} eq 'body') { |
} elsif ($token->{tag_name} eq 'body') { |
| 2554 |
!!!parse-error (type => 'in body:body'); |
!!!parse-error (type => 'in body:body'); |
| 2651 |
if ($i != -1) { |
if ($i != -1) { |
| 2652 |
!!!parse-error (type => 'end tag missing:'. |
!!!parse-error (type => 'end tag missing:'. |
| 2653 |
$self->{open_elements}->[-1]->[1]); |
$self->{open_elements}->[-1]->[1]); |
|
## TODO: test |
|
| 2654 |
} |
} |
| 2655 |
splice @{$self->{open_elements}}, $i; |
splice @{$self->{open_elements}}, $i; |
| 2656 |
last LI; |
last LI; |
| 2698 |
if ($i != -1) { |
if ($i != -1) { |
| 2699 |
!!!parse-error (type => 'end tag missing:'. |
!!!parse-error (type => 'end tag missing:'. |
| 2700 |
$self->{open_elements}->[-1]->[1]); |
$self->{open_elements}->[-1]->[1]); |
|
## TODO: test |
|
| 2701 |
} |
} |
| 2702 |
splice @{$self->{open_elements}}, $i; |
splice @{$self->{open_elements}}, $i; |
| 2703 |
last LI; |
last LI; |
| 2738 |
|
|
| 2739 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
| 2740 |
|
|
| 2741 |
$self->{content_model_flag} = 'PLAINTEXT'; |
$self->{content_model} = PLAINTEXT_CONTENT_MODEL; |
| 2742 |
|
|
| 2743 |
!!!next-token; |
!!!next-token; |
| 2744 |
return; |
return; |
| 2841 |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 2842 |
my $node = $self->{open_elements}->[$_]; |
my $node = $self->{open_elements}->[$_]; |
| 2843 |
if ($node->[1] eq 'nobr') { |
if ($node->[1] eq 'nobr') { |
| 2844 |
|
!!!parse-error (type => 'not closed:nobr'); |
| 2845 |
!!!back-token; |
!!!back-token; |
| 2846 |
$token = {type => 'end tag', tag_name => 'nobr'}; |
$token = {type => 'end tag', tag_name => 'nobr'}; |
| 2847 |
return; |
return; |
| 2893 |
return; |
return; |
| 2894 |
} elsif ($token->{tag_name} eq 'xmp') { |
} elsif ($token->{tag_name} eq 'xmp') { |
| 2895 |
$reconstruct_active_formatting_elements->($insert_to_current); |
$reconstruct_active_formatting_elements->($insert_to_current); |
| 2896 |
$parse_rcdata->('CDATA', $insert); |
$parse_rcdata->(CDATA_CONTENT_MODEL, $insert); |
| 2897 |
return; |
return; |
| 2898 |
} elsif ($token->{tag_name} eq 'table') { |
} elsif ($token->{tag_name} eq 'table') { |
| 2899 |
## has a p element in scope |
## has a p element in scope |
| 2925 |
!!!parse-error (type => 'image'); |
!!!parse-error (type => 'image'); |
| 2926 |
$token->{tag_name} = 'img'; |
$token->{tag_name} = 'img'; |
| 2927 |
} |
} |
| 2928 |
|
|
| 2929 |
|
## NOTE: There is an "as if <br>" code clone. |
| 2930 |
$reconstruct_active_formatting_elements->($insert_to_current); |
$reconstruct_active_formatting_elements->($insert_to_current); |
| 2931 |
|
|
| 2932 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
| 3009 |
!!!create-element ($el, $token->{tag_name}, $token->{attributes}); |
!!!create-element ($el, $token->{tag_name}, $token->{attributes}); |
| 3010 |
|
|
| 3011 |
## TODO: $self->{form_element} if defined |
## TODO: $self->{form_element} if defined |
| 3012 |
$self->{content_model_flag} = 'RCDATA'; |
$self->{content_model} = RCDATA_CONTENT_MODEL; |
| 3013 |
delete $self->{escape}; # MUST |
delete $self->{escape}; # MUST |
| 3014 |
|
|
| 3015 |
$insert->($el); |
$insert->($el); |
| 3030 |
$el->manakai_append_text ($text); |
$el->manakai_append_text ($text); |
| 3031 |
} |
} |
| 3032 |
|
|
| 3033 |
$self->{content_model_flag} = 'PCDATA'; |
$self->{content_model} = PCDATA_CONTENT_MODEL; |
| 3034 |
|
|
| 3035 |
if ($token->{type} eq 'end tag' and |
if ($token->{type} eq 'end tag' and |
| 3036 |
$token->{tag_name} eq $tag_name) { |
$token->{tag_name} eq $tag_name) { |
| 3046 |
noframes => 1, |
noframes => 1, |
| 3047 |
noscript => 0, ## TODO: 1 if scripting is enabled |
noscript => 0, ## TODO: 1 if scripting is enabled |
| 3048 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 3049 |
$parse_rcdata->('CDATA', $insert); |
$parse_rcdata->(CDATA_CONTENT_MODEL, $insert); |
| 3050 |
return; |
return; |
| 3051 |
} elsif ($token->{tag_name} eq 'select') { |
} elsif ($token->{tag_name} eq 'select') { |
| 3052 |
$reconstruct_active_formatting_elements->($insert_to_current); |
$reconstruct_active_formatting_elements->($insert_to_current); |
| 3084 |
unless ({ |
unless ({ |
| 3085 |
dd => 1, dt => 1, li => 1, p => 1, td => 1, |
dd => 1, dt => 1, li => 1, p => 1, td => 1, |
| 3086 |
th => 1, tr => 1, body => 1, html => 1, |
th => 1, tr => 1, body => 1, html => 1, |
| 3087 |
|
tbody => 1, tfoot => 1, thead => 1, |
| 3088 |
}->{$_->[1]}) { |
}->{$_->[1]}) { |
| 3089 |
!!!parse-error (type => 'not closed:'.$_->[1]); |
!!!parse-error (type => 'not closed:'.$_->[1]); |
| 3090 |
} |
} |
| 3134 |
li => ($token->{tag_name} ne 'li'), |
li => ($token->{tag_name} ne 'li'), |
| 3135 |
p => ($token->{tag_name} ne 'p'), |
p => ($token->{tag_name} ne 'p'), |
| 3136 |
td => 1, th => 1, tr => 1, |
td => 1, th => 1, tr => 1, |
| 3137 |
|
tbody => 1, tfoot=> 1, thead => 1, |
| 3138 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 3139 |
!!!back-token; |
!!!back-token; |
| 3140 |
$token = {type => 'end tag', |
$token = {type => 'end tag', |
| 3152 |
} # INSCOPE |
} # INSCOPE |
| 3153 |
|
|
| 3154 |
if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) { |
if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) { |
| 3155 |
!!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]); |
if (defined $i) { |
| 3156 |
|
!!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]); |
| 3157 |
|
} else { |
| 3158 |
|
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
| 3159 |
|
} |
| 3160 |
} |
} |
| 3161 |
|
|
| 3162 |
splice @{$self->{open_elements}}, $i if defined $i; |
if (defined $i) { |
| 3163 |
|
splice @{$self->{open_elements}}, $i; |
| 3164 |
|
} elsif ($token->{tag_name} eq 'p') { |
| 3165 |
|
## As if <p>, then reprocess the current token |
| 3166 |
|
my $el; |
| 3167 |
|
!!!create-element ($el, 'p'); |
| 3168 |
|
$insert->($el); |
| 3169 |
|
} |
| 3170 |
$clear_up_to_marker->() |
$clear_up_to_marker->() |
| 3171 |
if { |
if { |
| 3172 |
button => 1, marquee => 1, object => 1, |
button => 1, marquee => 1, object => 1, |
| 3182 |
if ({ |
if ({ |
| 3183 |
dd => 1, dt => 1, li => 1, p => 1, |
dd => 1, dt => 1, li => 1, p => 1, |
| 3184 |
td => 1, th => 1, tr => 1, |
td => 1, th => 1, tr => 1, |
| 3185 |
|
tbody => 1, tfoot=> 1, thead => 1, |
| 3186 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 3187 |
!!!back-token; |
!!!back-token; |
| 3188 |
$token = {type => 'end tag', |
$token = {type => 'end tag', |
| 3221 |
if ({ |
if ({ |
| 3222 |
dd => 1, dt => 1, li => 1, p => 1, |
dd => 1, dt => 1, li => 1, p => 1, |
| 3223 |
td => 1, th => 1, tr => 1, |
td => 1, th => 1, tr => 1, |
| 3224 |
|
tbody => 1, tfoot=> 1, thead => 1, |
| 3225 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 3226 |
!!!back-token; |
!!!back-token; |
| 3227 |
$token = {type => 'end tag', |
$token = {type => 'end tag', |
| 3252 |
strong => 1, tt => 1, u => 1, |
strong => 1, tt => 1, u => 1, |
| 3253 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 3254 |
$formatting_end_tag->($token->{tag_name}); |
$formatting_end_tag->($token->{tag_name}); |
| 3255 |
## TODO: <http://html5.org/tools/web-apps-tracker?from=883&to=884> |
return; |
| 3256 |
|
} elsif ($token->{tag_name} eq 'br') { |
| 3257 |
|
!!!parse-error (type => 'unmatched end tag:br'); |
| 3258 |
|
|
| 3259 |
|
## As if <br> |
| 3260 |
|
$reconstruct_active_formatting_elements->($insert_to_current); |
| 3261 |
|
|
| 3262 |
|
my $el; |
| 3263 |
|
!!!create-element ($el, 'br'); |
| 3264 |
|
$insert->($el); |
| 3265 |
|
|
| 3266 |
|
## Ignore the token. |
| 3267 |
|
!!!next-token; |
| 3268 |
return; |
return; |
| 3269 |
} elsif ({ |
} elsif ({ |
| 3270 |
caption => 1, col => 1, colgroup => 1, frame => 1, |
caption => 1, col => 1, colgroup => 1, frame => 1, |
| 3271 |
frameset => 1, head => 1, option => 1, optgroup => 1, |
frameset => 1, head => 1, option => 1, optgroup => 1, |
| 3272 |
tbody => 1, td => 1, tfoot => 1, th => 1, |
tbody => 1, td => 1, tfoot => 1, th => 1, |
| 3273 |
thead => 1, tr => 1, |
thead => 1, tr => 1, |
| 3274 |
area => 1, basefont => 1, bgsound => 1, br => 1, |
area => 1, basefont => 1, bgsound => 1, |
| 3275 |
embed => 1, hr => 1, iframe => 1, image => 1, |
embed => 1, hr => 1, iframe => 1, image => 1, |
| 3276 |
img => 1, input => 1, isindex => 1, noembed => 1, |
img => 1, input => 1, isindex => 1, noembed => 1, |
| 3277 |
noframes => 1, param => 1, select => 1, spacer => 1, |
noframes => 1, param => 1, select => 1, spacer => 1, |
| 3298 |
if ({ |
if ({ |
| 3299 |
dd => 1, dt => 1, li => 1, p => 1, |
dd => 1, dt => 1, li => 1, p => 1, |
| 3300 |
td => 1, th => 1, tr => 1, |
td => 1, th => 1, tr => 1, |
| 3301 |
|
tbody => 1, tfoot=> 1, thead => 1, |
| 3302 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 3303 |
!!!back-token; |
!!!back-token; |
| 3304 |
$token = {type => 'end tag', |
$token = {type => 'end tag', |
| 3342 |
}; # $in_body |
}; # $in_body |
| 3343 |
|
|
| 3344 |
B: { |
B: { |
| 3345 |
if ($phase eq 'main') { |
if ($token->{type} eq 'DOCTYPE') { |
| 3346 |
if ($token->{type} eq 'DOCTYPE') { |
!!!parse-error (type => 'DOCTYPE in the middle'); |
| 3347 |
!!!parse-error (type => 'in html:#DOCTYPE'); |
## Ignore the token |
| 3348 |
## Ignore the token |
## Stay in the phase |
| 3349 |
## Stay in the phase |
!!!next-token; |
| 3350 |
!!!next-token; |
redo B; |
| 3351 |
redo B; |
} elsif ($token->{type} eq 'end-of-file') { |
| 3352 |
} elsif ($token->{type} eq 'start tag' and |
if ($token->{insertion_mode} ne 'trailing end') { |
|
$token->{tag_name} eq 'html') { |
|
|
## TODO: unless it is the first start tag token, parse-error |
|
|
my $top_el = $self->{open_elements}->[0]->[0]; |
|
|
for my $attr_name (keys %{$token->{attributes}}) { |
|
|
unless ($top_el->has_attribute_ns (undef, $attr_name)) { |
|
|
$top_el->set_attribute_ns |
|
|
(undef, [undef, $attr_name], |
|
|
$token->{attributes}->{$attr_name}->{value}); |
|
|
} |
|
|
} |
|
|
!!!next-token; |
|
|
redo B; |
|
|
} elsif ($token->{type} eq 'end-of-file') { |
|
| 3353 |
## Generate implied end tags |
## Generate implied end tags |
| 3354 |
if ({ |
if ({ |
| 3355 |
dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1, |
dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1, |
| 3356 |
|
tbody => 1, tfoot=> 1, thead => 1, |
| 3357 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 3358 |
!!!back-token; |
!!!back-token; |
| 3359 |
$token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]}; |
$token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]}; |
| 3369 |
!!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]); |
!!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]); |
| 3370 |
} |
} |
| 3371 |
|
|
|
## Stop parsing |
|
|
last B; |
|
|
|
|
| 3372 |
## ISSUE: There is an issue in the spec. |
## ISSUE: There is an issue in the spec. |
| 3373 |
|
} |
| 3374 |
|
|
| 3375 |
|
## Stop parsing |
| 3376 |
|
last B; |
| 3377 |
|
} elsif ($token->{type} eq 'start tag' and |
| 3378 |
|
$token->{tag_name} eq 'html') { |
| 3379 |
|
if ($self->{insertion_mode} eq 'trailing end') { |
| 3380 |
|
## Turn into the main phase |
| 3381 |
|
!!!parse-error (type => 'after html:html'); |
| 3382 |
|
$self->{insertion_mode} = $previous_insertion_mode; |
| 3383 |
|
} |
| 3384 |
|
|
| 3385 |
|
## ISSUE: "aa<html>" is not a parse error. |
| 3386 |
|
## ISSUE: "<html>" in fragment is not a parse error. |
| 3387 |
|
unless ($token->{first_start_tag}) { |
| 3388 |
|
!!!parse-error (type => 'not first start tag'); |
| 3389 |
|
} |
| 3390 |
|
my $top_el = $self->{open_elements}->[0]->[0]; |
| 3391 |
|
for my $attr_name (keys %{$token->{attributes}}) { |
| 3392 |
|
unless ($top_el->has_attribute_ns (undef, $attr_name)) { |
| 3393 |
|
$top_el->set_attribute_ns |
| 3394 |
|
(undef, [undef, $attr_name], |
| 3395 |
|
$token->{attributes}->{$attr_name}->{value}); |
| 3396 |
|
} |
| 3397 |
|
} |
| 3398 |
|
!!!next-token; |
| 3399 |
|
redo B; |
| 3400 |
|
} elsif ($token->{type} eq 'comment') { |
| 3401 |
|
my $comment = $self->{document}->create_comment ($token->{data}); |
| 3402 |
|
if ($self->{insertion_mode} eq 'trailing end') { |
| 3403 |
|
$self->{document}->append_child ($comment); |
| 3404 |
|
} elsif ($self->{insertion_mode} eq 'after body') { |
| 3405 |
|
$self->{open_elements}->[0]->[0]->append_child ($comment); |
| 3406 |
} else { |
} else { |
| 3407 |
if ($self->{insertion_mode} eq 'before head') { |
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
| 3408 |
|
} |
| 3409 |
|
!!!next-token; |
| 3410 |
|
redo B; |
| 3411 |
|
} elsif ($self->{insertion_mode} eq 'before head') { |
| 3412 |
if ($token->{type} eq 'character') { |
if ($token->{type} eq 'character') { |
| 3413 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
| 3414 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 3424 |
$self->{insertion_mode} = 'in head'; |
$self->{insertion_mode} = 'in head'; |
| 3425 |
## reprocess |
## reprocess |
| 3426 |
redo B; |
redo B; |
|
} elsif ($token->{type} eq 'comment') { |
|
|
my $comment = $self->{document}->create_comment ($token->{data}); |
|
|
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
|
|
!!!next-token; |
|
|
redo B; |
|
| 3427 |
} elsif ($token->{type} eq 'start tag') { |
} elsif ($token->{type} eq 'start tag') { |
| 3428 |
my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {}; |
my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {}; |
| 3429 |
!!!create-element ($self->{head_element}, 'head', $attr); |
!!!create-element ($self->{head_element}, 'head', $attr); |
| 3442 |
} |
} |
| 3443 |
redo B; |
redo B; |
| 3444 |
} elsif ($token->{type} eq 'end tag') { |
} elsif ($token->{type} eq 'end tag') { |
| 3445 |
if ({head => 1, body => 1, html => 1}->{$token->{tag_name}}) { |
if ({ |
| 3446 |
|
head => 1, body => 1, html => 1, |
| 3447 |
|
p => 1, br => 1, |
| 3448 |
|
}->{$token->{tag_name}}) { |
| 3449 |
## As if <head> |
## As if <head> |
| 3450 |
!!!create-element ($self->{head_element}, 'head'); |
!!!create-element ($self->{head_element}, 'head'); |
| 3451 |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
| 3475 |
} |
} |
| 3476 |
|
|
| 3477 |
# |
# |
|
} elsif ($token->{type} eq 'comment') { |
|
|
my $comment = $self->{document}->create_comment ($token->{data}); |
|
|
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
|
|
!!!next-token; |
|
|
redo B; |
|
| 3478 |
} elsif ($token->{type} eq 'start tag') { |
} elsif ($token->{type} eq 'start tag') { |
| 3479 |
if ({base => ($self->{insertion_mode} eq 'in head' or |
if ({base => ($self->{insertion_mode} eq 'in head' or |
| 3480 |
$self->{insertion_mode} eq 'after head'), |
$self->{insertion_mode} eq 'after head'), |
| 3481 |
link => 1, meta => 1}->{$token->{tag_name}}) { |
link => 1}->{$token->{tag_name}}) { |
| 3482 |
## NOTE: There is a "as if in head" code clone. |
## NOTE: There is a "as if in head" code clone. |
| 3483 |
if ($self->{insertion_mode} eq 'after head') { |
if ($self->{insertion_mode} eq 'after head') { |
| 3484 |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
| 3486 |
} |
} |
| 3487 |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
| 3488 |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
| 3489 |
|
pop @{$self->{open_elements}} |
| 3490 |
|
if $self->{insertion_mode} eq 'after head'; |
| 3491 |
|
!!!next-token; |
| 3492 |
|
redo B; |
| 3493 |
|
} elsif ($token->{tag_name} eq 'meta') { |
| 3494 |
|
## NOTE: There is a "as if in head" code clone. |
| 3495 |
|
if ($self->{insertion_mode} eq 'after head') { |
| 3496 |
|
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
| 3497 |
|
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
| 3498 |
|
} |
| 3499 |
|
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
| 3500 |
|
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
| 3501 |
|
|
| 3502 |
|
unless ($self->{confident}) { |
| 3503 |
|
my $charset; |
| 3504 |
|
if ($token->{attributes}->{charset}) { ## TODO: And if supported |
| 3505 |
|
$charset = $token->{attributes}->{charset}->{value}; |
| 3506 |
|
} |
| 3507 |
|
if ($token->{attributes}->{'http-equiv'}) { |
| 3508 |
|
## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition. |
| 3509 |
|
if ($token->{attributes}->{'http-equiv'}->{value} |
| 3510 |
|
=~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*= |
| 3511 |
|
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
| 3512 |
|
([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) { |
| 3513 |
|
$charset = defined $1 ? $1 : defined $2 ? $2 : $3; |
| 3514 |
|
} ## TODO: And if supported |
| 3515 |
|
} |
| 3516 |
|
## TODO: Change the encoding |
| 3517 |
|
} |
| 3518 |
|
|
| 3519 |
## TODO: Extracting |charset| from |meta|. |
## TODO: Extracting |charset| from |meta|. |
| 3520 |
pop @{$self->{open_elements}} |
pop @{$self->{open_elements}} |
| 3521 |
if $self->{insertion_mode} eq 'after head'; |
if $self->{insertion_mode} eq 'after head'; |
| 3528 |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
| 3529 |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
| 3530 |
} |
} |
| 3531 |
$parse_rcdata->('RCDATA', $insert_to_current); |
my $parent = defined $self->{head_element} ? $self->{head_element} |
| 3532 |
|
: $self->{open_elements}->[-1]->[0]; |
| 3533 |
|
$parse_rcdata->(RCDATA_CONTENT_MODEL, |
| 3534 |
|
sub { $parent->append_child ($_[0]) }); |
| 3535 |
pop @{$self->{open_elements}} |
pop @{$self->{open_elements}} |
| 3536 |
if $self->{insertion_mode} eq 'after head'; |
if $self->{insertion_mode} eq 'after head'; |
| 3537 |
redo B; |
redo B; |
| 3543 |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
| 3544 |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
| 3545 |
} |
} |
| 3546 |
$parse_rcdata->('CDATA', $insert_to_current); |
$parse_rcdata->(CDATA_CONTENT_MODEL, $insert_to_current); |
| 3547 |
pop @{$self->{open_elements}} |
pop @{$self->{open_elements}} |
| 3548 |
if $self->{insertion_mode} eq 'after head'; |
if $self->{insertion_mode} eq 'after head'; |
| 3549 |
redo B; |
redo B; |
| 3555 |
!!!next-token; |
!!!next-token; |
| 3556 |
redo B; |
redo B; |
| 3557 |
} elsif ($self->{insertion_mode} eq 'in head noscript') { |
} elsif ($self->{insertion_mode} eq 'in head noscript') { |
| 3558 |
!!!parse-error (type => 'noscript in noscript'); |
!!!parse-error (type => 'in noscript:noscript'); |
| 3559 |
## Ignore the token |
## Ignore the token |
| 3560 |
|
!!!next-token; |
| 3561 |
redo B; |
redo B; |
| 3562 |
} else { |
} else { |
| 3563 |
# |
# |
| 3608 |
!!!next-token; |
!!!next-token; |
| 3609 |
redo B; |
redo B; |
| 3610 |
} elsif ($self->{insertion_mode} eq 'in head' and |
} elsif ($self->{insertion_mode} eq 'in head' and |
| 3611 |
($token->{tag_name} eq 'body' or |
{ |
| 3612 |
$token->{tag_name} eq 'html')) { |
body => 1, html => 1, |
| 3613 |
|
p => 1, br => 1, |
| 3614 |
|
}->{$token->{tag_name}}) { |
| 3615 |
|
# |
| 3616 |
|
} elsif ($self->{insertion_mode} eq 'in head noscript' and |
| 3617 |
|
{ |
| 3618 |
|
p => 1, br => 1, |
| 3619 |
|
}->{$token->{tag_name}}) { |
| 3620 |
# |
# |
| 3621 |
} elsif ($self->{insertion_mode} ne 'after head') { |
} elsif ($self->{insertion_mode} ne 'after head') { |
| 3622 |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
| 3655 |
|
|
| 3656 |
!!!next-token; |
!!!next-token; |
| 3657 |
redo B; |
redo B; |
|
} elsif ($token->{type} eq 'comment') { |
|
|
## NOTE: There is a code clone of "comment in body". |
|
|
my $comment = $self->{document}->create_comment ($token->{data}); |
|
|
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
|
|
!!!next-token; |
|
|
redo B; |
|
| 3658 |
} else { |
} else { |
| 3659 |
$in_body->($insert_to_current); |
$in_body->($insert_to_current); |
| 3660 |
redo B; |
redo B; |
| 3718 |
|
|
| 3719 |
!!!next-token; |
!!!next-token; |
| 3720 |
redo B; |
redo B; |
|
} elsif ($token->{type} eq 'comment') { |
|
|
my $comment = $self->{document}->create_comment ($token->{data}); |
|
|
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
|
|
!!!next-token; |
|
|
redo B; |
|
| 3721 |
} elsif ($token->{type} eq 'start tag') { |
} elsif ($token->{type} eq 'start tag') { |
| 3722 |
if ({ |
if ({ |
| 3723 |
caption => 1, |
caption => 1, |
| 3789 |
if ({ |
if ({ |
| 3790 |
dd => 1, dt => 1, li => 1, p => 1, |
dd => 1, dt => 1, li => 1, p => 1, |
| 3791 |
td => 1, th => 1, tr => 1, |
td => 1, th => 1, tr => 1, |
| 3792 |
|
tbody => 1, tfoot=> 1, thead => 1, |
| 3793 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 3794 |
!!!back-token; # <table> |
!!!back-token; # <table> |
| 3795 |
$token = {type => 'end tag', tag_name => 'table'}; |
$token = {type => 'end tag', tag_name => 'table'}; |
| 3838 |
if ({ |
if ({ |
| 3839 |
dd => 1, dt => 1, li => 1, p => 1, |
dd => 1, dt => 1, li => 1, p => 1, |
| 3840 |
td => 1, th => 1, tr => 1, |
td => 1, th => 1, tr => 1, |
| 3841 |
|
tbody => 1, tfoot=> 1, thead => 1, |
| 3842 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 3843 |
!!!back-token; |
!!!back-token; |
| 3844 |
$token = {type => 'end tag', |
$token = {type => 'end tag', |
| 3884 |
|
|
| 3885 |
!!!next-token; |
!!!next-token; |
| 3886 |
redo B; |
redo B; |
|
} elsif ($token->{type} eq 'comment') { |
|
|
## NOTE: This is a code clone of "comment in body". |
|
|
my $comment = $self->{document}->create_comment ($token->{data}); |
|
|
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
|
|
!!!next-token; |
|
|
redo B; |
|
| 3887 |
} elsif ($token->{type} eq 'start tag') { |
} elsif ($token->{type} eq 'start tag') { |
| 3888 |
if ({ |
if ({ |
| 3889 |
caption => 1, col => 1, colgroup => 1, tbody => 1, |
caption => 1, col => 1, colgroup => 1, tbody => 1, |
| 3916 |
if ({ |
if ({ |
| 3917 |
dd => 1, dt => 1, li => 1, p => 1, |
dd => 1, dt => 1, li => 1, p => 1, |
| 3918 |
td => 1, th => 1, tr => 1, |
td => 1, th => 1, tr => 1, |
| 3919 |
|
tbody => 1, tfoot=> 1, thead => 1, |
| 3920 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 3921 |
!!!back-token; # <?> |
!!!back-token; # <?> |
| 3922 |
$token = {type => 'end tag', tag_name => 'caption'}; |
$token = {type => 'end tag', tag_name => 'caption'}; |
| 3967 |
if ({ |
if ({ |
| 3968 |
dd => 1, dt => 1, li => 1, p => 1, |
dd => 1, dt => 1, li => 1, p => 1, |
| 3969 |
td => 1, th => 1, tr => 1, |
td => 1, th => 1, tr => 1, |
| 3970 |
|
tbody => 1, tfoot=> 1, thead => 1, |
| 3971 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 3972 |
!!!back-token; |
!!!back-token; |
| 3973 |
$token = {type => 'end tag', |
$token = {type => 'end tag', |
| 4015 |
if ({ |
if ({ |
| 4016 |
dd => 1, dt => 1, li => 1, p => 1, |
dd => 1, dt => 1, li => 1, p => 1, |
| 4017 |
td => 1, th => 1, tr => 1, |
td => 1, th => 1, tr => 1, |
| 4018 |
|
tbody => 1, tfoot=> 1, thead => 1, |
| 4019 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 4020 |
!!!back-token; # </table> |
!!!back-token; # </table> |
| 4021 |
$token = {type => 'end tag', tag_name => 'caption'}; |
$token = {type => 'end tag', tag_name => 'caption'}; |
| 4044 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 4045 |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
| 4046 |
## Ignore the token |
## Ignore the token |
| 4047 |
|
!!!next-token; |
| 4048 |
redo B; |
redo B; |
| 4049 |
} else { |
} else { |
| 4050 |
# |
# |
| 4066 |
} |
} |
| 4067 |
|
|
| 4068 |
# |
# |
|
} elsif ($token->{type} eq 'comment') { |
|
|
my $comment = $self->{document}->create_comment ($token->{data}); |
|
|
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
|
|
!!!next-token; |
|
|
redo B; |
|
| 4069 |
} elsif ($token->{type} eq 'start tag') { |
} elsif ($token->{type} eq 'start tag') { |
| 4070 |
if ($token->{tag_name} eq 'col') { |
if ($token->{tag_name} eq 'col') { |
| 4071 |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
| 4171 |
|
|
| 4172 |
!!!next-token; |
!!!next-token; |
| 4173 |
redo B; |
redo B; |
|
} elsif ($token->{type} eq 'comment') { |
|
|
## Copied from 'in table' |
|
|
my $comment = $self->{document}->create_comment ($token->{data}); |
|
|
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
|
|
!!!next-token; |
|
|
redo B; |
|
| 4174 |
} elsif ($token->{type} eq 'start tag') { |
} elsif ($token->{type} eq 'start tag') { |
| 4175 |
if ({ |
if ({ |
| 4176 |
tr => 1, |
tr => 1, |
| 4271 |
if ({ |
if ({ |
| 4272 |
dd => 1, dt => 1, li => 1, p => 1, |
dd => 1, dt => 1, li => 1, p => 1, |
| 4273 |
td => 1, th => 1, tr => 1, |
td => 1, th => 1, tr => 1, |
| 4274 |
|
tbody => 1, tfoot=> 1, thead => 1, |
| 4275 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 4276 |
!!!back-token; # <table> |
!!!back-token; # <table> |
| 4277 |
$token = {type => 'end tag', tag_name => 'table'}; |
$token = {type => 'end tag', tag_name => 'table'}; |
| 4450 |
|
|
| 4451 |
!!!next-token; |
!!!next-token; |
| 4452 |
redo B; |
redo B; |
|
} elsif ($token->{type} eq 'comment') { |
|
|
## Copied from 'in table' |
|
|
my $comment = $self->{document}->create_comment ($token->{data}); |
|
|
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
|
|
!!!next-token; |
|
|
redo B; |
|
| 4453 |
} elsif ($token->{type} eq 'start tag') { |
} elsif ($token->{type} eq 'start tag') { |
| 4454 |
if ($token->{tag_name} eq 'th' or |
if ($token->{tag_name} eq 'th' or |
| 4455 |
$token->{tag_name} eq 'td') { |
$token->{tag_name} eq 'td') { |
| 4534 |
if ({ |
if ({ |
| 4535 |
dd => 1, dt => 1, li => 1, p => 1, |
dd => 1, dt => 1, li => 1, p => 1, |
| 4536 |
td => 1, th => 1, tr => 1, |
td => 1, th => 1, tr => 1, |
| 4537 |
|
tbody => 1, tfoot=> 1, thead => 1, |
| 4538 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 4539 |
!!!back-token; # <table> |
!!!back-token; # <table> |
| 4540 |
$token = {type => 'end tag', tag_name => 'table'}; |
$token = {type => 'end tag', tag_name => 'table'}; |
| 4709 |
|
|
| 4710 |
!!!next-token; |
!!!next-token; |
| 4711 |
redo B; |
redo B; |
|
} elsif ($token->{type} eq 'comment') { |
|
|
## NOTE: This is a code clone of "comment in body". |
|
|
my $comment = $self->{document}->create_comment ($token->{data}); |
|
|
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
|
|
!!!next-token; |
|
|
redo B; |
|
| 4712 |
} elsif ($token->{type} eq 'start tag') { |
} elsif ($token->{type} eq 'start tag') { |
| 4713 |
if ({ |
if ({ |
| 4714 |
caption => 1, col => 1, colgroup => 1, |
caption => 1, col => 1, colgroup => 1, |
| 4770 |
td => ($token->{tag_name} eq 'th'), |
td => ($token->{tag_name} eq 'th'), |
| 4771 |
th => ($token->{tag_name} eq 'td'), |
th => ($token->{tag_name} eq 'td'), |
| 4772 |
tr => 1, |
tr => 1, |
| 4773 |
|
tbody => 1, tfoot=> 1, thead => 1, |
| 4774 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 4775 |
!!!back-token; |
!!!back-token; |
| 4776 |
$token = {type => 'end tag', |
$token = {type => 'end tag', |
| 4845 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
| 4846 |
!!!next-token; |
!!!next-token; |
| 4847 |
redo B; |
redo B; |
|
} elsif ($token->{type} eq 'comment') { |
|
|
my $comment = $self->{document}->create_comment ($token->{data}); |
|
|
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
|
|
!!!next-token; |
|
|
redo B; |
|
| 4848 |
} elsif ($token->{type} eq 'start tag') { |
} elsif ($token->{type} eq 'start tag') { |
| 4849 |
if ($token->{tag_name} eq 'option') { |
if ($token->{tag_name} eq 'option') { |
| 4850 |
if ($self->{open_elements}->[-1]->[1] eq 'option') { |
if ($self->{open_elements}->[-1]->[1] eq 'option') { |
| 5017 |
} elsif ($self->{insertion_mode} eq 'after body') { |
} elsif ($self->{insertion_mode} eq 'after body') { |
| 5018 |
if ($token->{type} eq 'character') { |
if ($token->{type} eq 'character') { |
| 5019 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
| 5020 |
|
my $data = $1; |
| 5021 |
## As if in body |
## As if in body |
| 5022 |
$reconstruct_active_formatting_elements->($insert_to_current); |
$reconstruct_active_formatting_elements->($insert_to_current); |
| 5023 |
|
|
| 5024 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 5025 |
|
|
| 5026 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 5027 |
!!!next-token; |
!!!next-token; |
| 5030 |
} |
} |
| 5031 |
|
|
| 5032 |
# |
# |
| 5033 |
!!!parse-error (type => 'after body:#'.$token->{type}); |
!!!parse-error (type => 'after body:#character'); |
|
} elsif ($token->{type} eq 'comment') { |
|
|
my $comment = $self->{document}->create_comment ($token->{data}); |
|
|
$self->{open_elements}->[0]->[0]->append_child ($comment); |
|
|
!!!next-token; |
|
|
redo B; |
|
| 5034 |
} elsif ($token->{type} eq 'start tag') { |
} elsif ($token->{type} eq 'start tag') { |
| 5035 |
!!!parse-error (type => 'after body:'.$token->{tag_name}); |
!!!parse-error (type => 'after body:'.$token->{tag_name}); |
| 5036 |
# |
# |
| 5042 |
!!!next-token; |
!!!next-token; |
| 5043 |
redo B; |
redo B; |
| 5044 |
} else { |
} else { |
| 5045 |
$phase = 'trailing end'; |
$previous_insertion_mode = $self->{insertion_mode}; |
| 5046 |
|
$self->{insertion_mode} = 'trailing end'; |
| 5047 |
!!!next-token; |
!!!next-token; |
| 5048 |
redo B; |
redo B; |
| 5049 |
} |
} |
| 5051 |
!!!parse-error (type => 'after body:/'.$token->{tag_name}); |
!!!parse-error (type => 'after body:/'.$token->{tag_name}); |
| 5052 |
} |
} |
| 5053 |
} else { |
} else { |
| 5054 |
!!!parse-error (type => 'after body:#'.$token->{type}); |
die "$0: $token->{type}: Unknown token type"; |
| 5055 |
} |
} |
| 5056 |
|
|
| 5057 |
$self->{insertion_mode} = 'in body'; |
$self->{insertion_mode} = 'in body'; |
| 5058 |
## reprocess |
## reprocess |
| 5059 |
redo B; |
redo B; |
| 5060 |
} elsif ($self->{insertion_mode} eq 'in frameset') { |
} elsif ($self->{insertion_mode} eq 'in frameset') { |
| 5061 |
if ($token->{type} eq 'character') { |
if ($token->{type} eq 'character') { |
| 5062 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
| 5063 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
|
|
|
|
unless (length $token->{data}) { |
|
|
!!!next-token; |
|
|
redo B; |
|
|
} |
|
|
} |
|
| 5064 |
|
|
| 5065 |
# |
unless (length $token->{data}) { |
|
} elsif ($token->{type} eq 'comment') { |
|
|
my $comment = $self->{document}->create_comment ($token->{data}); |
|
|
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
|
| 5066 |
!!!next-token; |
!!!next-token; |
| 5067 |
redo B; |
redo B; |
|
} elsif ($token->{type} eq 'start tag') { |
|
|
if ($token->{tag_name} eq 'frameset') { |
|
|
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
|
|
!!!next-token; |
|
|
redo B; |
|
|
} elsif ($token->{tag_name} eq 'frame') { |
|
|
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
|
|
pop @{$self->{open_elements}}; |
|
|
!!!next-token; |
|
|
redo B; |
|
|
} elsif ($token->{tag_name} eq 'noframes') { |
|
|
$in_body->($insert_to_current); |
|
|
redo B; |
|
|
} else { |
|
|
# |
|
|
} |
|
|
} elsif ($token->{type} eq 'end tag') { |
|
|
if ($token->{tag_name} eq 'frameset') { |
|
|
if ($self->{open_elements}->[-1]->[1] eq 'html' and |
|
|
@{$self->{open_elements}} == 1) { |
|
|
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
|
|
## Ignore the token |
|
|
!!!next-token; |
|
|
} else { |
|
|
pop @{$self->{open_elements}}; |
|
|
!!!next-token; |
|
|
} |
|
|
|
|
|
## if not inner_html and |
|
|
if ($self->{open_elements}->[-1]->[1] ne 'frameset') { |
|
|
$self->{insertion_mode} = 'after frameset'; |
|
|
} |
|
|
redo B; |
|
|
} else { |
|
|
# |
|
|
} |
|
|
} else { |
|
|
# |
|
| 5068 |
} |
} |
| 5069 |
|
} |
| 5070 |
if (defined $token->{tag_name}) { |
|
| 5071 |
!!!parse-error (type => 'in frameset:'.$token->{tag_name}); |
!!!parse-error (type => 'in frameset:#character'); |
| 5072 |
|
## Ignore the token |
| 5073 |
|
!!!next-token; |
| 5074 |
|
redo B; |
| 5075 |
|
} elsif ($token->{type} eq 'start tag') { |
| 5076 |
|
if ($token->{tag_name} eq 'frameset') { |
| 5077 |
|
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
| 5078 |
|
!!!next-token; |
| 5079 |
|
redo B; |
| 5080 |
|
} elsif ($token->{tag_name} eq 'frame') { |
| 5081 |
|
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
| 5082 |
|
pop @{$self->{open_elements}}; |
| 5083 |
|
!!!next-token; |
| 5084 |
|
redo B; |
| 5085 |
|
} elsif ($token->{tag_name} eq 'noframes') { |
| 5086 |
|
$in_body->($insert_to_current); |
| 5087 |
|
redo B; |
| 5088 |
|
} else { |
| 5089 |
|
!!!parse-error (type => 'in frameset:'.$token->{tag_name}); |
| 5090 |
|
## Ignore the token |
| 5091 |
|
!!!next-token; |
| 5092 |
|
redo B; |
| 5093 |
|
} |
| 5094 |
|
} elsif ($token->{type} eq 'end tag') { |
| 5095 |
|
if ($token->{tag_name} eq 'frameset') { |
| 5096 |
|
if ($self->{open_elements}->[-1]->[1] eq 'html' and |
| 5097 |
|
@{$self->{open_elements}} == 1) { |
| 5098 |
|
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
| 5099 |
|
## Ignore the token |
| 5100 |
|
!!!next-token; |
| 5101 |
} else { |
} else { |
| 5102 |
!!!parse-error (type => 'in frameset:#'.$token->{type}); |
pop @{$self->{open_elements}}; |
| 5103 |
|
!!!next-token; |
| 5104 |
|
} |
| 5105 |
|
|
| 5106 |
|
if (not defined $self->{inner_html_node} and |
| 5107 |
|
$self->{open_elements}->[-1]->[1] ne 'frameset') { |
| 5108 |
|
$self->{insertion_mode} = 'after frameset'; |
| 5109 |
} |
} |
| 5110 |
|
redo B; |
| 5111 |
|
} else { |
| 5112 |
|
!!!parse-error (type => 'in frameset:/'.$token->{tag_name}); |
| 5113 |
## Ignore the token |
## Ignore the token |
| 5114 |
!!!next-token; |
!!!next-token; |
| 5115 |
redo B; |
redo B; |
| 5116 |
} elsif ($self->{insertion_mode} eq 'after frameset') { |
} |
| 5117 |
if ($token->{type} eq 'character') { |
} else { |
| 5118 |
|
die "$0: $token->{type}: Unknown token type"; |
| 5119 |
|
} |
| 5120 |
|
} elsif ($self->{insertion_mode} eq 'after frameset') { |
| 5121 |
|
if ($token->{type} eq 'character') { |
| 5122 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
| 5123 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 5124 |
|
|
| 5125 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 5126 |
!!!next-token; |
!!!next-token; |
| 5128 |
} |
} |
| 5129 |
} |
} |
| 5130 |
|
|
| 5131 |
# |
if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) { |
| 5132 |
} elsif ($token->{type} eq 'comment') { |
!!!parse-error (type => 'after frameset:#character'); |
| 5133 |
my $comment = $self->{document}->create_comment ($token->{data}); |
|
| 5134 |
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
## Ignore the token. |
| 5135 |
!!!next-token; |
if (length $token->{data}) { |
| 5136 |
redo B; |
## reprocess the rest of characters |
| 5137 |
} elsif ($token->{type} eq 'start tag') { |
} else { |
| 5138 |
if ($token->{tag_name} eq 'noframes') { |
!!!next-token; |
| 5139 |
$in_body->($insert_to_current); |
} |
|
redo B; |
|
|
} else { |
|
|
# |
|
|
} |
|
|
} elsif ($token->{type} eq 'end tag') { |
|
|
if ($token->{tag_name} eq 'html') { |
|
|
$phase = 'trailing end'; |
|
|
!!!next-token; |
|
| 5140 |
redo B; |
redo B; |
|
} else { |
|
|
# |
|
| 5141 |
} |
} |
| 5142 |
} else { |
|
| 5143 |
# |
die qq[$0: Character "$token->{data}"]; |
| 5144 |
} |
} elsif ($token->{type} eq 'start tag') { |
| 5145 |
|
if ($token->{tag_name} eq 'noframes') { |
| 5146 |
if (defined $token->{tag_name}) { |
$in_body->($insert_to_current); |
| 5147 |
!!!parse-error (type => 'after frameset:'.$token->{tag_name}); |
redo B; |
| 5148 |
} else { |
} else { |
| 5149 |
!!!parse-error (type => 'after frameset:#'.$token->{type}); |
!!!parse-error (type => 'after frameset:'.$token->{tag_name}); |
|
} |
|
| 5150 |
## Ignore the token |
## Ignore the token |
| 5151 |
!!!next-token; |
!!!next-token; |
| 5152 |
redo B; |
redo B; |
| 5153 |
|
} |
| 5154 |
## ISSUE: An issue in spec there |
} elsif ($token->{type} eq 'end tag') { |
| 5155 |
|
if ($token->{tag_name} eq 'html') { |
| 5156 |
|
$previous_insertion_mode = $self->{insertion_mode}; |
| 5157 |
|
$self->{insertion_mode} = 'trailing end'; |
| 5158 |
|
!!!next-token; |
| 5159 |
|
redo B; |
| 5160 |
} else { |
} else { |
| 5161 |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
!!!parse-error (type => 'after frameset:/'.$token->{tag_name}); |
| 5162 |
|
## Ignore the token |
| 5163 |
|
!!!next-token; |
| 5164 |
|
redo B; |
| 5165 |
} |
} |
| 5166 |
|
} else { |
| 5167 |
|
die "$0: $token->{type}: Unknown token type"; |
| 5168 |
} |
} |
| 5169 |
} elsif ($phase eq 'trailing end') { |
|
| 5170 |
|
## ISSUE: An issue in spec here |
| 5171 |
|
} elsif ($self->{insertion_mode} eq 'trailing end') { |
| 5172 |
## states in the main stage is preserved yet # MUST |
## states in the main stage is preserved yet # MUST |
| 5173 |
|
|
| 5174 |
if ($token->{type} eq 'DOCTYPE') { |
if ($token->{type} eq 'character') { |
|
!!!parse-error (type => 'after html:#DOCTYPE'); |
|
|
## Ignore the token |
|
|
!!!next-token; |
|
|
redo B; |
|
|
} elsif ($token->{type} eq 'comment') { |
|
|
my $comment = $self->{document}->create_comment ($token->{data}); |
|
|
$self->{document}->append_child ($comment); |
|
|
!!!next-token; |
|
|
redo B; |
|
|
} elsif ($token->{type} eq 'character') { |
|
| 5175 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
| 5176 |
my $data = $1; |
my $data = $1; |
| 5177 |
## As if in the main phase. |
## As if in the main phase. |
| 5178 |
## NOTE: The insertion mode in the main phase |
## NOTE: The insertion mode in the main phase |
| 5179 |
## just before the phase has been changed to the trailing |
## just before the phase has been changed to the trailing |
| 5180 |
## end phase is either "after body" or "after frameset". |
## end phase is either "after body" or "after frameset". |
| 5181 |
$reconstruct_active_formatting_elements->($insert_to_current) |
$reconstruct_active_formatting_elements->($insert_to_current); |
|
if $phase eq 'main'; |
|
| 5182 |
|
|
| 5183 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($data); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($data); |
| 5184 |
|
|
| 5189 |
} |
} |
| 5190 |
|
|
| 5191 |
!!!parse-error (type => 'after html:#character'); |
!!!parse-error (type => 'after html:#character'); |
| 5192 |
$phase = 'main'; |
$self->{insertion_mode} = $previous_insertion_mode; |
| 5193 |
## reprocess |
## reprocess |
| 5194 |
redo B; |
redo B; |
| 5195 |
} elsif ($token->{type} eq 'start tag' or |
} elsif ($token->{type} eq 'start tag') { |
|
$token->{type} eq 'end tag') { |
|
| 5196 |
!!!parse-error (type => 'after html:'.$token->{tag_name}); |
!!!parse-error (type => 'after html:'.$token->{tag_name}); |
| 5197 |
$phase = 'main'; |
$self->{insertion_mode} = $previous_insertion_mode; |
| 5198 |
|
## reprocess |
| 5199 |
|
redo B; |
| 5200 |
|
} elsif ($token->{type} eq 'end tag') { |
| 5201 |
|
!!!parse-error (type => 'after html:/'.$token->{tag_name}); |
| 5202 |
|
$self->{insertion_mode} = $previous_insertion_mode; |
| 5203 |
## reprocess |
## reprocess |
| 5204 |
redo B; |
redo B; |
|
} elsif ($token->{type} eq 'end-of-file') { |
|
|
## Stop parsing |
|
|
last B; |
|
| 5205 |
} else { |
} else { |
| 5206 |
die "$0: $token->{type}: Unknown token"; |
die "$0: $token->{type}: Unknown token"; |
| 5207 |
} |
} |
| 5208 |
|
} else { |
| 5209 |
|
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
| 5210 |
} |
} |
| 5211 |
} # B |
} # B |
| 5212 |
|
|
| 5294 |
|
|
| 5295 |
## Step 2 |
## Step 2 |
| 5296 |
my $node_ln = $node->local_name; |
my $node_ln = $node->local_name; |
| 5297 |
$p->{content_model_flag} = { |
$p->{content_model} = { |
| 5298 |
title => 'RCDATA', |
title => RCDATA_CONTENT_MODEL, |
| 5299 |
textarea => 'RCDATA', |
textarea => RCDATA_CONTENT_MODEL, |
| 5300 |
style => 'CDATA', |
style => CDATA_CONTENT_MODEL, |
| 5301 |
script => 'CDATA', |
script => CDATA_CONTENT_MODEL, |
| 5302 |
xmp => 'CDATA', |
xmp => CDATA_CONTENT_MODEL, |
| 5303 |
iframe => 'CDATA', |
iframe => CDATA_CONTENT_MODEL, |
| 5304 |
noembed => 'CDATA', |
noembed => CDATA_CONTENT_MODEL, |
| 5305 |
noframes => 'CDATA', |
noframes => CDATA_CONTENT_MODEL, |
| 5306 |
noscript => 'CDATA', |
noscript => CDATA_CONTENT_MODEL, |
| 5307 |
plaintext => 'PLAINTEXT', |
plaintext => PLAINTEXT_CONTENT_MODEL, |
| 5308 |
}->{$node_ln} || 'PCDATA'; |
}->{$node_ln}; |
| 5309 |
## ISSUE: What is "the name of the element"? local name? |
$p->{content_model} = PCDATA_CONTENT_MODEL |
| 5310 |
|
unless defined $p->{content_model}; |
| 5311 |
|
## ISSUE: What is "the name of the element"? local name? |
| 5312 |
|
|
| 5313 |
$p->{inner_html_node} = [$node, $node_ln]; |
$p->{inner_html_node} = [$node, $node_ln]; |
| 5314 |
|
|