| 194 |
h6 => HEADING_EL, |
h6 => HEADING_EL, |
| 195 |
head => MISC_SPECIAL_EL, |
head => MISC_SPECIAL_EL, |
| 196 |
header => MISC_SPECIAL_EL, |
header => MISC_SPECIAL_EL, |
| 197 |
|
hgroup => MISC_SPECIAL_EL, |
| 198 |
hr => MISC_SPECIAL_EL, |
hr => MISC_SPECIAL_EL, |
| 199 |
html => HTML_EL, |
html => HTML_EL, |
| 200 |
i => FORMATTING_EL, |
i => FORMATTING_EL, |
| 789 |
sub new ($) { |
sub new ($) { |
| 790 |
my $class = shift; |
my $class = shift; |
| 791 |
my $self = bless { |
my $self = bless { |
| 792 |
level => {must => 'm', |
level => { |
| 793 |
should => 's', |
must => 'm', |
| 794 |
warn => 'w', |
should => 's', |
| 795 |
info => 'i', |
obc => 's', ## Obsolete but conforming, # XXX distinguish from "should" |
| 796 |
uncertain => 'u'}, |
warn => 'w', |
| 797 |
|
info => 'i', |
| 798 |
|
uncertain => 'u', |
| 799 |
|
}, |
| 800 |
}, $class; |
}, $class; |
| 801 |
$self->{set_nc} = sub { |
$self->{set_nc} = sub { |
| 802 |
$self->{nc} = -1; |
$self->{nc} = -1; |
| 872 |
$self->{document}->manakai_is_html (1); # MUST |
$self->{document}->manakai_is_html (1); # MUST |
| 873 |
$self->{document}->set_user_data (manakai_source_line => 1); |
$self->{document}->set_user_data (manakai_source_line => 1); |
| 874 |
$self->{document}->set_user_data (manakai_source_column => 1); |
$self->{document}->set_user_data (manakai_source_column => 1); |
| 875 |
|
|
| 876 |
|
$self->{frameset_ok} = 1; |
| 877 |
} # _initialize_tree_constructor |
} # _initialize_tree_constructor |
| 878 |
|
|
| 879 |
sub _terminate_tree_constructor ($) { |
sub _terminate_tree_constructor ($) { |
| 930 |
my $doctype = $self->{document}->create_document_type_definition |
my $doctype = $self->{document}->create_document_type_definition |
| 931 |
($doctype_name); |
($doctype_name); |
| 932 |
|
|
| 933 |
$doctype_name =~ tr/A-Z/a-z/; # ASCII case-insensitive |
$doctype_name =~ tr/A-Z/a-z/; # ASCII case-insensitive. |
| 934 |
if ($doctype_name ne 'html') { |
if ($doctype_name ne 'html') { |
| 935 |
!!!cp ('t1'); |
!!!cp ('t1'); |
| 936 |
!!!parse-error (type => 'not HTML5', token => $token); |
!!!parse-error (type => 'not HTML5', token => $token); |
| 937 |
} elsif (defined $token->{pubid}) { |
} elsif (defined $token->{pubid}) { |
| 938 |
!!!cp ('t2'); |
## Obsolete permitted DOCTYPEs (case-sensitive) |
| 939 |
## XXX Obsolete permitted DOCTYPEs |
my $xsysid = { |
| 940 |
!!!parse-error (type => 'not HTML5', token => $token); |
'-//W3C//DTD HTML 4.0//EN' => 'http://www.w3.org/TR/REC-html40/strict.dtd', |
| 941 |
|
'-//W3C//DTD HTML 4.01//EN' => 'http://www.w3.org/TR/html4/strict.dtd', |
| 942 |
|
'-//W3C//DTD XHTML 1.0 Strict//EN' => 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd', |
| 943 |
|
'-//W3C//DTD XHTML 1.1//EN' => 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd', |
| 944 |
|
}->{$token->{pubid}}; |
| 945 |
|
if (defined $xsysid and |
| 946 |
|
(not defined $token->{sysid} or $token->{sysid} eq $xsysid)) { |
| 947 |
|
!!!cp ('t2'); |
| 948 |
|
!!!parse-error (type => 'obs DOCTYPE', token => $token, |
| 949 |
|
level => $self->{level}->{obc}); ## XXX error type |
| 950 |
|
} else { |
| 951 |
|
!!!cp ('t2.1'); |
| 952 |
|
!!!parse-error (type => 'not HTML5', token => $token); |
| 953 |
|
} |
| 954 |
} elsif (defined $token->{sysid}) { |
} elsif (defined $token->{sysid}) { |
| 955 |
if ($token->{sysid} eq 'about:legacy-compat') { |
if ($token->{sysid} eq 'about:legacy-compat') { |
| 956 |
!!!cp ('t1.2'); ## <!DOCTYPE HTML SYSTEM "about:legacy-compat"> |
!!!cp ('t1.2'); ## <!DOCTYPE HTML SYSTEM "about:legacy-compat"> |
| 980 |
$self->{document}->manakai_compat_mode ('quirks'); |
$self->{document}->manakai_compat_mode ('quirks'); |
| 981 |
} elsif (defined $token->{pubid}) { |
} elsif (defined $token->{pubid}) { |
| 982 |
my $pubid = $token->{pubid}; |
my $pubid = $token->{pubid}; |
| 983 |
$pubid =~ tr/a-z/A-z/; |
$pubid =~ tr/a-z/A-Z/; ## ASCII case-insensitive. |
| 984 |
my $prefix = [ |
my $prefix = [ |
| 985 |
"+//SILMARIL//DTD HTML PRO V0R11 19970101//", |
"+//SILMARIL//DTD HTML PRO V0R11 19970101//", |
| 986 |
"-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//", |
"-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//", |
| 1072 |
} |
} |
| 1073 |
if (defined $token->{sysid}) { |
if (defined $token->{sysid}) { |
| 1074 |
my $sysid = $token->{sysid}; |
my $sysid = $token->{sysid}; |
| 1075 |
$sysid =~ tr/A-Z/a-z/; |
$sysid =~ tr/A-Z/a-z/; ## ASCII case-insensitive. |
| 1076 |
if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") { |
if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") { |
| 1077 |
## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"| is |
## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"| |
| 1078 |
## marked as quirks. |
## is signaled as in quirks mode! |
| 1079 |
$self->{document}->manakai_compat_mode ('quirks'); |
$self->{document}->manakai_compat_mode ('quirks'); |
| 1080 |
!!!cp ('t11'); |
!!!cp ('t11'); |
| 1081 |
} else { |
} else { |
| 1950 |
} elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) { |
} elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) { |
| 1951 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 1952 |
!!!cp ('t87.1'); |
!!!cp ('t87.1'); |
| 1953 |
|
|
| 1954 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
| 1955 |
|
|
| 1956 |
|
if ($token->{data} =~ /[^\x09\x0A\x0C\x0D\x20]/) { |
| 1957 |
|
delete $self->{frameset_ok}; |
| 1958 |
|
} |
| 1959 |
|
|
| 1960 |
!!!next-token; |
!!!next-token; |
| 1961 |
next B; |
next B; |
| 1962 |
} elsif ($token->{type} == START_TAG_TOKEN) { |
} elsif ($token->{type} == START_TAG_TOKEN) { |
| 2151 |
## As if <body> |
## As if <body> |
| 2152 |
!!!insert-element ('body',, $token); |
!!!insert-element ('body',, $token); |
| 2153 |
$self->{insertion_mode} = IN_BODY_IM; |
$self->{insertion_mode} = IN_BODY_IM; |
| 2154 |
## reprocess |
## The "frameset-ok" flag is left unchanged in this case. |
| 2155 |
|
## Reporcess the token. |
| 2156 |
next B; |
next B; |
| 2157 |
} elsif ($token->{type} == START_TAG_TOKEN) { |
} elsif ($token->{type} == START_TAG_TOKEN) { |
| 2158 |
if ($token->{tag_name} eq 'head') { |
if ($token->{tag_name} eq 'head') { |
| 2445 |
next B; |
next B; |
| 2446 |
} elsif ($token->{tag_name} eq 'body' or |
} elsif ($token->{tag_name} eq 'body' or |
| 2447 |
$token->{tag_name} eq 'frameset') { |
$token->{tag_name} eq 'frameset') { |
| 2448 |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 2449 |
!!!cp ('t122'); |
!!!cp ('t122'); |
| 2450 |
## As if </noscript> |
## As if </noscript> |
| 2451 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2452 |
!!!parse-error (type => 'in noscript', |
!!!parse-error (type => 'in noscript', |
| 2453 |
text => $token->{tag_name}, token => $token); |
text => $token->{tag_name}, token => $token); |
| 2454 |
|
|
| 2455 |
## Reprocess in the "in head" insertion mode... |
## Reprocess in the "in head" insertion mode... |
| 2456 |
## As if </head> |
## As if </head> |
| 2457 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2458 |
|
|
| 2459 |
## Reprocess in the "after head" insertion mode... |
## Reprocess in the "after head" insertion mode... |
| 2460 |
} elsif ($self->{insertion_mode} == IN_HEAD_IM) { |
} elsif ($self->{insertion_mode} == IN_HEAD_IM) { |
| 2461 |
!!!cp ('t124'); |
!!!cp ('t124'); |
| 2462 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2463 |
|
|
| 2464 |
## Reprocess in the "after head" insertion mode... |
## Reprocess in the "after head" insertion mode... |
| 2465 |
} else { |
} else { |
| 2466 |
!!!cp ('t125'); |
!!!cp ('t125'); |
| 2467 |
} |
} |
| 2468 |
|
|
| 2469 |
## "after head" insertion mode |
## "after head" insertion mode |
| 2470 |
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
!!!insert-element ($token->{tag_name}, $token->{attributes}, $token); |
| 2471 |
if ($token->{tag_name} eq 'body') { |
if ($token->{tag_name} eq 'body') { |
| 2472 |
!!!cp ('t126'); |
!!!cp ('t126'); |
| 2473 |
$self->{insertion_mode} = IN_BODY_IM; |
delete $self->{frameset_ok}; |
| 2474 |
} elsif ($token->{tag_name} eq 'frameset') { |
$self->{insertion_mode} = IN_BODY_IM; |
| 2475 |
!!!cp ('t127'); |
} elsif ($token->{tag_name} eq 'frameset') { |
| 2476 |
$self->{insertion_mode} = IN_FRAMESET_IM; |
!!!cp ('t127'); |
| 2477 |
} else { |
$self->{insertion_mode} = IN_FRAMESET_IM; |
| 2478 |
die "$0: tag name: $self->{tag_name}"; |
} else { |
| 2479 |
} |
die "$0: tag name: $self->{tag_name}"; |
| 2480 |
!!!nack ('t127.1'); |
} |
| 2481 |
!!!next-token; |
!!!nack ('t127.1'); |
| 2482 |
next B; |
!!!next-token; |
| 2483 |
} else { |
next B; |
| 2484 |
!!!cp ('t128'); |
} else { |
| 2485 |
# |
!!!cp ('t128'); |
| 2486 |
} |
# |
| 2487 |
|
} |
| 2488 |
|
|
| 2489 |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 2490 |
!!!cp ('t129'); |
!!!cp ('t129'); |
| 2508 |
!!!cp ('t131'); |
!!!cp ('t131'); |
| 2509 |
} |
} |
| 2510 |
|
|
| 2511 |
## "after head" insertion mode |
## "after head" insertion mode |
| 2512 |
## As if <body> |
## As if <body> |
| 2513 |
!!!insert-element ('body',, $token); |
!!!insert-element ('body',, $token); |
| 2514 |
$self->{insertion_mode} = IN_BODY_IM; |
$self->{insertion_mode} = IN_BODY_IM; |
| 2515 |
## reprocess |
## The "frameset-ok" flag is not changed in this case. |
| 2516 |
!!!ack-later; |
## Reprocess the token. |
| 2517 |
next B; |
!!!ack-later; |
| 2518 |
} elsif ($token->{type} == END_TAG_TOKEN) { |
next B; |
| 2519 |
if ($token->{tag_name} eq 'head') { |
} elsif ($token->{type} == END_TAG_TOKEN) { |
| 2520 |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
## "Before head", "in head", and "after head" insertion modes |
| 2521 |
!!!cp ('t132'); |
## ignore most of end tags. Exceptions are "body", "html", |
| 2522 |
## As if <head> |
## and "br" end tags. "Before head" and "in head" insertion |
| 2523 |
!!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token); |
## modes also recognize "head" end tag. "In head noscript" |
| 2524 |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
## insertion modes ignore end tags except for "noscript" and |
| 2525 |
push @{$self->{open_elements}}, |
## "br". |
|
[$self->{head_element}, $el_category->{head}]; |
|
| 2526 |
|
|
| 2527 |
## Reprocess in the "in head" insertion mode... |
if ($token->{tag_name} eq 'head') { |
| 2528 |
pop @{$self->{open_elements}}; |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
| 2529 |
$self->{insertion_mode} = AFTER_HEAD_IM; |
!!!cp ('t132'); |
| 2530 |
!!!next-token; |
## As if <head> |
| 2531 |
next B; |
!!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token); |
| 2532 |
} elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
| 2533 |
!!!cp ('t133'); |
push @{$self->{open_elements}}, |
| 2534 |
## As if </noscript> |
[$self->{head_element}, $el_category->{head}]; |
| 2535 |
pop @{$self->{open_elements}}; |
|
| 2536 |
!!!parse-error (type => 'in noscript:/', |
## Reprocess in the "in head" insertion mode... |
| 2537 |
text => 'head', token => $token); |
pop @{$self->{open_elements}}; |
| 2538 |
|
$self->{insertion_mode} = AFTER_HEAD_IM; |
| 2539 |
## Reprocess in the "in head" insertion mode... |
!!!next-token; |
| 2540 |
pop @{$self->{open_elements}}; |
next B; |
| 2541 |
$self->{insertion_mode} = AFTER_HEAD_IM; |
} elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 2542 |
!!!next-token; |
!!!cp ('t133'); |
| 2543 |
next B; |
# |
| 2544 |
} elsif ($self->{insertion_mode} == IN_HEAD_IM) { |
} elsif ($self->{insertion_mode} == IN_HEAD_IM) { |
| 2545 |
!!!cp ('t134'); |
!!!cp ('t134'); |
| 2546 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2547 |
$self->{insertion_mode} = AFTER_HEAD_IM; |
$self->{insertion_mode} = AFTER_HEAD_IM; |
| 2548 |
!!!next-token; |
!!!next-token; |
| 2549 |
next B; |
next B; |
| 2550 |
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 2551 |
!!!cp ('t134.1'); |
!!!cp ('t134.1'); |
| 2552 |
!!!parse-error (type => 'unmatched end tag', text => 'head', |
# |
| 2553 |
token => $token); |
} else { |
| 2554 |
## Ignore the token |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
| 2555 |
!!!next-token; |
} |
| 2556 |
next B; |
} elsif ($token->{tag_name} eq 'noscript') { |
| 2557 |
} else { |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 2558 |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
!!!cp ('t136'); |
| 2559 |
} |
pop @{$self->{open_elements}}; |
| 2560 |
} elsif ($token->{tag_name} eq 'noscript') { |
$self->{insertion_mode} = IN_HEAD_IM; |
| 2561 |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
!!!next-token; |
| 2562 |
!!!cp ('t136'); |
next B; |
| 2563 |
pop @{$self->{open_elements}}; |
} else { |
| 2564 |
$self->{insertion_mode} = IN_HEAD_IM; |
!!!cp ('t138'); |
| 2565 |
!!!next-token; |
# |
| 2566 |
next B; |
} |
| 2567 |
} elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or |
} elsif ({ |
| 2568 |
$self->{insertion_mode} == AFTER_HEAD_IM) { |
body => ($self->{insertion_mode} != IN_HEAD_NOSCRIPT_IM), |
| 2569 |
!!!cp ('t137'); |
html => ($self->{insertion_mode} != IN_HEAD_NOSCRIPT_IM), |
| 2570 |
!!!parse-error (type => 'unmatched end tag', |
br => 1, |
| 2571 |
text => 'noscript', token => $token); |
}->{$token->{tag_name}}) { |
|
## Ignore the token ## ISSUE: An issue in the spec. |
|
|
!!!next-token; |
|
|
next B; |
|
|
} else { |
|
|
!!!cp ('t138'); |
|
|
# |
|
|
} |
|
|
} elsif ({ |
|
|
body => 1, html => 1, |
|
|
}->{$token->{tag_name}}) { |
|
|
## TODO: This branch is entirely redundant. |
|
|
if ($self->{insertion_mode} == BEFORE_HEAD_IM or |
|
|
$self->{insertion_mode} == IN_HEAD_IM or |
|
|
$self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
|
|
!!!cp ('t140'); |
|
|
!!!parse-error (type => 'unmatched end tag', |
|
|
text => $token->{tag_name}, token => $token); |
|
|
## Ignore the token |
|
|
!!!next-token; |
|
|
next B; |
|
|
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
|
|
!!!cp ('t140.1'); |
|
|
!!!parse-error (type => 'unmatched end tag', |
|
|
text => $token->{tag_name}, token => $token); |
|
|
## Ignore the token |
|
|
!!!next-token; |
|
|
next B; |
|
|
} else { |
|
|
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
|
|
} |
|
|
} elsif ($token->{tag_name} eq 'p') { |
|
|
!!!cp ('t142'); |
|
|
!!!parse-error (type => 'unmatched end tag', |
|
|
text => $token->{tag_name}, token => $token); |
|
|
## Ignore the token |
|
|
!!!next-token; |
|
|
next B; |
|
|
} elsif ($token->{tag_name} eq 'br') { |
|
| 2572 |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
| 2573 |
!!!cp ('t142.2'); |
!!!cp ('t142.2'); |
| 2574 |
## (before head) as if <head>, (in head) as if </head> |
## (before head) as if <head>, (in head) as if </head> |
| 2588 |
!!!cp ('t143.3'); |
!!!cp ('t143.3'); |
| 2589 |
## NOTE: Two parse errors for <head><noscript></br> |
## NOTE: Two parse errors for <head><noscript></br> |
| 2590 |
!!!parse-error (type => 'unmatched end tag', |
!!!parse-error (type => 'unmatched end tag', |
| 2591 |
text => 'br', token => $token); |
text => $token->{tag_name}, token => $token); |
| 2592 |
## As if </noscript> |
## As if </noscript> |
| 2593 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2594 |
$self->{insertion_mode} = IN_HEAD_IM; |
$self->{insertion_mode} = IN_HEAD_IM; |
| 2606 |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
| 2607 |
} |
} |
| 2608 |
|
|
| 2609 |
# |
## "after head" insertion mode |
| 2610 |
} else { ## Other end tags |
## As if <body> |
| 2611 |
!!!cp ('t145'); |
!!!insert-element ('body',, $token); |
| 2612 |
!!!parse-error (type => 'unmatched end tag', |
$self->{insertion_mode} = IN_BODY_IM; |
| 2613 |
text => $token->{tag_name}, token => $token); |
## The "frameset-ok" flag is left unchanged in this case. |
| 2614 |
## Ignore the token |
## Reprocess the token. |
| 2615 |
!!!next-token; |
next B; |
| 2616 |
next B; |
} |
|
} |
|
|
|
|
|
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
|
|
!!!cp ('t146'); |
|
|
## As if </noscript> |
|
|
pop @{$self->{open_elements}}; |
|
|
!!!parse-error (type => 'in noscript:/', |
|
|
text => $token->{tag_name}, token => $token); |
|
|
|
|
|
## Reprocess in the "in head" insertion mode... |
|
|
## As if </head> |
|
|
pop @{$self->{open_elements}}; |
|
|
|
|
|
## Reprocess in the "after head" insertion mode... |
|
|
} elsif ($self->{insertion_mode} == IN_HEAD_IM) { |
|
|
!!!cp ('t147'); |
|
|
## As if </head> |
|
|
pop @{$self->{open_elements}}; |
|
|
|
|
|
## Reprocess in the "after head" insertion mode... |
|
|
} elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
|
|
## ISSUE: This case cannot be reached? |
|
|
!!!cp ('t148'); |
|
|
!!!parse-error (type => 'unmatched end tag', |
|
|
text => $token->{tag_name}, token => $token); |
|
|
## Ignore the token ## ISSUE: An issue in the spec. |
|
|
!!!next-token; |
|
|
next B; |
|
|
} else { |
|
|
!!!cp ('t149'); |
|
|
} |
|
| 2617 |
|
|
| 2618 |
## "after head" insertion mode |
## End tags are ignored by default. |
| 2619 |
## As if <body> |
!!!cp ('t145'); |
| 2620 |
!!!insert-element ('body',, $token); |
!!!parse-error (type => 'unmatched end tag', |
| 2621 |
$self->{insertion_mode} = IN_BODY_IM; |
text => $token->{tag_name}, token => $token); |
| 2622 |
## reprocess |
## Ignore the token. |
| 2623 |
|
!!!next-token; |
| 2624 |
next B; |
next B; |
| 2625 |
} elsif ($token->{type} == END_OF_FILE_TOKEN) { |
} elsif ($token->{type} == END_OF_FILE_TOKEN) { |
| 2626 |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
| 2674 |
## NOTE: As if <body> |
## NOTE: As if <body> |
| 2675 |
!!!insert-element ('body',, $token); |
!!!insert-element ('body',, $token); |
| 2676 |
$self->{insertion_mode} = IN_BODY_IM; |
$self->{insertion_mode} = IN_BODY_IM; |
| 2677 |
## NOTE: Reprocess. |
## The "frameset-ok" flag is left unchanged in this case. |
| 2678 |
|
## Reprocess the token. |
| 2679 |
next B; |
next B; |
| 2680 |
} else { |
} else { |
| 2681 |
die "$0: $token->{type}: Unknown token type"; |
die "$0: $token->{type}: Unknown token type"; |
| 2682 |
} |
} |
| 2683 |
} elsif ($self->{insertion_mode} & BODY_IMS) { |
} elsif ($self->{insertion_mode} & BODY_IMS) { |
| 2684 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 2685 |
!!!cp ('t150'); |
!!!cp ('t150'); |
| 2686 |
## NOTE: There is a code clone of "character in body". |
$reconstruct_active_formatting_elements->($insert_to_current); |
| 2687 |
$reconstruct_active_formatting_elements->($insert_to_current); |
|
| 2688 |
|
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
|
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
|
| 2689 |
|
|
| 2690 |
!!!next-token; |
if ($token->{data} =~ /[^\x09\x0A\x0C\x0D\x20]/) { |
| 2691 |
next B; |
delete $self->{frameset_ok}; |
| 2692 |
} elsif ($token->{type} == START_TAG_TOKEN) { |
} |
| 2693 |
|
|
| 2694 |
|
!!!next-token; |
| 2695 |
|
next B; |
| 2696 |
|
} elsif ($token->{type} == START_TAG_TOKEN) { |
| 2697 |
if ({ |
if ({ |
| 2698 |
caption => 1, col => 1, colgroup => 1, tbody => 1, |
caption => 1, col => 1, colgroup => 1, tbody => 1, |
| 2699 |
td => 1, tfoot => 1, th => 1, thead => 1, tr => 1, |
td => 1, tfoot => 1, th => 1, thead => 1, tr => 1, |
| 4369 |
!!!nack ('t343.1'); |
!!!nack ('t343.1'); |
| 4370 |
!!!next-token; |
!!!next-token; |
| 4371 |
next B; |
next B; |
| 4372 |
|
} elsif ($token->{tag_name} eq 'frameset') { |
| 4373 |
|
!!!parse-error (type => 'in body', text => $token->{tag_name}, |
| 4374 |
|
token => $token); |
| 4375 |
|
|
| 4376 |
|
if (@{$self->{open_elements}} == 1 or |
| 4377 |
|
not ($self->{open_elements}->[1]->[1] == BODY_EL)) { |
| 4378 |
|
!!!cp ('t343.2'); |
| 4379 |
|
## Ignore the token. |
| 4380 |
|
} elsif (not $self->{frameset_ok}) { |
| 4381 |
|
!!!cp ('t343.3'); |
| 4382 |
|
## Ignore the token. |
| 4383 |
|
} else { |
| 4384 |
|
!!!cp ('t343.4'); |
| 4385 |
|
|
| 4386 |
|
## 1. Remove the second element. |
| 4387 |
|
my $body = $self->{open_elements}->[1]->[0]; |
| 4388 |
|
my $body_parent = $body->parent_node; |
| 4389 |
|
$body_parent->remove_child ($body) if $body_parent; |
| 4390 |
|
|
| 4391 |
|
## 2. Pop nodes. |
| 4392 |
|
splice @{$self->{open_elements}}, 1; |
| 4393 |
|
|
| 4394 |
|
## 3. Insert. |
| 4395 |
|
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
| 4396 |
|
|
| 4397 |
|
## 4. Switch. |
| 4398 |
|
$self->{insertion_mode} = IN_FRAMESET_IM; |
| 4399 |
|
} |
| 4400 |
|
|
| 4401 |
|
!!!nack ('t343.5'); |
| 4402 |
|
!!!next-token; |
| 4403 |
|
next B; |
| 4404 |
} elsif ({ |
} elsif ({ |
| 4405 |
## NOTE: Start tags for non-phrasing flow content elements |
## NOTE: Start tags for non-phrasing flow content elements |
| 4406 |
|
|
| 4409 |
center => 1, datagrid => 1, details => 1, dialog => 1, |
center => 1, datagrid => 1, details => 1, dialog => 1, |
| 4410 |
dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1, |
dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1, |
| 4411 |
footer => 1, h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, |
footer => 1, h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, |
| 4412 |
h6 => 1, header => 1, menu => 1, nav => 1, ol => 1, p => 1, |
h6 => 1, header => 1, hgroup => 1, |
| 4413 |
|
menu => 1, nav => 1, ol => 1, p => 1, |
| 4414 |
section => 1, ul => 1, |
section => 1, ul => 1, |
| 4415 |
## NOTE: As normal, but drops leading newline |
## NOTE: As normal, but drops leading newline |
| 4416 |
pre => 1, listing => 1, |
pre => 1, listing => 1, |
| 4476 |
} else { |
} else { |
| 4477 |
!!!cp ('t348'); |
!!!cp ('t348'); |
| 4478 |
} |
} |
| 4479 |
|
|
| 4480 |
|
delete $self->{frameset_ok}; |
| 4481 |
} elsif ($token->{tag_name} eq 'form') { |
} elsif ($token->{tag_name} eq 'form') { |
| 4482 |
!!!cp ('t347.1'); |
!!!cp ('t347.1'); |
| 4483 |
$self->{form_element} = $self->{open_elements}->[-1]->[0]; |
$self->{form_element} = $self->{open_elements}->[-1]->[0]; |
| 4487 |
} elsif ($token->{tag_name} eq 'table') { |
} elsif ($token->{tag_name} eq 'table') { |
| 4488 |
!!!cp ('t382'); |
!!!cp ('t382'); |
| 4489 |
push @{$open_tables}, [$self->{open_elements}->[-1]->[0]]; |
push @{$open_tables}, [$self->{open_elements}->[-1]->[0]]; |
| 4490 |
|
|
| 4491 |
|
delete $self->{frameset_ok}; |
| 4492 |
|
|
| 4493 |
$self->{insertion_mode} = IN_TABLE_IM; |
$self->{insertion_mode} = IN_TABLE_IM; |
| 4494 |
|
|
| 4497 |
} elsif ($token->{tag_name} eq 'hr') { |
} elsif ($token->{tag_name} eq 'hr') { |
| 4498 |
!!!cp ('t386'); |
!!!cp ('t386'); |
| 4499 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 4500 |
|
|
| 4501 |
!!!nack ('t386.1'); |
!!!ack ('t386.1'); |
| 4502 |
|
|
| 4503 |
|
delete $self->{frameset_ok}; |
| 4504 |
|
|
| 4505 |
!!!next-token; |
!!!next-token; |
| 4506 |
} else { |
} else { |
| 4507 |
!!!nack ('t347.1'); |
!!!nack ('t347.1'); |
| 4524 |
## Interpreted as <li><foo><li/></foo></li> (non-conforming): |
## Interpreted as <li><foo><li/></foo></li> (non-conforming): |
| 4525 |
## div (Fx, S) |
## div (Fx, S) |
| 4526 |
|
|
| 4527 |
|
## 1. Frameset-ng |
| 4528 |
|
delete $self->{frameset_ok}; |
| 4529 |
|
|
| 4530 |
my $non_optional; |
my $non_optional; |
| 4531 |
my $i = -1; |
my $i = -1; |
| 4532 |
|
|
| 4533 |
## 1. |
## 2. |
| 4534 |
for my $node (reverse @{$self->{open_elements}}) { |
for my $node (reverse @{$self->{open_elements}}) { |
| 4535 |
if ($node->[1] == LI_EL) { |
if ($node->[1] == LI_EL) { |
| 4536 |
## 2. (a) As if </li> |
## 3. (a) As if </li> |
| 4537 |
{ |
{ |
| 4538 |
## If no </li> - not applied |
## If no </li> - not applied |
| 4539 |
# |
# |
| 4557 |
splice @{$self->{open_elements}}, $i; |
splice @{$self->{open_elements}}, $i; |
| 4558 |
} |
} |
| 4559 |
|
|
| 4560 |
last; ## 2. (b) goto 5. |
last; ## 3. (b) goto 5. |
| 4561 |
} elsif ( |
} elsif ( |
| 4562 |
## NOTE: not "formatting" and not "phrasing" |
## NOTE: not "formatting" and not "phrasing" |
| 4563 |
($node->[1] & SPECIAL_EL or |
($node->[1] & SPECIAL_EL or |
| 4565 |
## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|. |
## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|. |
| 4566 |
(not $node->[1] & ADDRESS_DIV_P_EL) |
(not $node->[1] & ADDRESS_DIV_P_EL) |
| 4567 |
) { |
) { |
| 4568 |
## 3. |
## 4. |
| 4569 |
!!!cp ('t357'); |
!!!cp ('t357'); |
| 4570 |
last; ## goto 5. |
last; ## goto 6. |
| 4571 |
} elsif ($node->[1] & END_TAG_OPTIONAL_EL) { |
} elsif ($node->[1] & END_TAG_OPTIONAL_EL) { |
| 4572 |
!!!cp ('t358'); |
!!!cp ('t358'); |
| 4573 |
# |
# |
| 4576 |
$non_optional ||= $node; |
$non_optional ||= $node; |
| 4577 |
# |
# |
| 4578 |
} |
} |
| 4579 |
## 4. |
## 5. |
| 4580 |
## goto 2. |
## goto 3. |
| 4581 |
$i--; |
$i--; |
| 4582 |
} |
} |
| 4583 |
|
|
| 4584 |
## 5. (a) has a |p| element in scope |
## 6. (a) has a |p| element in scope |
| 4585 |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
| 4586 |
if ($_->[1] == P_EL) { |
if ($_->[1] == P_EL) { |
| 4587 |
!!!cp ('t353'); |
!!!cp ('t353'); |
| 4598 |
} |
} |
| 4599 |
} # INSCOPE |
} # INSCOPE |
| 4600 |
|
|
| 4601 |
## 5. (b) insert |
## 6. (b) insert |
| 4602 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
| 4603 |
!!!nack ('t359.1'); |
!!!nack ('t359.1'); |
| 4604 |
!!!next-token; |
!!!next-token; |
| 4607 |
$token->{tag_name} eq 'dd') { |
$token->{tag_name} eq 'dd') { |
| 4608 |
## NOTE: As normal, but imply </dt> or </dd> when ... |
## NOTE: As normal, but imply </dt> or </dd> when ... |
| 4609 |
|
|
| 4610 |
|
## 1. Frameset-ng |
| 4611 |
|
delete $self->{frameset_ok}; |
| 4612 |
|
|
| 4613 |
my $non_optional; |
my $non_optional; |
| 4614 |
my $i = -1; |
my $i = -1; |
| 4615 |
|
|
| 4616 |
## 1. |
## 2. |
| 4617 |
for my $node (reverse @{$self->{open_elements}}) { |
for my $node (reverse @{$self->{open_elements}}) { |
| 4618 |
if ($node->[1] == DTDD_EL) { |
if ($node->[1] == DTDD_EL) { |
| 4619 |
## 2. (a) As if </li> |
## 3. (a) As if </li> |
| 4620 |
{ |
{ |
| 4621 |
## If no </li> - not applied |
## If no </li> - not applied |
| 4622 |
# |
# |
| 4640 |
splice @{$self->{open_elements}}, $i; |
splice @{$self->{open_elements}}, $i; |
| 4641 |
} |
} |
| 4642 |
|
|
| 4643 |
last; ## 2. (b) goto 5. |
last; ## 3. (b) goto 5. |
| 4644 |
} elsif ( |
} elsif ( |
| 4645 |
## NOTE: not "formatting" and not "phrasing" |
## NOTE: not "formatting" and not "phrasing" |
| 4646 |
($node->[1] & SPECIAL_EL or |
($node->[1] & SPECIAL_EL or |
| 4649 |
|
|
| 4650 |
(not $node->[1] & ADDRESS_DIV_P_EL) |
(not $node->[1] & ADDRESS_DIV_P_EL) |
| 4651 |
) { |
) { |
| 4652 |
## 3. |
## 4. |
| 4653 |
!!!cp ('t357.1'); |
!!!cp ('t357.1'); |
| 4654 |
last; ## goto 5. |
last; ## goto 5. |
| 4655 |
} elsif ($node->[1] & END_TAG_OPTIONAL_EL) { |
} elsif ($node->[1] & END_TAG_OPTIONAL_EL) { |
| 4660 |
$non_optional ||= $node; |
$non_optional ||= $node; |
| 4661 |
# |
# |
| 4662 |
} |
} |
| 4663 |
## 4. |
## 5. |
| 4664 |
## goto 2. |
## goto 3. |
| 4665 |
$i--; |
$i--; |
| 4666 |
} |
} |
| 4667 |
|
|
| 4668 |
## 5. (a) has a |p| element in scope |
## 6. (a) has a |p| element in scope |
| 4669 |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
| 4670 |
if ($_->[1] == P_EL) { |
if ($_->[1] == P_EL) { |
| 4671 |
!!!cp ('t353.1'); |
!!!cp ('t353.1'); |
| 4679 |
} |
} |
| 4680 |
} # INSCOPE |
} # INSCOPE |
| 4681 |
|
|
| 4682 |
## 5. (b) insert |
## 6. (b) insert |
| 4683 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
| 4684 |
!!!nack ('t359.2'); |
!!!nack ('t359.2'); |
| 4685 |
!!!next-token; |
!!!next-token; |
| 4799 |
|
|
| 4800 |
push @$active_formatting_elements, ['#marker', '']; |
push @$active_formatting_elements, ['#marker', '']; |
| 4801 |
|
|
| 4802 |
|
delete $self->{frameset_ok}; |
| 4803 |
|
|
| 4804 |
!!!nack ('t379.1'); |
!!!nack ('t379.1'); |
| 4805 |
!!!next-token; |
!!!next-token; |
| 4806 |
next B; |
next B; |
| 4814 |
if ($token->{tag_name} eq 'xmp') { |
if ($token->{tag_name} eq 'xmp') { |
| 4815 |
!!!cp ('t381'); |
!!!cp ('t381'); |
| 4816 |
$reconstruct_active_formatting_elements->($insert_to_current); |
$reconstruct_active_formatting_elements->($insert_to_current); |
| 4817 |
|
|
| 4818 |
|
delete $self->{frameset_ok}; |
| 4819 |
|
} elsif ($token->{tag_name} eq 'iframe') { |
| 4820 |
|
!!!cp ('t381.1'); |
| 4821 |
|
delete $self->{frameset_ok}; |
| 4822 |
} else { |
} else { |
| 4823 |
!!!cp ('t399'); |
!!!cp ('t399'); |
| 4824 |
} |
} |
| 4897 |
## 4., 6. Insertion mode |
## 4., 6. Insertion mode |
| 4898 |
$self->{insertion_mode} |= IN_CDATA_RCDATA_IM; |
$self->{insertion_mode} |= IN_CDATA_RCDATA_IM; |
| 4899 |
|
|
| 4900 |
## XXX: 5. frameset-ok flag |
## 5. Frameset-ng. |
| 4901 |
|
delete $self->{frameset_ok}; |
| 4902 |
|
|
| 4903 |
!!!nack ('t392.1'); |
!!!nack ('t392.1'); |
| 4904 |
!!!next-token; |
!!!next-token; |
| 4990 |
next B; |
next B; |
| 4991 |
} elsif ({ |
} elsif ({ |
| 4992 |
caption => 1, col => 1, colgroup => 1, frame => 1, |
caption => 1, col => 1, colgroup => 1, frame => 1, |
| 4993 |
frameset => 1, head => 1, |
head => 1, |
| 4994 |
tbody => 1, td => 1, tfoot => 1, th => 1, |
tbody => 1, td => 1, tfoot => 1, th => 1, |
| 4995 |
thead => 1, tr => 1, |
thead => 1, tr => 1, |
| 4996 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 5027 |
applet => 1, marquee => 1, object => 1, |
applet => 1, marquee => 1, object => 1, |
| 5028 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 5029 |
!!!cp ('t380'); |
!!!cp ('t380'); |
| 5030 |
|
|
| 5031 |
push @$active_formatting_elements, ['#marker', '']; |
push @$active_formatting_elements, ['#marker', '']; |
| 5032 |
|
|
| 5033 |
|
delete $self->{frameset_ok}; |
| 5034 |
|
|
| 5035 |
!!!nack ('t380.1'); |
!!!nack ('t380.1'); |
| 5036 |
} elsif ({ |
} elsif ({ |
| 5037 |
b => 1, big => 1, em => 1, font => 1, i => 1, |
b => 1, big => 1, em => 1, font => 1, i => 1, |
| 5052 |
keygen => 1, |
keygen => 1, |
| 5053 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 5054 |
!!!cp ('t388.1'); |
!!!cp ('t388.1'); |
| 5055 |
|
|
| 5056 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 5057 |
|
|
| 5058 |
|
delete $self->{frameset_ok}; |
| 5059 |
|
|
| 5060 |
!!!ack ('t388.3'); |
!!!ack ('t388.3'); |
| 5061 |
} elsif ($token->{tag_name} eq 'select') { |
} elsif ($token->{tag_name} eq 'select') { |
| 5062 |
## TODO: associate with $self->{form_element} if defined |
## TODO: associate with $self->{form_element} if defined |
| 5063 |
|
|
| 5064 |
|
delete $self->{frameset_ok}; |
| 5065 |
|
|
| 5066 |
if ($self->{insertion_mode} & TABLE_IMS or |
if ($self->{insertion_mode} & TABLE_IMS or |
| 5067 |
$self->{insertion_mode} & BODY_TABLE_IMS or |
$self->{insertion_mode} & BODY_TABLE_IMS or |
| 5068 |
($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) { |
($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) { |
| 5081 |
next B; |
next B; |
| 5082 |
} |
} |
| 5083 |
} elsif ($token->{type} == END_TAG_TOKEN) { |
} elsif ($token->{type} == END_TAG_TOKEN) { |
| 5084 |
if ($token->{tag_name} eq 'body') { |
if ($token->{tag_name} eq 'body' or $token->{tag_name} eq 'html') { |
| 5085 |
|
|
| 5086 |
## 1. If not "have an element in scope": |
## 1. If not "have an element in scope": |
| 5087 |
## "has a |body| element in scope" |
## "has a |body| element in scope" |
| 5098 |
} |
} |
| 5099 |
} |
} |
| 5100 |
|
|
| 5101 |
## NOTE: |<marquee></body>|, |<svg><foreignobject></body>| |
## NOTE: |<marquee></body>|, |<svg><foreignobject></body>|, |
| 5102 |
|
## and fragment cases. |
| 5103 |
|
|
| 5104 |
!!!parse-error (type => 'unmatched end tag', |
!!!parse-error (type => 'unmatched end tag', |
| 5105 |
text => $token->{tag_name}, token => $token); |
text => $token->{tag_name}, token => $token); |
| 5106 |
## NOTE: Ignore the token. |
## Ignore the token. (</body> or </html>) |
| 5107 |
!!!next-token; |
!!!next-token; |
| 5108 |
next B; |
next B; |
| 5109 |
} # INSCOPE |
} # INSCOPE |
| 5126 |
|
|
| 5127 |
## 3. Switch the insertion mode. |
## 3. Switch the insertion mode. |
| 5128 |
$self->{insertion_mode} = AFTER_BODY_IM; |
$self->{insertion_mode} = AFTER_BODY_IM; |
| 5129 |
!!!next-token; |
if ($token->{tag_name} eq 'body') { |
|
next B; |
|
|
} elsif ($token->{tag_name} eq 'html') { |
|
|
## TODO: Update this code. It seems that the code below is not |
|
|
## up-to-date, though it has same effect as speced. |
|
|
if (@{$self->{open_elements}} > 1 and |
|
|
$self->{open_elements}->[1]->[1] == BODY_EL) { |
|
|
unless ($self->{open_elements}->[-1]->[1] == BODY_EL) { |
|
|
!!!cp ('t406'); |
|
|
!!!parse-error (type => 'not closed', |
|
|
text => $self->{open_elements}->[1]->[0] |
|
|
->manakai_local_name, |
|
|
token => $token); |
|
|
} else { |
|
|
!!!cp ('t407'); |
|
|
} |
|
|
$self->{insertion_mode} = AFTER_BODY_IM; |
|
|
## reprocess |
|
|
next B; |
|
|
} else { |
|
|
!!!cp ('t408'); |
|
|
!!!parse-error (type => 'unmatched end tag', |
|
|
text => $token->{tag_name}, token => $token); |
|
|
## Ignore the token |
|
| 5130 |
!!!next-token; |
!!!next-token; |
| 5131 |
next B; |
} else { # html |
| 5132 |
|
## Reprocess. |
| 5133 |
} |
} |
| 5134 |
|
next B; |
| 5135 |
} elsif ({ |
} elsif ({ |
| 5136 |
## NOTE: End tags for non-phrasing flow content elements |
## NOTE: End tags for non-phrasing flow content elements |
| 5137 |
|
|
| 5139 |
address => 1, article => 1, aside => 1, blockquote => 1, |
address => 1, article => 1, aside => 1, blockquote => 1, |
| 5140 |
center => 1, datagrid => 1, details => 1, dialog => 1, |
center => 1, datagrid => 1, details => 1, dialog => 1, |
| 5141 |
dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1, |
dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1, |
| 5142 |
footer => 1, header => 1, listing => 1, menu => 1, nav => 1, |
footer => 1, header => 1, hgroup => 1, |
| 5143 |
|
listing => 1, menu => 1, nav => 1, |
| 5144 |
ol => 1, pre => 1, section => 1, ul => 1, |
ol => 1, pre => 1, section => 1, ul => 1, |
| 5145 |
|
|
| 5146 |
## NOTE: As normal, but ... optional tags |
## NOTE: As normal, but ... optional tags |