| 705 |
# |
# |
| 706 |
} else { |
} else { |
| 707 |
!!!parse-error (type => 'nestc'); |
!!!parse-error (type => 'nestc'); |
| 708 |
|
## TODO: Different error type for <aa / bb> than <aa/> |
| 709 |
} |
} |
| 710 |
$self->{state} = 'before attribute name'; |
$self->{state} = 'before attribute name'; |
| 711 |
# next-input-character is already done |
# next-input-character is already done |
| 1085 |
redo A; |
redo A; |
| 1086 |
} else { |
} else { |
| 1087 |
$self->{current_token}->{data} # comment |
$self->{current_token}->{data} # comment |
| 1088 |
.= chr ($self->{next_input_character}); |
.= '-' . chr ($self->{next_input_character}); |
| 1089 |
$self->{state} = 'comment'; |
$self->{state} = 'comment'; |
| 1090 |
!!!next-input-character; |
!!!next-input-character; |
| 1091 |
redo A; |
redo A; |
| 2018 |
my $root_element; !!!create-element ($root_element, 'html'); |
my $root_element; !!!create-element ($root_element, 'html'); |
| 2019 |
$self->{document}->append_child ($root_element); |
$self->{document}->append_child ($root_element); |
| 2020 |
push @{$self->{open_elements}}, [$root_element, 'html']; |
push @{$self->{open_elements}}, [$root_element, 'html']; |
|
#$phase = 'main'; |
|
| 2021 |
## reprocess |
## reprocess |
| 2022 |
#redo B; |
#redo B; |
| 2023 |
return; |
return; ## Go to the main phase. |
| 2024 |
} # B |
} # B |
| 2025 |
} # _tree_construction_root_element |
} # _tree_construction_root_element |
| 2026 |
|
|
| 2096 |
sub _tree_construction_main ($) { |
sub _tree_construction_main ($) { |
| 2097 |
my $self = shift; |
my $self = shift; |
| 2098 |
|
|
| 2099 |
my $phase = 'main'; |
my $previous_insertion_mode; |
| 2100 |
|
|
| 2101 |
my $active_formatting_elements = []; |
my $active_formatting_elements = []; |
| 2102 |
|
|
| 2497 |
$parse_rcdata->('CDATA', $insert); |
$parse_rcdata->('CDATA', $insert); |
| 2498 |
return; |
return; |
| 2499 |
} elsif ({ |
} elsif ({ |
| 2500 |
base => 1, link => 1, meta => 1, |
base => 1, link => 1, |
| 2501 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 2502 |
## NOTE: This is an "as if in head" code clone, only "-t" differs |
## NOTE: This is an "as if in head" code clone, only "-t" differs |
| 2503 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
| 2504 |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
| 2505 |
!!!next-token; |
!!!next-token; |
| 2506 |
## TODO: Extracting |charset| from |meta|. |
return; |
| 2507 |
|
} elsif ($token->{tag_name} eq 'meta') { |
| 2508 |
|
## NOTE: This is an "as if in head" code clone, only "-t" differs |
| 2509 |
|
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
| 2510 |
|
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
| 2511 |
|
|
| 2512 |
|
unless ($self->{confident}) { |
| 2513 |
|
my $charset; |
| 2514 |
|
if ($token->{attributes}->{charset}) { ## TODO: And if supported |
| 2515 |
|
$charset = $token->{attributes}->{charset}->{value}; |
| 2516 |
|
} |
| 2517 |
|
if ($token->{attributes}->{'http-equiv'}) { |
| 2518 |
|
## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition. |
| 2519 |
|
if ($token->{attributes}->{'http-equiv'}->{value} |
| 2520 |
|
=~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*= |
| 2521 |
|
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
| 2522 |
|
([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) { |
| 2523 |
|
$charset = defined $1 ? $1 : defined $2 ? $2 : $3; |
| 2524 |
|
} ## TODO: And if supported |
| 2525 |
|
} |
| 2526 |
|
## TODO: Change the encoding |
| 2527 |
|
} |
| 2528 |
|
|
| 2529 |
|
!!!next-token; |
| 2530 |
return; |
return; |
| 2531 |
} elsif ($token->{tag_name} eq 'title') { |
} elsif ($token->{tag_name} eq 'title') { |
| 2532 |
!!!parse-error (type => 'in body:title'); |
!!!parse-error (type => 'in body:title'); |
| 3141 |
} # INSCOPE |
} # INSCOPE |
| 3142 |
|
|
| 3143 |
if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) { |
if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) { |
| 3144 |
!!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]); |
if (defined $i) { |
| 3145 |
|
!!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]); |
| 3146 |
|
} else { |
| 3147 |
|
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
| 3148 |
|
} |
| 3149 |
} |
} |
| 3150 |
|
|
| 3151 |
if (defined $i) { |
if (defined $i) { |
| 3331 |
}; # $in_body |
}; # $in_body |
| 3332 |
|
|
| 3333 |
B: { |
B: { |
| 3334 |
if ($phase eq 'main') { |
if ($self->{insertion_mode} ne 'trailing end') { |
| 3335 |
if ($token->{type} eq 'DOCTYPE') { |
if ($token->{type} eq 'DOCTYPE') { |
| 3336 |
!!!parse-error (type => 'in html:#DOCTYPE'); |
!!!parse-error (type => 'in html:#DOCTYPE'); |
| 3337 |
## Ignore the token |
## Ignore the token |
| 3460 |
} elsif ($token->{type} eq 'start tag') { |
} elsif ($token->{type} eq 'start tag') { |
| 3461 |
if ({base => ($self->{insertion_mode} eq 'in head' or |
if ({base => ($self->{insertion_mode} eq 'in head' or |
| 3462 |
$self->{insertion_mode} eq 'after head'), |
$self->{insertion_mode} eq 'after head'), |
| 3463 |
link => 1, meta => 1}->{$token->{tag_name}}) { |
link => 1}->{$token->{tag_name}}) { |
| 3464 |
|
## NOTE: There is a "as if in head" code clone. |
| 3465 |
|
if ($self->{insertion_mode} eq 'after head') { |
| 3466 |
|
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
| 3467 |
|
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
| 3468 |
|
} |
| 3469 |
|
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
| 3470 |
|
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
| 3471 |
|
pop @{$self->{open_elements}} |
| 3472 |
|
if $self->{insertion_mode} eq 'after head'; |
| 3473 |
|
!!!next-token; |
| 3474 |
|
redo B; |
| 3475 |
|
} elsif ($token->{tag_name} eq 'meta') { |
| 3476 |
## NOTE: There is a "as if in head" code clone. |
## NOTE: There is a "as if in head" code clone. |
| 3477 |
if ($self->{insertion_mode} eq 'after head') { |
if ($self->{insertion_mode} eq 'after head') { |
| 3478 |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
| 3480 |
} |
} |
| 3481 |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
| 3482 |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
| 3483 |
|
|
| 3484 |
|
unless ($self->{confident}) { |
| 3485 |
|
my $charset; |
| 3486 |
|
if ($token->{attributes}->{charset}) { ## TODO: And if supported |
| 3487 |
|
$charset = $token->{attributes}->{charset}->{value}; |
| 3488 |
|
} |
| 3489 |
|
if ($token->{attributes}->{'http-equiv'}) { |
| 3490 |
|
## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition. |
| 3491 |
|
if ($token->{attributes}->{'http-equiv'}->{value} |
| 3492 |
|
=~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*= |
| 3493 |
|
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
| 3494 |
|
([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) { |
| 3495 |
|
$charset = defined $1 ? $1 : defined $2 ? $2 : $3; |
| 3496 |
|
} ## TODO: And if supported |
| 3497 |
|
} |
| 3498 |
|
## TODO: Change the encoding |
| 3499 |
|
} |
| 3500 |
|
|
| 3501 |
## TODO: Extracting |charset| from |meta|. |
## TODO: Extracting |charset| from |meta|. |
| 3502 |
pop @{$self->{open_elements}} |
pop @{$self->{open_elements}} |
| 3503 |
if $self->{insertion_mode} eq 'after head'; |
if $self->{insertion_mode} eq 'after head'; |
| 5041 |
} elsif ($self->{insertion_mode} eq 'after body') { |
} elsif ($self->{insertion_mode} eq 'after body') { |
| 5042 |
if ($token->{type} eq 'character') { |
if ($token->{type} eq 'character') { |
| 5043 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
| 5044 |
|
my $data = $1; |
| 5045 |
## As if in body |
## As if in body |
| 5046 |
$reconstruct_active_formatting_elements->($insert_to_current); |
$reconstruct_active_formatting_elements->($insert_to_current); |
| 5047 |
|
|
| 5048 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 5049 |
|
|
| 5050 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 5051 |
!!!next-token; |
!!!next-token; |
| 5071 |
!!!next-token; |
!!!next-token; |
| 5072 |
redo B; |
redo B; |
| 5073 |
} else { |
} else { |
| 5074 |
$phase = 'trailing end'; |
$previous_insertion_mode = $self->{insertion_mode}; |
| 5075 |
|
$self->{insertion_mode} = 'trailing end'; |
| 5076 |
!!!next-token; |
!!!next-token; |
| 5077 |
redo B; |
redo B; |
| 5078 |
} |
} |
| 5089 |
} elsif ($self->{insertion_mode} eq 'in frameset') { |
} elsif ($self->{insertion_mode} eq 'in frameset') { |
| 5090 |
if ($token->{type} eq 'character') { |
if ($token->{type} eq 'character') { |
| 5091 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
| 5092 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 5093 |
|
|
| 5094 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 5095 |
!!!next-token; |
!!!next-token; |
| 5154 |
} elsif ($self->{insertion_mode} eq 'after frameset') { |
} elsif ($self->{insertion_mode} eq 'after frameset') { |
| 5155 |
if ($token->{type} eq 'character') { |
if ($token->{type} eq 'character') { |
| 5156 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
| 5157 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 5158 |
|
|
| 5159 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 5160 |
!!!next-token; |
!!!next-token; |
| 5162 |
} |
} |
| 5163 |
} |
} |
| 5164 |
|
|
| 5165 |
# |
if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) { |
| 5166 |
|
!!!parse-error (type => 'after frameset:#character'); |
| 5167 |
|
|
| 5168 |
|
## Ignore the token. |
| 5169 |
|
if (length $token->{data}) { |
| 5170 |
|
## reprocess the rest of characters |
| 5171 |
|
} else { |
| 5172 |
|
!!!next-token; |
| 5173 |
|
} |
| 5174 |
|
redo B; |
| 5175 |
|
} |
| 5176 |
} elsif ($token->{type} eq 'comment') { |
} elsif ($token->{type} eq 'comment') { |
| 5177 |
my $comment = $self->{document}->create_comment ($token->{data}); |
my $comment = $self->{document}->create_comment ($token->{data}); |
| 5178 |
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
| 5187 |
} |
} |
| 5188 |
} elsif ($token->{type} eq 'end tag') { |
} elsif ($token->{type} eq 'end tag') { |
| 5189 |
if ($token->{tag_name} eq 'html') { |
if ($token->{tag_name} eq 'html') { |
| 5190 |
$phase = 'trailing end'; |
$previous_insertion_mode = $self->{insertion_mode}; |
| 5191 |
|
$self->{insertion_mode} = 'trailing end'; |
| 5192 |
!!!next-token; |
!!!next-token; |
| 5193 |
redo B; |
redo B; |
| 5194 |
} else { |
} else { |
| 5195 |
# |
# |
| 5196 |
} |
} |
| 5197 |
} else { |
} else { |
| 5198 |
# |
die "$0: $token->{type}: Unknown token type"; |
| 5199 |
} |
} |
| 5200 |
|
|
| 5201 |
if (defined $token->{tag_name}) { |
!!!parse-error (type => 'after frameset:'.($token->{tag_name} eq 'end tag' ? '/' : '').$token->{tag_name}); |
|
!!!parse-error (type => 'after frameset:'.($token->{tag_name} eq 'end tag' ? '/' : '').$token->{tag_name}); |
|
|
} else { |
|
|
!!!parse-error (type => 'after frameset:#'.$token->{type}); |
|
|
} |
|
| 5202 |
## Ignore the token |
## Ignore the token |
| 5203 |
!!!next-token; |
!!!next-token; |
| 5204 |
redo B; |
redo B; |
| 5208 |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
| 5209 |
} |
} |
| 5210 |
} |
} |
| 5211 |
} elsif ($phase eq 'trailing end') { |
} elsif ($self->{insertion_mode} eq 'trailing end') { |
| 5212 |
## states in the main stage is preserved yet # MUST |
## states in the main stage is preserved yet # MUST |
| 5213 |
|
|
| 5214 |
if ($token->{type} eq 'DOCTYPE') { |
if ($token->{type} eq 'DOCTYPE') { |
| 5228 |
## NOTE: The insertion mode in the main phase |
## NOTE: The insertion mode in the main phase |
| 5229 |
## just before the phase has been changed to the trailing |
## just before the phase has been changed to the trailing |
| 5230 |
## end phase is either "after body" or "after frameset". |
## end phase is either "after body" or "after frameset". |
| 5231 |
$reconstruct_active_formatting_elements->($insert_to_current) |
$reconstruct_active_formatting_elements->($insert_to_current); |
|
if $phase eq 'main'; |
|
| 5232 |
|
|
| 5233 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($data); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($data); |
| 5234 |
|
|
| 5239 |
} |
} |
| 5240 |
|
|
| 5241 |
!!!parse-error (type => 'after html:#character'); |
!!!parse-error (type => 'after html:#character'); |
| 5242 |
$phase = 'main'; |
$self->{insertion_mode} = $previous_insertion_mode; |
| 5243 |
## reprocess |
## reprocess |
| 5244 |
redo B; |
redo B; |
| 5245 |
} elsif ($token->{type} eq 'start tag' or |
} elsif ($token->{type} eq 'start tag' or |
| 5246 |
$token->{type} eq 'end tag') { |
$token->{type} eq 'end tag') { |
| 5247 |
!!!parse-error (type => 'after html:'.($token->{type} eq 'end tag' ? '/' : '').$token->{tag_name}); |
!!!parse-error (type => 'after html:'.($token->{type} eq 'end tag' ? '/' : '').$token->{tag_name}); |
| 5248 |
$phase = 'main'; |
$self->{insertion_mode} = $previous_insertion_mode; |
| 5249 |
## reprocess |
## reprocess |
| 5250 |
redo B; |
redo B; |
| 5251 |
} elsif ($token->{type} eq 'end-of-file') { |
} elsif ($token->{type} eq 'end-of-file') { |