| 2018 |
my $root_element; !!!create-element ($root_element, 'html'); |
my $root_element; !!!create-element ($root_element, 'html'); |
| 2019 |
$self->{document}->append_child ($root_element); |
$self->{document}->append_child ($root_element); |
| 2020 |
push @{$self->{open_elements}}, [$root_element, 'html']; |
push @{$self->{open_elements}}, [$root_element, 'html']; |
|
#$phase = 'main'; |
|
| 2021 |
## reprocess |
## reprocess |
| 2022 |
#redo B; |
#redo B; |
| 2023 |
return; |
return; ## Go to the main phase. |
| 2024 |
} # B |
} # B |
| 2025 |
} # _tree_construction_root_element |
} # _tree_construction_root_element |
| 2026 |
|
|
| 2096 |
sub _tree_construction_main ($) { |
sub _tree_construction_main ($) { |
| 2097 |
my $self = shift; |
my $self = shift; |
| 2098 |
|
|
| 2099 |
my $phase = 'main'; |
my $previous_insertion_mode; |
| 2100 |
|
|
| 2101 |
my $active_formatting_elements = []; |
my $active_formatting_elements = []; |
| 2102 |
|
|
| 2497 |
$parse_rcdata->('CDATA', $insert); |
$parse_rcdata->('CDATA', $insert); |
| 2498 |
return; |
return; |
| 2499 |
} elsif ({ |
} elsif ({ |
| 2500 |
base => 1, link => 1, meta => 1, |
base => 1, link => 1, |
| 2501 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 2502 |
## NOTE: This is an "as if in head" code clone, only "-t" differs |
## NOTE: This is an "as if in head" code clone, only "-t" differs |
| 2503 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
| 2504 |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
| 2505 |
!!!next-token; |
!!!next-token; |
|
## TODO: Extracting |charset| from |meta|. |
|
| 2506 |
return; |
return; |
| 2507 |
} elsif ($token->{tag_name} eq 'meta') { |
} elsif ($token->{tag_name} eq 'meta') { |
| 2508 |
## NOTE: This is an "as if in head" code clone, only "-t" differs |
## NOTE: This is an "as if in head" code clone, only "-t" differs |
| 2515 |
$charset = $token->{attributes}->{charset}->{value}; |
$charset = $token->{attributes}->{charset}->{value}; |
| 2516 |
} |
} |
| 2517 |
if ($token->{attributes}->{'http-equiv'}) { |
if ($token->{attributes}->{'http-equiv'}) { |
| 2518 |
|
## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition. |
| 2519 |
if ($token->{attributes}->{'http-equiv'}->{value} |
if ($token->{attributes}->{'http-equiv'}->{value} |
| 2520 |
=~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*= |
=~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*= |
| 2521 |
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
| 2527 |
} |
} |
| 2528 |
|
|
| 2529 |
!!!next-token; |
!!!next-token; |
|
## TODO: Extracting |charset| from |meta|. |
|
| 2530 |
return; |
return; |
| 2531 |
} elsif ($token->{tag_name} eq 'title') { |
} elsif ($token->{tag_name} eq 'title') { |
| 2532 |
!!!parse-error (type => 'in body:title'); |
!!!parse-error (type => 'in body:title'); |
| 3331 |
}; # $in_body |
}; # $in_body |
| 3332 |
|
|
| 3333 |
B: { |
B: { |
| 3334 |
if ($phase eq 'main') { |
if ($self->{insertion_mode} ne 'trailing end') { |
| 3335 |
if ($token->{type} eq 'DOCTYPE') { |
if ($token->{type} eq 'DOCTYPE') { |
| 3336 |
!!!parse-error (type => 'in html:#DOCTYPE'); |
!!!parse-error (type => 'in html:#DOCTYPE'); |
| 3337 |
## Ignore the token |
## Ignore the token |
| 3468 |
} |
} |
| 3469 |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
| 3470 |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
|
## TODO: Extracting |charset| from |meta|. |
|
| 3471 |
pop @{$self->{open_elements}} |
pop @{$self->{open_elements}} |
| 3472 |
if $self->{insertion_mode} eq 'after head'; |
if $self->{insertion_mode} eq 'after head'; |
| 3473 |
!!!next-token; |
!!!next-token; |
| 3487 |
$charset = $token->{attributes}->{charset}->{value}; |
$charset = $token->{attributes}->{charset}->{value}; |
| 3488 |
} |
} |
| 3489 |
if ($token->{attributes}->{'http-equiv'}) { |
if ($token->{attributes}->{'http-equiv'}) { |
| 3490 |
|
## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition. |
| 3491 |
if ($token->{attributes}->{'http-equiv'}->{value} |
if ($token->{attributes}->{'http-equiv'}->{value} |
| 3492 |
=~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*= |
=~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*= |
| 3493 |
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
| 5041 |
} elsif ($self->{insertion_mode} eq 'after body') { |
} elsif ($self->{insertion_mode} eq 'after body') { |
| 5042 |
if ($token->{type} eq 'character') { |
if ($token->{type} eq 'character') { |
| 5043 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
| 5044 |
|
my $data = $1; |
| 5045 |
## As if in body |
## As if in body |
| 5046 |
$reconstruct_active_formatting_elements->($insert_to_current); |
$reconstruct_active_formatting_elements->($insert_to_current); |
| 5047 |
|
|
| 5048 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 5049 |
|
|
| 5050 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 5051 |
!!!next-token; |
!!!next-token; |
| 5071 |
!!!next-token; |
!!!next-token; |
| 5072 |
redo B; |
redo B; |
| 5073 |
} else { |
} else { |
| 5074 |
$phase = 'trailing end'; |
$previous_insertion_mode = $self->{insertion_mode}; |
| 5075 |
|
$self->{insertion_mode} = 'trailing end'; |
| 5076 |
!!!next-token; |
!!!next-token; |
| 5077 |
redo B; |
redo B; |
| 5078 |
} |
} |
| 5089 |
} elsif ($self->{insertion_mode} eq 'in frameset') { |
} elsif ($self->{insertion_mode} eq 'in frameset') { |
| 5090 |
if ($token->{type} eq 'character') { |
if ($token->{type} eq 'character') { |
| 5091 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
| 5092 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 5093 |
|
|
| 5094 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 5095 |
!!!next-token; |
!!!next-token; |
| 5154 |
} elsif ($self->{insertion_mode} eq 'after frameset') { |
} elsif ($self->{insertion_mode} eq 'after frameset') { |
| 5155 |
if ($token->{type} eq 'character') { |
if ($token->{type} eq 'character') { |
| 5156 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
| 5157 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 5158 |
|
|
| 5159 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 5160 |
!!!next-token; |
!!!next-token; |
| 5162 |
} |
} |
| 5163 |
} |
} |
| 5164 |
|
|
| 5165 |
# |
if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) { |
| 5166 |
|
!!!parse-error (type => 'after frameset:#character'); |
| 5167 |
|
|
| 5168 |
|
## Ignore the token. |
| 5169 |
|
if (length $token->{data}) { |
| 5170 |
|
## reprocess the rest of characters |
| 5171 |
|
} else { |
| 5172 |
|
!!!next-token; |
| 5173 |
|
} |
| 5174 |
|
redo B; |
| 5175 |
|
} |
| 5176 |
} elsif ($token->{type} eq 'comment') { |
} elsif ($token->{type} eq 'comment') { |
| 5177 |
my $comment = $self->{document}->create_comment ($token->{data}); |
my $comment = $self->{document}->create_comment ($token->{data}); |
| 5178 |
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
| 5187 |
} |
} |
| 5188 |
} elsif ($token->{type} eq 'end tag') { |
} elsif ($token->{type} eq 'end tag') { |
| 5189 |
if ($token->{tag_name} eq 'html') { |
if ($token->{tag_name} eq 'html') { |
| 5190 |
$phase = 'trailing end'; |
$previous_insertion_mode = $self->{insertion_mode}; |
| 5191 |
|
$self->{insertion_mode} = 'trailing end'; |
| 5192 |
!!!next-token; |
!!!next-token; |
| 5193 |
redo B; |
redo B; |
| 5194 |
} else { |
} else { |
| 5195 |
# |
# |
| 5196 |
} |
} |
| 5197 |
} else { |
} else { |
| 5198 |
# |
die "$0: $token->{type}: Unknown token type"; |
| 5199 |
} |
} |
| 5200 |
|
|
| 5201 |
if (defined $token->{tag_name}) { |
!!!parse-error (type => 'after frameset:'.($token->{tag_name} eq 'end tag' ? '/' : '').$token->{tag_name}); |
|
!!!parse-error (type => 'after frameset:'.($token->{tag_name} eq 'end tag' ? '/' : '').$token->{tag_name}); |
|
|
} else { |
|
|
!!!parse-error (type => 'after frameset:#'.$token->{type}); |
|
|
} |
|
| 5202 |
## Ignore the token |
## Ignore the token |
| 5203 |
!!!next-token; |
!!!next-token; |
| 5204 |
redo B; |
redo B; |
| 5208 |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
| 5209 |
} |
} |
| 5210 |
} |
} |
| 5211 |
} elsif ($phase eq 'trailing end') { |
} elsif ($self->{insertion_mode} eq 'trailing end') { |
| 5212 |
## states in the main stage is preserved yet # MUST |
## states in the main stage is preserved yet # MUST |
| 5213 |
|
|
| 5214 |
if ($token->{type} eq 'DOCTYPE') { |
if ($token->{type} eq 'DOCTYPE') { |
| 5228 |
## NOTE: The insertion mode in the main phase |
## NOTE: The insertion mode in the main phase |
| 5229 |
## just before the phase has been changed to the trailing |
## just before the phase has been changed to the trailing |
| 5230 |
## end phase is either "after body" or "after frameset". |
## end phase is either "after body" or "after frameset". |
| 5231 |
$reconstruct_active_formatting_elements->($insert_to_current) |
$reconstruct_active_formatting_elements->($insert_to_current); |
|
if $phase eq 'main'; |
|
| 5232 |
|
|
| 5233 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($data); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($data); |
| 5234 |
|
|
| 5239 |
} |
} |
| 5240 |
|
|
| 5241 |
!!!parse-error (type => 'after html:#character'); |
!!!parse-error (type => 'after html:#character'); |
| 5242 |
$phase = 'main'; |
$self->{insertion_mode} = $previous_insertion_mode; |
| 5243 |
## reprocess |
## reprocess |
| 5244 |
redo B; |
redo B; |
| 5245 |
} elsif ($token->{type} eq 'start tag' or |
} elsif ($token->{type} eq 'start tag' or |
| 5246 |
$token->{type} eq 'end tag') { |
$token->{type} eq 'end tag') { |
| 5247 |
!!!parse-error (type => 'after html:'.($token->{type} eq 'end tag' ? '/' : '').$token->{tag_name}); |
!!!parse-error (type => 'after html:'.($token->{type} eq 'end tag' ? '/' : '').$token->{tag_name}); |
| 5248 |
$phase = 'main'; |
$self->{insertion_mode} = $previous_insertion_mode; |
| 5249 |
## reprocess |
## reprocess |
| 5250 |
redo B; |
redo B; |
| 5251 |
} elsif ($token->{type} eq 'end-of-file') { |
} elsif ($token->{type} eq 'end-of-file') { |