2018 |
my $root_element; !!!create-element ($root_element, 'html'); |
my $root_element; !!!create-element ($root_element, 'html'); |
2019 |
$self->{document}->append_child ($root_element); |
$self->{document}->append_child ($root_element); |
2020 |
push @{$self->{open_elements}}, [$root_element, 'html']; |
push @{$self->{open_elements}}, [$root_element, 'html']; |
|
#$phase = 'main'; |
|
2021 |
## reprocess |
## reprocess |
2022 |
#redo B; |
#redo B; |
2023 |
return; |
return; ## Go to the main phase. |
2024 |
} # B |
} # B |
2025 |
} # _tree_construction_root_element |
} # _tree_construction_root_element |
2026 |
|
|
2096 |
sub _tree_construction_main ($) { |
sub _tree_construction_main ($) { |
2097 |
my $self = shift; |
my $self = shift; |
2098 |
|
|
2099 |
my $phase = 'main'; |
my $previous_insertion_mode; |
2100 |
|
|
2101 |
my $active_formatting_elements = []; |
my $active_formatting_elements = []; |
2102 |
|
|
2497 |
$parse_rcdata->('CDATA', $insert); |
$parse_rcdata->('CDATA', $insert); |
2498 |
return; |
return; |
2499 |
} elsif ({ |
} elsif ({ |
2500 |
base => 1, link => 1, meta => 1, |
base => 1, link => 1, |
2501 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
2502 |
## NOTE: This is an "as if in head" code clone, only "-t" differs |
## NOTE: This is an "as if in head" code clone, only "-t" differs |
2503 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
2504 |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
2505 |
!!!next-token; |
!!!next-token; |
|
## TODO: Extracting |charset| from |meta|. |
|
2506 |
return; |
return; |
2507 |
} elsif ($token->{tag_name} eq 'meta') { |
} elsif ($token->{tag_name} eq 'meta') { |
2508 |
## NOTE: This is an "as if in head" code clone, only "-t" differs |
## NOTE: This is an "as if in head" code clone, only "-t" differs |
2515 |
$charset = $token->{attributes}->{charset}->{value}; |
$charset = $token->{attributes}->{charset}->{value}; |
2516 |
} |
} |
2517 |
if ($token->{attributes}->{'http-equiv'}) { |
if ($token->{attributes}->{'http-equiv'}) { |
2518 |
|
## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition. |
2519 |
if ($token->{attributes}->{'http-equiv'}->{value} |
if ($token->{attributes}->{'http-equiv'}->{value} |
2520 |
=~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*= |
=~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*= |
2521 |
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
2527 |
} |
} |
2528 |
|
|
2529 |
!!!next-token; |
!!!next-token; |
|
## TODO: Extracting |charset| from |meta|. |
|
2530 |
return; |
return; |
2531 |
} elsif ($token->{tag_name} eq 'title') { |
} elsif ($token->{tag_name} eq 'title') { |
2532 |
!!!parse-error (type => 'in body:title'); |
!!!parse-error (type => 'in body:title'); |
3331 |
}; # $in_body |
}; # $in_body |
3332 |
|
|
3333 |
B: { |
B: { |
3334 |
if ($phase eq 'main') { |
if ($self->{insertion_mode} ne 'trailing end') { |
3335 |
if ($token->{type} eq 'DOCTYPE') { |
if ($token->{type} eq 'DOCTYPE') { |
3336 |
!!!parse-error (type => 'in html:#DOCTYPE'); |
!!!parse-error (type => 'in html:#DOCTYPE'); |
3337 |
## Ignore the token |
## Ignore the token |
3468 |
} |
} |
3469 |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
3470 |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
|
## TODO: Extracting |charset| from |meta|. |
|
3471 |
pop @{$self->{open_elements}} |
pop @{$self->{open_elements}} |
3472 |
if $self->{insertion_mode} eq 'after head'; |
if $self->{insertion_mode} eq 'after head'; |
3473 |
!!!next-token; |
!!!next-token; |
3487 |
$charset = $token->{attributes}->{charset}->{value}; |
$charset = $token->{attributes}->{charset}->{value}; |
3488 |
} |
} |
3489 |
if ($token->{attributes}->{'http-equiv'}) { |
if ($token->{attributes}->{'http-equiv'}) { |
3490 |
|
## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition. |
3491 |
if ($token->{attributes}->{'http-equiv'}->{value} |
if ($token->{attributes}->{'http-equiv'}->{value} |
3492 |
=~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*= |
=~ /\A[^;]*;[\x09-\x0D\x20]*charset[\x09-\x0D\x20]*= |
3493 |
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
[\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'| |
5041 |
} elsif ($self->{insertion_mode} eq 'after body') { |
} elsif ($self->{insertion_mode} eq 'after body') { |
5042 |
if ($token->{type} eq 'character') { |
if ($token->{type} eq 'character') { |
5043 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
5044 |
|
my $data = $1; |
5045 |
## As if in body |
## As if in body |
5046 |
$reconstruct_active_formatting_elements->($insert_to_current); |
$reconstruct_active_formatting_elements->($insert_to_current); |
5047 |
|
|
5048 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
5049 |
|
|
5050 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
5051 |
!!!next-token; |
!!!next-token; |
5071 |
!!!next-token; |
!!!next-token; |
5072 |
redo B; |
redo B; |
5073 |
} else { |
} else { |
5074 |
$phase = 'trailing end'; |
$previous_insertion_mode = $self->{insertion_mode}; |
5075 |
|
$self->{insertion_mode} = 'trailing end'; |
5076 |
!!!next-token; |
!!!next-token; |
5077 |
redo B; |
redo B; |
5078 |
} |
} |
5089 |
} elsif ($self->{insertion_mode} eq 'in frameset') { |
} elsif ($self->{insertion_mode} eq 'in frameset') { |
5090 |
if ($token->{type} eq 'character') { |
if ($token->{type} eq 'character') { |
5091 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
5092 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
5093 |
|
|
5094 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
5095 |
!!!next-token; |
!!!next-token; |
5154 |
} elsif ($self->{insertion_mode} eq 'after frameset') { |
} elsif ($self->{insertion_mode} eq 'after frameset') { |
5155 |
if ($token->{type} eq 'character') { |
if ($token->{type} eq 'character') { |
5156 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
5157 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
5158 |
|
|
5159 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
5160 |
!!!next-token; |
!!!next-token; |
5162 |
} |
} |
5163 |
} |
} |
5164 |
|
|
5165 |
# |
if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) { |
5166 |
|
!!!parse-error (type => 'after frameset:#character'); |
5167 |
|
|
5168 |
|
## Ignore the token. |
5169 |
|
if (length $token->{data}) { |
5170 |
|
## reprocess the rest of characters |
5171 |
|
} else { |
5172 |
|
!!!next-token; |
5173 |
|
} |
5174 |
|
redo B; |
5175 |
|
} |
5176 |
} elsif ($token->{type} eq 'comment') { |
} elsif ($token->{type} eq 'comment') { |
5177 |
my $comment = $self->{document}->create_comment ($token->{data}); |
my $comment = $self->{document}->create_comment ($token->{data}); |
5178 |
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
5187 |
} |
} |
5188 |
} elsif ($token->{type} eq 'end tag') { |
} elsif ($token->{type} eq 'end tag') { |
5189 |
if ($token->{tag_name} eq 'html') { |
if ($token->{tag_name} eq 'html') { |
5190 |
$phase = 'trailing end'; |
$previous_insertion_mode = $self->{insertion_mode}; |
5191 |
|
$self->{insertion_mode} = 'trailing end'; |
5192 |
!!!next-token; |
!!!next-token; |
5193 |
redo B; |
redo B; |
5194 |
} else { |
} else { |
5195 |
# |
# |
5196 |
} |
} |
5197 |
} else { |
} else { |
5198 |
# |
die "$0: $token->{type}: Unknown token type"; |
5199 |
} |
} |
5200 |
|
|
5201 |
if (defined $token->{tag_name}) { |
!!!parse-error (type => 'after frameset:'.($token->{tag_name} eq 'end tag' ? '/' : '').$token->{tag_name}); |
|
!!!parse-error (type => 'after frameset:'.($token->{tag_name} eq 'end tag' ? '/' : '').$token->{tag_name}); |
|
|
} else { |
|
|
!!!parse-error (type => 'after frameset:#'.$token->{type}); |
|
|
} |
|
5202 |
## Ignore the token |
## Ignore the token |
5203 |
!!!next-token; |
!!!next-token; |
5204 |
redo B; |
redo B; |
5208 |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
die "$0: $self->{insertion_mode}: Unknown insertion mode"; |
5209 |
} |
} |
5210 |
} |
} |
5211 |
} elsif ($phase eq 'trailing end') { |
} elsif ($self->{insertion_mode} eq 'trailing end') { |
5212 |
## states in the main stage is preserved yet # MUST |
## states in the main stage is preserved yet # MUST |
5213 |
|
|
5214 |
if ($token->{type} eq 'DOCTYPE') { |
if ($token->{type} eq 'DOCTYPE') { |
5228 |
## NOTE: The insertion mode in the main phase |
## NOTE: The insertion mode in the main phase |
5229 |
## just before the phase has been changed to the trailing |
## just before the phase has been changed to the trailing |
5230 |
## end phase is either "after body" or "after frameset". |
## end phase is either "after body" or "after frameset". |
5231 |
$reconstruct_active_formatting_elements->($insert_to_current) |
$reconstruct_active_formatting_elements->($insert_to_current); |
|
if $phase eq 'main'; |
|
5232 |
|
|
5233 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($data); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($data); |
5234 |
|
|
5239 |
} |
} |
5240 |
|
|
5241 |
!!!parse-error (type => 'after html:#character'); |
!!!parse-error (type => 'after html:#character'); |
5242 |
$phase = 'main'; |
$self->{insertion_mode} = $previous_insertion_mode; |
5243 |
## reprocess |
## reprocess |
5244 |
redo B; |
redo B; |
5245 |
} elsif ($token->{type} eq 'start tag' or |
} elsif ($token->{type} eq 'start tag' or |
5246 |
$token->{type} eq 'end tag') { |
$token->{type} eq 'end tag') { |
5247 |
!!!parse-error (type => 'after html:'.($token->{type} eq 'end tag' ? '/' : '').$token->{tag_name}); |
!!!parse-error (type => 'after html:'.($token->{type} eq 'end tag' ? '/' : '').$token->{tag_name}); |
5248 |
$phase = 'main'; |
$self->{insertion_mode} = $previous_insertion_mode; |
5249 |
## reprocess |
## reprocess |
5250 |
redo B; |
redo B; |
5251 |
} elsif ($token->{type} eq 'end-of-file') { |
} elsif ($token->{type} eq 'end-of-file') { |