918 |
## NOTE: "in foreign content" insertion mode is special; it is combined |
## NOTE: "in foreign content" insertion mode is special; it is combined |
919 |
## with the secondary insertion mode. In this parser, they are stored |
## with the secondary insertion mode. In this parser, they are stored |
920 |
## together in the bit-or'ed form. |
## together in the bit-or'ed form. |
921 |
|
sub IN_CDATA_RCDATA_IM () { 0b1000000000000 } |
922 |
|
## NOTE: "in CDATA/RCDATA" insertion mode is also special; it is |
923 |
|
## combined with the original insertion mode. In thie parser, |
924 |
|
## they are stored together in the bit-or'ed form. |
925 |
|
|
926 |
## NOTE: "initial" and "before html" insertion modes have no constants. |
## NOTE: "initial" and "before html" insertion modes have no constants. |
927 |
|
|
3979 |
|
|
3980 |
## Step 1 |
## Step 1 |
3981 |
my $start_tag_name = $token->{tag_name}; |
my $start_tag_name = $token->{tag_name}; |
3982 |
my $el; |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
|
!!!create-element ($el, $HTML_NS, $start_tag_name, $token->{attributes}, $token); |
|
3983 |
|
|
3984 |
## Step 2 |
## Step 2 |
|
$insert->($el); |
|
|
|
|
|
## Step 3 |
|
3985 |
$self->{content_model} = $content_model_flag; # CDATA or RCDATA |
$self->{content_model} = $content_model_flag; # CDATA or RCDATA |
3986 |
delete $self->{escape}; # MUST |
delete $self->{escape}; # MUST |
3987 |
|
|
3988 |
## Step 4 |
## Step 3, 4 |
3989 |
my $text = ''; |
$self->{insertion_mode} |= IN_CDATA_RCDATA_IM; |
|
!!!nack ('t40.1'); |
|
|
!!!next-token; |
|
|
while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing |
|
|
!!!cp ('t40'); |
|
|
$text .= $token->{data}; |
|
|
!!!next-token; |
|
|
} |
|
3990 |
|
|
3991 |
## Step 5 |
!!!nack ('t40.1'); |
|
if (length $text) { |
|
|
!!!cp ('t41'); |
|
|
my $text = $self->{document}->create_text_node ($text); |
|
|
$el->append_child ($text); |
|
|
} |
|
|
|
|
|
## Step 6 |
|
|
$self->{content_model} = PCDATA_CONTENT_MODEL; |
|
|
|
|
|
## Step 7 |
|
|
if ($token->{type} == END_TAG_TOKEN and |
|
|
$token->{tag_name} eq $start_tag_name) { |
|
|
!!!cp ('t42'); |
|
|
## Ignore the token |
|
|
} else { |
|
|
## NOTE: An end-of-file token. |
|
|
if ($content_model_flag == CDATA_CONTENT_MODEL) { |
|
|
!!!cp ('t43'); |
|
|
!!!parse-error (type => 'in CDATA:#eof', token => $token); |
|
|
} elsif ($content_model_flag == RCDATA_CONTENT_MODEL) { |
|
|
!!!cp ('t44'); |
|
|
!!!parse-error (type => 'in RCDATA:#eof', token => $token); |
|
|
} else { |
|
|
die "$0: $content_model_flag in parse_rcdata"; |
|
|
} |
|
|
} |
|
3992 |
!!!next-token; |
!!!next-token; |
3993 |
}; # $parse_rcdata |
}; # $parse_rcdata |
3994 |
|
|
3995 |
my $script_start_tag = sub () { |
my $script_start_tag = sub () { |
3996 |
|
## Step 1 |
3997 |
my $script_el; |
my $script_el; |
3998 |
!!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token); |
!!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token); |
3999 |
|
|
4000 |
|
## Step 2 |
4001 |
## TODO: mark as "parser-inserted" |
## TODO: mark as "parser-inserted" |
4002 |
|
|
4003 |
|
## Step 3 |
4004 |
|
## TODO: Mark as "already executed", if ... |
4005 |
|
|
4006 |
|
## Step 4 |
4007 |
|
$insert->($script_el); |
4008 |
|
|
4009 |
|
## ISSUE: $script_el is not put into the stack |
4010 |
|
push @{$self->{open_elements}}, [$script_el, $el_category->{script}]; |
4011 |
|
|
4012 |
|
## Step 5 |
4013 |
$self->{content_model} = CDATA_CONTENT_MODEL; |
$self->{content_model} = CDATA_CONTENT_MODEL; |
4014 |
delete $self->{escape}; # MUST |
delete $self->{escape}; # MUST |
|
|
|
|
my $text = ''; |
|
|
!!!nack ('t45.1'); |
|
|
!!!next-token; |
|
|
while ($token->{type} == CHARACTER_TOKEN) { |
|
|
!!!cp ('t45'); |
|
|
$text .= $token->{data}; |
|
|
!!!next-token; |
|
|
} # stop if non-character token or tokenizer stops tokenising |
|
|
if (length $text) { |
|
|
!!!cp ('t46'); |
|
|
$script_el->manakai_append_text ($text); |
|
|
} |
|
|
|
|
|
$self->{content_model} = PCDATA_CONTENT_MODEL; |
|
4015 |
|
|
4016 |
if ($token->{type} == END_TAG_TOKEN and |
## Step 6-7 |
4017 |
$token->{tag_name} eq 'script') { |
$self->{insertion_mode} |= IN_CDATA_RCDATA_IM; |
|
!!!cp ('t47'); |
|
|
## Ignore the token |
|
|
} else { |
|
|
!!!cp ('t48'); |
|
|
!!!parse-error (type => 'in CDATA:#eof', token => $token); |
|
|
## ISSUE: And ignore? |
|
|
## TODO: mark as "already executed" |
|
|
} |
|
|
|
|
|
if (defined $self->{inner_html_node}) { |
|
|
!!!cp ('t49'); |
|
|
## TODO: mark as "already executed" |
|
|
} else { |
|
|
!!!cp ('t50'); |
|
|
## TODO: $old_insertion_point = current insertion point |
|
|
## TODO: insertion point = just before the next input character |
|
4018 |
|
|
4019 |
$insert->($script_el); |
!!!nack ('t40.2'); |
|
|
|
|
## TODO: insertion point = $old_insertion_point (might be "undefined") |
|
|
|
|
|
## TODO: if there is a script that will execute as soon as the parser resume, then... |
|
|
} |
|
|
|
|
4020 |
!!!next-token; |
!!!next-token; |
4021 |
}; # $script_start_tag |
}; # $script_start_tag |
4022 |
|
|
4367 |
} |
} |
4368 |
!!!next-token; |
!!!next-token; |
4369 |
next B; |
next B; |
4370 |
|
} elsif ($self->{insertion_mode} & IN_CDATA_RCDATA_IM) { |
4371 |
|
if ($token->{type} == CHARACTER_TOKEN) { |
4372 |
|
$token->{data} =~ s/^\x0A// if $self->{ignore_newline}; |
4373 |
|
delete $self->{ignore_newline}; |
4374 |
|
|
4375 |
|
if (length $token->{data}) { |
4376 |
|
!!!cp ('t43'); |
4377 |
|
$self->{open_elements}->[-1]->[0]->manakai_append_text |
4378 |
|
($token->{data}); |
4379 |
|
} else { |
4380 |
|
!!!cp ('t43.1'); |
4381 |
|
} |
4382 |
|
!!!next-token; |
4383 |
|
next B; |
4384 |
|
} elsif ($token->{type} == END_TAG_TOKEN) { |
4385 |
|
delete $self->{ignore_newline}; |
4386 |
|
|
4387 |
|
if ($token->{tag_name} eq 'script') { |
4388 |
|
!!!cp ('t50'); |
4389 |
|
|
4390 |
|
## Para 1-2 |
4391 |
|
my $script = pop @{$self->{open_elements}}; |
4392 |
|
|
4393 |
|
## Para 3 |
4394 |
|
$self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM; |
4395 |
|
|
4396 |
|
## Para 4 |
4397 |
|
## TODO: $old_insertion_point = $current_insertion_point; |
4398 |
|
## TODO: $current_insertion_point = just before $self->{nc}; |
4399 |
|
|
4400 |
|
## Para 5 |
4401 |
|
## TODO: Run the $script->[0]. |
4402 |
|
|
4403 |
|
## Para 6 |
4404 |
|
## TODO: $current_insertion_point = $old_insertion_point; |
4405 |
|
|
4406 |
|
## Para 7 |
4407 |
|
## TODO: if ($pending_external_script) { |
4408 |
|
## TODO: ... |
4409 |
|
## TODO: } |
4410 |
|
|
4411 |
|
!!!next-token; |
4412 |
|
next B; |
4413 |
|
} else { |
4414 |
|
!!!cp ('t42'); |
4415 |
|
|
4416 |
|
pop @{$self->{open_elements}}; |
4417 |
|
|
4418 |
|
$self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM; |
4419 |
|
!!!next-token; |
4420 |
|
next B; |
4421 |
|
} |
4422 |
|
} elsif ($token->{type} == END_OF_FILE_TOKEN) { |
4423 |
|
delete $self->{ignore_newline}; |
4424 |
|
|
4425 |
|
!!!cp ('t44'); |
4426 |
|
!!!parse-error (type => 'not closed', |
4427 |
|
text => $self->{open_elements}->[-1]->[0] |
4428 |
|
->manakai_local_name, |
4429 |
|
token => $token); |
4430 |
|
|
4431 |
|
#if ($self->{open_elements}->[-1]->[1] & SCRIPT_EL) { |
4432 |
|
# ## TODO: Mark as "already executed" |
4433 |
|
#} |
4434 |
|
|
4435 |
|
pop @{$self->{open_elements}}; |
4436 |
|
|
4437 |
|
$self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM; |
4438 |
|
## Reprocess. |
4439 |
|
next B; |
4440 |
|
} else { |
4441 |
|
die "$0: $token->{type}: In CDATA/RCDATA: Unknown token type"; |
4442 |
|
} |
4443 |
} elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) { |
} elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) { |
4444 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
4445 |
!!!cp ('t87.1'); |
!!!cp ('t87.1'); |
4841 |
|
|
4842 |
## NOTE: There is a "as if in head" code clone. |
## NOTE: There is a "as if in head" code clone. |
4843 |
$parse_rcdata->(RCDATA_CONTENT_MODEL); |
$parse_rcdata->(RCDATA_CONTENT_MODEL); |
4844 |
pop @{$self->{open_elements}} # <head> |
## ISSUE: A spec bug [Bug 6038] |
4845 |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
splice @{$self->{open_elements}}, -2, 1, () # <head> |
4846 |
|
if ($self->{insertion_mode} & AFTER_HEAD_IM) == AFTER_HEAD_IM; |
4847 |
next B; |
next B; |
4848 |
} elsif ($token->{tag_name} eq 'style' or |
} elsif ($token->{tag_name} eq 'style' or |
4849 |
$token->{tag_name} eq 'noframes') { |
$token->{tag_name} eq 'noframes') { |
4861 |
!!!cp ('t115'); |
!!!cp ('t115'); |
4862 |
} |
} |
4863 |
$parse_rcdata->(CDATA_CONTENT_MODEL); |
$parse_rcdata->(CDATA_CONTENT_MODEL); |
4864 |
pop @{$self->{open_elements}} # <head> |
## ISSUE: A spec bug [Bug 6038] |
4865 |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
splice @{$self->{open_elements}}, -2, 1, () # <head> |
4866 |
|
if ($self->{insertion_mode} & AFTER_HEAD_IM) == AFTER_HEAD_IM; |
4867 |
next B; |
next B; |
4868 |
} elsif ($token->{tag_name} eq 'noscript') { |
} elsif ($token->{tag_name} eq 'noscript') { |
4869 |
if ($self->{insertion_mode} == IN_HEAD_IM) { |
if ($self->{insertion_mode} == IN_HEAD_IM) { |
4870 |
!!!cp ('t116'); |
!!!cp ('t116'); |
4871 |
## NOTE: and scripting is disalbed |
## NOTE: and scripting is disalbed |
4909 |
|
|
4910 |
## NOTE: There is a "as if in head" code clone. |
## NOTE: There is a "as if in head" code clone. |
4911 |
$script_start_tag->(); |
$script_start_tag->(); |
4912 |
pop @{$self->{open_elements}} # <head> |
## ISSUE: A spec bug [Bug 6038] |
4913 |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
splice @{$self->{open_elements}}, -2, 1 # <head> |
4914 |
|
if ($self->{insertion_mode} & AFTER_HEAD_IM) == AFTER_HEAD_IM; |
4915 |
next B; |
next B; |
4916 |
} elsif ($token->{tag_name} eq 'body' or |
} elsif ($token->{tag_name} eq 'body' or |
4917 |
$token->{tag_name} eq 'frameset') { |
$token->{tag_name} eq 'frameset') { |
7419 |
next B; |
next B; |
7420 |
} |
} |
7421 |
} elsif ($token->{tag_name} eq 'textarea') { |
} elsif ($token->{tag_name} eq 'textarea') { |
7422 |
my $tag_name = $token->{tag_name}; |
## Step 1 |
7423 |
my $el; |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
|
!!!create-element ($el, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token); |
|
7424 |
|
|
7425 |
|
## Step 2 |
7426 |
## TODO: $self->{form_element} if defined |
## TODO: $self->{form_element} if defined |
7427 |
|
|
7428 |
|
## Step 3 |
7429 |
|
$self->{ignore_newline} = 1; |
7430 |
|
|
7431 |
|
## Step 4 |
7432 |
|
## ISSUE: This step is wrong. (r2302 enbugged) |
7433 |
|
|
7434 |
|
## Step 5 |
7435 |
$self->{content_model} = RCDATA_CONTENT_MODEL; |
$self->{content_model} = RCDATA_CONTENT_MODEL; |
7436 |
delete $self->{escape}; # MUST |
delete $self->{escape}; # MUST |
7437 |
|
|
7438 |
$insert->($el); |
## Step 6-7 |
7439 |
|
$self->{insertion_mode} |= IN_CDATA_RCDATA_IM; |
7440 |
my $text = ''; |
|
7441 |
!!!nack ('t392.1'); |
!!!nack ('t392.1'); |
7442 |
!!!next-token; |
!!!next-token; |
|
if ($token->{type} == CHARACTER_TOKEN) { |
|
|
$token->{data} =~ s/^\x0A//; |
|
|
unless (length $token->{data}) { |
|
|
!!!cp ('t392'); |
|
|
!!!next-token; |
|
|
} else { |
|
|
!!!cp ('t393'); |
|
|
} |
|
|
} else { |
|
|
!!!cp ('t394'); |
|
|
} |
|
|
while ($token->{type} == CHARACTER_TOKEN) { |
|
|
!!!cp ('t395'); |
|
|
$text .= $token->{data}; |
|
|
!!!next-token; |
|
|
} |
|
|
if (length $text) { |
|
|
!!!cp ('t396'); |
|
|
$el->manakai_append_text ($text); |
|
|
} |
|
|
|
|
|
$self->{content_model} = PCDATA_CONTENT_MODEL; |
|
|
|
|
|
if ($token->{type} == END_TAG_TOKEN and |
|
|
$token->{tag_name} eq $tag_name) { |
|
|
!!!cp ('t397'); |
|
|
## Ignore the token |
|
|
} else { |
|
|
!!!cp ('t398'); |
|
|
!!!parse-error (type => 'in RCDATA:#eof', token => $token); |
|
|
} |
|
|
!!!next-token; |
|
7443 |
next B; |
next B; |
7444 |
} elsif ($token->{tag_name} eq 'optgroup' or |
} elsif ($token->{tag_name} eq 'optgroup' or |
7445 |
$token->{tag_name} eq 'option') { |
$token->{tag_name} eq 'option') { |