| 918 |
## NOTE: "in foreign content" insertion mode is special; it is combined |
## NOTE: "in foreign content" insertion mode is special; it is combined |
| 919 |
## with the secondary insertion mode. In this parser, they are stored |
## with the secondary insertion mode. In this parser, they are stored |
| 920 |
## together in the bit-or'ed form. |
## together in the bit-or'ed form. |
| 921 |
|
sub IN_CDATA_RCDATA_IM () { 0b1000000000000 } |
| 922 |
|
## NOTE: "in CDATA/RCDATA" insertion mode is also special; it is |
| 923 |
|
## combined with the original insertion mode. In thie parser, |
| 924 |
|
## they are stored together in the bit-or'ed form. |
| 925 |
|
|
| 926 |
## NOTE: "initial" and "before html" insertion modes have no constants. |
## NOTE: "initial" and "before html" insertion modes have no constants. |
| 927 |
|
|
| 3979 |
|
|
| 3980 |
## Step 1 |
## Step 1 |
| 3981 |
my $start_tag_name = $token->{tag_name}; |
my $start_tag_name = $token->{tag_name}; |
| 3982 |
my $el; |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
|
!!!create-element ($el, $HTML_NS, $start_tag_name, $token->{attributes}, $token); |
|
| 3983 |
|
|
| 3984 |
## Step 2 |
## Step 2 |
|
$insert->($el); |
|
|
|
|
|
## Step 3 |
|
| 3985 |
$self->{content_model} = $content_model_flag; # CDATA or RCDATA |
$self->{content_model} = $content_model_flag; # CDATA or RCDATA |
| 3986 |
delete $self->{escape}; # MUST |
delete $self->{escape}; # MUST |
| 3987 |
|
|
| 3988 |
## Step 4 |
## Step 3, 4 |
| 3989 |
my $text = ''; |
$self->{insertion_mode} |= IN_CDATA_RCDATA_IM; |
|
!!!nack ('t40.1'); |
|
|
!!!next-token; |
|
|
while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing |
|
|
!!!cp ('t40'); |
|
|
$text .= $token->{data}; |
|
|
!!!next-token; |
|
|
} |
|
| 3990 |
|
|
| 3991 |
## Step 5 |
!!!nack ('t40.1'); |
|
if (length $text) { |
|
|
!!!cp ('t41'); |
|
|
my $text = $self->{document}->create_text_node ($text); |
|
|
$el->append_child ($text); |
|
|
} |
|
|
|
|
|
## Step 6 |
|
|
$self->{content_model} = PCDATA_CONTENT_MODEL; |
|
|
|
|
|
## Step 7 |
|
|
if ($token->{type} == END_TAG_TOKEN and |
|
|
$token->{tag_name} eq $start_tag_name) { |
|
|
!!!cp ('t42'); |
|
|
## Ignore the token |
|
|
} else { |
|
|
## NOTE: An end-of-file token. |
|
|
if ($content_model_flag == CDATA_CONTENT_MODEL) { |
|
|
!!!cp ('t43'); |
|
|
!!!parse-error (type => 'in CDATA:#eof', token => $token); |
|
|
} elsif ($content_model_flag == RCDATA_CONTENT_MODEL) { |
|
|
!!!cp ('t44'); |
|
|
!!!parse-error (type => 'in RCDATA:#eof', token => $token); |
|
|
} else { |
|
|
die "$0: $content_model_flag in parse_rcdata"; |
|
|
} |
|
|
} |
|
| 3992 |
!!!next-token; |
!!!next-token; |
| 3993 |
}; # $parse_rcdata |
}; # $parse_rcdata |
| 3994 |
|
|
| 3995 |
my $script_start_tag = sub () { |
my $script_start_tag = sub () { |
| 3996 |
|
## Step 1 |
| 3997 |
my $script_el; |
my $script_el; |
| 3998 |
!!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token); |
!!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token); |
| 3999 |
|
|
| 4000 |
|
## Step 2 |
| 4001 |
## TODO: mark as "parser-inserted" |
## TODO: mark as "parser-inserted" |
| 4002 |
|
|
| 4003 |
|
## Step 3 |
| 4004 |
|
## TODO: Mark as "already executed", if ... |
| 4005 |
|
|
| 4006 |
|
## Step 4 |
| 4007 |
|
$insert->($script_el); |
| 4008 |
|
|
| 4009 |
|
## ISSUE: $script_el is not put into the stack |
| 4010 |
|
push @{$self->{open_elements}}, [$script_el, $el_category->{script}]; |
| 4011 |
|
|
| 4012 |
|
## Step 5 |
| 4013 |
$self->{content_model} = CDATA_CONTENT_MODEL; |
$self->{content_model} = CDATA_CONTENT_MODEL; |
| 4014 |
delete $self->{escape}; # MUST |
delete $self->{escape}; # MUST |
|
|
|
|
my $text = ''; |
|
|
!!!nack ('t45.1'); |
|
|
!!!next-token; |
|
|
while ($token->{type} == CHARACTER_TOKEN) { |
|
|
!!!cp ('t45'); |
|
|
$text .= $token->{data}; |
|
|
!!!next-token; |
|
|
} # stop if non-character token or tokenizer stops tokenising |
|
|
if (length $text) { |
|
|
!!!cp ('t46'); |
|
|
$script_el->manakai_append_text ($text); |
|
|
} |
|
|
|
|
|
$self->{content_model} = PCDATA_CONTENT_MODEL; |
|
| 4015 |
|
|
| 4016 |
if ($token->{type} == END_TAG_TOKEN and |
## Step 6-7 |
| 4017 |
$token->{tag_name} eq 'script') { |
$self->{insertion_mode} |= IN_CDATA_RCDATA_IM; |
|
!!!cp ('t47'); |
|
|
## Ignore the token |
|
|
} else { |
|
|
!!!cp ('t48'); |
|
|
!!!parse-error (type => 'in CDATA:#eof', token => $token); |
|
|
## ISSUE: And ignore? |
|
|
## TODO: mark as "already executed" |
|
|
} |
|
|
|
|
|
if (defined $self->{inner_html_node}) { |
|
|
!!!cp ('t49'); |
|
|
## TODO: mark as "already executed" |
|
|
} else { |
|
|
!!!cp ('t50'); |
|
|
## TODO: $old_insertion_point = current insertion point |
|
|
## TODO: insertion point = just before the next input character |
|
| 4018 |
|
|
| 4019 |
$insert->($script_el); |
!!!nack ('t40.2'); |
|
|
|
|
## TODO: insertion point = $old_insertion_point (might be "undefined") |
|
|
|
|
|
## TODO: if there is a script that will execute as soon as the parser resume, then... |
|
|
} |
|
|
|
|
| 4020 |
!!!next-token; |
!!!next-token; |
| 4021 |
}; # $script_start_tag |
}; # $script_start_tag |
| 4022 |
|
|
| 4367 |
} |
} |
| 4368 |
!!!next-token; |
!!!next-token; |
| 4369 |
next B; |
next B; |
| 4370 |
|
} elsif ($self->{insertion_mode} & IN_CDATA_RCDATA_IM) { |
| 4371 |
|
if ($token->{type} == CHARACTER_TOKEN) { |
| 4372 |
|
$token->{data} =~ s/^\x0A// if $self->{ignore_newline}; |
| 4373 |
|
delete $self->{ignore_newline}; |
| 4374 |
|
|
| 4375 |
|
if (length $token->{data}) { |
| 4376 |
|
!!!cp ('t43'); |
| 4377 |
|
$self->{open_elements}->[-1]->[0]->manakai_append_text |
| 4378 |
|
($token->{data}); |
| 4379 |
|
} else { |
| 4380 |
|
!!!cp ('t43.1'); |
| 4381 |
|
} |
| 4382 |
|
!!!next-token; |
| 4383 |
|
next B; |
| 4384 |
|
} elsif ($token->{type} == END_TAG_TOKEN) { |
| 4385 |
|
delete $self->{ignore_newline}; |
| 4386 |
|
|
| 4387 |
|
if ($token->{tag_name} eq 'script') { |
| 4388 |
|
!!!cp ('t50'); |
| 4389 |
|
|
| 4390 |
|
## Para 1-2 |
| 4391 |
|
my $script = pop @{$self->{open_elements}}; |
| 4392 |
|
|
| 4393 |
|
## Para 3 |
| 4394 |
|
$self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM; |
| 4395 |
|
|
| 4396 |
|
## Para 4 |
| 4397 |
|
## TODO: $old_insertion_point = $current_insertion_point; |
| 4398 |
|
## TODO: $current_insertion_point = just before $self->{nc}; |
| 4399 |
|
|
| 4400 |
|
## Para 5 |
| 4401 |
|
## TODO: Run the $script->[0]. |
| 4402 |
|
|
| 4403 |
|
## Para 6 |
| 4404 |
|
## TODO: $current_insertion_point = $old_insertion_point; |
| 4405 |
|
|
| 4406 |
|
## Para 7 |
| 4407 |
|
## TODO: if ($pending_external_script) { |
| 4408 |
|
## TODO: ... |
| 4409 |
|
## TODO: } |
| 4410 |
|
|
| 4411 |
|
!!!next-token; |
| 4412 |
|
next B; |
| 4413 |
|
} else { |
| 4414 |
|
!!!cp ('t42'); |
| 4415 |
|
|
| 4416 |
|
pop @{$self->{open_elements}}; |
| 4417 |
|
|
| 4418 |
|
$self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM; |
| 4419 |
|
!!!next-token; |
| 4420 |
|
next B; |
| 4421 |
|
} |
| 4422 |
|
} elsif ($token->{type} == END_OF_FILE_TOKEN) { |
| 4423 |
|
delete $self->{ignore_newline}; |
| 4424 |
|
|
| 4425 |
|
!!!cp ('t44'); |
| 4426 |
|
!!!parse-error (type => 'not closed', |
| 4427 |
|
text => $self->{open_elements}->[-1]->[0] |
| 4428 |
|
->manakai_local_name, |
| 4429 |
|
token => $token); |
| 4430 |
|
|
| 4431 |
|
#if ($self->{open_elements}->[-1]->[1] & SCRIPT_EL) { |
| 4432 |
|
# ## TODO: Mark as "already executed" |
| 4433 |
|
#} |
| 4434 |
|
|
| 4435 |
|
pop @{$self->{open_elements}}; |
| 4436 |
|
|
| 4437 |
|
$self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM; |
| 4438 |
|
## Reprocess. |
| 4439 |
|
next B; |
| 4440 |
|
} else { |
| 4441 |
|
die "$0: $token->{type}: In CDATA/RCDATA: Unknown token type"; |
| 4442 |
|
} |
| 4443 |
} elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) { |
} elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) { |
| 4444 |
if ($token->{type} == CHARACTER_TOKEN) { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 4445 |
!!!cp ('t87.1'); |
!!!cp ('t87.1'); |
| 4841 |
|
|
| 4842 |
## NOTE: There is a "as if in head" code clone. |
## NOTE: There is a "as if in head" code clone. |
| 4843 |
$parse_rcdata->(RCDATA_CONTENT_MODEL); |
$parse_rcdata->(RCDATA_CONTENT_MODEL); |
| 4844 |
pop @{$self->{open_elements}} # <head> |
## ISSUE: A spec bug [Bug 6038] |
| 4845 |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
splice @{$self->{open_elements}}, -2, 1, () # <head> |
| 4846 |
|
if ($self->{insertion_mode} & AFTER_HEAD_IM) == AFTER_HEAD_IM; |
| 4847 |
next B; |
next B; |
| 4848 |
} elsif ($token->{tag_name} eq 'style' or |
} elsif ($token->{tag_name} eq 'style' or |
| 4849 |
$token->{tag_name} eq 'noframes') { |
$token->{tag_name} eq 'noframes') { |
| 4861 |
!!!cp ('t115'); |
!!!cp ('t115'); |
| 4862 |
} |
} |
| 4863 |
$parse_rcdata->(CDATA_CONTENT_MODEL); |
$parse_rcdata->(CDATA_CONTENT_MODEL); |
| 4864 |
pop @{$self->{open_elements}} # <head> |
## ISSUE: A spec bug [Bug 6038] |
| 4865 |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
splice @{$self->{open_elements}}, -2, 1, () # <head> |
| 4866 |
|
if ($self->{insertion_mode} & AFTER_HEAD_IM) == AFTER_HEAD_IM; |
| 4867 |
next B; |
next B; |
| 4868 |
} elsif ($token->{tag_name} eq 'noscript') { |
} elsif ($token->{tag_name} eq 'noscript') { |
| 4869 |
if ($self->{insertion_mode} == IN_HEAD_IM) { |
if ($self->{insertion_mode} == IN_HEAD_IM) { |
| 4870 |
!!!cp ('t116'); |
!!!cp ('t116'); |
| 4871 |
## NOTE: and scripting is disalbed |
## NOTE: and scripting is disalbed |
| 4909 |
|
|
| 4910 |
## NOTE: There is a "as if in head" code clone. |
## NOTE: There is a "as if in head" code clone. |
| 4911 |
$script_start_tag->(); |
$script_start_tag->(); |
| 4912 |
pop @{$self->{open_elements}} # <head> |
## ISSUE: A spec bug [Bug 6038] |
| 4913 |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
splice @{$self->{open_elements}}, -2, 1 # <head> |
| 4914 |
|
if ($self->{insertion_mode} & AFTER_HEAD_IM) == AFTER_HEAD_IM; |
| 4915 |
next B; |
next B; |
| 4916 |
} elsif ($token->{tag_name} eq 'body' or |
} elsif ($token->{tag_name} eq 'body' or |
| 4917 |
$token->{tag_name} eq 'frameset') { |
$token->{tag_name} eq 'frameset') { |
| 7419 |
next B; |
next B; |
| 7420 |
} |
} |
| 7421 |
} elsif ($token->{tag_name} eq 'textarea') { |
} elsif ($token->{tag_name} eq 'textarea') { |
| 7422 |
my $tag_name = $token->{tag_name}; |
## Step 1 |
| 7423 |
my $el; |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token); |
|
!!!create-element ($el, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token); |
|
| 7424 |
|
|
| 7425 |
|
## Step 2 |
| 7426 |
## TODO: $self->{form_element} if defined |
## TODO: $self->{form_element} if defined |
| 7427 |
|
|
| 7428 |
|
## Step 3 |
| 7429 |
|
$self->{ignore_newline} = 1; |
| 7430 |
|
|
| 7431 |
|
## Step 4 |
| 7432 |
|
## ISSUE: This step is wrong. (r2302 enbugged) |
| 7433 |
|
|
| 7434 |
|
## Step 5 |
| 7435 |
$self->{content_model} = RCDATA_CONTENT_MODEL; |
$self->{content_model} = RCDATA_CONTENT_MODEL; |
| 7436 |
delete $self->{escape}; # MUST |
delete $self->{escape}; # MUST |
| 7437 |
|
|
| 7438 |
$insert->($el); |
## Step 6-7 |
| 7439 |
|
$self->{insertion_mode} |= IN_CDATA_RCDATA_IM; |
| 7440 |
my $text = ''; |
|
| 7441 |
!!!nack ('t392.1'); |
!!!nack ('t392.1'); |
| 7442 |
!!!next-token; |
!!!next-token; |
|
if ($token->{type} == CHARACTER_TOKEN) { |
|
|
$token->{data} =~ s/^\x0A//; |
|
|
unless (length $token->{data}) { |
|
|
!!!cp ('t392'); |
|
|
!!!next-token; |
|
|
} else { |
|
|
!!!cp ('t393'); |
|
|
} |
|
|
} else { |
|
|
!!!cp ('t394'); |
|
|
} |
|
|
while ($token->{type} == CHARACTER_TOKEN) { |
|
|
!!!cp ('t395'); |
|
|
$text .= $token->{data}; |
|
|
!!!next-token; |
|
|
} |
|
|
if (length $text) { |
|
|
!!!cp ('t396'); |
|
|
$el->manakai_append_text ($text); |
|
|
} |
|
|
|
|
|
$self->{content_model} = PCDATA_CONTENT_MODEL; |
|
|
|
|
|
if ($token->{type} == END_TAG_TOKEN and |
|
|
$token->{tag_name} eq $tag_name) { |
|
|
!!!cp ('t397'); |
|
|
## Ignore the token |
|
|
} else { |
|
|
!!!cp ('t398'); |
|
|
!!!parse-error (type => 'in RCDATA:#eof', token => $token); |
|
|
} |
|
|
!!!next-token; |
|
| 7443 |
next B; |
next B; |
| 7444 |
} elsif ($token->{tag_name} eq 'optgroup' or |
} elsif ($token->{tag_name} eq 'optgroup' or |
| 7445 |
$token->{tag_name} eq 'option') { |
$token->{tag_name} eq 'option') { |