| 159 |
sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP } |
sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP } |
| 160 |
sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP } |
sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP } |
| 161 |
|
|
| 162 |
|
sub DOCTYPE_TOKEN () { 1 } |
| 163 |
|
sub COMMENT_TOKEN () { 2 } |
| 164 |
|
sub START_TAG_TOKEN () { 3 } |
| 165 |
|
sub END_TAG_TOKEN () { 4 } |
| 166 |
|
sub END_OF_FILE_TOKEN () { 5 } |
| 167 |
|
sub CHARACTER_TOKEN () { 6 } |
| 168 |
|
|
| 169 |
|
sub AFTER_HTML_IMS () { 0b100 } |
| 170 |
|
sub HEAD_IMS () { 0b1000 } |
| 171 |
|
sub BODY_IMS () { 0b10000 } |
| 172 |
|
sub BODY_TABLE_IMS () { 0b100000 | BODY_IMS } |
| 173 |
|
sub TABLE_IMS () { 0b1000000 } |
| 174 |
|
sub ROW_IMS () { 0b10000000 | TABLE_IMS } |
| 175 |
|
sub BODY_AFTER_IMS () { 0b100000000 } |
| 176 |
|
sub FRAME_IMS () { 0b1000000000 } |
| 177 |
|
|
| 178 |
|
sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS } |
| 179 |
|
sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS } |
| 180 |
|
sub IN_HEAD_IM () { HEAD_IMS | 0b00 } |
| 181 |
|
sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 } |
| 182 |
|
sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 } |
| 183 |
|
sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 } |
| 184 |
|
sub IN_BODY_IM () { BODY_IMS } |
| 185 |
|
sub IN_CELL_IM () { BODY_TABLE_IMS | 0b01 } |
| 186 |
|
sub IN_CAPTION_IM () { BODY_TABLE_IMS | 0b10 } |
| 187 |
|
sub IN_ROW_IM () { ROW_IMS | 0b01 } |
| 188 |
|
sub IN_TABLE_BODY_IM () { ROW_IMS | 0b10 } |
| 189 |
|
sub IN_TABLE_IM () { TABLE_IMS } |
| 190 |
|
sub AFTER_BODY_IM () { BODY_AFTER_IMS } |
| 191 |
|
sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 } |
| 192 |
|
sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 } |
| 193 |
|
sub IN_SELECT_IM () { 0b01 } |
| 194 |
|
sub IN_COLUMN_GROUP_IM () { 0b10 } |
| 195 |
|
|
| 196 |
## Implementations MUST act as if state machine in the spec |
## Implementations MUST act as if state machine in the spec |
| 197 |
|
|
| 198 |
sub _initialize_tokenizer ($) { |
sub _initialize_tokenizer ($) { |
| 211 |
} # _initialize_tokenizer |
} # _initialize_tokenizer |
| 212 |
|
|
| 213 |
## A token has: |
## A token has: |
| 214 |
## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment', |
## ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN, |
| 215 |
## 'character', or 'end-of-file' |
## CHARACTER_TOKEN, or END_OF_FILE_TOKEN |
| 216 |
## ->{name} (DOCTYPE, start tag (tag name), end tag (tag name)) |
## ->{name} (DOCTYPE_TOKEN) |
| 217 |
## ->{public_identifier} (DOCTYPE) |
## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN) |
| 218 |
## ->{system_identifier} (DOCTYPE) |
## ->{public_identifier} (DOCTYPE_TOKEN) |
| 219 |
## ->{correct} == 1 or 0 (DOCTYPE) |
## ->{system_identifier} (DOCTYPE_TOKEN) |
| 220 |
## ->{attributes} isa HASH (start tag, end tag) |
## ->{correct} == 1 or 0 (DOCTYPE_TOKEN) |
| 221 |
## ->{data} (comment, character) |
## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN) |
| 222 |
|
## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN) |
| 223 |
|
|
| 224 |
## Emitted token MUST immediately be handled by the tree construction state. |
## Emitted token MUST immediately be handled by the tree construction state. |
| 225 |
|
|
| 278 |
|
|
| 279 |
# |
# |
| 280 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 281 |
!!!emit ({type => 'end-of-file'}); |
!!!emit ({type => END_OF_FILE_TOKEN}); |
| 282 |
last A; ## TODO: ok? |
last A; ## TODO: ok? |
| 283 |
} |
} |
| 284 |
# Anything else |
# Anything else |
| 285 |
my $token = {type => 'character', |
my $token = {type => CHARACTER_TOKEN, |
| 286 |
data => chr $self->{next_input_character}}; |
data => chr $self->{next_input_character}}; |
| 287 |
## Stay in the data state |
## Stay in the data state |
| 288 |
!!!next-input-character; |
!!!next-input-character; |
| 299 |
# next-input-character is already done |
# next-input-character is already done |
| 300 |
|
|
| 301 |
unless (defined $token) { |
unless (defined $token) { |
| 302 |
!!!emit ({type => 'character', data => '&'}); |
!!!emit ({type => CHARACTER_TOKEN, data => '&'}); |
| 303 |
} else { |
} else { |
| 304 |
!!!emit ($token); |
!!!emit ($token); |
| 305 |
} |
} |
| 315 |
## reconsume |
## reconsume |
| 316 |
$self->{state} = 'data'; |
$self->{state} = 'data'; |
| 317 |
|
|
| 318 |
!!!emit ({type => 'character', data => '<'}); |
!!!emit ({type => CHARACTER_TOKEN, data => '<'}); |
| 319 |
|
|
| 320 |
redo A; |
redo A; |
| 321 |
} |
} |
| 331 |
} elsif (0x0041 <= $self->{next_input_character} and |
} elsif (0x0041 <= $self->{next_input_character} and |
| 332 |
$self->{next_input_character} <= 0x005A) { # A..Z |
$self->{next_input_character} <= 0x005A) { # A..Z |
| 333 |
$self->{current_token} |
$self->{current_token} |
| 334 |
= {type => 'start tag', |
= {type => START_TAG_TOKEN, |
| 335 |
tag_name => chr ($self->{next_input_character} + 0x0020)}; |
tag_name => chr ($self->{next_input_character} + 0x0020)}; |
| 336 |
$self->{state} = 'tag name'; |
$self->{state} = 'tag name'; |
| 337 |
!!!next-input-character; |
!!!next-input-character; |
| 338 |
redo A; |
redo A; |
| 339 |
} elsif (0x0061 <= $self->{next_input_character} and |
} elsif (0x0061 <= $self->{next_input_character} and |
| 340 |
$self->{next_input_character} <= 0x007A) { # a..z |
$self->{next_input_character} <= 0x007A) { # a..z |
| 341 |
$self->{current_token} = {type => 'start tag', |
$self->{current_token} = {type => START_TAG_TOKEN, |
| 342 |
tag_name => chr ($self->{next_input_character})}; |
tag_name => chr ($self->{next_input_character})}; |
| 343 |
$self->{state} = 'tag name'; |
$self->{state} = 'tag name'; |
| 344 |
!!!next-input-character; |
!!!next-input-character; |
| 348 |
$self->{state} = 'data'; |
$self->{state} = 'data'; |
| 349 |
!!!next-input-character; |
!!!next-input-character; |
| 350 |
|
|
| 351 |
!!!emit ({type => 'character', data => '<>'}); |
!!!emit ({type => CHARACTER_TOKEN, data => '<>'}); |
| 352 |
|
|
| 353 |
redo A; |
redo A; |
| 354 |
} elsif ($self->{next_input_character} == 0x003F) { # ? |
} elsif ($self->{next_input_character} == 0x003F) { # ? |
| 361 |
$self->{state} = 'data'; |
$self->{state} = 'data'; |
| 362 |
## reconsume |
## reconsume |
| 363 |
|
|
| 364 |
!!!emit ({type => 'character', data => '<'}); |
!!!emit ({type => CHARACTER_TOKEN, data => '<'}); |
| 365 |
|
|
| 366 |
redo A; |
redo A; |
| 367 |
} |
} |
| 385 |
!!!back-next-input-character (@next_char); |
!!!back-next-input-character (@next_char); |
| 386 |
$self->{state} = 'data'; |
$self->{state} = 'data'; |
| 387 |
|
|
| 388 |
!!!emit ({type => 'character', data => '</'}); |
!!!emit ({type => CHARACTER_TOKEN, data => '</'}); |
| 389 |
|
|
| 390 |
redo A; |
redo A; |
| 391 |
} |
} |
| 403 |
$self->{next_input_character} = shift @next_char; # reconsume |
$self->{next_input_character} = shift @next_char; # reconsume |
| 404 |
!!!back-next-input-character (@next_char); |
!!!back-next-input-character (@next_char); |
| 405 |
$self->{state} = 'data'; |
$self->{state} = 'data'; |
| 406 |
!!!emit ({type => 'character', data => '</'}); |
!!!emit ({type => CHARACTER_TOKEN, data => '</'}); |
| 407 |
redo A; |
redo A; |
| 408 |
} else { |
} else { |
| 409 |
$self->{next_input_character} = shift @next_char; |
$self->{next_input_character} = shift @next_char; |
| 414 |
## No start tag token has ever been emitted |
## No start tag token has ever been emitted |
| 415 |
# next-input-character is already done |
# next-input-character is already done |
| 416 |
$self->{state} = 'data'; |
$self->{state} = 'data'; |
| 417 |
!!!emit ({type => 'character', data => '</'}); |
!!!emit ({type => CHARACTER_TOKEN, data => '</'}); |
| 418 |
redo A; |
redo A; |
| 419 |
} |
} |
| 420 |
} |
} |
| 421 |
|
|
| 422 |
if (0x0041 <= $self->{next_input_character} and |
if (0x0041 <= $self->{next_input_character} and |
| 423 |
$self->{next_input_character} <= 0x005A) { # A..Z |
$self->{next_input_character} <= 0x005A) { # A..Z |
| 424 |
$self->{current_token} = {type => 'end tag', |
$self->{current_token} = {type => END_TAG_TOKEN, |
| 425 |
tag_name => chr ($self->{next_input_character} + 0x0020)}; |
tag_name => chr ($self->{next_input_character} + 0x0020)}; |
| 426 |
$self->{state} = 'tag name'; |
$self->{state} = 'tag name'; |
| 427 |
!!!next-input-character; |
!!!next-input-character; |
| 428 |
redo A; |
redo A; |
| 429 |
} elsif (0x0061 <= $self->{next_input_character} and |
} elsif (0x0061 <= $self->{next_input_character} and |
| 430 |
$self->{next_input_character} <= 0x007A) { # a..z |
$self->{next_input_character} <= 0x007A) { # a..z |
| 431 |
$self->{current_token} = {type => 'end tag', |
$self->{current_token} = {type => END_TAG_TOKEN, |
| 432 |
tag_name => chr ($self->{next_input_character})}; |
tag_name => chr ($self->{next_input_character})}; |
| 433 |
$self->{state} = 'tag name'; |
$self->{state} = 'tag name'; |
| 434 |
!!!next-input-character; |
!!!next-input-character; |
| 443 |
$self->{state} = 'data'; |
$self->{state} = 'data'; |
| 444 |
# reconsume |
# reconsume |
| 445 |
|
|
| 446 |
!!!emit ({type => 'character', data => '</'}); |
!!!emit ({type => CHARACTER_TOKEN, data => '</'}); |
| 447 |
|
|
| 448 |
redo A; |
redo A; |
| 449 |
} else { |
} else { |
| 462 |
!!!next-input-character; |
!!!next-input-character; |
| 463 |
redo A; |
redo A; |
| 464 |
} elsif ($self->{next_input_character} == 0x003E) { # > |
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 465 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 466 |
$self->{current_token}->{first_start_tag} |
$self->{current_token}->{first_start_tag} |
| 467 |
= not defined $self->{last_emitted_start_tag_name}; |
= not defined $self->{last_emitted_start_tag_name}; |
| 468 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 469 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 470 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 471 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 472 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 489 |
redo A; |
redo A; |
| 490 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 491 |
!!!parse-error (type => 'unclosed tag'); |
!!!parse-error (type => 'unclosed tag'); |
| 492 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 493 |
$self->{current_token}->{first_start_tag} |
$self->{current_token}->{first_start_tag} |
| 494 |
= not defined $self->{last_emitted_start_tag_name}; |
= not defined $self->{last_emitted_start_tag_name}; |
| 495 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 496 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 497 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 498 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 499 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 510 |
} elsif ($self->{next_input_character} == 0x002F) { # / |
} elsif ($self->{next_input_character} == 0x002F) { # / |
| 511 |
!!!next-input-character; |
!!!next-input-character; |
| 512 |
if ($self->{next_input_character} == 0x003E and # > |
if ($self->{next_input_character} == 0x003E and # > |
| 513 |
$self->{current_token}->{type} eq 'start tag' and |
$self->{current_token}->{type} == START_TAG_TOKEN and |
| 514 |
$permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) { |
$permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) { |
| 515 |
# permitted slash |
# permitted slash |
| 516 |
# |
# |
| 537 |
!!!next-input-character; |
!!!next-input-character; |
| 538 |
redo A; |
redo A; |
| 539 |
} elsif ($self->{next_input_character} == 0x003E) { # > |
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 540 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 541 |
$self->{current_token}->{first_start_tag} |
$self->{current_token}->{first_start_tag} |
| 542 |
= not defined $self->{last_emitted_start_tag_name}; |
= not defined $self->{last_emitted_start_tag_name}; |
| 543 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 544 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 545 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 546 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 547 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 565 |
} elsif ($self->{next_input_character} == 0x002F) { # / |
} elsif ($self->{next_input_character} == 0x002F) { # / |
| 566 |
!!!next-input-character; |
!!!next-input-character; |
| 567 |
if ($self->{next_input_character} == 0x003E and # > |
if ($self->{next_input_character} == 0x003E and # > |
| 568 |
$self->{current_token}->{type} eq 'start tag' and |
$self->{current_token}->{type} == START_TAG_TOKEN and |
| 569 |
$permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) { |
$permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) { |
| 570 |
# permitted slash |
# permitted slash |
| 571 |
# |
# |
| 577 |
redo A; |
redo A; |
| 578 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 579 |
!!!parse-error (type => 'unclosed tag'); |
!!!parse-error (type => 'unclosed tag'); |
| 580 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 581 |
$self->{current_token}->{first_start_tag} |
$self->{current_token}->{first_start_tag} |
| 582 |
= not defined $self->{last_emitted_start_tag_name}; |
= not defined $self->{last_emitted_start_tag_name}; |
| 583 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 584 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 585 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 586 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 587 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 630 |
redo A; |
redo A; |
| 631 |
} elsif ($self->{next_input_character} == 0x003E) { # > |
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 632 |
$before_leave->(); |
$before_leave->(); |
| 633 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 634 |
$self->{current_token}->{first_start_tag} |
$self->{current_token}->{first_start_tag} |
| 635 |
= not defined $self->{last_emitted_start_tag_name}; |
= not defined $self->{last_emitted_start_tag_name}; |
| 636 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 637 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 638 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 639 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 640 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 658 |
$before_leave->(); |
$before_leave->(); |
| 659 |
!!!next-input-character; |
!!!next-input-character; |
| 660 |
if ($self->{next_input_character} == 0x003E and # > |
if ($self->{next_input_character} == 0x003E and # > |
| 661 |
$self->{current_token}->{type} eq 'start tag' and |
$self->{current_token}->{type} == START_TAG_TOKEN and |
| 662 |
$permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) { |
$permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) { |
| 663 |
# permitted slash |
# permitted slash |
| 664 |
# |
# |
| 671 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 672 |
!!!parse-error (type => 'unclosed tag'); |
!!!parse-error (type => 'unclosed tag'); |
| 673 |
$before_leave->(); |
$before_leave->(); |
| 674 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 675 |
$self->{current_token}->{first_start_tag} |
$self->{current_token}->{first_start_tag} |
| 676 |
= not defined $self->{last_emitted_start_tag_name}; |
= not defined $self->{last_emitted_start_tag_name}; |
| 677 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 678 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 679 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 680 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 681 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 709 |
!!!next-input-character; |
!!!next-input-character; |
| 710 |
redo A; |
redo A; |
| 711 |
} elsif ($self->{next_input_character} == 0x003E) { # > |
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 712 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 713 |
$self->{current_token}->{first_start_tag} |
$self->{current_token}->{first_start_tag} |
| 714 |
= not defined $self->{last_emitted_start_tag_name}; |
= not defined $self->{last_emitted_start_tag_name}; |
| 715 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 716 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 717 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 718 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 719 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 737 |
} elsif ($self->{next_input_character} == 0x002F) { # / |
} elsif ($self->{next_input_character} == 0x002F) { # / |
| 738 |
!!!next-input-character; |
!!!next-input-character; |
| 739 |
if ($self->{next_input_character} == 0x003E and # > |
if ($self->{next_input_character} == 0x003E and # > |
| 740 |
$self->{current_token}->{type} eq 'start tag' and |
$self->{current_token}->{type} == START_TAG_TOKEN and |
| 741 |
$permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) { |
$permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) { |
| 742 |
# permitted slash |
# permitted slash |
| 743 |
# |
# |
| 750 |
redo A; |
redo A; |
| 751 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 752 |
!!!parse-error (type => 'unclosed tag'); |
!!!parse-error (type => 'unclosed tag'); |
| 753 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 754 |
$self->{current_token}->{first_start_tag} |
$self->{current_token}->{first_start_tag} |
| 755 |
= not defined $self->{last_emitted_start_tag_name}; |
= not defined $self->{last_emitted_start_tag_name}; |
| 756 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 757 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 758 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 759 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 760 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 797 |
!!!next-input-character; |
!!!next-input-character; |
| 798 |
redo A; |
redo A; |
| 799 |
} elsif ($self->{next_input_character} == 0x003E) { # > |
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 800 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 801 |
$self->{current_token}->{first_start_tag} |
$self->{current_token}->{first_start_tag} |
| 802 |
= not defined $self->{last_emitted_start_tag_name}; |
= not defined $self->{last_emitted_start_tag_name}; |
| 803 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 804 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 805 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 806 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 807 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 817 |
redo A; |
redo A; |
| 818 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 819 |
!!!parse-error (type => 'unclosed tag'); |
!!!parse-error (type => 'unclosed tag'); |
| 820 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 821 |
$self->{current_token}->{first_start_tag} |
$self->{current_token}->{first_start_tag} |
| 822 |
= not defined $self->{last_emitted_start_tag_name}; |
= not defined $self->{last_emitted_start_tag_name}; |
| 823 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 824 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 825 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 826 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 827 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 853 |
redo A; |
redo A; |
| 854 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 855 |
!!!parse-error (type => 'unclosed attribute value'); |
!!!parse-error (type => 'unclosed attribute value'); |
| 856 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 857 |
$self->{current_token}->{first_start_tag} |
$self->{current_token}->{first_start_tag} |
| 858 |
= not defined $self->{last_emitted_start_tag_name}; |
= not defined $self->{last_emitted_start_tag_name}; |
| 859 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 860 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 861 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 862 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 863 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 889 |
redo A; |
redo A; |
| 890 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 891 |
!!!parse-error (type => 'unclosed attribute value'); |
!!!parse-error (type => 'unclosed attribute value'); |
| 892 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 893 |
$self->{current_token}->{first_start_tag} |
$self->{current_token}->{first_start_tag} |
| 894 |
= not defined $self->{last_emitted_start_tag_name}; |
= not defined $self->{last_emitted_start_tag_name}; |
| 895 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 896 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 897 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 898 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 899 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 928 |
!!!next-input-character; |
!!!next-input-character; |
| 929 |
redo A; |
redo A; |
| 930 |
} elsif ($self->{next_input_character} == 0x003E) { # > |
} elsif ($self->{next_input_character} == 0x003E) { # > |
| 931 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 932 |
$self->{current_token}->{first_start_tag} |
$self->{current_token}->{first_start_tag} |
| 933 |
= not defined $self->{last_emitted_start_tag_name}; |
= not defined $self->{last_emitted_start_tag_name}; |
| 934 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 935 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 936 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 937 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 938 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 948 |
redo A; |
redo A; |
| 949 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 950 |
!!!parse-error (type => 'unclosed tag'); |
!!!parse-error (type => 'unclosed tag'); |
| 951 |
if ($self->{current_token}->{type} eq 'start tag') { |
if ($self->{current_token}->{type} == START_TAG_TOKEN) { |
| 952 |
$self->{current_token}->{first_start_tag} |
$self->{current_token}->{first_start_tag} |
| 953 |
= not defined $self->{last_emitted_start_tag_name}; |
= not defined $self->{last_emitted_start_tag_name}; |
| 954 |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
$self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name}; |
| 955 |
} elsif ($self->{current_token}->{type} eq 'end tag') { |
} elsif ($self->{current_token}->{type} == END_TAG_TOKEN) { |
| 956 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
$self->{content_model} = PCDATA_CONTENT_MODEL; # MUST |
| 957 |
if ($self->{current_token}->{attributes}) { |
if ($self->{current_token}->{attributes}) { |
| 958 |
!!!parse-error (type => 'end tag attribute'); |
!!!parse-error (type => 'end tag attribute'); |
| 988 |
} elsif ($self->{state} eq 'bogus comment') { |
} elsif ($self->{state} eq 'bogus comment') { |
| 989 |
## (only happen if PCDATA state) |
## (only happen if PCDATA state) |
| 990 |
|
|
| 991 |
my $token = {type => 'comment', data => ''}; |
my $token = {type => COMMENT_TOKEN, data => ''}; |
| 992 |
|
|
| 993 |
BC: { |
BC: { |
| 994 |
if ($self->{next_input_character} == 0x003E) { # > |
if ($self->{next_input_character} == 0x003E) { # > |
| 1021 |
!!!next-input-character; |
!!!next-input-character; |
| 1022 |
push @next_char, $self->{next_input_character}; |
push @next_char, $self->{next_input_character}; |
| 1023 |
if ($self->{next_input_character} == 0x002D) { # - |
if ($self->{next_input_character} == 0x002D) { # - |
| 1024 |
$self->{current_token} = {type => 'comment', data => ''}; |
$self->{current_token} = {type => COMMENT_TOKEN, data => ''}; |
| 1025 |
$self->{state} = 'comment start'; |
$self->{state} = 'comment start'; |
| 1026 |
!!!next-input-character; |
!!!next-input-character; |
| 1027 |
redo A; |
redo A; |
| 1224 |
$self->{state} = 'data'; |
$self->{state} = 'data'; |
| 1225 |
!!!next-input-character; |
!!!next-input-character; |
| 1226 |
|
|
| 1227 |
!!!emit ({type => 'DOCTYPE'}); # incorrect |
!!!emit ({type => DOCTYPE_TOKEN}); # incorrect |
| 1228 |
|
|
| 1229 |
redo A; |
redo A; |
| 1230 |
} elsif ($self->{next_input_character} == -1) { |
} elsif ($self->{next_input_character} == -1) { |
| 1232 |
$self->{state} = 'data'; |
$self->{state} = 'data'; |
| 1233 |
## reconsume |
## reconsume |
| 1234 |
|
|
| 1235 |
!!!emit ({type => 'DOCTYPE'}); # incorrect |
!!!emit ({type => DOCTYPE_TOKEN}); # incorrect |
| 1236 |
|
|
| 1237 |
redo A; |
redo A; |
| 1238 |
} else { |
} else { |
| 1239 |
$self->{current_token} |
$self->{current_token} |
| 1240 |
= {type => 'DOCTYPE', |
= {type => DOCTYPE_TOKEN, |
| 1241 |
name => chr ($self->{next_input_character}), |
name => chr ($self->{next_input_character}), |
| 1242 |
correct => 1}; |
correct => 1}; |
| 1243 |
## ISSUE: "Set the token's name name to the" in the spec |
## ISSUE: "Set the token's name name to the" in the spec |
| 1705 |
$code = $c1_entity_char->{$code}; |
$code = $c1_entity_char->{$code}; |
| 1706 |
} |
} |
| 1707 |
|
|
| 1708 |
return {type => 'character', data => chr $code}; |
return {type => CHARACTER_TOKEN, data => chr $code}; |
| 1709 |
} # X |
} # X |
| 1710 |
} elsif (0x0030 <= $self->{next_input_character} and |
} elsif (0x0030 <= $self->{next_input_character} and |
| 1711 |
$self->{next_input_character} <= 0x0039) { # 0..9 |
$self->{next_input_character} <= 0x0039) { # 0..9 |
| 1740 |
$code = $c1_entity_char->{$code}; |
$code = $c1_entity_char->{$code}; |
| 1741 |
} |
} |
| 1742 |
|
|
| 1743 |
return {type => 'character', data => chr $code}; |
return {type => CHARACTER_TOKEN, data => chr $code}; |
| 1744 |
} else { |
} else { |
| 1745 |
!!!parse-error (type => 'bare nero'); |
!!!parse-error (type => 'bare nero'); |
| 1746 |
!!!back-next-input-character ($self->{next_input_character}); |
!!!back-next-input-character ($self->{next_input_character}); |
| 1788 |
} |
} |
| 1789 |
|
|
| 1790 |
if ($match > 0) { |
if ($match > 0) { |
| 1791 |
return {type => 'character', data => $value}; |
return {type => CHARACTER_TOKEN, data => $value}; |
| 1792 |
} elsif ($match < 0) { |
} elsif ($match < 0) { |
| 1793 |
!!!parse-error (type => 'no refc'); |
!!!parse-error (type => 'no refc'); |
| 1794 |
if ($in_attr and $match < -1) { |
if ($in_attr and $match < -1) { |
| 1795 |
return {type => 'character', data => '&'.$entity_name}; |
return {type => CHARACTER_TOKEN, data => '&'.$entity_name}; |
| 1796 |
} else { |
} else { |
| 1797 |
return {type => 'character', data => $value}; |
return {type => CHARACTER_TOKEN, data => $value}; |
| 1798 |
} |
} |
| 1799 |
} else { |
} else { |
| 1800 |
!!!parse-error (type => 'bare ero'); |
!!!parse-error (type => 'bare ero'); |
| 1801 |
## NOTE: No characters are consumed in the spec. |
## NOTE: No characters are consumed in the spec. |
| 1802 |
return {type => 'character', data => '&'.$value}; |
return {type => CHARACTER_TOKEN, data => '&'.$value}; |
| 1803 |
} |
} |
| 1804 |
} else { |
} else { |
| 1805 |
## no characters are consumed |
## no characters are consumed |
| 1841 |
|
|
| 1842 |
!!!next-token; |
!!!next-token; |
| 1843 |
|
|
| 1844 |
$self->{insertion_mode} = 'before head'; |
$self->{insertion_mode} = BEFORE_HEAD_IM; |
| 1845 |
undef $self->{form_element}; |
undef $self->{form_element}; |
| 1846 |
undef $self->{head_element}; |
undef $self->{head_element}; |
| 1847 |
$self->{open_elements} = []; |
$self->{open_elements} = []; |
| 1855 |
sub _tree_construction_initial ($) { |
sub _tree_construction_initial ($) { |
| 1856 |
my $self = shift; |
my $self = shift; |
| 1857 |
INITIAL: { |
INITIAL: { |
| 1858 |
if ($token->{type} eq 'DOCTYPE') { |
if ($token->{type} == DOCTYPE_TOKEN) { |
| 1859 |
## NOTE: Conformance checkers MAY, instead of reporting "not HTML5" |
## NOTE: Conformance checkers MAY, instead of reporting "not HTML5" |
| 1860 |
## error, switch to a conformance checking mode for another |
## error, switch to a conformance checking mode for another |
| 1861 |
## language. |
## language. |
| 1982 |
!!!next-token; |
!!!next-token; |
| 1983 |
return; |
return; |
| 1984 |
} elsif ({ |
} elsif ({ |
| 1985 |
'start tag' => 1, |
START_TAG_TOKEN, 1, |
| 1986 |
'end tag' => 1, |
END_TAG_TOKEN, 1, |
| 1987 |
'end-of-file' => 1, |
END_OF_FILE_TOKEN, 1, |
| 1988 |
}->{$token->{type}}) { |
}->{$token->{type}}) { |
| 1989 |
!!!parse-error (type => 'no DOCTYPE'); |
!!!parse-error (type => 'no DOCTYPE'); |
| 1990 |
$self->{document}->manakai_compat_mode ('quirks'); |
$self->{document}->manakai_compat_mode ('quirks'); |
| 1991 |
## Go to the root element phase |
## Go to the root element phase |
| 1992 |
## reprocess |
## reprocess |
| 1993 |
return; |
return; |
| 1994 |
} elsif ($token->{type} eq 'character') { |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
| 1995 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D |
| 1996 |
## Ignore the token |
## Ignore the token |
| 1997 |
|
|
| 2007 |
## Go to the root element phase |
## Go to the root element phase |
| 2008 |
## reprocess |
## reprocess |
| 2009 |
return; |
return; |
| 2010 |
} elsif ($token->{type} eq 'comment') { |
} elsif ($token->{type} == COMMENT_TOKEN) { |
| 2011 |
my $comment = $self->{document}->create_comment ($token->{data}); |
my $comment = $self->{document}->create_comment ($token->{data}); |
| 2012 |
$self->{document}->append_child ($comment); |
$self->{document}->append_child ($comment); |
| 2013 |
|
|
| 2015 |
!!!next-token; |
!!!next-token; |
| 2016 |
redo INITIAL; |
redo INITIAL; |
| 2017 |
} else { |
} else { |
| 2018 |
die "$0: $token->{type}: Unknown token"; |
die "$0: $token->{type}: Unknown token type"; |
| 2019 |
} |
} |
| 2020 |
} # INITIAL |
} # INITIAL |
| 2021 |
} # _tree_construction_initial |
} # _tree_construction_initial |
| 2024 |
my $self = shift; |
my $self = shift; |
| 2025 |
|
|
| 2026 |
B: { |
B: { |
| 2027 |
if ($token->{type} eq 'DOCTYPE') { |
if ($token->{type} == DOCTYPE_TOKEN) { |
| 2028 |
!!!parse-error (type => 'in html:#DOCTYPE'); |
!!!parse-error (type => 'in html:#DOCTYPE'); |
| 2029 |
## Ignore the token |
## Ignore the token |
| 2030 |
## Stay in the phase |
## Stay in the phase |
| 2031 |
!!!next-token; |
!!!next-token; |
| 2032 |
redo B; |
redo B; |
| 2033 |
} elsif ($token->{type} eq 'comment') { |
} elsif ($token->{type} == COMMENT_TOKEN) { |
| 2034 |
my $comment = $self->{document}->create_comment ($token->{data}); |
my $comment = $self->{document}->create_comment ($token->{data}); |
| 2035 |
$self->{document}->append_child ($comment); |
$self->{document}->append_child ($comment); |
| 2036 |
## Stay in the phase |
## Stay in the phase |
| 2037 |
!!!next-token; |
!!!next-token; |
| 2038 |
redo B; |
redo B; |
| 2039 |
} elsif ($token->{type} eq 'character') { |
} elsif ($token->{type} == CHARACTER_TOKEN) { |
| 2040 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D |
| 2041 |
## Ignore the token. |
## Ignore the token. |
| 2042 |
|
|
| 2048 |
} |
} |
| 2049 |
# |
# |
| 2050 |
} elsif ({ |
} elsif ({ |
| 2051 |
'start tag' => 1, |
START_TAG_TOKEN, 1, |
| 2052 |
'end tag' => 1, |
END_TAG_TOKEN, 1, |
| 2053 |
'end-of-file' => 1, |
END_OF_FILE_TOKEN, 1, |
| 2054 |
}->{$token->{type}}) { |
}->{$token->{type}}) { |
| 2055 |
## ISSUE: There is an issue in the spec |
## ISSUE: There is an issue in the spec |
| 2056 |
# |
# |
| 2057 |
} else { |
} else { |
| 2058 |
die "$0: $token->{type}: Unknown token"; |
die "$0: $token->{type}: Unknown token type"; |
| 2059 |
} |
} |
| 2060 |
my $root_element; !!!create-element ($root_element, 'html'); |
my $root_element; !!!create-element ($root_element, 'html'); |
| 2061 |
$self->{document}->append_child ($root_element); |
$self->{document}->append_child ($root_element); |
| 2097 |
|
|
| 2098 |
## Step 4..13 |
## Step 4..13 |
| 2099 |
my $new_mode = { |
my $new_mode = { |
| 2100 |
select => 'in select', |
select => IN_SELECT_IM, |
| 2101 |
td => 'in cell', |
td => IN_CELL_IM, |
| 2102 |
th => 'in cell', |
th => IN_CELL_IM, |
| 2103 |
tr => 'in row', |
tr => IN_ROW_IM, |
| 2104 |
tbody => 'in table body', |
tbody => IN_TABLE_BODY_IM, |
| 2105 |
thead => 'in table body', |
thead => IN_TABLE_BODY_IM, |
| 2106 |
tfoot => 'in table body', |
tfoot => IN_TABLE_BODY_IM, |
| 2107 |
caption => 'in caption', |
caption => IN_CAPTION_IM, |
| 2108 |
colgroup => 'in column group', |
colgroup => IN_COLUMN_GROUP_IM, |
| 2109 |
table => 'in table', |
table => IN_TABLE_IM, |
| 2110 |
head => 'in body', # not in head! |
head => IN_BODY_IM, # not in head! |
| 2111 |
body => 'in body', |
body => IN_BODY_IM, |
| 2112 |
frameset => 'in frameset', |
frameset => IN_FRAMESET_IM, |
| 2113 |
}->{$node->[1]}; |
}->{$node->[1]}; |
| 2114 |
$self->{insertion_mode} = $new_mode and return if defined $new_mode; |
$self->{insertion_mode} = $new_mode and return if defined $new_mode; |
| 2115 |
|
|
| 2116 |
## Step 14 |
## Step 14 |
| 2117 |
if ($node->[1] eq 'html') { |
if ($node->[1] eq 'html') { |
| 2118 |
unless (defined $self->{head_element}) { |
unless (defined $self->{head_element}) { |
| 2119 |
$self->{insertion_mode} = 'before head'; |
$self->{insertion_mode} = BEFORE_HEAD_IM; |
| 2120 |
} else { |
} else { |
| 2121 |
$self->{insertion_mode} = 'after head'; |
$self->{insertion_mode} = AFTER_HEAD_IM; |
| 2122 |
} |
} |
| 2123 |
return; |
return; |
| 2124 |
} |
} |
| 2125 |
|
|
| 2126 |
## Step 15 |
## Step 15 |
| 2127 |
$self->{insertion_mode} = 'in body' and return if $last; |
$self->{insertion_mode} = IN_BODY_IM and return if $last; |
| 2128 |
|
|
| 2129 |
## Step 16 |
## Step 16 |
| 2130 |
$i--; |
$i--; |
| 2238 |
## Step 4 |
## Step 4 |
| 2239 |
my $text = ''; |
my $text = ''; |
| 2240 |
!!!next-token; |
!!!next-token; |
| 2241 |
while ($token->{type} eq 'character') { # or until stop tokenizing |
while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing |
| 2242 |
$text .= $token->{data}; |
$text .= $token->{data}; |
| 2243 |
!!!next-token; |
!!!next-token; |
| 2244 |
} |
} |
| 2253 |
$self->{content_model} = PCDATA_CONTENT_MODEL; |
$self->{content_model} = PCDATA_CONTENT_MODEL; |
| 2254 |
|
|
| 2255 |
## Step 7 |
## Step 7 |
| 2256 |
if ($token->{type} eq 'end tag' and $token->{tag_name} eq $start_tag_name) { |
if ($token->{type} == END_TAG_TOKEN and $token->{tag_name} eq $start_tag_name) { |
| 2257 |
## Ignore the token |
## Ignore the token |
| 2258 |
} elsif ($content_model_flag == CDATA_CONTENT_MODEL) { |
} elsif ($content_model_flag == CDATA_CONTENT_MODEL) { |
| 2259 |
!!!parse-error (type => 'in CDATA:#'.$token->{type}); |
!!!parse-error (type => 'in CDATA:#'.$token->{type}); |
| 2276 |
|
|
| 2277 |
my $text = ''; |
my $text = ''; |
| 2278 |
!!!next-token; |
!!!next-token; |
| 2279 |
while ($token->{type} eq 'character') { |
while ($token->{type} == CHARACTER_TOKEN) { |
| 2280 |
$text .= $token->{data}; |
$text .= $token->{data}; |
| 2281 |
!!!next-token; |
!!!next-token; |
| 2282 |
} # stop if non-character token or tokenizer stops tokenising |
} # stop if non-character token or tokenizer stops tokenising |
| 2286 |
|
|
| 2287 |
$self->{content_model} = PCDATA_CONTENT_MODEL; |
$self->{content_model} = PCDATA_CONTENT_MODEL; |
| 2288 |
|
|
| 2289 |
if ($token->{type} eq 'end tag' and |
if ($token->{type} == END_TAG_TOKEN and |
| 2290 |
$token->{tag_name} eq 'script') { |
$token->{tag_name} eq 'script') { |
| 2291 |
## Ignore the token |
## Ignore the token |
| 2292 |
} else { |
} else { |
| 2532 |
my $insert; |
my $insert; |
| 2533 |
|
|
| 2534 |
B: { |
B: { |
| 2535 |
if ($token->{type} eq 'DOCTYPE') { |
if ($token->{type} == DOCTYPE_TOKEN) { |
| 2536 |
!!!parse-error (type => 'DOCTYPE in the middle'); |
!!!parse-error (type => 'DOCTYPE in the middle'); |
| 2537 |
## Ignore the token |
## Ignore the token |
| 2538 |
## Stay in the phase |
## Stay in the phase |
| 2539 |
!!!next-token; |
!!!next-token; |
| 2540 |
redo B; |
redo B; |
| 2541 |
} elsif ($token->{type} eq 'end-of-file') { |
} elsif ($token->{type} == END_OF_FILE_TOKEN) { |
| 2542 |
if ($self->{insertion_mode} eq 'after html body' or |
if ($self->{insertion_mode} == AFTER_HTML_BODY_IM or |
| 2543 |
$self->{insertion_mode} eq 'after html frameset') { |
$self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) { |
| 2544 |
# |
# |
| 2545 |
} else { |
} else { |
| 2546 |
## Generate implied end tags |
## Generate implied end tags |
| 2549 |
tbody => 1, tfoot=> 1, thead => 1, |
tbody => 1, tfoot=> 1, thead => 1, |
| 2550 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 2551 |
!!!back-token; |
!!!back-token; |
| 2552 |
$token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]}; |
$token = {type => END_TAG_TOKEN, tag_name => $self->{open_elements}->[-1]->[1]}; |
| 2553 |
redo B; |
redo B; |
| 2554 |
} |
} |
| 2555 |
|
|
| 2567 |
|
|
| 2568 |
## Stop parsing |
## Stop parsing |
| 2569 |
last B; |
last B; |
| 2570 |
} elsif ($token->{type} eq 'start tag' and |
} elsif ($token->{type} == START_TAG_TOKEN and |
| 2571 |
$token->{tag_name} eq 'html') { |
$token->{tag_name} eq 'html') { |
| 2572 |
if ($self->{insertion_mode} eq 'after html body') { |
if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) { |
| 2573 |
## Turn into the main phase |
## Turn into the main phase |
| 2574 |
!!!parse-error (type => 'after html:html'); |
!!!parse-error (type => 'after html:html'); |
| 2575 |
$self->{insertion_mode} = 'after body'; |
$self->{insertion_mode} = AFTER_BODY_IM; |
| 2576 |
} elsif ($self->{insertion_mode} eq 'after html frameset') { |
} elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) { |
| 2577 |
## Turn into the main phase |
## Turn into the main phase |
| 2578 |
!!!parse-error (type => 'after html:html'); |
!!!parse-error (type => 'after html:html'); |
| 2579 |
$self->{insertion_mode} = 'after frameset'; |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
| 2580 |
} |
} |
| 2581 |
|
|
| 2582 |
## ISSUE: "aa<html>" is not a parse error. |
## ISSUE: "aa<html>" is not a parse error. |
| 2594 |
} |
} |
| 2595 |
!!!next-token; |
!!!next-token; |
| 2596 |
redo B; |
redo B; |
| 2597 |
} elsif ($token->{type} eq 'comment') { |
} elsif ($token->{type} == COMMENT_TOKEN) { |
| 2598 |
my $comment = $self->{document}->create_comment ($token->{data}); |
my $comment = $self->{document}->create_comment ($token->{data}); |
| 2599 |
if ($self->{insertion_mode} eq 'after html body' or |
if ($self->{insertion_mode} == AFTER_HTML_BODY_IM or |
| 2600 |
$self->{insertion_mode} eq 'after html frameset') { |
$self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) { |
| 2601 |
$self->{document}->append_child ($comment); |
$self->{document}->append_child ($comment); |
| 2602 |
} elsif ($self->{insertion_mode} eq 'after body') { |
} elsif ($self->{insertion_mode} == AFTER_BODY_IM) { |
| 2603 |
$self->{open_elements}->[0]->[0]->append_child ($comment); |
$self->{open_elements}->[0]->[0]->append_child ($comment); |
| 2604 |
} else { |
} else { |
| 2605 |
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
$self->{open_elements}->[-1]->[0]->append_child ($comment); |
| 2606 |
} |
} |
| 2607 |
!!!next-token; |
!!!next-token; |
| 2608 |
redo B; |
redo B; |
| 2609 |
} elsif ($self->{insertion_mode} eq 'in head' or |
} elsif ($self->{insertion_mode} == IN_HEAD_IM or |
| 2610 |
$self->{insertion_mode} eq 'in head noscript' or |
$self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM or |
| 2611 |
$self->{insertion_mode} eq 'after head' or |
$self->{insertion_mode} == AFTER_HEAD_IM or |
| 2612 |
$self->{insertion_mode} eq 'before head') { |
$self->{insertion_mode} == BEFORE_HEAD_IM) { |
| 2613 |
if ($token->{type} eq 'character') { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 2614 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
| 2615 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 2616 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 2619 |
} |
} |
| 2620 |
} |
} |
| 2621 |
|
|
| 2622 |
if ($self->{insertion_mode} eq 'before head') { |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
| 2623 |
## As if <head> |
## As if <head> |
| 2624 |
!!!create-element ($self->{head_element}, 'head'); |
!!!create-element ($self->{head_element}, 'head'); |
| 2625 |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
| 2629 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2630 |
|
|
| 2631 |
## Reprocess in the "after head" insertion mode... |
## Reprocess in the "after head" insertion mode... |
| 2632 |
} elsif ($self->{insertion_mode} eq 'in head noscript') { |
} elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 2633 |
## As if </noscript> |
## As if </noscript> |
| 2634 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2635 |
!!!parse-error (type => 'in noscript:#character'); |
!!!parse-error (type => 'in noscript:#character'); |
| 2639 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2640 |
|
|
| 2641 |
## Reprocess in the "after head" insertion mode... |
## Reprocess in the "after head" insertion mode... |
| 2642 |
} elsif ($self->{insertion_mode} eq 'in head') { |
} elsif ($self->{insertion_mode} == IN_HEAD_IM) { |
| 2643 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2644 |
|
|
| 2645 |
## Reprocess in the "after head" insertion mode... |
## Reprocess in the "after head" insertion mode... |
| 2648 |
## "after head" insertion mode |
## "after head" insertion mode |
| 2649 |
## As if <body> |
## As if <body> |
| 2650 |
!!!insert-element ('body'); |
!!!insert-element ('body'); |
| 2651 |
$self->{insertion_mode} = 'in body'; |
$self->{insertion_mode} = IN_BODY_IM; |
| 2652 |
## reprocess |
## reprocess |
| 2653 |
redo B; |
redo B; |
| 2654 |
} elsif ($token->{type} eq 'start tag') { |
} elsif ($token->{type} == START_TAG_TOKEN) { |
| 2655 |
if ($token->{tag_name} eq 'head') { |
if ($token->{tag_name} eq 'head') { |
| 2656 |
if ($self->{insertion_mode} eq 'before head') { |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
| 2657 |
!!!create-element ($self->{head_element}, $token->{tag_name}, $token->{attributes}); |
!!!create-element ($self->{head_element}, $token->{tag_name}, $token->{attributes}); |
| 2658 |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
| 2659 |
push @{$self->{open_elements}}, [$self->{head_element}, $token->{tag_name}]; |
push @{$self->{open_elements}}, [$self->{head_element}, $token->{tag_name}]; |
| 2660 |
$self->{insertion_mode} = 'in head'; |
$self->{insertion_mode} = IN_HEAD_IM; |
| 2661 |
!!!next-token; |
!!!next-token; |
| 2662 |
redo B; |
redo B; |
| 2663 |
} elsif ($self->{insertion_mode} ne 'after head') { |
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 2664 |
|
# |
| 2665 |
|
} else { |
| 2666 |
!!!parse-error (type => 'in head:head'); # or in head noscript |
!!!parse-error (type => 'in head:head'); # or in head noscript |
| 2667 |
## Ignore the token |
## Ignore the token |
| 2668 |
!!!next-token; |
!!!next-token; |
| 2669 |
redo B; |
redo B; |
|
} else { |
|
|
# |
|
| 2670 |
} |
} |
| 2671 |
} elsif ($self->{insertion_mode} eq 'before head') { |
} elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
| 2672 |
## As if <head> |
## As if <head> |
| 2673 |
!!!create-element ($self->{head_element}, 'head'); |
!!!create-element ($self->{head_element}, 'head'); |
| 2674 |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
| 2675 |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
| 2676 |
|
|
| 2677 |
$self->{insertion_mode} = 'in head'; |
$self->{insertion_mode} = IN_HEAD_IM; |
| 2678 |
## Reprocess in the "in head" insertion mode... |
## Reprocess in the "in head" insertion mode... |
| 2679 |
} |
} |
| 2680 |
|
|
| 2681 |
if ($token->{tag_name} eq 'base') { |
if ($token->{tag_name} eq 'base') { |
| 2682 |
if ($self->{insertion_mode} eq 'in head noscript') { |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 2683 |
## As if </noscript> |
## As if </noscript> |
| 2684 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2685 |
!!!parse-error (type => 'in noscript:base'); |
!!!parse-error (type => 'in noscript:base'); |
| 2686 |
|
|
| 2687 |
$self->{insertion_mode} = 'in head'; |
$self->{insertion_mode} = IN_HEAD_IM; |
| 2688 |
## Reprocess in the "in head" insertion mode... |
## Reprocess in the "in head" insertion mode... |
| 2689 |
} |
} |
| 2690 |
|
|
| 2691 |
## NOTE: There is a "as if in head" code clone. |
## NOTE: There is a "as if in head" code clone. |
| 2692 |
if ($self->{insertion_mode} eq 'after head') { |
if ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 2693 |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
| 2694 |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
| 2695 |
} |
} |
| 2696 |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
| 2697 |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
| 2698 |
pop @{$self->{open_elements}} |
pop @{$self->{open_elements}} |
| 2699 |
if $self->{insertion_mode} eq 'after head'; |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
| 2700 |
!!!next-token; |
!!!next-token; |
| 2701 |
redo B; |
redo B; |
| 2702 |
} elsif ($token->{tag_name} eq 'link') { |
} elsif ($token->{tag_name} eq 'link') { |
| 2703 |
## NOTE: There is a "as if in head" code clone. |
## NOTE: There is a "as if in head" code clone. |
| 2704 |
if ($self->{insertion_mode} eq 'after head') { |
if ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 2705 |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
| 2706 |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
| 2707 |
} |
} |
| 2708 |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
| 2709 |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec. |
| 2710 |
pop @{$self->{open_elements}} |
pop @{$self->{open_elements}} |
| 2711 |
if $self->{insertion_mode} eq 'after head'; |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
| 2712 |
!!!next-token; |
!!!next-token; |
| 2713 |
redo B; |
redo B; |
| 2714 |
} elsif ($token->{tag_name} eq 'meta') { |
} elsif ($token->{tag_name} eq 'meta') { |
| 2715 |
## NOTE: There is a "as if in head" code clone. |
## NOTE: There is a "as if in head" code clone. |
| 2716 |
if ($self->{insertion_mode} eq 'after head') { |
if ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 2717 |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
| 2718 |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
| 2719 |
} |
} |
| 2739 |
|
|
| 2740 |
## TODO: Extracting |charset| from |meta|. |
## TODO: Extracting |charset| from |meta|. |
| 2741 |
pop @{$self->{open_elements}} |
pop @{$self->{open_elements}} |
| 2742 |
if $self->{insertion_mode} eq 'after head'; |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
| 2743 |
!!!next-token; |
!!!next-token; |
| 2744 |
redo B; |
redo B; |
| 2745 |
} elsif ($token->{tag_name} eq 'title') { |
} elsif ($token->{tag_name} eq 'title') { |
| 2746 |
if ($self->{insertion_mode} eq 'in head noscript') { |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 2747 |
## As if </noscript> |
## As if </noscript> |
| 2748 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2749 |
!!!parse-error (type => 'in noscript:title'); |
!!!parse-error (type => 'in noscript:title'); |
| 2750 |
|
|
| 2751 |
$self->{insertion_mode} = 'in head'; |
$self->{insertion_mode} = IN_HEAD_IM; |
| 2752 |
## Reprocess in the "in head" insertion mode... |
## Reprocess in the "in head" insertion mode... |
| 2753 |
} elsif ($self->{insertion_mode} eq 'after head') { |
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 2754 |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
| 2755 |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
| 2756 |
} |
} |
| 2761 |
$parse_rcdata->(RCDATA_CONTENT_MODEL, |
$parse_rcdata->(RCDATA_CONTENT_MODEL, |
| 2762 |
sub { $parent->append_child ($_[0]) }); |
sub { $parent->append_child ($_[0]) }); |
| 2763 |
pop @{$self->{open_elements}} |
pop @{$self->{open_elements}} |
| 2764 |
if $self->{insertion_mode} eq 'after head'; |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
| 2765 |
redo B; |
redo B; |
| 2766 |
} elsif ($token->{tag_name} eq 'style') { |
} elsif ($token->{tag_name} eq 'style') { |
| 2767 |
## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and |
## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and |
| 2768 |
## insertion mode 'in head') |
## insertion mode IN_HEAD_IM) |
| 2769 |
## NOTE: There is a "as if in head" code clone. |
## NOTE: There is a "as if in head" code clone. |
| 2770 |
if ($self->{insertion_mode} eq 'after head') { |
if ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 2771 |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
| 2772 |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
| 2773 |
} |
} |
| 2774 |
$parse_rcdata->(CDATA_CONTENT_MODEL, $insert_to_current); |
$parse_rcdata->(CDATA_CONTENT_MODEL, $insert_to_current); |
| 2775 |
pop @{$self->{open_elements}} |
pop @{$self->{open_elements}} |
| 2776 |
if $self->{insertion_mode} eq 'after head'; |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
| 2777 |
redo B; |
redo B; |
| 2778 |
} elsif ($token->{tag_name} eq 'noscript') { |
} elsif ($token->{tag_name} eq 'noscript') { |
| 2779 |
if ($self->{insertion_mode} eq 'in head') { |
if ($self->{insertion_mode} == IN_HEAD_IM) { |
| 2780 |
## NOTE: and scripting is disalbed |
## NOTE: and scripting is disalbed |
| 2781 |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
| 2782 |
$self->{insertion_mode} = 'in head noscript'; |
$self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM; |
| 2783 |
!!!next-token; |
!!!next-token; |
| 2784 |
redo B; |
redo B; |
| 2785 |
} elsif ($self->{insertion_mode} eq 'in head noscript') { |
} elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 2786 |
!!!parse-error (type => 'in noscript:noscript'); |
!!!parse-error (type => 'in noscript:noscript'); |
| 2787 |
## Ignore the token |
## Ignore the token |
| 2788 |
!!!next-token; |
!!!next-token; |
| 2791 |
# |
# |
| 2792 |
} |
} |
| 2793 |
} elsif ($token->{tag_name} eq 'script') { |
} elsif ($token->{tag_name} eq 'script') { |
| 2794 |
if ($self->{insertion_mode} eq 'in head noscript') { |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 2795 |
## As if </noscript> |
## As if </noscript> |
| 2796 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2797 |
!!!parse-error (type => 'in noscript:script'); |
!!!parse-error (type => 'in noscript:script'); |
| 2798 |
|
|
| 2799 |
$self->{insertion_mode} = 'in head'; |
$self->{insertion_mode} = IN_HEAD_IM; |
| 2800 |
## Reprocess in the "in head" insertion mode... |
## Reprocess in the "in head" insertion mode... |
| 2801 |
} elsif ($self->{insertion_mode} eq 'after head') { |
} elsif ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 2802 |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
!!!parse-error (type => 'after head:'.$token->{tag_name}); |
| 2803 |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
| 2804 |
} |
} |
| 2806 |
## NOTE: There is a "as if in head" code clone. |
## NOTE: There is a "as if in head" code clone. |
| 2807 |
$script_start_tag->($insert_to_current); |
$script_start_tag->($insert_to_current); |
| 2808 |
pop @{$self->{open_elements}} |
pop @{$self->{open_elements}} |
| 2809 |
if $self->{insertion_mode} eq 'after head'; |
if $self->{insertion_mode} == AFTER_HEAD_IM; |
| 2810 |
redo B; |
redo B; |
| 2811 |
} elsif ($token->{tag_name} eq 'body' or |
} elsif ($token->{tag_name} eq 'body' or |
| 2812 |
$token->{tag_name} eq 'frameset') { |
$token->{tag_name} eq 'frameset') { |
| 2813 |
if ($self->{insertion_mode} eq 'in head noscript') { |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 2814 |
## As if </noscript> |
## As if </noscript> |
| 2815 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2816 |
!!!parse-error (type => 'in noscript:'.$token->{tag_name}); |
!!!parse-error (type => 'in noscript:'.$token->{tag_name}); |
| 2820 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2821 |
|
|
| 2822 |
## Reprocess in the "after head" insertion mode... |
## Reprocess in the "after head" insertion mode... |
| 2823 |
} elsif ($self->{insertion_mode} eq 'in head') { |
} elsif ($self->{insertion_mode} == IN_HEAD_IM) { |
| 2824 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2825 |
|
|
| 2826 |
## Reprocess in the "after head" insertion mode... |
## Reprocess in the "after head" insertion mode... |
| 2828 |
|
|
| 2829 |
## "after head" insertion mode |
## "after head" insertion mode |
| 2830 |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
| 2831 |
$self->{insertion_mode} = 'in '.$token->{tag_name}; |
if ($token->{tag_name} eq 'body') { |
| 2832 |
|
$self->{insertion_mode} = IN_BODY_IM; |
| 2833 |
|
} elsif ($token->{tag_name} eq 'frameset') { |
| 2834 |
|
$self->{insertion_mode} = IN_FRAMESET_IM; |
| 2835 |
|
} else { |
| 2836 |
|
die "$0: tag name: $self->{tag_name}"; |
| 2837 |
|
} |
| 2838 |
!!!next-token; |
!!!next-token; |
| 2839 |
redo B; |
redo B; |
| 2840 |
} else { |
} else { |
| 2841 |
# |
# |
| 2842 |
} |
} |
| 2843 |
|
|
| 2844 |
if ($self->{insertion_mode} eq 'in head noscript') { |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 2845 |
## As if </noscript> |
## As if </noscript> |
| 2846 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2847 |
!!!parse-error (type => 'in noscript:/'.$token->{tag_name}); |
!!!parse-error (type => 'in noscript:/'.$token->{tag_name}); |
| 2851 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2852 |
|
|
| 2853 |
## Reprocess in the "after head" insertion mode... |
## Reprocess in the "after head" insertion mode... |
| 2854 |
} elsif ($self->{insertion_mode} eq 'in head') { |
} elsif ($self->{insertion_mode} == IN_HEAD_IM) { |
| 2855 |
## As if </head> |
## As if </head> |
| 2856 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2857 |
|
|
| 2861 |
## "after head" insertion mode |
## "after head" insertion mode |
| 2862 |
## As if <body> |
## As if <body> |
| 2863 |
!!!insert-element ('body'); |
!!!insert-element ('body'); |
| 2864 |
$self->{insertion_mode} = 'in body'; |
$self->{insertion_mode} = IN_BODY_IM; |
| 2865 |
## reprocess |
## reprocess |
| 2866 |
redo B; |
redo B; |
| 2867 |
} elsif ($token->{type} eq 'end tag') { |
} elsif ($token->{type} == END_TAG_TOKEN) { |
| 2868 |
if ($token->{tag_name} eq 'head') { |
if ($token->{tag_name} eq 'head') { |
| 2869 |
if ($self->{insertion_mode} eq 'before head') { |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
| 2870 |
## As if <head> |
## As if <head> |
| 2871 |
!!!create-element ($self->{head_element}, 'head'); |
!!!create-element ($self->{head_element}, 'head'); |
| 2872 |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
| 2874 |
|
|
| 2875 |
## Reprocess in the "in head" insertion mode... |
## Reprocess in the "in head" insertion mode... |
| 2876 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2877 |
$self->{insertion_mode} = 'after head'; |
$self->{insertion_mode} = AFTER_HEAD_IM; |
| 2878 |
!!!next-token; |
!!!next-token; |
| 2879 |
redo B; |
redo B; |
| 2880 |
} elsif ($self->{insertion_mode} eq 'in head noscript') { |
} elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 2881 |
## As if </noscript> |
## As if </noscript> |
| 2882 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2883 |
!!!parse-error (type => 'in noscript:script'); |
!!!parse-error (type => 'in noscript:script'); |
| 2884 |
|
|
| 2885 |
## Reprocess in the "in head" insertion mode... |
## Reprocess in the "in head" insertion mode... |
| 2886 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2887 |
$self->{insertion_mode} = 'after head'; |
$self->{insertion_mode} = AFTER_HEAD_IM; |
| 2888 |
!!!next-token; |
!!!next-token; |
| 2889 |
redo B; |
redo B; |
| 2890 |
} elsif ($self->{insertion_mode} eq 'in head') { |
} elsif ($self->{insertion_mode} == IN_HEAD_IM) { |
| 2891 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2892 |
$self->{insertion_mode} = 'after head'; |
$self->{insertion_mode} = AFTER_HEAD_IM; |
| 2893 |
!!!next-token; |
!!!next-token; |
| 2894 |
redo B; |
redo B; |
| 2895 |
} else { |
} else { |
| 2896 |
# |
# |
| 2897 |
} |
} |
| 2898 |
} elsif ($token->{tag_name} eq 'noscript') { |
} elsif ($token->{tag_name} eq 'noscript') { |
| 2899 |
if ($self->{insertion_mode} eq 'in head noscript') { |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 2900 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2901 |
$self->{insertion_mode} = 'in head'; |
$self->{insertion_mode} = IN_HEAD_IM; |
| 2902 |
!!!next-token; |
!!!next-token; |
| 2903 |
redo B; |
redo B; |
| 2904 |
} elsif ($self->{insertion_mode} eq 'before head') { |
} elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
| 2905 |
!!!parse-error (type => 'unmatched end tag:noscript'); |
!!!parse-error (type => 'unmatched end tag:noscript'); |
| 2906 |
## Ignore the token ## ISSUE: An issue in the spec. |
## Ignore the token ## ISSUE: An issue in the spec. |
| 2907 |
!!!next-token; |
!!!next-token; |
| 2912 |
} elsif ({ |
} elsif ({ |
| 2913 |
body => 1, html => 1, |
body => 1, html => 1, |
| 2914 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 2915 |
if ($self->{insertion_mode} eq 'before head') { |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
| 2916 |
## As if <head> |
## As if <head> |
| 2917 |
!!!create-element ($self->{head_element}, 'head'); |
!!!create-element ($self->{head_element}, 'head'); |
| 2918 |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
| 2919 |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
| 2920 |
|
|
| 2921 |
$self->{insertion_mode} = 'in head'; |
$self->{insertion_mode} = IN_HEAD_IM; |
| 2922 |
## Reprocess in the "in head" insertion mode... |
## Reprocess in the "in head" insertion mode... |
| 2923 |
} elsif ($self->{insertion_mode} eq 'in head noscript') { |
} elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 2924 |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
| 2925 |
## Ignore the token |
## Ignore the token |
| 2926 |
!!!next-token; |
!!!next-token; |
| 2931 |
} elsif ({ |
} elsif ({ |
| 2932 |
p => 1, br => 1, |
p => 1, br => 1, |
| 2933 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 2934 |
if ($self->{insertion_mode} eq 'before head') { |
if ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
| 2935 |
## As if <head> |
## As if <head> |
| 2936 |
!!!create-element ($self->{head_element}, 'head'); |
!!!create-element ($self->{head_element}, 'head'); |
| 2937 |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
$self->{open_elements}->[-1]->[0]->append_child ($self->{head_element}); |
| 2938 |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
push @{$self->{open_elements}}, [$self->{head_element}, 'head']; |
| 2939 |
|
|
| 2940 |
$self->{insertion_mode} = 'in head'; |
$self->{insertion_mode} = IN_HEAD_IM; |
| 2941 |
## Reprocess in the "in head" insertion mode... |
## Reprocess in the "in head" insertion mode... |
| 2942 |
} |
} |
| 2943 |
|
|
| 2944 |
# |
# |
| 2945 |
} else { |
} else { |
| 2946 |
if ($self->{insertion_mode} ne 'after head') { |
if ($self->{insertion_mode} == AFTER_HEAD_IM) { |
| 2947 |
|
# |
| 2948 |
|
} else { |
| 2949 |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
| 2950 |
## Ignore the token |
## Ignore the token |
| 2951 |
!!!next-token; |
!!!next-token; |
| 2952 |
redo B; |
redo B; |
|
} else { |
|
|
# |
|
| 2953 |
} |
} |
| 2954 |
} |
} |
| 2955 |
|
|
| 2956 |
if ($self->{insertion_mode} eq 'in head noscript') { |
if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) { |
| 2957 |
## As if </noscript> |
## As if </noscript> |
| 2958 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2959 |
!!!parse-error (type => 'in noscript:/'.$token->{tag_name}); |
!!!parse-error (type => 'in noscript:/'.$token->{tag_name}); |
| 2963 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2964 |
|
|
| 2965 |
## Reprocess in the "after head" insertion mode... |
## Reprocess in the "after head" insertion mode... |
| 2966 |
} elsif ($self->{insertion_mode} eq 'in head') { |
} elsif ($self->{insertion_mode} == IN_HEAD_IM) { |
| 2967 |
## As if </head> |
## As if </head> |
| 2968 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 2969 |
|
|
| 2970 |
## Reprocess in the "after head" insertion mode... |
## Reprocess in the "after head" insertion mode... |
| 2971 |
} elsif ($self->{insertion_mode} eq 'before head') { |
} elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) { |
| 2972 |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
| 2973 |
## Ignore the token ## ISSUE: An issue in the spec. |
## Ignore the token ## ISSUE: An issue in the spec. |
| 2974 |
!!!next-token; |
!!!next-token; |
| 2978 |
## "after head" insertion mode |
## "after head" insertion mode |
| 2979 |
## As if <body> |
## As if <body> |
| 2980 |
!!!insert-element ('body'); |
!!!insert-element ('body'); |
| 2981 |
$self->{insertion_mode} = 'in body'; |
$self->{insertion_mode} = IN_BODY_IM; |
| 2982 |
## reprocess |
## reprocess |
| 2983 |
redo B; |
redo B; |
| 2984 |
} else { |
} else { |
| 2986 |
} |
} |
| 2987 |
|
|
| 2988 |
## ISSUE: An issue in the spec. |
## ISSUE: An issue in the spec. |
| 2989 |
} elsif ($self->{insertion_mode} eq 'in body' or |
} elsif ($self->{insertion_mode} == IN_BODY_IM or |
| 2990 |
$self->{insertion_mode} eq 'in cell' or |
$self->{insertion_mode} == IN_CELL_IM or |
| 2991 |
$self->{insertion_mode} eq 'in caption') { |
$self->{insertion_mode} == IN_CAPTION_IM) { |
| 2992 |
if ($token->{type} eq 'character') { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 2993 |
## NOTE: There is a code clone of "character in body". |
## NOTE: There is a code clone of "character in body". |
| 2994 |
$reconstruct_active_formatting_elements->($insert_to_current); |
$reconstruct_active_formatting_elements->($insert_to_current); |
| 2995 |
|
|
| 2997 |
|
|
| 2998 |
!!!next-token; |
!!!next-token; |
| 2999 |
redo B; |
redo B; |
| 3000 |
} elsif ($token->{type} eq 'start tag') { |
} elsif ($token->{type} == START_TAG_TOKEN) { |
| 3001 |
if ({ |
if ({ |
| 3002 |
caption => 1, col => 1, colgroup => 1, tbody => 1, |
caption => 1, col => 1, colgroup => 1, tbody => 1, |
| 3003 |
td => 1, tfoot => 1, th => 1, thead => 1, tr => 1, |
td => 1, tfoot => 1, th => 1, thead => 1, tr => 1, |
| 3004 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 3005 |
if ($self->{insertion_mode} eq 'in cell') { |
if ($self->{insertion_mode} == IN_CELL_IM) { |
| 3006 |
## have an element in table scope |
## have an element in table scope |
| 3007 |
my $tn; |
my $tn; |
| 3008 |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 3025 |
|
|
| 3026 |
## Close the cell |
## Close the cell |
| 3027 |
!!!back-token; # <?> |
!!!back-token; # <?> |
| 3028 |
$token = {type => 'end tag', tag_name => $tn}; |
$token = {type => END_TAG_TOKEN, tag_name => $tn}; |
| 3029 |
redo B; |
redo B; |
| 3030 |
} elsif ($self->{insertion_mode} eq 'in caption') { |
} elsif ($self->{insertion_mode} == IN_CAPTION_IM) { |
| 3031 |
!!!parse-error (type => 'not closed:caption'); |
!!!parse-error (type => 'not closed:caption'); |
| 3032 |
|
|
| 3033 |
## As if </caption> |
## As if </caption> |
| 3058 |
tbody => 1, tfoot=> 1, thead => 1, |
tbody => 1, tfoot=> 1, thead => 1, |
| 3059 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 3060 |
!!!back-token; # <?> |
!!!back-token; # <?> |
| 3061 |
$token = {type => 'end tag', tag_name => 'caption'}; |
$token = {type => END_TAG_TOKEN, tag_name => 'caption'}; |
| 3062 |
!!!back-token; |
!!!back-token; |
| 3063 |
$token = {type => 'end tag', |
$token = {type => END_TAG_TOKEN, |
| 3064 |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
| 3065 |
redo B; |
redo B; |
| 3066 |
} |
} |
| 3073 |
|
|
| 3074 |
$clear_up_to_marker->(); |
$clear_up_to_marker->(); |
| 3075 |
|
|
| 3076 |
$self->{insertion_mode} = 'in table'; |
$self->{insertion_mode} = IN_TABLE_IM; |
| 3077 |
|
|
| 3078 |
## reprocess |
## reprocess |
| 3079 |
redo B; |
redo B; |
| 3083 |
} else { |
} else { |
| 3084 |
# |
# |
| 3085 |
} |
} |
| 3086 |
} elsif ($token->{type} eq 'end tag') { |
} elsif ($token->{type} == END_TAG_TOKEN) { |
| 3087 |
if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') { |
if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') { |
| 3088 |
if ($self->{insertion_mode} eq 'in cell') { |
if ($self->{insertion_mode} == IN_CELL_IM) { |
| 3089 |
## have an element in table scope |
## have an element in table scope |
| 3090 |
my $i; |
my $i; |
| 3091 |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 3115 |
tbody => 1, tfoot=> 1, thead => 1, |
tbody => 1, tfoot=> 1, thead => 1, |
| 3116 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 3117 |
!!!back-token; |
!!!back-token; |
| 3118 |
$token = {type => 'end tag', |
$token = {type => END_TAG_TOKEN, |
| 3119 |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
| 3120 |
redo B; |
redo B; |
| 3121 |
} |
} |
| 3128 |
|
|
| 3129 |
$clear_up_to_marker->(); |
$clear_up_to_marker->(); |
| 3130 |
|
|
| 3131 |
$self->{insertion_mode} = 'in row'; |
$self->{insertion_mode} = IN_ROW_IM; |
| 3132 |
|
|
| 3133 |
!!!next-token; |
!!!next-token; |
| 3134 |
redo B; |
redo B; |
| 3135 |
} elsif ($self->{insertion_mode} eq 'in caption') { |
} elsif ($self->{insertion_mode} == IN_CAPTION_IM) { |
| 3136 |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
| 3137 |
## Ignore the token |
## Ignore the token |
| 3138 |
!!!next-token; |
!!!next-token; |
| 3141 |
# |
# |
| 3142 |
} |
} |
| 3143 |
} elsif ($token->{tag_name} eq 'caption') { |
} elsif ($token->{tag_name} eq 'caption') { |
| 3144 |
if ($self->{insertion_mode} eq 'in caption') { |
if ($self->{insertion_mode} == IN_CAPTION_IM) { |
| 3145 |
## have a table element in table scope |
## have a table element in table scope |
| 3146 |
my $i; |
my $i; |
| 3147 |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 3169 |
tbody => 1, tfoot=> 1, thead => 1, |
tbody => 1, tfoot=> 1, thead => 1, |
| 3170 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 3171 |
!!!back-token; |
!!!back-token; |
| 3172 |
$token = {type => 'end tag', |
$token = {type => END_TAG_TOKEN, |
| 3173 |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
| 3174 |
redo B; |
redo B; |
| 3175 |
} |
} |
| 3182 |
|
|
| 3183 |
$clear_up_to_marker->(); |
$clear_up_to_marker->(); |
| 3184 |
|
|
| 3185 |
$self->{insertion_mode} = 'in table'; |
$self->{insertion_mode} = IN_TABLE_IM; |
| 3186 |
|
|
| 3187 |
!!!next-token; |
!!!next-token; |
| 3188 |
redo B; |
redo B; |
| 3189 |
} elsif ($self->{insertion_mode} eq 'in cell') { |
} elsif ($self->{insertion_mode} == IN_CELL_IM) { |
| 3190 |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
| 3191 |
## Ignore the token |
## Ignore the token |
| 3192 |
!!!next-token; |
!!!next-token; |
| 3198 |
table => 1, tbody => 1, tfoot => 1, |
table => 1, tbody => 1, tfoot => 1, |
| 3199 |
thead => 1, tr => 1, |
thead => 1, tr => 1, |
| 3200 |
}->{$token->{tag_name}} and |
}->{$token->{tag_name}} and |
| 3201 |
$self->{insertion_mode} eq 'in cell') { |
$self->{insertion_mode} == IN_CELL_IM) { |
| 3202 |
## have an element in table scope |
## have an element in table scope |
| 3203 |
my $i; |
my $i; |
| 3204 |
my $tn; |
my $tn; |
| 3226 |
|
|
| 3227 |
## Close the cell |
## Close the cell |
| 3228 |
!!!back-token; # </?> |
!!!back-token; # </?> |
| 3229 |
$token = {type => 'end tag', tag_name => $tn}; |
$token = {type => END_TAG_TOKEN, tag_name => $tn}; |
| 3230 |
redo B; |
redo B; |
| 3231 |
} elsif ($token->{tag_name} eq 'table' and |
} elsif ($token->{tag_name} eq 'table' and |
| 3232 |
$self->{insertion_mode} eq 'in caption') { |
$self->{insertion_mode} == IN_CAPTION_IM) { |
| 3233 |
!!!parse-error (type => 'not closed:caption'); |
!!!parse-error (type => 'not closed:caption'); |
| 3234 |
|
|
| 3235 |
## As if </caption> |
## As if </caption> |
| 3260 |
tbody => 1, tfoot=> 1, thead => 1, |
tbody => 1, tfoot=> 1, thead => 1, |
| 3261 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 3262 |
!!!back-token; # </table> |
!!!back-token; # </table> |
| 3263 |
$token = {type => 'end tag', tag_name => 'caption'}; |
$token = {type => END_TAG_TOKEN, tag_name => 'caption'}; |
| 3264 |
!!!back-token; |
!!!back-token; |
| 3265 |
$token = {type => 'end tag', |
$token = {type => END_TAG_TOKEN, |
| 3266 |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
| 3267 |
redo B; |
redo B; |
| 3268 |
} |
} |
| 3275 |
|
|
| 3276 |
$clear_up_to_marker->(); |
$clear_up_to_marker->(); |
| 3277 |
|
|
| 3278 |
$self->{insertion_mode} = 'in table'; |
$self->{insertion_mode} = IN_TABLE_IM; |
| 3279 |
|
|
| 3280 |
## reprocess |
## reprocess |
| 3281 |
redo B; |
redo B; |
| 3282 |
} elsif ({ |
} elsif ({ |
| 3283 |
body => 1, col => 1, colgroup => 1, html => 1, |
body => 1, col => 1, colgroup => 1, html => 1, |
| 3284 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 3285 |
if ($self->{insertion_mode} eq 'in cell' or |
if ($self->{insertion_mode} == IN_CELL_IM or |
| 3286 |
$self->{insertion_mode} eq 'in caption') { |
$self->{insertion_mode} == IN_CAPTION_IM) { |
| 3287 |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
| 3288 |
## Ignore the token |
## Ignore the token |
| 3289 |
!!!next-token; |
!!!next-token; |
| 3295 |
tbody => 1, tfoot => 1, |
tbody => 1, tfoot => 1, |
| 3296 |
thead => 1, tr => 1, |
thead => 1, tr => 1, |
| 3297 |
}->{$token->{tag_name}} and |
}->{$token->{tag_name}} and |
| 3298 |
$self->{insertion_mode} eq 'in caption') { |
$self->{insertion_mode} == IN_CAPTION_IM) { |
| 3299 |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
| 3300 |
## Ignore the token |
## Ignore the token |
| 3301 |
!!!next-token; |
!!!next-token; |
| 3309 |
|
|
| 3310 |
$insert = $insert_to_current; |
$insert = $insert_to_current; |
| 3311 |
# |
# |
| 3312 |
} elsif ($self->{insertion_mode} eq 'in row' or |
} elsif ($self->{insertion_mode} == IN_ROW_IM or |
| 3313 |
$self->{insertion_mode} eq 'in table body' or |
$self->{insertion_mode} == IN_TABLE_BODY_IM or |
| 3314 |
$self->{insertion_mode} eq 'in table') { |
$self->{insertion_mode} == IN_TABLE_IM) { |
| 3315 |
if ($token->{type} eq 'character') { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 3316 |
## NOTE: There are "character in table" code clones. |
## NOTE: There are "character in table" code clones. |
| 3317 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
| 3318 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 3370 |
|
|
| 3371 |
!!!next-token; |
!!!next-token; |
| 3372 |
redo B; |
redo B; |
| 3373 |
} elsif ($token->{type} eq 'start tag') { |
} elsif ($token->{type} == START_TAG_TOKEN) { |
| 3374 |
if ({ |
if ({ |
| 3375 |
tr => ($self->{insertion_mode} ne 'in row'), |
tr => ($self->{insertion_mode} != IN_ROW_IM), |
| 3376 |
th => 1, td => 1, |
th => 1, td => 1, |
| 3377 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 3378 |
if ($self->{insertion_mode} eq 'in table') { |
if ($self->{insertion_mode} == IN_TABLE_IM) { |
| 3379 |
## Clear back to table context |
## Clear back to table context |
| 3380 |
while ($self->{open_elements}->[-1]->[1] ne 'table' and |
while ($self->{open_elements}->[-1]->[1] ne 'table' and |
| 3381 |
$self->{open_elements}->[-1]->[1] ne 'html') { |
$self->{open_elements}->[-1]->[1] ne 'html') { |
| 3384 |
} |
} |
| 3385 |
|
|
| 3386 |
!!!insert-element ('tbody'); |
!!!insert-element ('tbody'); |
| 3387 |
$self->{insertion_mode} = 'in table body'; |
$self->{insertion_mode} = IN_TABLE_BODY_IM; |
| 3388 |
## reprocess in the "in table body" insertion mode... |
## reprocess in the "in table body" insertion mode... |
| 3389 |
} |
} |
| 3390 |
|
|
| 3391 |
if ($self->{insertion_mode} eq 'in table body') { |
if ($self->{insertion_mode} == IN_TABLE_BODY_IM) { |
| 3392 |
unless ($token->{tag_name} eq 'tr') { |
unless ($token->{tag_name} eq 'tr') { |
| 3393 |
!!!parse-error (type => 'missing start tag:tr'); |
!!!parse-error (type => 'missing start tag:tr'); |
| 3394 |
} |
} |
| 3401 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 3402 |
} |
} |
| 3403 |
|
|
| 3404 |
$self->{insertion_mode} = 'in row'; |
$self->{insertion_mode} = IN_ROW_IM; |
| 3405 |
if ($token->{tag_name} eq 'tr') { |
if ($token->{tag_name} eq 'tr') { |
| 3406 |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
| 3407 |
!!!next-token; |
!!!next-token; |
| 3421 |
} |
} |
| 3422 |
|
|
| 3423 |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
| 3424 |
$self->{insertion_mode} = 'in cell'; |
$self->{insertion_mode} = IN_CELL_IM; |
| 3425 |
|
|
| 3426 |
push @$active_formatting_elements, ['#marker', '']; |
push @$active_formatting_elements, ['#marker', '']; |
| 3427 |
|
|
| 3430 |
} elsif ({ |
} elsif ({ |
| 3431 |
caption => 1, col => 1, colgroup => 1, |
caption => 1, col => 1, colgroup => 1, |
| 3432 |
tbody => 1, tfoot => 1, thead => 1, |
tbody => 1, tfoot => 1, thead => 1, |
| 3433 |
tr => 1, # $self->{insertion_mode} eq 'in row' |
tr => 1, # $self->{insertion_mode} == IN_ROW_IM |
| 3434 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 3435 |
if ($self->{insertion_mode} eq 'in row') { |
if ($self->{insertion_mode} == IN_ROW_IM) { |
| 3436 |
## As if </tr> |
## As if </tr> |
| 3437 |
## have an element in table scope |
## have an element in table scope |
| 3438 |
my $i; |
my $i; |
| 3463 |
} |
} |
| 3464 |
|
|
| 3465 |
pop @{$self->{open_elements}}; # tr |
pop @{$self->{open_elements}}; # tr |
| 3466 |
$self->{insertion_mode} = 'in table body'; |
$self->{insertion_mode} = IN_TABLE_BODY_IM; |
| 3467 |
if ($token->{tag_name} eq 'tr') { |
if ($token->{tag_name} eq 'tr') { |
| 3468 |
## reprocess |
## reprocess |
| 3469 |
redo B; |
redo B; |
| 3472 |
} |
} |
| 3473 |
} |
} |
| 3474 |
|
|
| 3475 |
if ($self->{insertion_mode} eq 'in table body') { |
if ($self->{insertion_mode} == IN_TABLE_BODY_IM) { |
| 3476 |
## have an element in table scope |
## have an element in table scope |
| 3477 |
my $i; |
my $i; |
| 3478 |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 3511 |
## nop by definition |
## nop by definition |
| 3512 |
|
|
| 3513 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 3514 |
$self->{insertion_mode} = 'in table'; |
$self->{insertion_mode} = IN_TABLE_IM; |
| 3515 |
## reprocess in "in table" insertion mode... |
## reprocess in "in table" insertion mode... |
| 3516 |
} |
} |
| 3517 |
|
|
| 3524 |
} |
} |
| 3525 |
|
|
| 3526 |
!!!insert-element ('colgroup'); |
!!!insert-element ('colgroup'); |
| 3527 |
$self->{insertion_mode} = 'in column group'; |
$self->{insertion_mode} = IN_COLUMN_GROUP_IM; |
| 3528 |
## reprocess |
## reprocess |
| 3529 |
redo B; |
redo B; |
| 3530 |
} elsif ({ |
} elsif ({ |
| 3544 |
|
|
| 3545 |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
| 3546 |
$self->{insertion_mode} = { |
$self->{insertion_mode} = { |
| 3547 |
caption => 'in caption', |
caption => IN_CAPTION_IM, |
| 3548 |
colgroup => 'in column group', |
colgroup => IN_COLUMN_GROUP_IM, |
| 3549 |
tbody => 'in table body', |
tbody => IN_TABLE_BODY_IM, |
| 3550 |
tfoot => 'in table body', |
tfoot => IN_TABLE_BODY_IM, |
| 3551 |
thead => 'in table body', |
thead => IN_TABLE_BODY_IM, |
| 3552 |
}->{$token->{tag_name}}; |
}->{$token->{tag_name}}; |
| 3553 |
!!!next-token; |
!!!next-token; |
| 3554 |
redo B; |
redo B; |
| 3587 |
tbody => 1, tfoot=> 1, thead => 1, |
tbody => 1, tfoot=> 1, thead => 1, |
| 3588 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 3589 |
!!!back-token; # <table> |
!!!back-token; # <table> |
| 3590 |
$token = {type => 'end tag', tag_name => 'table'}; |
$token = {type => END_TAG_TOKEN, tag_name => 'table'}; |
| 3591 |
!!!back-token; |
!!!back-token; |
| 3592 |
$token = {type => 'end tag', |
$token = {type => END_TAG_TOKEN, |
| 3593 |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
| 3594 |
redo B; |
redo B; |
| 3595 |
} |
} |
| 3607 |
} else { |
} else { |
| 3608 |
# |
# |
| 3609 |
} |
} |
| 3610 |
} elsif ($token->{type} eq 'end tag') { |
} elsif ($token->{type} == END_TAG_TOKEN) { |
| 3611 |
if ($token->{tag_name} eq 'tr' and |
if ($token->{tag_name} eq 'tr' and |
| 3612 |
$self->{insertion_mode} eq 'in row') { |
$self->{insertion_mode} == IN_ROW_IM) { |
| 3613 |
## have an element in table scope |
## have an element in table scope |
| 3614 |
my $i; |
my $i; |
| 3615 |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 3639 |
} |
} |
| 3640 |
|
|
| 3641 |
pop @{$self->{open_elements}}; # tr |
pop @{$self->{open_elements}}; # tr |
| 3642 |
$self->{insertion_mode} = 'in table body'; |
$self->{insertion_mode} = IN_TABLE_BODY_IM; |
| 3643 |
!!!next-token; |
!!!next-token; |
| 3644 |
redo B; |
redo B; |
| 3645 |
} elsif ($token->{tag_name} eq 'table') { |
} elsif ($token->{tag_name} eq 'table') { |
| 3646 |
if ($self->{insertion_mode} eq 'in row') { |
if ($self->{insertion_mode} == IN_ROW_IM) { |
| 3647 |
## As if </tr> |
## As if </tr> |
| 3648 |
## have an element in table scope |
## have an element in table scope |
| 3649 |
my $i; |
my $i; |
| 3674 |
} |
} |
| 3675 |
|
|
| 3676 |
pop @{$self->{open_elements}}; # tr |
pop @{$self->{open_elements}}; # tr |
| 3677 |
$self->{insertion_mode} = 'in table body'; |
$self->{insertion_mode} = IN_TABLE_BODY_IM; |
| 3678 |
## reprocess in the "in table body" insertion mode... |
## reprocess in the "in table body" insertion mode... |
| 3679 |
} |
} |
| 3680 |
|
|
| 3681 |
if ($self->{insertion_mode} eq 'in table body') { |
if ($self->{insertion_mode} == IN_TABLE_BODY_IM) { |
| 3682 |
## have an element in table scope |
## have an element in table scope |
| 3683 |
my $i; |
my $i; |
| 3684 |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 3717 |
## nop by definition |
## nop by definition |
| 3718 |
|
|
| 3719 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 3720 |
$self->{insertion_mode} = 'in table'; |
$self->{insertion_mode} = IN_TABLE_IM; |
| 3721 |
## reprocess in the "in table" insertion mode... |
## reprocess in the "in table" insertion mode... |
| 3722 |
} |
} |
| 3723 |
|
|
| 3748 |
tbody => 1, tfoot=> 1, thead => 1, |
tbody => 1, tfoot=> 1, thead => 1, |
| 3749 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 3750 |
!!!back-token; |
!!!back-token; |
| 3751 |
$token = {type => 'end tag', |
$token = {type => END_TAG_TOKEN, |
| 3752 |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
| 3753 |
redo B; |
redo B; |
| 3754 |
} |
} |
| 3766 |
} elsif ({ |
} elsif ({ |
| 3767 |
tbody => 1, tfoot => 1, thead => 1, |
tbody => 1, tfoot => 1, thead => 1, |
| 3768 |
}->{$token->{tag_name}} and |
}->{$token->{tag_name}} and |
| 3769 |
($self->{insertion_mode} eq 'in row' or |
($self->{insertion_mode} == IN_ROW_IM or |
| 3770 |
$self->{insertion_mode} eq 'in table body')) { |
$self->{insertion_mode} == IN_TABLE_BODY_IM)) { |
| 3771 |
if ($self->{insertion_mode} eq 'in row') { |
if ($self->{insertion_mode} == IN_ROW_IM) { |
| 3772 |
## have an element in table scope |
## have an element in table scope |
| 3773 |
my $i; |
my $i; |
| 3774 |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
INSCOPE: for (reverse 0..$#{$self->{open_elements}}) { |
| 3819 |
} |
} |
| 3820 |
|
|
| 3821 |
pop @{$self->{open_elements}}; # tr |
pop @{$self->{open_elements}}; # tr |
| 3822 |
$self->{insertion_mode} = 'in table body'; |
$self->{insertion_mode} = IN_TABLE_BODY_IM; |
| 3823 |
## reprocess in the "in table body" insertion mode... |
## reprocess in the "in table body" insertion mode... |
| 3824 |
} |
} |
| 3825 |
|
|
| 3852 |
} |
} |
| 3853 |
|
|
| 3854 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 3855 |
$self->{insertion_mode} = 'in table'; |
$self->{insertion_mode} = IN_TABLE_IM; |
| 3856 |
!!!next-token; |
!!!next-token; |
| 3857 |
redo B; |
redo B; |
| 3858 |
} elsif ({ |
} elsif ({ |
| 3859 |
body => 1, caption => 1, col => 1, colgroup => 1, |
body => 1, caption => 1, col => 1, colgroup => 1, |
| 3860 |
html => 1, td => 1, th => 1, |
html => 1, td => 1, th => 1, |
| 3861 |
tr => 1, # $self->{insertion_mode} eq 'in row' |
tr => 1, # $self->{insertion_mode} == IN_ROW_IM |
| 3862 |
tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} eq 'in table' |
tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM |
| 3863 |
}->{$token->{tag_name}}) { |
}->{$token->{tag_name}}) { |
| 3864 |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
| 3865 |
## Ignore the token |
## Ignore the token |
| 3876 |
|
|
| 3877 |
$insert = $insert_to_foster; |
$insert = $insert_to_foster; |
| 3878 |
# |
# |
| 3879 |
} elsif ($self->{insertion_mode} eq 'in column group') { |
} elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) { |
| 3880 |
if ($token->{type} eq 'character') { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 3881 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
| 3882 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 3883 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 3887 |
} |
} |
| 3888 |
|
|
| 3889 |
# |
# |
| 3890 |
} elsif ($token->{type} eq 'start tag') { |
} elsif ($token->{type} == START_TAG_TOKEN) { |
| 3891 |
if ($token->{tag_name} eq 'col') { |
if ($token->{tag_name} eq 'col') { |
| 3892 |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
| 3893 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 3896 |
} else { |
} else { |
| 3897 |
# |
# |
| 3898 |
} |
} |
| 3899 |
} elsif ($token->{type} eq 'end tag') { |
} elsif ($token->{type} == END_TAG_TOKEN) { |
| 3900 |
if ($token->{tag_name} eq 'colgroup') { |
if ($token->{tag_name} eq 'colgroup') { |
| 3901 |
if ($self->{open_elements}->[-1]->[1] eq 'html') { |
if ($self->{open_elements}->[-1]->[1] eq 'html') { |
| 3902 |
!!!parse-error (type => 'unmatched end tag:colgroup'); |
!!!parse-error (type => 'unmatched end tag:colgroup'); |
| 3905 |
redo B; |
redo B; |
| 3906 |
} else { |
} else { |
| 3907 |
pop @{$self->{open_elements}}; # colgroup |
pop @{$self->{open_elements}}; # colgroup |
| 3908 |
$self->{insertion_mode} = 'in table'; |
$self->{insertion_mode} = IN_TABLE_IM; |
| 3909 |
!!!next-token; |
!!!next-token; |
| 3910 |
redo B; |
redo B; |
| 3911 |
} |
} |
| 3929 |
redo B; |
redo B; |
| 3930 |
} else { |
} else { |
| 3931 |
pop @{$self->{open_elements}}; # colgroup |
pop @{$self->{open_elements}}; # colgroup |
| 3932 |
$self->{insertion_mode} = 'in table'; |
$self->{insertion_mode} = IN_TABLE_IM; |
| 3933 |
## reprocess |
## reprocess |
| 3934 |
redo B; |
redo B; |
| 3935 |
} |
} |
| 3936 |
} elsif ($self->{insertion_mode} eq 'in select') { |
} elsif ($self->{insertion_mode} == IN_SELECT_IM) { |
| 3937 |
if ($token->{type} eq 'character') { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 3938 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); |
| 3939 |
!!!next-token; |
!!!next-token; |
| 3940 |
redo B; |
redo B; |
| 3941 |
} elsif ($token->{type} eq 'start tag') { |
} elsif ($token->{type} == START_TAG_TOKEN) { |
| 3942 |
if ($token->{tag_name} eq 'option') { |
if ($token->{tag_name} eq 'option') { |
| 3943 |
if ($self->{open_elements}->[-1]->[1] eq 'option') { |
if ($self->{open_elements}->[-1]->[1] eq 'option') { |
| 3944 |
## As if </option> |
## As if </option> |
| 3994 |
} else { |
} else { |
| 3995 |
# |
# |
| 3996 |
} |
} |
| 3997 |
} elsif ($token->{type} eq 'end tag') { |
} elsif ($token->{type} == END_TAG_TOKEN) { |
| 3998 |
if ($token->{tag_name} eq 'optgroup') { |
if ($token->{tag_name} eq 'optgroup') { |
| 3999 |
if ($self->{open_elements}->[-1]->[1] eq 'option' and |
if ($self->{open_elements}->[-1]->[1] eq 'option' and |
| 4000 |
$self->{open_elements}->[-2]->[1] eq 'optgroup') { |
$self->{open_elements}->[-2]->[1] eq 'optgroup') { |
| 4107 |
## Ignore the token |
## Ignore the token |
| 4108 |
!!!next-token; |
!!!next-token; |
| 4109 |
redo B; |
redo B; |
| 4110 |
} elsif ($self->{insertion_mode} eq 'after body' or |
} elsif ($self->{insertion_mode} == AFTER_BODY_IM or |
| 4111 |
$self->{insertion_mode} eq 'after html body') { |
$self->{insertion_mode} == AFTER_HTML_BODY_IM) { |
| 4112 |
if ($token->{type} eq 'character') { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 4113 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
| 4114 |
my $data = $1; |
my $data = $1; |
| 4115 |
## As if in body |
## As if in body |
| 4123 |
} |
} |
| 4124 |
} |
} |
| 4125 |
|
|
| 4126 |
if ($self->{insertion_mode} eq 'after html body') { |
if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) { |
| 4127 |
!!!parse-error (type => 'after html:#character'); |
!!!parse-error (type => 'after html:#character'); |
| 4128 |
|
|
| 4129 |
## Reprocess in the "main" phase, "after body" insertion mode... |
## Reprocess in the "main" phase, "after body" insertion mode... |
| 4132 |
## "after body" insertion mode |
## "after body" insertion mode |
| 4133 |
!!!parse-error (type => 'after body:#character'); |
!!!parse-error (type => 'after body:#character'); |
| 4134 |
|
|
| 4135 |
$self->{insertion_mode} = 'in body'; |
$self->{insertion_mode} = IN_BODY_IM; |
| 4136 |
## reprocess |
## reprocess |
| 4137 |
redo B; |
redo B; |
| 4138 |
} elsif ($token->{type} eq 'start tag') { |
} elsif ($token->{type} == START_TAG_TOKEN) { |
| 4139 |
if ($self->{insertion_mode} eq 'after html body') { |
if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) { |
| 4140 |
!!!parse-error (type => 'after html:'.$token->{tag_name}); |
!!!parse-error (type => 'after html:'.$token->{tag_name}); |
| 4141 |
|
|
| 4142 |
## Reprocess in the "main" phase, "after body" insertion mode... |
## Reprocess in the "main" phase, "after body" insertion mode... |
| 4145 |
## "after body" insertion mode |
## "after body" insertion mode |
| 4146 |
!!!parse-error (type => 'after body:'.$token->{tag_name}); |
!!!parse-error (type => 'after body:'.$token->{tag_name}); |
| 4147 |
|
|
| 4148 |
$self->{insertion_mode} = 'in body'; |
$self->{insertion_mode} = IN_BODY_IM; |
| 4149 |
## reprocess |
## reprocess |
| 4150 |
redo B; |
redo B; |
| 4151 |
} elsif ($token->{type} eq 'end tag') { |
} elsif ($token->{type} == END_TAG_TOKEN) { |
| 4152 |
if ($self->{insertion_mode} eq 'after html body') { |
if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) { |
| 4153 |
!!!parse-error (type => 'after html:/'.$token->{tag_name}); |
!!!parse-error (type => 'after html:/'.$token->{tag_name}); |
| 4154 |
|
|
| 4155 |
$self->{insertion_mode} = 'after body'; |
$self->{insertion_mode} = AFTER_BODY_IM; |
| 4156 |
## Reprocess in the "main" phase, "after body" insertion mode... |
## Reprocess in the "main" phase, "after body" insertion mode... |
| 4157 |
} |
} |
| 4158 |
|
|
| 4164 |
!!!next-token; |
!!!next-token; |
| 4165 |
redo B; |
redo B; |
| 4166 |
} else { |
} else { |
| 4167 |
$self->{insertion_mode} = 'after html body'; |
$self->{insertion_mode} = AFTER_HTML_BODY_IM; |
| 4168 |
!!!next-token; |
!!!next-token; |
| 4169 |
redo B; |
redo B; |
| 4170 |
} |
} |
| 4171 |
} else { |
} else { |
| 4172 |
!!!parse-error (type => 'after body:/'.$token->{tag_name}); |
!!!parse-error (type => 'after body:/'.$token->{tag_name}); |
| 4173 |
|
|
| 4174 |
$self->{insertion_mode} = 'in body'; |
$self->{insertion_mode} = IN_BODY_IM; |
| 4175 |
## reprocess |
## reprocess |
| 4176 |
redo B; |
redo B; |
| 4177 |
} |
} |
| 4178 |
} else { |
} else { |
| 4179 |
die "$0: $token->{type}: Unknown token type"; |
die "$0: $token->{type}: Unknown token type"; |
| 4180 |
} |
} |
| 4181 |
} elsif ($self->{insertion_mode} eq 'in frameset' or |
} elsif ($self->{insertion_mode} == IN_FRAMESET_IM or |
| 4182 |
$self->{insertion_mode} eq 'after frameset' or |
$self->{insertion_mode} == AFTER_FRAMESET_IM or |
| 4183 |
$self->{insertion_mode} eq 'after html frameset') { |
$self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) { |
| 4184 |
if ($token->{type} eq 'character') { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 4185 |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { |
| 4186 |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
$self->{open_elements}->[-1]->[0]->manakai_append_text ($1); |
| 4187 |
|
|
| 4192 |
} |
} |
| 4193 |
|
|
| 4194 |
if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) { |
if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) { |
| 4195 |
if ($self->{insertion_mode} eq 'in frameset') { |
if ($self->{insertion_mode} == IN_FRAMESET_IM) { |
| 4196 |
!!!parse-error (type => 'in frameset:#character'); |
!!!parse-error (type => 'in frameset:#character'); |
| 4197 |
} elsif ($self->{insertion_mode} eq 'after frameset') { |
} elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) { |
| 4198 |
!!!parse-error (type => 'after frameset:#character'); |
!!!parse-error (type => 'after frameset:#character'); |
| 4199 |
} else { # "after html frameset" |
} else { # "after html frameset" |
| 4200 |
!!!parse-error (type => 'after html:#character'); |
!!!parse-error (type => 'after html:#character'); |
| 4201 |
|
|
| 4202 |
$self->{insertion_mode} = 'after frameset'; |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
| 4203 |
## Reprocess in the "main" phase, "after frameset"... |
## Reprocess in the "main" phase, "after frameset"... |
| 4204 |
!!!parse-error (type => 'after frameset:#character'); |
!!!parse-error (type => 'after frameset:#character'); |
| 4205 |
} |
} |
| 4214 |
} |
} |
| 4215 |
|
|
| 4216 |
die qq[$0: Character "$token->{data}"]; |
die qq[$0: Character "$token->{data}"]; |
| 4217 |
} elsif ($token->{type} eq 'start tag') { |
} elsif ($token->{type} == START_TAG_TOKEN) { |
| 4218 |
if ($self->{insertion_mode} eq 'after html frameset') { |
if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) { |
| 4219 |
!!!parse-error (type => 'after html:'.$token->{tag_name}); |
!!!parse-error (type => 'after html:'.$token->{tag_name}); |
| 4220 |
|
|
| 4221 |
$self->{insertion_mode} = 'after frameset'; |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
| 4222 |
## Process in the "main" phase, "after frameset" insertion mode... |
## Process in the "main" phase, "after frameset" insertion mode... |
| 4223 |
} |
} |
| 4224 |
|
|
| 4225 |
if ($token->{tag_name} eq 'frameset' and |
if ($token->{tag_name} eq 'frameset' and |
| 4226 |
$self->{insertion_mode} eq 'in frameset') { |
$self->{insertion_mode} == IN_FRAMESET_IM) { |
| 4227 |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
| 4228 |
!!!next-token; |
!!!next-token; |
| 4229 |
redo B; |
redo B; |
| 4230 |
} elsif ($token->{tag_name} eq 'frame' and |
} elsif ($token->{tag_name} eq 'frame' and |
| 4231 |
$self->{insertion_mode} eq 'in frameset') { |
$self->{insertion_mode} == IN_FRAMESET_IM) { |
| 4232 |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
!!!insert-element ($token->{tag_name}, $token->{attributes}); |
| 4233 |
pop @{$self->{open_elements}}; |
pop @{$self->{open_elements}}; |
| 4234 |
!!!next-token; |
!!!next-token; |
| 4238 |
$parse_rcdata->(CDATA_CONTENT_MODEL, $insert_to_current); |
$parse_rcdata->(CDATA_CONTENT_MODEL, $insert_to_current); |
| 4239 |
redo B; |
redo B; |
| 4240 |
} else { |
} else { |
| 4241 |
if ($self->{insertion_mode} eq 'in frameset') { |
if ($self->{insertion_mode} == IN_FRAMESET_IM) { |
| 4242 |
!!!parse-error (type => 'in frameset:'.$token->{tag_name}); |
!!!parse-error (type => 'in frameset:'.$token->{tag_name}); |
| 4243 |
} else { |
} else { |
| 4244 |
!!!parse-error (type => 'after frameset:'.$token->{tag_name}); |
!!!parse-error (type => 'after frameset:'.$token->{tag_name}); |
| 4247 |
!!!next-token; |
!!!next-token; |
| 4248 |
redo B; |
redo B; |
| 4249 |
} |
} |
| 4250 |
} elsif ($token->{type} eq 'end tag') { |
} elsif ($token->{type} == END_TAG_TOKEN) { |
| 4251 |
if ($self->{insertion_mode} eq 'after html frameset') { |
if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) { |
| 4252 |
!!!parse-error (type => 'after html:/'.$token->{tag_name}); |
!!!parse-error (type => 'after html:/'.$token->{tag_name}); |
| 4253 |
|
|
| 4254 |
$self->{insertion_mode} = 'after frameset'; |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
| 4255 |
## Process in the "main" phase, "after frameset" insertion mode... |
## Process in the "main" phase, "after frameset" insertion mode... |
| 4256 |
} |
} |
| 4257 |
|
|
| 4258 |
if ($token->{tag_name} eq 'frameset' and |
if ($token->{tag_name} eq 'frameset' and |
| 4259 |
$self->{insertion_mode} eq 'in frameset') { |
$self->{insertion_mode} == IN_FRAMESET_IM) { |
| 4260 |
if ($self->{open_elements}->[-1]->[1] eq 'html' and |
if ($self->{open_elements}->[-1]->[1] eq 'html' and |
| 4261 |
@{$self->{open_elements}} == 1) { |
@{$self->{open_elements}} == 1) { |
| 4262 |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
!!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}); |
| 4269 |
|
|
| 4270 |
if (not defined $self->{inner_html_node} and |
if (not defined $self->{inner_html_node} and |
| 4271 |
$self->{open_elements}->[-1]->[1] ne 'frameset') { |
$self->{open_elements}->[-1]->[1] ne 'frameset') { |
| 4272 |
$self->{insertion_mode} = 'after frameset'; |
$self->{insertion_mode} = AFTER_FRAMESET_IM; |
| 4273 |
} |
} |
| 4274 |
redo B; |
redo B; |
| 4275 |
} elsif ($token->{tag_name} eq 'html' and |
} elsif ($token->{tag_name} eq 'html' and |
| 4276 |
$self->{insertion_mode} eq 'after frameset') { |
$self->{insertion_mode} == AFTER_FRAMESET_IM) { |
| 4277 |
$self->{insertion_mode} = 'after html frameset'; |
$self->{insertion_mode} = AFTER_HTML_FRAMESET_IM; |
| 4278 |
!!!next-token; |
!!!next-token; |
| 4279 |
redo B; |
redo B; |
| 4280 |
} else { |
} else { |
| 4281 |
if ($self->{insertion_mode} eq 'in frameset') { |
if ($self->{insertion_mode} == IN_FRAMESET_IM) { |
| 4282 |
!!!parse-error (type => 'in frameset:/'.$token->{tag_name}); |
!!!parse-error (type => 'in frameset:/'.$token->{tag_name}); |
| 4283 |
} else { |
} else { |
| 4284 |
!!!parse-error (type => 'after frameset:/'.$token->{tag_name}); |
!!!parse-error (type => 'after frameset:/'.$token->{tag_name}); |
| 4297 |
} |
} |
| 4298 |
|
|
| 4299 |
## "in body" insertion mode |
## "in body" insertion mode |
| 4300 |
if ($token->{type} eq 'start tag') { |
if ($token->{type} == START_TAG_TOKEN) { |
| 4301 |
if ($token->{tag_name} eq 'script') { |
if ($token->{tag_name} eq 'script') { |
| 4302 |
## NOTE: This is an "as if in head" code clone |
## NOTE: This is an "as if in head" code clone |
| 4303 |
$script_start_tag->($insert); |
$script_start_tag->($insert); |
| 4377 |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
| 4378 |
if ($_->[1] eq 'p') { |
if ($_->[1] eq 'p') { |
| 4379 |
!!!back-token; |
!!!back-token; |
| 4380 |
$token = {type => 'end tag', tag_name => 'p'}; |
$token = {type => END_TAG_TOKEN, tag_name => 'p'}; |
| 4381 |
redo B; |
redo B; |
| 4382 |
} elsif ({ |
} elsif ({ |
| 4383 |
table => 1, caption => 1, td => 1, th => 1, |
table => 1, caption => 1, td => 1, th => 1, |
| 4390 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
| 4391 |
if ($token->{tag_name} eq 'pre') { |
if ($token->{tag_name} eq 'pre') { |
| 4392 |
!!!next-token; |
!!!next-token; |
| 4393 |
if ($token->{type} eq 'character') { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 4394 |
$token->{data} =~ s/^\x0A//; |
$token->{data} =~ s/^\x0A//; |
| 4395 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 4396 |
!!!next-token; |
!!!next-token; |
| 4411 |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
| 4412 |
if ($_->[1] eq 'p') { |
if ($_->[1] eq 'p') { |
| 4413 |
!!!back-token; |
!!!back-token; |
| 4414 |
$token = {type => 'end tag', tag_name => 'p'}; |
$token = {type => END_TAG_TOKEN, tag_name => 'p'}; |
| 4415 |
redo B; |
redo B; |
| 4416 |
} elsif ({ |
} elsif ({ |
| 4417 |
table => 1, caption => 1, td => 1, th => 1, |
table => 1, caption => 1, td => 1, th => 1, |
| 4431 |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
| 4432 |
if ($_->[1] eq 'p') { |
if ($_->[1] eq 'p') { |
| 4433 |
!!!back-token; |
!!!back-token; |
| 4434 |
$token = {type => 'end tag', tag_name => 'p'}; |
$token = {type => END_TAG_TOKEN, tag_name => 'p'}; |
| 4435 |
redo B; |
redo B; |
| 4436 |
} elsif ({ |
} elsif ({ |
| 4437 |
table => 1, caption => 1, td => 1, th => 1, |
table => 1, caption => 1, td => 1, th => 1, |
| 4478 |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
| 4479 |
if ($_->[1] eq 'p') { |
if ($_->[1] eq 'p') { |
| 4480 |
!!!back-token; |
!!!back-token; |
| 4481 |
$token = {type => 'end tag', tag_name => 'p'}; |
$token = {type => END_TAG_TOKEN, tag_name => 'p'}; |
| 4482 |
redo B; |
redo B; |
| 4483 |
} elsif ({ |
} elsif ({ |
| 4484 |
table => 1, caption => 1, td => 1, th => 1, |
table => 1, caption => 1, td => 1, th => 1, |
| 4525 |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
| 4526 |
if ($_->[1] eq 'p') { |
if ($_->[1] eq 'p') { |
| 4527 |
!!!back-token; |
!!!back-token; |
| 4528 |
$token = {type => 'end tag', tag_name => 'p'}; |
$token = {type => END_TAG_TOKEN, tag_name => 'p'}; |
| 4529 |
redo B; |
redo B; |
| 4530 |
} elsif ({ |
} elsif ({ |
| 4531 |
table => 1, caption => 1, td => 1, th => 1, |
table => 1, caption => 1, td => 1, th => 1, |
| 4549 |
my $node = $self->{open_elements}->[$_]; |
my $node = $self->{open_elements}->[$_]; |
| 4550 |
if ($node->[1] eq 'p') { |
if ($node->[1] eq 'p') { |
| 4551 |
!!!back-token; |
!!!back-token; |
| 4552 |
$token = {type => 'end tag', tag_name => 'p'}; |
$token = {type => END_TAG_TOKEN, tag_name => 'p'}; |
| 4553 |
redo B; |
redo B; |
| 4554 |
} elsif ({ |
} elsif ({ |
| 4555 |
table => 1, caption => 1, td => 1, th => 1, |
table => 1, caption => 1, td => 1, th => 1, |
| 4593 |
!!!parse-error (type => 'in a:a'); |
!!!parse-error (type => 'in a:a'); |
| 4594 |
|
|
| 4595 |
!!!back-token; |
!!!back-token; |
| 4596 |
$token = {type => 'end tag', tag_name => 'a'}; |
$token = {type => END_TAG_TOKEN, tag_name => 'a'}; |
| 4597 |
$formatting_end_tag->($token->{tag_name}); |
$formatting_end_tag->($token->{tag_name}); |
| 4598 |
|
|
| 4599 |
AFE2: for (reverse 0..$#$active_formatting_elements) { |
AFE2: for (reverse 0..$#$active_formatting_elements) { |
| 4642 |
if ($node->[1] eq 'nobr') { |
if ($node->[1] eq 'nobr') { |
| 4643 |
!!!parse-error (type => 'not closed:nobr'); |
!!!parse-error (type => 'not closed:nobr'); |
| 4644 |
!!!back-token; |
!!!back-token; |
| 4645 |
$token = {type => 'end tag', tag_name => 'nobr'}; |
$token = {type => END_TAG_TOKEN, tag_name => 'nobr'}; |
| 4646 |
redo B; |
redo B; |
| 4647 |
} elsif ({ |
} elsif ({ |
| 4648 |
table => 1, caption => 1, td => 1, th => 1, |
table => 1, caption => 1, td => 1, th => 1, |
| 4664 |
if ($node->[1] eq 'button') { |
if ($node->[1] eq 'button') { |
| 4665 |
!!!parse-error (type => 'in button:button'); |
!!!parse-error (type => 'in button:button'); |
| 4666 |
!!!back-token; |
!!!back-token; |
| 4667 |
$token = {type => 'end tag', tag_name => 'button'}; |
$token = {type => END_TAG_TOKEN, tag_name => 'button'}; |
| 4668 |
redo B; |
redo B; |
| 4669 |
} elsif ({ |
} elsif ({ |
| 4670 |
table => 1, caption => 1, td => 1, th => 1, |
table => 1, caption => 1, td => 1, th => 1, |
| 4699 |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
| 4700 |
if ($_->[1] eq 'p') { |
if ($_->[1] eq 'p') { |
| 4701 |
!!!back-token; |
!!!back-token; |
| 4702 |
$token = {type => 'end tag', tag_name => 'p'}; |
$token = {type => END_TAG_TOKEN, tag_name => 'p'}; |
| 4703 |
redo B; |
redo B; |
| 4704 |
} elsif ({ |
} elsif ({ |
| 4705 |
table => 1, caption => 1, td => 1, th => 1, |
table => 1, caption => 1, td => 1, th => 1, |
| 4711 |
|
|
| 4712 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
| 4713 |
|
|
| 4714 |
$self->{insertion_mode} = 'in table'; |
$self->{insertion_mode} = IN_TABLE_IM; |
| 4715 |
|
|
| 4716 |
!!!next-token; |
!!!next-token; |
| 4717 |
redo B; |
redo B; |
| 4738 |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
INSCOPE: for (reverse @{$self->{open_elements}}) { |
| 4739 |
if ($_->[1] eq 'p') { |
if ($_->[1] eq 'p') { |
| 4740 |
!!!back-token; |
!!!back-token; |
| 4741 |
$token = {type => 'end tag', tag_name => 'p'}; |
$token = {type => END_TAG_TOKEN, tag_name => 'p'}; |
| 4742 |
redo B; |
redo B; |
| 4743 |
} elsif ({ |
} elsif ({ |
| 4744 |
table => 1, caption => 1, td => 1, th => 1, |
table => 1, caption => 1, td => 1, th => 1, |
| 4778 |
delete $at->{action}; |
delete $at->{action}; |
| 4779 |
delete $at->{prompt}; |
delete $at->{prompt}; |
| 4780 |
my @tokens = ( |
my @tokens = ( |
| 4781 |
{type => 'start tag', tag_name => 'form', |
{type => START_TAG_TOKEN, tag_name => 'form', |
| 4782 |
attributes => $form_attrs}, |
attributes => $form_attrs}, |
| 4783 |
{type => 'start tag', tag_name => 'hr'}, |
{type => START_TAG_TOKEN, tag_name => 'hr'}, |
| 4784 |
{type => 'start tag', tag_name => 'p'}, |
{type => START_TAG_TOKEN, tag_name => 'p'}, |
| 4785 |
{type => 'start tag', tag_name => 'label'}, |
{type => START_TAG_TOKEN, tag_name => 'label'}, |
| 4786 |
); |
); |
| 4787 |
if ($prompt_attr) { |
if ($prompt_attr) { |
| 4788 |
push @tokens, {type => 'character', data => $prompt_attr->{value}}; |
push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value}}; |
| 4789 |
} else { |
} else { |
| 4790 |
push @tokens, {type => 'character', |
push @tokens, {type => CHARACTER_TOKEN, |
| 4791 |
data => 'This is a searchable index. Insert your search keywords here: '}; # SHOULD |
data => 'This is a searchable index. Insert your search keywords here: '}; # SHOULD |
| 4792 |
## TODO: make this configurable |
## TODO: make this configurable |
| 4793 |
} |
} |
| 4794 |
push @tokens, |
push @tokens, |
| 4795 |
{type => 'start tag', tag_name => 'input', attributes => $at}, |
{type => START_TAG_TOKEN, tag_name => 'input', attributes => $at}, |
| 4796 |
#{type => 'character', data => ''}, # SHOULD |
#{type => CHARACTER_TOKEN, data => ''}, # SHOULD |
| 4797 |
{type => 'end tag', tag_name => 'label'}, |
{type => END_TAG_TOKEN, tag_name => 'label'}, |
| 4798 |
{type => 'end tag', tag_name => 'p'}, |
{type => END_TAG_TOKEN, tag_name => 'p'}, |
| 4799 |
{type => 'start tag', tag_name => 'hr'}, |
{type => START_TAG_TOKEN, tag_name => 'hr'}, |
| 4800 |
{type => 'end tag', tag_name => 'form'}; |
{type => END_TAG_TOKEN, tag_name => 'form'}; |
| 4801 |
$token = shift @tokens; |
$token = shift @tokens; |
| 4802 |
!!!back-token (@tokens); |
!!!back-token (@tokens); |
| 4803 |
redo B; |
redo B; |
| 4815 |
|
|
| 4816 |
my $text = ''; |
my $text = ''; |
| 4817 |
!!!next-token; |
!!!next-token; |
| 4818 |
if ($token->{type} eq 'character') { |
if ($token->{type} == CHARACTER_TOKEN) { |
| 4819 |
$token->{data} =~ s/^\x0A//; |
$token->{data} =~ s/^\x0A//; |
| 4820 |
unless (length $token->{data}) { |
unless (length $token->{data}) { |
| 4821 |
!!!next-token; |
!!!next-token; |
| 4822 |
} |
} |
| 4823 |
} |
} |
| 4824 |
while ($token->{type} eq 'character') { |
while ($token->{type} == CHARACTER_TOKEN) { |
| 4825 |
$text .= $token->{data}; |
$text .= $token->{data}; |
| 4826 |
!!!next-token; |
!!!next-token; |
| 4827 |
} |
} |
| 4831 |
|
|
| 4832 |
$self->{content_model} = PCDATA_CONTENT_MODEL; |
$self->{content_model} = PCDATA_CONTENT_MODEL; |
| 4833 |
|
|
| 4834 |
if ($token->{type} eq 'end tag' and |
if ($token->{type} == END_TAG_TOKEN and |
| 4835 |
$token->{tag_name} eq $tag_name) { |
$token->{tag_name} eq $tag_name) { |
| 4836 |
## Ignore the token |
## Ignore the token |
| 4837 |
} else { |
} else { |
| 4853 |
|
|
| 4854 |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
!!!insert-element-t ($token->{tag_name}, $token->{attributes}); |
| 4855 |
|
|
| 4856 |
$self->{insertion_mode} = 'in select'; |
$self->{insertion_mode} = IN_SELECT_IM; |
| 4857 |
!!!next-token; |
!!!next-token; |
| 4858 |
redo B; |
redo B; |
| 4859 |
} elsif ({ |
} elsif ({ |
| 4876 |
!!!next-token; |
!!!next-token; |
| 4877 |
redo B; |
redo B; |
| 4878 |
} |
} |
| 4879 |
} elsif ($token->{type} eq 'end tag') { |
} elsif ($token->{type} == END_TAG_TOKEN) { |
| 4880 |
if ($token->{tag_name} eq 'body') { |
if ($token->{tag_name} eq 'body') { |
| 4881 |
if (@{$self->{open_elements}} > 1 and |
if (@{$self->{open_elements}} > 1 and |
| 4882 |
$self->{open_elements}->[1]->[1] eq 'body') { |
$self->{open_elements}->[1]->[1] eq 'body') { |
| 4890 |
} |
} |
| 4891 |
} |
} |
| 4892 |
|
|
| 4893 |
$self->{insertion_mode} = 'after body'; |
$self->{insertion_mode} = AFTER_BODY_IM; |
| 4894 |
!!!next-token; |
!!!next-token; |
| 4895 |
redo B; |
redo B; |
| 4896 |
} else { |
} else { |
| 4905 |
if ($self->{open_elements}->[-1]->[1] ne 'body') { |
if ($self->{open_elements}->[-1]->[1] ne 'body') { |
| 4906 |
!!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1]); |
!!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1]); |
| 4907 |
} |
} |
| 4908 |
$self->{insertion_mode} = 'after body'; |
$self->{insertion_mode} = AFTER_BODY_IM; |
| 4909 |
## reprocess |
## reprocess |
| 4910 |
redo B; |
redo B; |
| 4911 |
} else { |
} else { |
| 4937 |
tbody => 1, tfoot=> 1, thead => 1, |
tbody => 1, tfoot=> 1, thead => 1, |
| 4938 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 4939 |
!!!back-token; |
!!!back-token; |
| 4940 |
$token = {type => 'end tag', |
$token = {type => END_TAG_TOKEN, |
| 4941 |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
| 4942 |
redo B; |
redo B; |
| 4943 |
} |
} |
| 4985 |
tbody => 1, tfoot=> 1, thead => 1, |
tbody => 1, tfoot=> 1, thead => 1, |
| 4986 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 4987 |
!!!back-token; |
!!!back-token; |
| 4988 |
$token = {type => 'end tag', |
$token = {type => END_TAG_TOKEN, |
| 4989 |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
| 4990 |
redo B; |
redo B; |
| 4991 |
} |
} |
| 5024 |
tbody => 1, tfoot=> 1, thead => 1, |
tbody => 1, tfoot=> 1, thead => 1, |
| 5025 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 5026 |
!!!back-token; |
!!!back-token; |
| 5027 |
$token = {type => 'end tag', |
$token = {type => END_TAG_TOKEN, |
| 5028 |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
| 5029 |
redo B; |
redo B; |
| 5030 |
} |
} |
| 5098 |
if ({ |
if ({ |
| 5099 |
dd => 1, dt => 1, li => 1, p => 1, |
dd => 1, dt => 1, li => 1, p => 1, |
| 5100 |
td => 1, th => 1, tr => 1, |
td => 1, th => 1, tr => 1, |
| 5101 |
tbody => 1, tfoot=> 1, thead => 1, |
tbody => 1, tfoot => 1, thead => 1, |
| 5102 |
}->{$self->{open_elements}->[-1]->[1]}) { |
}->{$self->{open_elements}->[-1]->[1]}) { |
| 5103 |
!!!back-token; |
!!!back-token; |
| 5104 |
$token = {type => 'end tag', |
$token = {type => END_TAG_TOKEN, |
| 5105 |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
tag_name => $self->{open_elements}->[-1]->[1]}; # MUST |
| 5106 |
redo B; |
redo B; |
| 5107 |
} |
} |