114 |
sub ENTITY_NAME_STATE () { 49 } |
sub ENTITY_NAME_STATE () { 49 } |
115 |
sub PCDATA_STATE () { 50 } # "data state" in the spec |
sub PCDATA_STATE () { 50 } # "data state" in the spec |
116 |
|
|
117 |
|
## XML states |
118 |
|
sub PI_STATE () { 51 } |
119 |
|
sub PI_TARGET_STATE () { 52 } |
120 |
|
sub PI_TARGET_AFTER_STATE () { 53 } |
121 |
|
sub PI_DATA_STATE () { 54 } |
122 |
|
sub PI_AFTER_STATE () { 55 } |
123 |
|
sub PI_DATA_AFTER_STATE () { 56 } |
124 |
|
|
125 |
## Tree constructor state constants (see Whatpm::HTML for the full |
## Tree constructor state constants (see Whatpm::HTML for the full |
126 |
## list and descriptions) |
## list and descriptions) |
127 |
|
|
183 |
#$self->{level} |
#$self->{level} |
184 |
#$self->{set_nc} |
#$self->{set_nc} |
185 |
#$self->{parse_error} |
#$self->{parse_error} |
186 |
|
#$self->{is_xml} (if XML) |
187 |
|
|
188 |
$self->{state} = DATA_STATE; # MUST |
$self->{state} = DATA_STATE; # MUST |
189 |
#$self->{s_kwd}; # state keyword - initialized when used |
$self->{s_kwd} = ''; # state keyword |
190 |
#$self->{entity__value}; # initialized when used |
#$self->{entity__value}; # initialized when used |
191 |
#$self->{entity__match}; # initialized when used |
#$self->{entity__match}; # initialized when used |
192 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # be |
$self->{content_model} = PCDATA_CONTENT_MODEL; # be |
217 |
## ->{value} |
## ->{value} |
218 |
## ->{has_reference} == 1 or 0 |
## ->{has_reference} == 1 or 0 |
219 |
## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN) |
## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN) |
220 |
|
## ->{has_reference} == 1 or 0 (CHARACTER_TOKEN) |
221 |
## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|. |
## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|. |
222 |
## |->{self_closing}| is used to save the value of |$self->{self_closing}| |
## |->{self_closing}| is used to save the value of |$self->{self_closing}| |
223 |
## while the token is pushed back to the stack. |
## while the token is pushed back to the stack. |
321 |
} |
} |
322 |
} elsif ($self->{nc} == 0x002D) { # - |
} elsif ($self->{nc} == 0x002D) { # - |
323 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
324 |
$self->{s_kwd} .= '-'; |
if ($self->{s_kwd} eq '<!-') { |
|
|
|
|
if ($self->{s_kwd} eq '<!--') { |
|
325 |
!!!cp (3); |
!!!cp (3); |
326 |
$self->{escape} = 1; # unless $self->{escape}; |
$self->{escape} = 1; # unless $self->{escape}; |
327 |
$self->{s_kwd} = '--'; |
$self->{s_kwd} = '--'; |
328 |
# |
# |
329 |
} elsif ($self->{s_kwd} eq '---') { |
} elsif ($self->{s_kwd} eq '-') { |
330 |
!!!cp (4); |
!!!cp (4); |
331 |
$self->{s_kwd} = '--'; |
$self->{s_kwd} = '--'; |
332 |
# |
# |
333 |
|
} elsif ($self->{s_kwd} eq '<!' or $self->{s_kwd} eq '-') { |
334 |
|
!!!cp (4.1); |
335 |
|
$self->{s_kwd} .= '-'; |
336 |
|
# |
337 |
} else { |
} else { |
338 |
!!!cp (5); |
!!!cp (5); |
339 |
|
$self->{s_kwd} = '-'; |
340 |
# |
# |
341 |
} |
} |
342 |
} |
} |
372 |
if ($self->{s_kwd} eq '--') { |
if ($self->{s_kwd} eq '--') { |
373 |
!!!cp (8); |
!!!cp (8); |
374 |
delete $self->{escape}; |
delete $self->{escape}; |
375 |
|
# |
376 |
} else { |
} else { |
377 |
!!!cp (9); |
!!!cp (9); |
378 |
|
# |
379 |
} |
} |
380 |
|
} elsif ($self->{is_xml} and $self->{s_kwd} eq ']]') { |
381 |
|
!!!cp (9.1); |
382 |
|
!!!parse-error (type => 'unmatched mse', ## TODO: type |
383 |
|
line => $self->{line_prev}, |
384 |
|
column => $self->{column_prev} - 1); |
385 |
|
# |
386 |
} else { |
} else { |
387 |
!!!cp (10); |
!!!cp (10); |
388 |
|
# |
389 |
} |
} |
390 |
|
|
391 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
392 |
# |
# |
393 |
|
} elsif ($self->{nc} == 0x005D) { # ] |
394 |
|
if ($self->{s_kwd} eq ']' or $self->{s_kwd} eq '') { |
395 |
|
!!!cp (10.1); |
396 |
|
$self->{s_kwd} .= ']'; |
397 |
|
} elsif ($self->{s_kwd} eq ']]') { |
398 |
|
!!!cp (10.2); |
399 |
|
# |
400 |
|
} else { |
401 |
|
!!!cp (10.3); |
402 |
|
$self->{s_kwd} = ''; |
403 |
|
} |
404 |
|
# |
405 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
406 |
!!!cp (11); |
!!!cp (11); |
407 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
419 |
data => chr $self->{nc}, |
data => chr $self->{nc}, |
420 |
line => $self->{line}, column => $self->{column}, |
line => $self->{line}, column => $self->{column}, |
421 |
}; |
}; |
422 |
if ($self->{read_until}->($token->{data}, q[-!<>&], |
if ($self->{read_until}->($token->{data}, q{-!<>&\]}, |
423 |
length $token->{data})) { |
length $token->{data})) { |
424 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
425 |
} |
} |
426 |
|
|
427 |
## Stay in the data state. |
## Stay in the data state. |
428 |
if ($self->{content_model} == PCDATA_CONTENT_MODEL) { |
if (not $self->{is_xml} and |
429 |
|
$self->{content_model} == PCDATA_CONTENT_MODEL) { |
430 |
!!!cp (13); |
!!!cp (13); |
431 |
$self->{state} = PCDATA_STATE; |
$self->{state} = PCDATA_STATE; |
432 |
} else { |
} else { |
454 |
|
|
455 |
## reconsume |
## reconsume |
456 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
457 |
|
$self->{s_kwd} = ''; |
458 |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
459 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
460 |
column => $self->{column_prev}, |
column => $self->{column_prev}, |
476 |
!!!cp (19); |
!!!cp (19); |
477 |
$self->{ct} |
$self->{ct} |
478 |
= {type => START_TAG_TOKEN, |
= {type => START_TAG_TOKEN, |
479 |
tag_name => chr ($self->{nc} + 0x0020), |
tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
480 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
481 |
column => $self->{column_prev}}; |
column => $self->{column_prev}}; |
482 |
$self->{state} = TAG_NAME_STATE; |
$self->{state} = TAG_NAME_STATE; |
498 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
499 |
column => $self->{column_prev}); |
column => $self->{column_prev}); |
500 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
501 |
|
$self->{s_kwd} = ''; |
502 |
!!!next-input-character; |
!!!next-input-character; |
503 |
|
|
504 |
!!!emit ({type => CHARACTER_TOKEN, data => '<>', |
!!!emit ({type => CHARACTER_TOKEN, data => '<>', |
508 |
|
|
509 |
redo A; |
redo A; |
510 |
} elsif ($self->{nc} == 0x003F) { # ? |
} elsif ($self->{nc} == 0x003F) { # ? |
511 |
!!!cp (22); |
if ($self->{is_xml}) { |
512 |
!!!parse-error (type => 'pio', |
!!!cp (22.1); |
513 |
line => $self->{line_prev}, |
$self->{state} = PI_STATE; |
514 |
column => $self->{column_prev}); |
!!!next-input-character; |
515 |
$self->{state} = BOGUS_COMMENT_STATE; |
redo A; |
516 |
$self->{ct} = {type => COMMENT_TOKEN, data => '', |
} else { |
517 |
line => $self->{line_prev}, |
!!!cp (22); |
518 |
column => $self->{column_prev}, |
!!!parse-error (type => 'pio', |
519 |
}; |
line => $self->{line_prev}, |
520 |
## $self->{nc} is intentionally left as is |
column => $self->{column_prev}); |
521 |
redo A; |
$self->{state} = BOGUS_COMMENT_STATE; |
522 |
} else { |
$self->{ct} = {type => COMMENT_TOKEN, data => '', |
523 |
|
line => $self->{line_prev}, |
524 |
|
column => $self->{column_prev}, |
525 |
|
}; |
526 |
|
## $self->{nc} is intentionally left as is |
527 |
|
redo A; |
528 |
|
} |
529 |
|
} elsif (not $self->{is_xml} or $is_space->{$self->{nc}}) { |
530 |
!!!cp (23); |
!!!cp (23); |
531 |
!!!parse-error (type => 'bare stago', |
!!!parse-error (type => 'bare stago', |
532 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
533 |
column => $self->{column_prev}); |
column => $self->{column_prev}); |
534 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
535 |
|
$self->{s_kwd} = ''; |
536 |
## reconsume |
## reconsume |
537 |
|
|
538 |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
541 |
}); |
}); |
542 |
|
|
543 |
redo A; |
redo A; |
544 |
|
} else { |
545 |
|
## XML5: "<:" is a parse error. |
546 |
|
!!!cp (23.1); |
547 |
|
$self->{ct} = {type => START_TAG_TOKEN, |
548 |
|
tag_name => chr ($self->{nc}), |
549 |
|
line => $self->{line_prev}, |
550 |
|
column => $self->{column_prev}}; |
551 |
|
$self->{state} = TAG_NAME_STATE; |
552 |
|
!!!next-input-character; |
553 |
|
redo A; |
554 |
} |
} |
555 |
} else { |
} else { |
556 |
die "$0: $self->{content_model} in tag open"; |
die "$0: $self->{content_model} in tag open"; |
571 |
## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>. |
## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>. |
572 |
!!!cp (28); |
!!!cp (28); |
573 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
574 |
|
$self->{s_kwd} = ''; |
575 |
## Reconsume. |
## Reconsume. |
576 |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
577 |
line => $l, column => $c, |
line => $l, column => $c, |
585 |
!!!cp (29); |
!!!cp (29); |
586 |
$self->{ct} |
$self->{ct} |
587 |
= {type => END_TAG_TOKEN, |
= {type => END_TAG_TOKEN, |
588 |
tag_name => chr ($self->{nc} + 0x0020), |
tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
589 |
line => $l, column => $c}; |
line => $l, column => $c}; |
590 |
$self->{state} = TAG_NAME_STATE; |
$self->{state} = TAG_NAME_STATE; |
591 |
!!!next-input-character; |
!!!next-input-character; |
605 |
line => $self->{line_prev}, ## "<" in "</>" |
line => $self->{line_prev}, ## "<" in "</>" |
606 |
column => $self->{column_prev} - 1); |
column => $self->{column_prev} - 1); |
607 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
608 |
|
$self->{s_kwd} = ''; |
609 |
!!!next-input-character; |
!!!next-input-character; |
610 |
redo A; |
redo A; |
611 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
612 |
!!!cp (32); |
!!!cp (32); |
613 |
!!!parse-error (type => 'bare etago'); |
!!!parse-error (type => 'bare etago'); |
614 |
|
$self->{s_kwd} = ''; |
615 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
616 |
# reconsume |
# reconsume |
617 |
|
|
651 |
} else { |
} else { |
652 |
!!!cp (25); |
!!!cp (25); |
653 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
654 |
|
$self->{s_kwd} = ''; |
655 |
## Reconsume. |
## Reconsume. |
656 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
657 |
data => '</' . $self->{s_kwd}, |
data => '</' . $self->{s_kwd}, |
670 |
!!!cp (26); |
!!!cp (26); |
671 |
## Reconsume. |
## Reconsume. |
672 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
673 |
|
$self->{s_kwd} = ''; |
674 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
675 |
data => '</' . $self->{s_kwd}, |
data => '</' . $self->{s_kwd}, |
676 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
712 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
713 |
} |
} |
714 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
715 |
|
$self->{s_kwd} = ''; |
716 |
!!!next-input-character; |
!!!next-input-character; |
717 |
|
|
718 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
721 |
} elsif (0x0041 <= $self->{nc} and |
} elsif (0x0041 <= $self->{nc} and |
722 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
723 |
!!!cp (38); |
!!!cp (38); |
724 |
$self->{ct}->{tag_name} .= chr ($self->{nc} + 0x0020); |
$self->{ct}->{tag_name} |
725 |
|
.= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)); |
726 |
# start tag or end tag |
# start tag or end tag |
727 |
## Stay in this state |
## Stay in this state |
728 |
!!!next-input-character; |
!!!next-input-character; |
745 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
746 |
} |
} |
747 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
748 |
|
$self->{s_kwd} = ''; |
749 |
# reconsume |
# reconsume |
750 |
|
|
751 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
786 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
787 |
} |
} |
788 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
789 |
|
$self->{s_kwd} = ''; |
790 |
!!!next-input-character; |
!!!next-input-character; |
791 |
|
|
792 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
796 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
797 |
!!!cp (49); |
!!!cp (49); |
798 |
$self->{ca} |
$self->{ca} |
799 |
= {name => chr ($self->{nc} + 0x0020), |
= {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
800 |
value => '', |
value => '', |
801 |
line => $self->{line}, column => $self->{column}}; |
line => $self->{line}, column => $self->{column}}; |
802 |
$self->{state} = ATTRIBUTE_NAME_STATE; |
$self->{state} = ATTRIBUTE_NAME_STATE; |
824 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
825 |
} |
} |
826 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
827 |
|
$self->{s_kwd} = ''; |
828 |
# reconsume |
# reconsume |
829 |
|
|
830 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
890 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
891 |
} |
} |
892 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
893 |
|
$self->{s_kwd} = ''; |
894 |
!!!next-input-character; |
!!!next-input-character; |
895 |
|
|
896 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
899 |
} elsif (0x0041 <= $self->{nc} and |
} elsif (0x0041 <= $self->{nc} and |
900 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
901 |
!!!cp (63); |
!!!cp (63); |
902 |
$self->{ca}->{name} .= chr ($self->{nc} + 0x0020); |
$self->{ca}->{name} |
903 |
|
.= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)); |
904 |
## Stay in the state |
## Stay in the state |
905 |
!!!next-input-character; |
!!!next-input-character; |
906 |
redo A; |
redo A; |
929 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
930 |
} |
} |
931 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
932 |
|
$self->{s_kwd} = ''; |
933 |
# reconsume |
# reconsume |
934 |
|
|
935 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
976 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
977 |
} |
} |
978 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
979 |
|
$self->{s_kwd} = ''; |
980 |
!!!next-input-character; |
!!!next-input-character; |
981 |
|
|
982 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
986 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
987 |
!!!cp (76); |
!!!cp (76); |
988 |
$self->{ca} |
$self->{ca} |
989 |
= {name => chr ($self->{nc} + 0x0020), |
= {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
990 |
value => '', |
value => '', |
991 |
line => $self->{line}, column => $self->{column}}; |
line => $self->{line}, column => $self->{column}}; |
992 |
$self->{state} = ATTRIBUTE_NAME_STATE; |
$self->{state} = ATTRIBUTE_NAME_STATE; |
1014 |
} else { |
} else { |
1015 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1016 |
} |
} |
1017 |
|
$self->{s_kwd} = ''; |
1018 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1019 |
# reconsume |
# reconsume |
1020 |
|
|
1076 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1077 |
} |
} |
1078 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1079 |
|
$self->{s_kwd} = ''; |
1080 |
!!!next-input-character; |
!!!next-input-character; |
1081 |
|
|
1082 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1100 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1101 |
} |
} |
1102 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1103 |
|
$self->{s_kwd} = ''; |
1104 |
## reconsume |
## reconsume |
1105 |
|
|
1106 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1153 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1154 |
} |
} |
1155 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1156 |
|
$self->{s_kwd} = ''; |
1157 |
## reconsume |
## reconsume |
1158 |
|
|
1159 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1205 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1206 |
} |
} |
1207 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1208 |
|
$self->{s_kwd} = ''; |
1209 |
## reconsume |
## reconsume |
1210 |
|
|
1211 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1256 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1257 |
} |
} |
1258 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1259 |
|
$self->{s_kwd} = ''; |
1260 |
!!!next-input-character; |
!!!next-input-character; |
1261 |
|
|
1262 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1280 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1281 |
} |
} |
1282 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1283 |
|
$self->{s_kwd} = ''; |
1284 |
## reconsume |
## reconsume |
1285 |
|
|
1286 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1329 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1330 |
} |
} |
1331 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1332 |
|
$self->{s_kwd} = ''; |
1333 |
!!!next-input-character; |
!!!next-input-character; |
1334 |
|
|
1335 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1357 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1358 |
} |
} |
1359 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1360 |
|
$self->{s_kwd} = ''; |
1361 |
## Reconsume. |
## Reconsume. |
1362 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1363 |
redo A; |
redo A; |
1388 |
} |
} |
1389 |
|
|
1390 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1391 |
|
$self->{s_kwd} = ''; |
1392 |
!!!next-input-character; |
!!!next-input-character; |
1393 |
|
|
1394 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1411 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1412 |
} |
} |
1413 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1414 |
|
$self->{s_kwd} = ''; |
1415 |
## Reconsume. |
## Reconsume. |
1416 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1417 |
redo A; |
redo A; |
1432 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
1433 |
!!!cp (124); |
!!!cp (124); |
1434 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1435 |
|
$self->{s_kwd} = ''; |
1436 |
!!!next-input-character; |
!!!next-input-character; |
1437 |
|
|
1438 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1440 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
1441 |
!!!cp (125); |
!!!cp (125); |
1442 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1443 |
|
$self->{s_kwd} = ''; |
1444 |
## reconsume |
## reconsume |
1445 |
|
|
1446 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1472 |
$self->{s_kwd} = chr $self->{nc}; |
$self->{s_kwd} = chr $self->{nc}; |
1473 |
!!!next-input-character; |
!!!next-input-character; |
1474 |
redo A; |
redo A; |
1475 |
} elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and |
} elsif ((($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and |
1476 |
$self->{open_elements}->[-1]->[1] & FOREIGN_EL and |
$self->{open_elements}->[-1]->[1] & FOREIGN_EL) or |
1477 |
|
$self->{is_xml}) and |
1478 |
$self->{nc} == 0x005B) { # [ |
$self->{nc} == 0x005B) { # [ |
1479 |
!!!cp (135.4); |
!!!cp (135.4); |
1480 |
$self->{state} = MD_CDATA_STATE; |
$self->{state} = MD_CDATA_STATE; |
1583 |
redo A; |
redo A; |
1584 |
} elsif ($self->{s_kwd} eq '[CDATA' and |
} elsif ($self->{s_kwd} eq '[CDATA' and |
1585 |
$self->{nc} == 0x005B) { # [ |
$self->{nc} == 0x005B) { # [ |
1586 |
!!!cp (135.2); |
if ($self->{is_xml} and |
1587 |
|
not $self->{tainted} and |
1588 |
|
@{$self->{open_elements} or []} == 0) { |
1589 |
|
!!!cp (135.2); |
1590 |
|
!!!parse-error (type => 'cdata outside of root element', |
1591 |
|
line => $self->{line_prev}, |
1592 |
|
column => $self->{column_prev} - 7); |
1593 |
|
$self->{tainted} = 1; |
1594 |
|
} else { |
1595 |
|
!!!cp (135.21); |
1596 |
|
} |
1597 |
|
|
1598 |
$self->{ct} = {type => CHARACTER_TOKEN, |
$self->{ct} = {type => CHARACTER_TOKEN, |
1599 |
data => '', |
data => '', |
1600 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
1626 |
!!!cp (138); |
!!!cp (138); |
1627 |
!!!parse-error (type => 'bogus comment'); |
!!!parse-error (type => 'bogus comment'); |
1628 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1629 |
|
$self->{s_kwd} = ''; |
1630 |
!!!next-input-character; |
!!!next-input-character; |
1631 |
|
|
1632 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1636 |
!!!cp (139); |
!!!cp (139); |
1637 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
1638 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1639 |
|
$self->{s_kwd} = ''; |
1640 |
## reconsume |
## reconsume |
1641 |
|
|
1642 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1660 |
!!!cp (142); |
!!!cp (142); |
1661 |
!!!parse-error (type => 'bogus comment'); |
!!!parse-error (type => 'bogus comment'); |
1662 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1663 |
|
$self->{s_kwd} = ''; |
1664 |
!!!next-input-character; |
!!!next-input-character; |
1665 |
|
|
1666 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1670 |
!!!cp (143); |
!!!cp (143); |
1671 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
1672 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1673 |
|
$self->{s_kwd} = ''; |
1674 |
## reconsume |
## reconsume |
1675 |
|
|
1676 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1694 |
!!!cp (146); |
!!!cp (146); |
1695 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
1696 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1697 |
|
$self->{s_kwd} = ''; |
1698 |
## reconsume |
## reconsume |
1699 |
|
|
1700 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1720 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
1721 |
!!!cp (149); |
!!!cp (149); |
1722 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
1723 |
|
$self->{s_kwd} = ''; |
1724 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1725 |
|
$self->{s_kwd} = ''; |
1726 |
## reconsume |
## reconsume |
1727 |
|
|
1728 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1739 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
1740 |
!!!cp (151); |
!!!cp (151); |
1741 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1742 |
|
$self->{s_kwd} = ''; |
1743 |
!!!next-input-character; |
!!!next-input-character; |
1744 |
|
|
1745 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1758 |
!!!cp (153); |
!!!cp (153); |
1759 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
1760 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1761 |
|
$self->{s_kwd} = ''; |
1762 |
## reconsume |
## reconsume |
1763 |
|
|
1764 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1797 |
!!!cp (158); |
!!!cp (158); |
1798 |
!!!parse-error (type => 'no DOCTYPE name'); |
!!!parse-error (type => 'no DOCTYPE name'); |
1799 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1800 |
|
$self->{s_kwd} = ''; |
1801 |
!!!next-input-character; |
!!!next-input-character; |
1802 |
|
|
1803 |
!!!emit ($self->{ct}); # DOCTYPE (quirks) |
!!!emit ($self->{ct}); # DOCTYPE (quirks) |
1807 |
!!!cp (159); |
!!!cp (159); |
1808 |
!!!parse-error (type => 'no DOCTYPE name'); |
!!!parse-error (type => 'no DOCTYPE name'); |
1809 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1810 |
|
$self->{s_kwd} = ''; |
1811 |
## reconsume |
## reconsume |
1812 |
|
|
1813 |
!!!emit ($self->{ct}); # DOCTYPE (quirks) |
!!!emit ($self->{ct}); # DOCTYPE (quirks) |
1831 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
1832 |
!!!cp (162); |
!!!cp (162); |
1833 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1834 |
|
$self->{s_kwd} = ''; |
1835 |
!!!next-input-character; |
!!!next-input-character; |
1836 |
|
|
1837 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
1841 |
!!!cp (163); |
!!!cp (163); |
1842 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
1843 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1844 |
|
$self->{s_kwd} = ''; |
1845 |
## reconsume |
## reconsume |
1846 |
|
|
1847 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
1865 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
1866 |
!!!cp (166); |
!!!cp (166); |
1867 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1868 |
|
$self->{s_kwd} = ''; |
1869 |
!!!next-input-character; |
!!!next-input-character; |
1870 |
|
|
1871 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
1875 |
!!!cp (167); |
!!!cp (167); |
1876 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
1877 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1878 |
|
$self->{s_kwd} = ''; |
1879 |
## reconsume |
## reconsume |
1880 |
|
|
1881 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2004 |
!!!parse-error (type => 'no PUBLIC literal'); |
!!!parse-error (type => 'no PUBLIC literal'); |
2005 |
|
|
2006 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2007 |
|
$self->{s_kwd} = ''; |
2008 |
!!!next-input-character; |
!!!next-input-character; |
2009 |
|
|
2010 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2016 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
2017 |
|
|
2018 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2019 |
|
$self->{s_kwd} = ''; |
2020 |
## reconsume |
## reconsume |
2021 |
|
|
2022 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2043 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
2044 |
|
|
2045 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2046 |
|
$self->{s_kwd} = ''; |
2047 |
!!!next-input-character; |
!!!next-input-character; |
2048 |
|
|
2049 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2055 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
2056 |
|
|
2057 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2058 |
|
$self->{s_kwd} = ''; |
2059 |
## reconsume |
## reconsume |
2060 |
|
|
2061 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2084 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
2085 |
|
|
2086 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2087 |
|
$self->{s_kwd} = ''; |
2088 |
!!!next-input-character; |
!!!next-input-character; |
2089 |
|
|
2090 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2096 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
2097 |
|
|
2098 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2099 |
|
$self->{s_kwd} = ''; |
2100 |
## reconsume |
## reconsume |
2101 |
|
|
2102 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2135 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
2136 |
!!!cp (198); |
!!!cp (198); |
2137 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2138 |
|
$self->{s_kwd} = ''; |
2139 |
!!!next-input-character; |
!!!next-input-character; |
2140 |
|
|
2141 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
2146 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
2147 |
|
|
2148 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2149 |
|
$self->{s_kwd} = ''; |
2150 |
## reconsume |
## reconsume |
2151 |
|
|
2152 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2184 |
!!!cp (204); |
!!!cp (204); |
2185 |
!!!parse-error (type => 'no SYSTEM literal'); |
!!!parse-error (type => 'no SYSTEM literal'); |
2186 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2187 |
|
$self->{s_kwd} = ''; |
2188 |
!!!next-input-character; |
!!!next-input-character; |
2189 |
|
|
2190 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2196 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
2197 |
|
|
2198 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2199 |
|
$self->{s_kwd} = ''; |
2200 |
## reconsume |
## reconsume |
2201 |
|
|
2202 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2223 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
2224 |
|
|
2225 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2226 |
|
$self->{s_kwd} = ''; |
2227 |
!!!next-input-character; |
!!!next-input-character; |
2228 |
|
|
2229 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2235 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
2236 |
|
|
2237 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2238 |
|
$self->{s_kwd} = ''; |
2239 |
## reconsume |
## reconsume |
2240 |
|
|
2241 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2264 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
2265 |
|
|
2266 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2267 |
|
$self->{s_kwd} = ''; |
2268 |
!!!next-input-character; |
!!!next-input-character; |
2269 |
|
|
2270 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2276 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
2277 |
|
|
2278 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2279 |
|
$self->{s_kwd} = ''; |
2280 |
## reconsume |
## reconsume |
2281 |
|
|
2282 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2303 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
2304 |
!!!cp (216); |
!!!cp (216); |
2305 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2306 |
|
$self->{s_kwd} = ''; |
2307 |
!!!next-input-character; |
!!!next-input-character; |
2308 |
|
|
2309 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
2313 |
!!!cp (217); |
!!!cp (217); |
2314 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
2315 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2316 |
|
$self->{s_kwd} = ''; |
2317 |
## reconsume |
## reconsume |
2318 |
|
|
2319 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2333 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
2334 |
!!!cp (219); |
!!!cp (219); |
2335 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2336 |
|
$self->{s_kwd} = ''; |
2337 |
!!!next-input-character; |
!!!next-input-character; |
2338 |
|
|
2339 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
2342 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
2343 |
!!!cp (220); |
!!!cp (220); |
2344 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2345 |
|
$self->{s_kwd} = ''; |
2346 |
## reconsume |
## reconsume |
2347 |
|
|
2348 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
2368 |
!!!next-input-character; |
!!!next-input-character; |
2369 |
redo A; |
redo A; |
2370 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
2371 |
|
if ($self->{is_xml}) { |
2372 |
|
!!!cp (221.11); |
2373 |
|
!!!parse-error (type => 'no mse'); ## TODO: type |
2374 |
|
} else { |
2375 |
|
!!!cp (221.12); |
2376 |
|
} |
2377 |
|
|
2378 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2379 |
|
$self->{s_kwd} = ''; |
2380 |
!!!next-input-character; |
!!!next-input-character; |
2381 |
if (length $self->{ct}->{data}) { # character |
if (length $self->{ct}->{data}) { # character |
2382 |
!!!cp (221.2); |
!!!cp (221.2); |
2415 |
} elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) { |
} elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) { |
2416 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
2417 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2418 |
|
$self->{s_kwd} = ''; |
2419 |
!!!next-input-character; |
!!!next-input-character; |
2420 |
if (length $self->{ct}->{data}) { # character |
if (length $self->{ct}->{data}) { # character |
2421 |
!!!cp (221.7); |
!!!cp (221.7); |
2483 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2484 |
!!!cp (997); |
!!!cp (997); |
2485 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2486 |
|
$self->{s_kwd} = ''; |
2487 |
## Reconsume. |
## Reconsume. |
2488 |
!!!emit ({type => CHARACTER_TOKEN, data => '&', |
!!!emit ({type => CHARACTER_TOKEN, data => '&', |
2489 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
2494 |
!!!cp (996); |
!!!cp (996); |
2495 |
$self->{ca}->{value} .= '&'; |
$self->{ca}->{value} .= '&'; |
2496 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2497 |
|
$self->{s_kwd} = ''; |
2498 |
## Reconsume. |
## Reconsume. |
2499 |
redo A; |
redo A; |
2500 |
} |
} |
2525 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2526 |
!!!cp (1019); |
!!!cp (1019); |
2527 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2528 |
|
$self->{s_kwd} = ''; |
2529 |
## Reconsume. |
## Reconsume. |
2530 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
2531 |
data => '&#', |
data => '&#', |
2537 |
!!!cp (993); |
!!!cp (993); |
2538 |
$self->{ca}->{value} .= '&#'; |
$self->{ca}->{value} .= '&#'; |
2539 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2540 |
|
$self->{s_kwd} = ''; |
2541 |
## Reconsume. |
## Reconsume. |
2542 |
redo A; |
redo A; |
2543 |
} |
} |
2583 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2584 |
!!!cp (992); |
!!!cp (992); |
2585 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2586 |
|
$self->{s_kwd} = ''; |
2587 |
## Reconsume. |
## Reconsume. |
2588 |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
2589 |
|
has_reference => 1, |
2590 |
line => $l, column => $c, |
line => $l, column => $c, |
2591 |
}); |
}); |
2592 |
redo A; |
redo A; |
2595 |
$self->{ca}->{value} .= chr $code; |
$self->{ca}->{value} .= chr $code; |
2596 |
$self->{ca}->{has_reference} = 1; |
$self->{ca}->{has_reference} = 1; |
2597 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2598 |
|
$self->{s_kwd} = ''; |
2599 |
## Reconsume. |
## Reconsume. |
2600 |
redo A; |
redo A; |
2601 |
} |
} |
2621 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2622 |
!!!cp (1005); |
!!!cp (1005); |
2623 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2624 |
|
$self->{s_kwd} = ''; |
2625 |
## Reconsume. |
## Reconsume. |
2626 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
2627 |
data => '&' . $self->{s_kwd}, |
data => '&' . $self->{s_kwd}, |
2633 |
!!!cp (989); |
!!!cp (989); |
2634 |
$self->{ca}->{value} .= '&' . $self->{s_kwd}; |
$self->{ca}->{value} .= '&' . $self->{s_kwd}; |
2635 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2636 |
|
$self->{s_kwd} = ''; |
2637 |
## Reconsume. |
## Reconsume. |
2638 |
redo A; |
redo A; |
2639 |
} |
} |
2696 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2697 |
!!!cp (988); |
!!!cp (988); |
2698 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2699 |
|
$self->{s_kwd} = ''; |
2700 |
## Reconsume. |
## Reconsume. |
2701 |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
2702 |
|
has_reference => 1, |
2703 |
line => $l, column => $c, |
line => $l, column => $c, |
2704 |
}); |
}); |
2705 |
redo A; |
redo A; |
2708 |
$self->{ca}->{value} .= chr $code; |
$self->{ca}->{value} .= chr $code; |
2709 |
$self->{ca}->{has_reference} = 1; |
$self->{ca}->{has_reference} = 1; |
2710 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2711 |
|
$self->{s_kwd} = ''; |
2712 |
## Reconsume. |
## Reconsume. |
2713 |
redo A; |
redo A; |
2714 |
} |
} |
2791 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2792 |
!!!cp (986); |
!!!cp (986); |
2793 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2794 |
|
$self->{s_kwd} = ''; |
2795 |
## Reconsume. |
## Reconsume. |
2796 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
2797 |
data => $data, |
data => $data, |
2798 |
|
has_reference => $has_ref, |
2799 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
2800 |
column => $self->{column_prev} + 1 - length $self->{s_kwd}, |
column => $self->{column_prev} + 1 - length $self->{s_kwd}, |
2801 |
}); |
}); |
2805 |
$self->{ca}->{value} .= $data; |
$self->{ca}->{value} .= $data; |
2806 |
$self->{ca}->{has_reference} = 1 if $has_ref; |
$self->{ca}->{has_reference} = 1 if $has_ref; |
2807 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2808 |
|
$self->{s_kwd} = ''; |
2809 |
|
## Reconsume. |
2810 |
|
redo A; |
2811 |
|
} |
2812 |
|
|
2813 |
|
## XML-only states |
2814 |
|
|
2815 |
|
} elsif ($self->{state} == PI_STATE) { |
2816 |
|
if ($is_space->{$self->{nc}} or |
2817 |
|
$self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else" |
2818 |
|
$self->{nc} == -1) { |
2819 |
|
!!!parse-error (type => 'bare pio', ## TODO: type |
2820 |
|
line => $self->{line_prev}, |
2821 |
|
column => $self->{column_prev} |
2822 |
|
- 1 * ($self->{nc} != -1)); |
2823 |
|
$self->{state} = BOGUS_COMMENT_STATE; |
2824 |
## Reconsume. |
## Reconsume. |
2825 |
|
$self->{ct} = {type => COMMENT_TOKEN, |
2826 |
|
data => '?', |
2827 |
|
line => $self->{line_prev}, |
2828 |
|
column => $self->{column_prev} |
2829 |
|
- 1 * ($self->{nc} != -1), |
2830 |
|
}; |
2831 |
|
redo A; |
2832 |
|
} else { |
2833 |
|
$self->{ct} = {type => PI_TOKEN, |
2834 |
|
target => chr $self->{nc}, |
2835 |
|
data => '', |
2836 |
|
line => $self->{line_prev}, |
2837 |
|
column => $self->{column_prev} - 1, |
2838 |
|
}; |
2839 |
|
$self->{state} = PI_TARGET_STATE; |
2840 |
|
!!!next-input-character; |
2841 |
redo A; |
redo A; |
2842 |
} |
} |
2843 |
|
} elsif ($self->{state} == PI_TARGET_STATE) { |
2844 |
|
if ($is_space->{$self->{nc}}) { |
2845 |
|
$self->{state} = PI_TARGET_AFTER_STATE; |
2846 |
|
!!!next-input-character; |
2847 |
|
redo A; |
2848 |
|
} elsif ($self->{nc} == -1) { |
2849 |
|
!!!parse-error (type => 'no pic'); ## TODO: type |
2850 |
|
$self->{state} = DATA_STATE; |
2851 |
|
$self->{s_kwd} = ''; |
2852 |
|
## Reconsume. |
2853 |
|
!!!emit ($self->{ct}); # pi |
2854 |
|
redo A; |
2855 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
2856 |
|
$self->{state} = PI_AFTER_STATE; |
2857 |
|
!!!next-input-character; |
2858 |
|
redo A; |
2859 |
|
} else { |
2860 |
|
## XML5: typo ("tag name" -> "target") |
2861 |
|
$self->{ct}->{target} .= chr $self->{nc}; # pi |
2862 |
|
!!!next-input-character; |
2863 |
|
redo A; |
2864 |
|
} |
2865 |
|
} elsif ($self->{state} == PI_TARGET_AFTER_STATE) { |
2866 |
|
if ($is_space->{$self->{nc}}) { |
2867 |
|
## Stay in the state. |
2868 |
|
!!!next-input-character; |
2869 |
|
redo A; |
2870 |
|
} else { |
2871 |
|
$self->{state} = PI_DATA_STATE; |
2872 |
|
## Reprocess. |
2873 |
|
redo A; |
2874 |
|
} |
2875 |
|
} elsif ($self->{state} == PI_DATA_STATE) { |
2876 |
|
if ($self->{nc} == 0x003F) { # ? |
2877 |
|
$self->{state} = PI_DATA_AFTER_STATE; |
2878 |
|
!!!next-input-character; |
2879 |
|
redo A; |
2880 |
|
} elsif ($self->{nc} == -1) { |
2881 |
|
!!!parse-error (type => 'no pic'); ## TODO: type |
2882 |
|
$self->{state} = DATA_STATE; |
2883 |
|
$self->{s_kwd} = ''; |
2884 |
|
## Reprocess. |
2885 |
|
!!!emit ($self->{ct}); # pi |
2886 |
|
redo A; |
2887 |
|
} else { |
2888 |
|
$self->{ct}->{data} .= chr $self->{nc}; # pi |
2889 |
|
$self->{read_until}->($self->{ct}->{data}, q[?], |
2890 |
|
length $self->{ct}->{data}); |
2891 |
|
## Stay in the state. |
2892 |
|
!!!next-input-character; |
2893 |
|
## Reprocess. |
2894 |
|
redo A; |
2895 |
|
} |
2896 |
|
} elsif ($self->{state} == PI_AFTER_STATE) { |
2897 |
|
if ($self->{nc} == 0x003E) { # > |
2898 |
|
$self->{state} = DATA_STATE; |
2899 |
|
$self->{s_kwd} = ''; |
2900 |
|
!!!next-input-character; |
2901 |
|
!!!emit ($self->{ct}); # pi |
2902 |
|
redo A; |
2903 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
2904 |
|
!!!parse-error (type => 'no s after target', ## TODO: type |
2905 |
|
line => $self->{line_prev}, |
2906 |
|
column => $self->{column_prev}); ## XML5: no error |
2907 |
|
$self->{ct}->{data} .= '?'; |
2908 |
|
$self->{state} = PI_DATA_AFTER_STATE; |
2909 |
|
!!!next-input-character; |
2910 |
|
redo A; |
2911 |
|
} else { |
2912 |
|
!!!parse-error (type => 'no s after target', ## TODO: type |
2913 |
|
line => $self->{line_prev}, |
2914 |
|
column => $self->{column_prev} |
2915 |
|
+ 1 * ($self->{nc} == -1)); ## XML5: no error |
2916 |
|
$self->{ct}->{data} .= '?'; ## XML5: not appended |
2917 |
|
$self->{state} = PI_DATA_STATE; |
2918 |
|
## Reprocess. |
2919 |
|
redo A; |
2920 |
|
} |
2921 |
|
} elsif ($self->{state} == PI_DATA_AFTER_STATE) { |
2922 |
|
## XML5: Same as "pi after state" in XML5 |
2923 |
|
if ($self->{nc} == 0x003E) { # > |
2924 |
|
$self->{state} = DATA_STATE; |
2925 |
|
$self->{s_kwd} = ''; |
2926 |
|
!!!next-input-character; |
2927 |
|
!!!emit ($self->{ct}); # pi |
2928 |
|
redo A; |
2929 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
2930 |
|
$self->{ct}->{data} .= '?'; |
2931 |
|
## Stay in the state. |
2932 |
|
!!!next-input-character; |
2933 |
|
redo A; |
2934 |
|
} else { |
2935 |
|
$self->{ct}->{data} .= '?'; ## XML5: not appended |
2936 |
|
$self->{state} = PI_DATA_STATE; |
2937 |
|
## Reprocess. |
2938 |
|
redo A; |
2939 |
|
} |
2940 |
|
|
2941 |
} else { |
} else { |
2942 |
die "$0: $self->{state}: Unknown state"; |
die "$0: $self->{state}: Unknown state"; |
2943 |
} |
} |