2 |
use strict; |
use strict; |
3 |
our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
4 |
|
|
5 |
|
BEGIN { |
6 |
|
require Exporter; |
7 |
|
push our @ISA, 'Exporter'; |
8 |
|
|
9 |
|
our @EXPORT_OK = qw( |
10 |
|
DOCTYPE_TOKEN |
11 |
|
COMMENT_TOKEN |
12 |
|
START_TAG_TOKEN |
13 |
|
END_TAG_TOKEN |
14 |
|
END_OF_FILE_TOKEN |
15 |
|
CHARACTER_TOKEN |
16 |
|
PI_TOKEN |
17 |
|
ABORT_TOKEN |
18 |
|
); |
19 |
|
|
20 |
|
our %EXPORT_TAGS = ( |
21 |
|
token => [qw( |
22 |
|
DOCTYPE_TOKEN |
23 |
|
COMMENT_TOKEN |
24 |
|
START_TAG_TOKEN |
25 |
|
END_TAG_TOKEN |
26 |
|
END_OF_FILE_TOKEN |
27 |
|
CHARACTER_TOKEN |
28 |
|
PI_TOKEN |
29 |
|
ABORT_TOKEN |
30 |
|
)], |
31 |
|
); |
32 |
|
} |
33 |
|
|
34 |
|
## Token types |
35 |
|
|
36 |
|
sub DOCTYPE_TOKEN () { 1 } |
37 |
|
sub COMMENT_TOKEN () { 2 } |
38 |
|
sub START_TAG_TOKEN () { 3 } |
39 |
|
sub END_TAG_TOKEN () { 4 } |
40 |
|
sub END_OF_FILE_TOKEN () { 5 } |
41 |
|
sub CHARACTER_TOKEN () { 6 } |
42 |
|
sub PI_TOKEN () { 7 } # XML5 |
43 |
|
sub ABORT_TOKEN () { 8 } # Not a token actually |
44 |
|
|
45 |
package Whatpm::HTML; |
package Whatpm::HTML; |
46 |
|
|
47 |
|
BEGIN { Whatpm::HTML::Tokenizer->import (':token') } |
48 |
|
|
49 |
## Content model flags |
## Content model flags |
50 |
|
|
51 |
sub CM_ENTITY () { 0b001 } # & markup in data |
sub CM_ENTITY () { 0b001 } # & markup in data |
114 |
sub ENTITY_NAME_STATE () { 49 } |
sub ENTITY_NAME_STATE () { 49 } |
115 |
sub PCDATA_STATE () { 50 } # "data state" in the spec |
sub PCDATA_STATE () { 50 } # "data state" in the spec |
116 |
|
|
|
## Token types |
|
|
|
|
|
sub DOCTYPE_TOKEN () { 1 } |
|
|
sub COMMENT_TOKEN () { 2 } |
|
|
sub START_TAG_TOKEN () { 3 } |
|
|
sub END_TAG_TOKEN () { 4 } |
|
|
sub END_OF_FILE_TOKEN () { 5 } |
|
|
sub CHARACTER_TOKEN () { 6 } |
|
|
|
|
117 |
## Tree constructor state constants (see Whatpm::HTML for the full |
## Tree constructor state constants (see Whatpm::HTML for the full |
118 |
## list and descriptions) |
## list and descriptions) |
119 |
|
|
175 |
#$self->{level} |
#$self->{level} |
176 |
#$self->{set_nc} |
#$self->{set_nc} |
177 |
#$self->{parse_error} |
#$self->{parse_error} |
178 |
|
#$self->{is_xml} (if XML) |
179 |
|
|
180 |
$self->{state} = DATA_STATE; # MUST |
$self->{state} = DATA_STATE; # MUST |
181 |
#$self->{s_kwd}; # state keyword - initialized when used |
$self->{s_kwd} = ''; # state keyword |
182 |
#$self->{entity__value}; # initialized when used |
#$self->{entity__value}; # initialized when used |
183 |
#$self->{entity__match}; # initialized when used |
#$self->{entity__match}; # initialized when used |
184 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # be |
$self->{content_model} = PCDATA_CONTENT_MODEL; # be |
209 |
## ->{value} |
## ->{value} |
210 |
## ->{has_reference} == 1 or 0 |
## ->{has_reference} == 1 or 0 |
211 |
## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN) |
## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN) |
212 |
|
## ->{has_reference} == 1 or 0 (CHARACTER_TOKEN) |
213 |
## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|. |
## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|. |
214 |
## |->{self_closing}| is used to save the value of |$self->{self_closing}| |
## |->{self_closing}| is used to save the value of |$self->{self_closing}| |
215 |
## while the token is pushed back to the stack. |
## while the token is pushed back to the stack. |
313 |
} |
} |
314 |
} elsif ($self->{nc} == 0x002D) { # - |
} elsif ($self->{nc} == 0x002D) { # - |
315 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
316 |
$self->{s_kwd} .= '-'; |
if ($self->{s_kwd} eq '<!-') { |
|
|
|
|
if ($self->{s_kwd} eq '<!--') { |
|
317 |
!!!cp (3); |
!!!cp (3); |
318 |
$self->{escape} = 1; # unless $self->{escape}; |
$self->{escape} = 1; # unless $self->{escape}; |
319 |
$self->{s_kwd} = '--'; |
$self->{s_kwd} = '--'; |
320 |
# |
# |
321 |
} elsif ($self->{s_kwd} eq '---') { |
} elsif ($self->{s_kwd} eq '-') { |
322 |
!!!cp (4); |
!!!cp (4); |
323 |
$self->{s_kwd} = '--'; |
$self->{s_kwd} = '--'; |
324 |
# |
# |
325 |
|
} elsif ($self->{s_kwd} eq '<!' or $self->{s_kwd} eq '-') { |
326 |
|
!!!cp (4.1); |
327 |
|
$self->{s_kwd} .= '-'; |
328 |
|
# |
329 |
} else { |
} else { |
330 |
!!!cp (5); |
!!!cp (5); |
331 |
|
$self->{s_kwd} = '-'; |
332 |
# |
# |
333 |
} |
} |
334 |
} |
} |
364 |
if ($self->{s_kwd} eq '--') { |
if ($self->{s_kwd} eq '--') { |
365 |
!!!cp (8); |
!!!cp (8); |
366 |
delete $self->{escape}; |
delete $self->{escape}; |
367 |
|
# |
368 |
} else { |
} else { |
369 |
!!!cp (9); |
!!!cp (9); |
370 |
|
# |
371 |
} |
} |
372 |
|
} elsif ($self->{is_xml} and $self->{s_kwd} eq ']]') { |
373 |
|
!!!cp (9.1); |
374 |
|
!!!parse-error (type => 'unmatched mse', ## TODO: type |
375 |
|
line => $self->{line_prev}, |
376 |
|
column => $self->{column_prev} - 1); |
377 |
|
# |
378 |
} else { |
} else { |
379 |
!!!cp (10); |
!!!cp (10); |
380 |
|
# |
381 |
} |
} |
382 |
|
|
383 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
384 |
# |
# |
385 |
|
} elsif ($self->{nc} == 0x005D) { # ] |
386 |
|
if ($self->{s_kwd} eq ']' or $self->{s_kwd} eq '') { |
387 |
|
!!!cp (10.1); |
388 |
|
$self->{s_kwd} .= ']'; |
389 |
|
} elsif ($self->{s_kwd} eq ']]') { |
390 |
|
!!!cp (10.2); |
391 |
|
# |
392 |
|
} else { |
393 |
|
!!!cp (10.3); |
394 |
|
$self->{s_kwd} = ''; |
395 |
|
} |
396 |
|
# |
397 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
398 |
!!!cp (11); |
!!!cp (11); |
399 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
411 |
data => chr $self->{nc}, |
data => chr $self->{nc}, |
412 |
line => $self->{line}, column => $self->{column}, |
line => $self->{line}, column => $self->{column}, |
413 |
}; |
}; |
414 |
if ($self->{read_until}->($token->{data}, q[-!<>&], |
if ($self->{read_until}->($token->{data}, q{-!<>&\]}, |
415 |
length $token->{data})) { |
length $token->{data})) { |
416 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
417 |
} |
} |
418 |
|
|
419 |
## Stay in the data state. |
## Stay in the data state. |
420 |
if ($self->{content_model} == PCDATA_CONTENT_MODEL) { |
if (not $self->{is_xml} and |
421 |
|
$self->{content_model} == PCDATA_CONTENT_MODEL) { |
422 |
!!!cp (13); |
!!!cp (13); |
423 |
$self->{state} = PCDATA_STATE; |
$self->{state} = PCDATA_STATE; |
424 |
} else { |
} else { |
446 |
|
|
447 |
## reconsume |
## reconsume |
448 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
449 |
|
$self->{s_kwd} = ''; |
450 |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
451 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
452 |
column => $self->{column_prev}, |
column => $self->{column_prev}, |
468 |
!!!cp (19); |
!!!cp (19); |
469 |
$self->{ct} |
$self->{ct} |
470 |
= {type => START_TAG_TOKEN, |
= {type => START_TAG_TOKEN, |
471 |
tag_name => chr ($self->{nc} + 0x0020), |
tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
472 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
473 |
column => $self->{column_prev}}; |
column => $self->{column_prev}}; |
474 |
$self->{state} = TAG_NAME_STATE; |
$self->{state} = TAG_NAME_STATE; |
490 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
491 |
column => $self->{column_prev}); |
column => $self->{column_prev}); |
492 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
493 |
|
$self->{s_kwd} = ''; |
494 |
!!!next-input-character; |
!!!next-input-character; |
495 |
|
|
496 |
!!!emit ({type => CHARACTER_TOKEN, data => '<>', |
!!!emit ({type => CHARACTER_TOKEN, data => '<>', |
517 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
518 |
column => $self->{column_prev}); |
column => $self->{column_prev}); |
519 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
520 |
|
$self->{s_kwd} = ''; |
521 |
## reconsume |
## reconsume |
522 |
|
|
523 |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
546 |
## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>. |
## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>. |
547 |
!!!cp (28); |
!!!cp (28); |
548 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
549 |
|
$self->{s_kwd} = ''; |
550 |
## Reconsume. |
## Reconsume. |
551 |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
552 |
line => $l, column => $c, |
line => $l, column => $c, |
560 |
!!!cp (29); |
!!!cp (29); |
561 |
$self->{ct} |
$self->{ct} |
562 |
= {type => END_TAG_TOKEN, |
= {type => END_TAG_TOKEN, |
563 |
tag_name => chr ($self->{nc} + 0x0020), |
tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
564 |
line => $l, column => $c}; |
line => $l, column => $c}; |
565 |
$self->{state} = TAG_NAME_STATE; |
$self->{state} = TAG_NAME_STATE; |
566 |
!!!next-input-character; |
!!!next-input-character; |
580 |
line => $self->{line_prev}, ## "<" in "</>" |
line => $self->{line_prev}, ## "<" in "</>" |
581 |
column => $self->{column_prev} - 1); |
column => $self->{column_prev} - 1); |
582 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
583 |
|
$self->{s_kwd} = ''; |
584 |
!!!next-input-character; |
!!!next-input-character; |
585 |
redo A; |
redo A; |
586 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
587 |
!!!cp (32); |
!!!cp (32); |
588 |
!!!parse-error (type => 'bare etago'); |
!!!parse-error (type => 'bare etago'); |
589 |
|
$self->{s_kwd} = ''; |
590 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
591 |
# reconsume |
# reconsume |
592 |
|
|
626 |
} else { |
} else { |
627 |
!!!cp (25); |
!!!cp (25); |
628 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
629 |
|
$self->{s_kwd} = ''; |
630 |
## Reconsume. |
## Reconsume. |
631 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
632 |
data => '</' . $self->{s_kwd}, |
data => '</' . $self->{s_kwd}, |
645 |
!!!cp (26); |
!!!cp (26); |
646 |
## Reconsume. |
## Reconsume. |
647 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
648 |
|
$self->{s_kwd} = ''; |
649 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
650 |
data => '</' . $self->{s_kwd}, |
data => '</' . $self->{s_kwd}, |
651 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
687 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
688 |
} |
} |
689 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
690 |
|
$self->{s_kwd} = ''; |
691 |
!!!next-input-character; |
!!!next-input-character; |
692 |
|
|
693 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
696 |
} elsif (0x0041 <= $self->{nc} and |
} elsif (0x0041 <= $self->{nc} and |
697 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
698 |
!!!cp (38); |
!!!cp (38); |
699 |
$self->{ct}->{tag_name} .= chr ($self->{nc} + 0x0020); |
$self->{ct}->{tag_name} |
700 |
|
.= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)); |
701 |
# start tag or end tag |
# start tag or end tag |
702 |
## Stay in this state |
## Stay in this state |
703 |
!!!next-input-character; |
!!!next-input-character; |
720 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
721 |
} |
} |
722 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
723 |
|
$self->{s_kwd} = ''; |
724 |
# reconsume |
# reconsume |
725 |
|
|
726 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
761 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
762 |
} |
} |
763 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
764 |
|
$self->{s_kwd} = ''; |
765 |
!!!next-input-character; |
!!!next-input-character; |
766 |
|
|
767 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
771 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
772 |
!!!cp (49); |
!!!cp (49); |
773 |
$self->{ca} |
$self->{ca} |
774 |
= {name => chr ($self->{nc} + 0x0020), |
= {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
775 |
value => '', |
value => '', |
776 |
line => $self->{line}, column => $self->{column}}; |
line => $self->{line}, column => $self->{column}}; |
777 |
$self->{state} = ATTRIBUTE_NAME_STATE; |
$self->{state} = ATTRIBUTE_NAME_STATE; |
799 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
800 |
} |
} |
801 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
802 |
|
$self->{s_kwd} = ''; |
803 |
# reconsume |
# reconsume |
804 |
|
|
805 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
865 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
866 |
} |
} |
867 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
868 |
|
$self->{s_kwd} = ''; |
869 |
!!!next-input-character; |
!!!next-input-character; |
870 |
|
|
871 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
874 |
} elsif (0x0041 <= $self->{nc} and |
} elsif (0x0041 <= $self->{nc} and |
875 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
876 |
!!!cp (63); |
!!!cp (63); |
877 |
$self->{ca}->{name} .= chr ($self->{nc} + 0x0020); |
$self->{ca}->{name} |
878 |
|
.= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)); |
879 |
## Stay in the state |
## Stay in the state |
880 |
!!!next-input-character; |
!!!next-input-character; |
881 |
redo A; |
redo A; |
904 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
905 |
} |
} |
906 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
907 |
|
$self->{s_kwd} = ''; |
908 |
# reconsume |
# reconsume |
909 |
|
|
910 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
951 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
952 |
} |
} |
953 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
954 |
|
$self->{s_kwd} = ''; |
955 |
!!!next-input-character; |
!!!next-input-character; |
956 |
|
|
957 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
961 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
962 |
!!!cp (76); |
!!!cp (76); |
963 |
$self->{ca} |
$self->{ca} |
964 |
= {name => chr ($self->{nc} + 0x0020), |
= {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
965 |
value => '', |
value => '', |
966 |
line => $self->{line}, column => $self->{column}}; |
line => $self->{line}, column => $self->{column}}; |
967 |
$self->{state} = ATTRIBUTE_NAME_STATE; |
$self->{state} = ATTRIBUTE_NAME_STATE; |
989 |
} else { |
} else { |
990 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
991 |
} |
} |
992 |
|
$self->{s_kwd} = ''; |
993 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
994 |
# reconsume |
# reconsume |
995 |
|
|
1051 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1052 |
} |
} |
1053 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1054 |
|
$self->{s_kwd} = ''; |
1055 |
!!!next-input-character; |
!!!next-input-character; |
1056 |
|
|
1057 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1075 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1076 |
} |
} |
1077 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1078 |
|
$self->{s_kwd} = ''; |
1079 |
## reconsume |
## reconsume |
1080 |
|
|
1081 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1128 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1129 |
} |
} |
1130 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1131 |
|
$self->{s_kwd} = ''; |
1132 |
## reconsume |
## reconsume |
1133 |
|
|
1134 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1180 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1181 |
} |
} |
1182 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1183 |
|
$self->{s_kwd} = ''; |
1184 |
## reconsume |
## reconsume |
1185 |
|
|
1186 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1231 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1232 |
} |
} |
1233 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1234 |
|
$self->{s_kwd} = ''; |
1235 |
!!!next-input-character; |
!!!next-input-character; |
1236 |
|
|
1237 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1255 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1256 |
} |
} |
1257 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1258 |
|
$self->{s_kwd} = ''; |
1259 |
## reconsume |
## reconsume |
1260 |
|
|
1261 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1304 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1305 |
} |
} |
1306 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1307 |
|
$self->{s_kwd} = ''; |
1308 |
!!!next-input-character; |
!!!next-input-character; |
1309 |
|
|
1310 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1332 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1333 |
} |
} |
1334 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1335 |
|
$self->{s_kwd} = ''; |
1336 |
## Reconsume. |
## Reconsume. |
1337 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1338 |
redo A; |
redo A; |
1363 |
} |
} |
1364 |
|
|
1365 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1366 |
|
$self->{s_kwd} = ''; |
1367 |
!!!next-input-character; |
!!!next-input-character; |
1368 |
|
|
1369 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1386 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1387 |
} |
} |
1388 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1389 |
|
$self->{s_kwd} = ''; |
1390 |
## Reconsume. |
## Reconsume. |
1391 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1392 |
redo A; |
redo A; |
1407 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
1408 |
!!!cp (124); |
!!!cp (124); |
1409 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1410 |
|
$self->{s_kwd} = ''; |
1411 |
!!!next-input-character; |
!!!next-input-character; |
1412 |
|
|
1413 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1415 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
1416 |
!!!cp (125); |
!!!cp (125); |
1417 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1418 |
|
$self->{s_kwd} = ''; |
1419 |
## reconsume |
## reconsume |
1420 |
|
|
1421 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1447 |
$self->{s_kwd} = chr $self->{nc}; |
$self->{s_kwd} = chr $self->{nc}; |
1448 |
!!!next-input-character; |
!!!next-input-character; |
1449 |
redo A; |
redo A; |
1450 |
} elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and |
} elsif ((($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and |
1451 |
$self->{open_elements}->[-1]->[1] & FOREIGN_EL and |
$self->{open_elements}->[-1]->[1] & FOREIGN_EL) or |
1452 |
|
$self->{is_xml}) and |
1453 |
$self->{nc} == 0x005B) { # [ |
$self->{nc} == 0x005B) { # [ |
1454 |
!!!cp (135.4); |
!!!cp (135.4); |
1455 |
$self->{state} = MD_CDATA_STATE; |
$self->{state} = MD_CDATA_STATE; |
1559 |
} elsif ($self->{s_kwd} eq '[CDATA' and |
} elsif ($self->{s_kwd} eq '[CDATA' and |
1560 |
$self->{nc} == 0x005B) { # [ |
$self->{nc} == 0x005B) { # [ |
1561 |
!!!cp (135.2); |
!!!cp (135.2); |
1562 |
|
|
1563 |
|
if ($self->{is_xml} and |
1564 |
|
not $self->{tainted} and |
1565 |
|
@{$self->{open_elements} or []} == 0) { |
1566 |
|
!!!parse-error (type => 'cdata outside of root element', |
1567 |
|
line => $self->{line_prev}, |
1568 |
|
column => $self->{column_prev} - 7); |
1569 |
|
$self->{tainted} = 1; |
1570 |
|
} |
1571 |
|
|
1572 |
$self->{ct} = {type => CHARACTER_TOKEN, |
$self->{ct} = {type => CHARACTER_TOKEN, |
1573 |
data => '', |
data => '', |
1574 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
1600 |
!!!cp (138); |
!!!cp (138); |
1601 |
!!!parse-error (type => 'bogus comment'); |
!!!parse-error (type => 'bogus comment'); |
1602 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1603 |
|
$self->{s_kwd} = ''; |
1604 |
!!!next-input-character; |
!!!next-input-character; |
1605 |
|
|
1606 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1610 |
!!!cp (139); |
!!!cp (139); |
1611 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
1612 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1613 |
|
$self->{s_kwd} = ''; |
1614 |
## reconsume |
## reconsume |
1615 |
|
|
1616 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1634 |
!!!cp (142); |
!!!cp (142); |
1635 |
!!!parse-error (type => 'bogus comment'); |
!!!parse-error (type => 'bogus comment'); |
1636 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1637 |
|
$self->{s_kwd} = ''; |
1638 |
!!!next-input-character; |
!!!next-input-character; |
1639 |
|
|
1640 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1644 |
!!!cp (143); |
!!!cp (143); |
1645 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
1646 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1647 |
|
$self->{s_kwd} = ''; |
1648 |
## reconsume |
## reconsume |
1649 |
|
|
1650 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1668 |
!!!cp (146); |
!!!cp (146); |
1669 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
1670 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1671 |
|
$self->{s_kwd} = ''; |
1672 |
## reconsume |
## reconsume |
1673 |
|
|
1674 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1694 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
1695 |
!!!cp (149); |
!!!cp (149); |
1696 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
1697 |
|
$self->{s_kwd} = ''; |
1698 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1699 |
|
$self->{s_kwd} = ''; |
1700 |
## reconsume |
## reconsume |
1701 |
|
|
1702 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1713 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
1714 |
!!!cp (151); |
!!!cp (151); |
1715 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1716 |
|
$self->{s_kwd} = ''; |
1717 |
!!!next-input-character; |
!!!next-input-character; |
1718 |
|
|
1719 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1732 |
!!!cp (153); |
!!!cp (153); |
1733 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
1734 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1735 |
|
$self->{s_kwd} = ''; |
1736 |
## reconsume |
## reconsume |
1737 |
|
|
1738 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1771 |
!!!cp (158); |
!!!cp (158); |
1772 |
!!!parse-error (type => 'no DOCTYPE name'); |
!!!parse-error (type => 'no DOCTYPE name'); |
1773 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1774 |
|
$self->{s_kwd} = ''; |
1775 |
!!!next-input-character; |
!!!next-input-character; |
1776 |
|
|
1777 |
!!!emit ($self->{ct}); # DOCTYPE (quirks) |
!!!emit ($self->{ct}); # DOCTYPE (quirks) |
1781 |
!!!cp (159); |
!!!cp (159); |
1782 |
!!!parse-error (type => 'no DOCTYPE name'); |
!!!parse-error (type => 'no DOCTYPE name'); |
1783 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1784 |
|
$self->{s_kwd} = ''; |
1785 |
## reconsume |
## reconsume |
1786 |
|
|
1787 |
!!!emit ($self->{ct}); # DOCTYPE (quirks) |
!!!emit ($self->{ct}); # DOCTYPE (quirks) |
1805 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
1806 |
!!!cp (162); |
!!!cp (162); |
1807 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1808 |
|
$self->{s_kwd} = ''; |
1809 |
!!!next-input-character; |
!!!next-input-character; |
1810 |
|
|
1811 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
1815 |
!!!cp (163); |
!!!cp (163); |
1816 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
1817 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1818 |
|
$self->{s_kwd} = ''; |
1819 |
## reconsume |
## reconsume |
1820 |
|
|
1821 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
1839 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
1840 |
!!!cp (166); |
!!!cp (166); |
1841 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1842 |
|
$self->{s_kwd} = ''; |
1843 |
!!!next-input-character; |
!!!next-input-character; |
1844 |
|
|
1845 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
1849 |
!!!cp (167); |
!!!cp (167); |
1850 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
1851 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1852 |
|
$self->{s_kwd} = ''; |
1853 |
## reconsume |
## reconsume |
1854 |
|
|
1855 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
1978 |
!!!parse-error (type => 'no PUBLIC literal'); |
!!!parse-error (type => 'no PUBLIC literal'); |
1979 |
|
|
1980 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1981 |
|
$self->{s_kwd} = ''; |
1982 |
!!!next-input-character; |
!!!next-input-character; |
1983 |
|
|
1984 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
1990 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
1991 |
|
|
1992 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1993 |
|
$self->{s_kwd} = ''; |
1994 |
## reconsume |
## reconsume |
1995 |
|
|
1996 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2017 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
2018 |
|
|
2019 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2020 |
|
$self->{s_kwd} = ''; |
2021 |
!!!next-input-character; |
!!!next-input-character; |
2022 |
|
|
2023 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2029 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
2030 |
|
|
2031 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2032 |
|
$self->{s_kwd} = ''; |
2033 |
## reconsume |
## reconsume |
2034 |
|
|
2035 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2058 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
2059 |
|
|
2060 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2061 |
|
$self->{s_kwd} = ''; |
2062 |
!!!next-input-character; |
!!!next-input-character; |
2063 |
|
|
2064 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2070 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
2071 |
|
|
2072 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2073 |
|
$self->{s_kwd} = ''; |
2074 |
## reconsume |
## reconsume |
2075 |
|
|
2076 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2109 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
2110 |
!!!cp (198); |
!!!cp (198); |
2111 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2112 |
|
$self->{s_kwd} = ''; |
2113 |
!!!next-input-character; |
!!!next-input-character; |
2114 |
|
|
2115 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
2120 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
2121 |
|
|
2122 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2123 |
|
$self->{s_kwd} = ''; |
2124 |
## reconsume |
## reconsume |
2125 |
|
|
2126 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2158 |
!!!cp (204); |
!!!cp (204); |
2159 |
!!!parse-error (type => 'no SYSTEM literal'); |
!!!parse-error (type => 'no SYSTEM literal'); |
2160 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2161 |
|
$self->{s_kwd} = ''; |
2162 |
!!!next-input-character; |
!!!next-input-character; |
2163 |
|
|
2164 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2170 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
2171 |
|
|
2172 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2173 |
|
$self->{s_kwd} = ''; |
2174 |
## reconsume |
## reconsume |
2175 |
|
|
2176 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2197 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
2198 |
|
|
2199 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2200 |
|
$self->{s_kwd} = ''; |
2201 |
!!!next-input-character; |
!!!next-input-character; |
2202 |
|
|
2203 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2209 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
2210 |
|
|
2211 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2212 |
|
$self->{s_kwd} = ''; |
2213 |
## reconsume |
## reconsume |
2214 |
|
|
2215 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2238 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
2239 |
|
|
2240 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2241 |
|
$self->{s_kwd} = ''; |
2242 |
!!!next-input-character; |
!!!next-input-character; |
2243 |
|
|
2244 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2250 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
2251 |
|
|
2252 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2253 |
|
$self->{s_kwd} = ''; |
2254 |
## reconsume |
## reconsume |
2255 |
|
|
2256 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2277 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
2278 |
!!!cp (216); |
!!!cp (216); |
2279 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2280 |
|
$self->{s_kwd} = ''; |
2281 |
!!!next-input-character; |
!!!next-input-character; |
2282 |
|
|
2283 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
2287 |
!!!cp (217); |
!!!cp (217); |
2288 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
2289 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2290 |
|
$self->{s_kwd} = ''; |
2291 |
## reconsume |
## reconsume |
2292 |
|
|
2293 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2307 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
2308 |
!!!cp (219); |
!!!cp (219); |
2309 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2310 |
|
$self->{s_kwd} = ''; |
2311 |
!!!next-input-character; |
!!!next-input-character; |
2312 |
|
|
2313 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
2316 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
2317 |
!!!cp (220); |
!!!cp (220); |
2318 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2319 |
|
$self->{s_kwd} = ''; |
2320 |
## reconsume |
## reconsume |
2321 |
|
|
2322 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
2342 |
!!!next-input-character; |
!!!next-input-character; |
2343 |
redo A; |
redo A; |
2344 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
2345 |
|
if ($self->{is_xml}) { |
2346 |
|
!!!parse-error (type => 'no mse'); ## TODO: type |
2347 |
|
} |
2348 |
|
|
2349 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2350 |
|
$self->{s_kwd} = ''; |
2351 |
!!!next-input-character; |
!!!next-input-character; |
2352 |
if (length $self->{ct}->{data}) { # character |
if (length $self->{ct}->{data}) { # character |
2353 |
!!!cp (221.2); |
!!!cp (221.2); |
2386 |
} elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) { |
} elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) { |
2387 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
2388 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2389 |
|
$self->{s_kwd} = ''; |
2390 |
!!!next-input-character; |
!!!next-input-character; |
2391 |
if (length $self->{ct}->{data}) { # character |
if (length $self->{ct}->{data}) { # character |
2392 |
!!!cp (221.7); |
!!!cp (221.7); |
2454 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2455 |
!!!cp (997); |
!!!cp (997); |
2456 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2457 |
|
$self->{s_kwd} = ''; |
2458 |
## Reconsume. |
## Reconsume. |
2459 |
!!!emit ({type => CHARACTER_TOKEN, data => '&', |
!!!emit ({type => CHARACTER_TOKEN, data => '&', |
2460 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
2465 |
!!!cp (996); |
!!!cp (996); |
2466 |
$self->{ca}->{value} .= '&'; |
$self->{ca}->{value} .= '&'; |
2467 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2468 |
|
$self->{s_kwd} = ''; |
2469 |
## Reconsume. |
## Reconsume. |
2470 |
redo A; |
redo A; |
2471 |
} |
} |
2496 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2497 |
!!!cp (1019); |
!!!cp (1019); |
2498 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2499 |
|
$self->{s_kwd} = ''; |
2500 |
## Reconsume. |
## Reconsume. |
2501 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
2502 |
data => '&#', |
data => '&#', |
2508 |
!!!cp (993); |
!!!cp (993); |
2509 |
$self->{ca}->{value} .= '&#'; |
$self->{ca}->{value} .= '&#'; |
2510 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2511 |
|
$self->{s_kwd} = ''; |
2512 |
## Reconsume. |
## Reconsume. |
2513 |
redo A; |
redo A; |
2514 |
} |
} |
2554 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2555 |
!!!cp (992); |
!!!cp (992); |
2556 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2557 |
|
$self->{s_kwd} = ''; |
2558 |
## Reconsume. |
## Reconsume. |
2559 |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
2560 |
|
has_reference => 1, |
2561 |
line => $l, column => $c, |
line => $l, column => $c, |
2562 |
}); |
}); |
2563 |
redo A; |
redo A; |
2566 |
$self->{ca}->{value} .= chr $code; |
$self->{ca}->{value} .= chr $code; |
2567 |
$self->{ca}->{has_reference} = 1; |
$self->{ca}->{has_reference} = 1; |
2568 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2569 |
|
$self->{s_kwd} = ''; |
2570 |
## Reconsume. |
## Reconsume. |
2571 |
redo A; |
redo A; |
2572 |
} |
} |
2592 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2593 |
!!!cp (1005); |
!!!cp (1005); |
2594 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2595 |
|
$self->{s_kwd} = ''; |
2596 |
## Reconsume. |
## Reconsume. |
2597 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
2598 |
data => '&' . $self->{s_kwd}, |
data => '&' . $self->{s_kwd}, |
2604 |
!!!cp (989); |
!!!cp (989); |
2605 |
$self->{ca}->{value} .= '&' . $self->{s_kwd}; |
$self->{ca}->{value} .= '&' . $self->{s_kwd}; |
2606 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2607 |
|
$self->{s_kwd} = ''; |
2608 |
## Reconsume. |
## Reconsume. |
2609 |
redo A; |
redo A; |
2610 |
} |
} |
2667 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2668 |
!!!cp (988); |
!!!cp (988); |
2669 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2670 |
|
$self->{s_kwd} = ''; |
2671 |
## Reconsume. |
## Reconsume. |
2672 |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
2673 |
|
has_reference => 1, |
2674 |
line => $l, column => $c, |
line => $l, column => $c, |
2675 |
}); |
}); |
2676 |
redo A; |
redo A; |
2679 |
$self->{ca}->{value} .= chr $code; |
$self->{ca}->{value} .= chr $code; |
2680 |
$self->{ca}->{has_reference} = 1; |
$self->{ca}->{has_reference} = 1; |
2681 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2682 |
|
$self->{s_kwd} = ''; |
2683 |
## Reconsume. |
## Reconsume. |
2684 |
redo A; |
redo A; |
2685 |
} |
} |
2762 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2763 |
!!!cp (986); |
!!!cp (986); |
2764 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2765 |
|
$self->{s_kwd} = ''; |
2766 |
## Reconsume. |
## Reconsume. |
2767 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
2768 |
data => $data, |
data => $data, |
2769 |
|
has_reference => $has_ref, |
2770 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
2771 |
column => $self->{column_prev} + 1 - length $self->{s_kwd}, |
column => $self->{column_prev} + 1 - length $self->{s_kwd}, |
2772 |
}); |
}); |
2776 |
$self->{ca}->{value} .= $data; |
$self->{ca}->{value} .= $data; |
2777 |
$self->{ca}->{has_reference} = 1 if $has_ref; |
$self->{ca}->{has_reference} = 1 if $has_ref; |
2778 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2779 |
|
$self->{s_kwd} = ''; |
2780 |
## Reconsume. |
## Reconsume. |
2781 |
redo A; |
redo A; |
2782 |
} |
} |