2 |
use strict; |
use strict; |
3 |
our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
4 |
|
|
5 |
|
BEGIN { |
6 |
|
require Exporter; |
7 |
|
push our @ISA, 'Exporter'; |
8 |
|
|
9 |
|
our @EXPORT_OK = qw( |
10 |
|
DOCTYPE_TOKEN |
11 |
|
COMMENT_TOKEN |
12 |
|
START_TAG_TOKEN |
13 |
|
END_TAG_TOKEN |
14 |
|
END_OF_FILE_TOKEN |
15 |
|
CHARACTER_TOKEN |
16 |
|
PI_TOKEN |
17 |
|
ABORT_TOKEN |
18 |
|
); |
19 |
|
|
20 |
|
our %EXPORT_TAGS = ( |
21 |
|
token => [qw( |
22 |
|
DOCTYPE_TOKEN |
23 |
|
COMMENT_TOKEN |
24 |
|
START_TAG_TOKEN |
25 |
|
END_TAG_TOKEN |
26 |
|
END_OF_FILE_TOKEN |
27 |
|
CHARACTER_TOKEN |
28 |
|
PI_TOKEN |
29 |
|
ABORT_TOKEN |
30 |
|
)], |
31 |
|
); |
32 |
|
} |
33 |
|
|
34 |
|
## Token types |
35 |
|
|
36 |
|
sub DOCTYPE_TOKEN () { 1 } |
37 |
|
sub COMMENT_TOKEN () { 2 } |
38 |
|
sub START_TAG_TOKEN () { 3 } |
39 |
|
sub END_TAG_TOKEN () { 4 } |
40 |
|
sub END_OF_FILE_TOKEN () { 5 } |
41 |
|
sub CHARACTER_TOKEN () { 6 } |
42 |
|
sub PI_TOKEN () { 7 } # XML5 |
43 |
|
sub ABORT_TOKEN () { 8 } # Not a token actually |
44 |
|
|
45 |
package Whatpm::HTML; |
package Whatpm::HTML; |
46 |
|
|
47 |
|
BEGIN { Whatpm::HTML::Tokenizer->import (':token') } |
48 |
|
|
49 |
## Content model flags |
## Content model flags |
50 |
|
|
51 |
sub CM_ENTITY () { 0b001 } # & markup in data |
sub CM_ENTITY () { 0b001 } # & markup in data |
114 |
sub ENTITY_NAME_STATE () { 49 } |
sub ENTITY_NAME_STATE () { 49 } |
115 |
sub PCDATA_STATE () { 50 } # "data state" in the spec |
sub PCDATA_STATE () { 50 } # "data state" in the spec |
116 |
|
|
|
## Token types |
|
|
|
|
|
sub DOCTYPE_TOKEN () { 1 } |
|
|
sub COMMENT_TOKEN () { 2 } |
|
|
sub START_TAG_TOKEN () { 3 } |
|
|
sub END_TAG_TOKEN () { 4 } |
|
|
sub END_OF_FILE_TOKEN () { 5 } |
|
|
sub CHARACTER_TOKEN () { 6 } |
|
|
|
|
117 |
## Tree constructor state constants (see Whatpm::HTML for the full |
## Tree constructor state constants (see Whatpm::HTML for the full |
118 |
## list and descriptions) |
## list and descriptions) |
119 |
|
|
175 |
#$self->{level} |
#$self->{level} |
176 |
#$self->{set_nc} |
#$self->{set_nc} |
177 |
#$self->{parse_error} |
#$self->{parse_error} |
178 |
|
#$self->{is_xml} (if XML) |
179 |
|
|
180 |
$self->{state} = DATA_STATE; # MUST |
$self->{state} = DATA_STATE; # MUST |
181 |
#$self->{s_kwd}; # state keyword - initialized when used |
$self->{s_kwd} = ''; # state keyword |
182 |
#$self->{entity__value}; # initialized when used |
#$self->{entity__value}; # initialized when used |
183 |
#$self->{entity__match}; # initialized when used |
#$self->{entity__match}; # initialized when used |
184 |
$self->{content_model} = PCDATA_CONTENT_MODEL; # be |
$self->{content_model} = PCDATA_CONTENT_MODEL; # be |
312 |
} |
} |
313 |
} elsif ($self->{nc} == 0x002D) { # - |
} elsif ($self->{nc} == 0x002D) { # - |
314 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
315 |
$self->{s_kwd} .= '-'; |
if ($self->{s_kwd} eq '<!-') { |
|
|
|
|
if ($self->{s_kwd} eq '<!--') { |
|
316 |
!!!cp (3); |
!!!cp (3); |
317 |
$self->{escape} = 1; # unless $self->{escape}; |
$self->{escape} = 1; # unless $self->{escape}; |
318 |
$self->{s_kwd} = '--'; |
$self->{s_kwd} = '--'; |
319 |
# |
# |
320 |
} elsif ($self->{s_kwd} eq '---') { |
} elsif ($self->{s_kwd} eq '-') { |
321 |
!!!cp (4); |
!!!cp (4); |
322 |
$self->{s_kwd} = '--'; |
$self->{s_kwd} = '--'; |
323 |
# |
# |
324 |
|
} elsif ($self->{s_kwd} eq '<!' or $self->{s_kwd} eq '-') { |
325 |
|
!!!cp (4.1); |
326 |
|
$self->{s_kwd} .= '-'; |
327 |
|
# |
328 |
} else { |
} else { |
329 |
!!!cp (5); |
!!!cp (5); |
330 |
|
$self->{s_kwd} = '-'; |
331 |
# |
# |
332 |
} |
} |
333 |
} |
} |
363 |
if ($self->{s_kwd} eq '--') { |
if ($self->{s_kwd} eq '--') { |
364 |
!!!cp (8); |
!!!cp (8); |
365 |
delete $self->{escape}; |
delete $self->{escape}; |
366 |
|
# |
367 |
} else { |
} else { |
368 |
!!!cp (9); |
!!!cp (9); |
369 |
|
# |
370 |
} |
} |
371 |
|
} elsif ($self->{is_xml} and $self->{s_kwd} eq ']]') { |
372 |
|
!!!cp (9.1); |
373 |
|
!!!parse-error (type => 'unmatched mse', ## TODO: type |
374 |
|
line => $self->{line_prev}, |
375 |
|
column => $self->{column_prev} - 1); |
376 |
|
# |
377 |
} else { |
} else { |
378 |
!!!cp (10); |
!!!cp (10); |
379 |
|
# |
380 |
} |
} |
381 |
|
|
382 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
383 |
# |
# |
384 |
|
} elsif ($self->{nc} == 0x005D) { # ] |
385 |
|
if ($self->{s_kwd} eq ']' or $self->{s_kwd} eq '') { |
386 |
|
!!!cp (10.1); |
387 |
|
$self->{s_kwd} .= ']'; |
388 |
|
} elsif ($self->{s_kwd} eq ']]') { |
389 |
|
!!!cp (10.2); |
390 |
|
# |
391 |
|
} else { |
392 |
|
!!!cp (10.3); |
393 |
|
$self->{s_kwd} = ''; |
394 |
|
} |
395 |
|
# |
396 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
397 |
!!!cp (11); |
!!!cp (11); |
398 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
410 |
data => chr $self->{nc}, |
data => chr $self->{nc}, |
411 |
line => $self->{line}, column => $self->{column}, |
line => $self->{line}, column => $self->{column}, |
412 |
}; |
}; |
413 |
if ($self->{read_until}->($token->{data}, q[-!<>&], |
if ($self->{read_until}->($token->{data}, q{-!<>&\]}, |
414 |
length $token->{data})) { |
length $token->{data})) { |
415 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
416 |
} |
} |
417 |
|
|
418 |
## Stay in the data state. |
## Stay in the data state. |
419 |
if ($self->{content_model} == PCDATA_CONTENT_MODEL) { |
if (not $self->{is_xml} and |
420 |
|
$self->{content_model} == PCDATA_CONTENT_MODEL) { |
421 |
!!!cp (13); |
!!!cp (13); |
422 |
$self->{state} = PCDATA_STATE; |
$self->{state} = PCDATA_STATE; |
423 |
} else { |
} else { |
445 |
|
|
446 |
## reconsume |
## reconsume |
447 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
448 |
|
$self->{s_kwd} = ''; |
449 |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
450 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
451 |
column => $self->{column_prev}, |
column => $self->{column_prev}, |
467 |
!!!cp (19); |
!!!cp (19); |
468 |
$self->{ct} |
$self->{ct} |
469 |
= {type => START_TAG_TOKEN, |
= {type => START_TAG_TOKEN, |
470 |
tag_name => chr ($self->{nc} + 0x0020), |
tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
471 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
472 |
column => $self->{column_prev}}; |
column => $self->{column_prev}}; |
473 |
$self->{state} = TAG_NAME_STATE; |
$self->{state} = TAG_NAME_STATE; |
489 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
490 |
column => $self->{column_prev}); |
column => $self->{column_prev}); |
491 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
492 |
|
$self->{s_kwd} = ''; |
493 |
!!!next-input-character; |
!!!next-input-character; |
494 |
|
|
495 |
!!!emit ({type => CHARACTER_TOKEN, data => '<>', |
!!!emit ({type => CHARACTER_TOKEN, data => '<>', |
516 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
517 |
column => $self->{column_prev}); |
column => $self->{column_prev}); |
518 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
519 |
|
$self->{s_kwd} = ''; |
520 |
## reconsume |
## reconsume |
521 |
|
|
522 |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
545 |
## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>. |
## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>. |
546 |
!!!cp (28); |
!!!cp (28); |
547 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
548 |
|
$self->{s_kwd} = ''; |
549 |
## Reconsume. |
## Reconsume. |
550 |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
!!!emit ({type => CHARACTER_TOKEN, data => '</', |
551 |
line => $l, column => $c, |
line => $l, column => $c, |
559 |
!!!cp (29); |
!!!cp (29); |
560 |
$self->{ct} |
$self->{ct} |
561 |
= {type => END_TAG_TOKEN, |
= {type => END_TAG_TOKEN, |
562 |
tag_name => chr ($self->{nc} + 0x0020), |
tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
563 |
line => $l, column => $c}; |
line => $l, column => $c}; |
564 |
$self->{state} = TAG_NAME_STATE; |
$self->{state} = TAG_NAME_STATE; |
565 |
!!!next-input-character; |
!!!next-input-character; |
579 |
line => $self->{line_prev}, ## "<" in "</>" |
line => $self->{line_prev}, ## "<" in "</>" |
580 |
column => $self->{column_prev} - 1); |
column => $self->{column_prev} - 1); |
581 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
582 |
|
$self->{s_kwd} = ''; |
583 |
!!!next-input-character; |
!!!next-input-character; |
584 |
redo A; |
redo A; |
585 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
586 |
!!!cp (32); |
!!!cp (32); |
587 |
!!!parse-error (type => 'bare etago'); |
!!!parse-error (type => 'bare etago'); |
588 |
|
$self->{s_kwd} = ''; |
589 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
590 |
# reconsume |
# reconsume |
591 |
|
|
625 |
} else { |
} else { |
626 |
!!!cp (25); |
!!!cp (25); |
627 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
628 |
|
$self->{s_kwd} = ''; |
629 |
## Reconsume. |
## Reconsume. |
630 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
631 |
data => '</' . $self->{s_kwd}, |
data => '</' . $self->{s_kwd}, |
644 |
!!!cp (26); |
!!!cp (26); |
645 |
## Reconsume. |
## Reconsume. |
646 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
647 |
|
$self->{s_kwd} = ''; |
648 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
649 |
data => '</' . $self->{s_kwd}, |
data => '</' . $self->{s_kwd}, |
650 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
686 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
687 |
} |
} |
688 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
689 |
|
$self->{s_kwd} = ''; |
690 |
!!!next-input-character; |
!!!next-input-character; |
691 |
|
|
692 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
695 |
} elsif (0x0041 <= $self->{nc} and |
} elsif (0x0041 <= $self->{nc} and |
696 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
697 |
!!!cp (38); |
!!!cp (38); |
698 |
$self->{ct}->{tag_name} .= chr ($self->{nc} + 0x0020); |
$self->{ct}->{tag_name} |
699 |
|
.= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)); |
700 |
# start tag or end tag |
# start tag or end tag |
701 |
## Stay in this state |
## Stay in this state |
702 |
!!!next-input-character; |
!!!next-input-character; |
719 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
720 |
} |
} |
721 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
722 |
|
$self->{s_kwd} = ''; |
723 |
# reconsume |
# reconsume |
724 |
|
|
725 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
760 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
761 |
} |
} |
762 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
763 |
|
$self->{s_kwd} = ''; |
764 |
!!!next-input-character; |
!!!next-input-character; |
765 |
|
|
766 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
770 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
771 |
!!!cp (49); |
!!!cp (49); |
772 |
$self->{ca} |
$self->{ca} |
773 |
= {name => chr ($self->{nc} + 0x0020), |
= {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
774 |
value => '', |
value => '', |
775 |
line => $self->{line}, column => $self->{column}}; |
line => $self->{line}, column => $self->{column}}; |
776 |
$self->{state} = ATTRIBUTE_NAME_STATE; |
$self->{state} = ATTRIBUTE_NAME_STATE; |
798 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
799 |
} |
} |
800 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
801 |
|
$self->{s_kwd} = ''; |
802 |
# reconsume |
# reconsume |
803 |
|
|
804 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
864 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
865 |
} |
} |
866 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
867 |
|
$self->{s_kwd} = ''; |
868 |
!!!next-input-character; |
!!!next-input-character; |
869 |
|
|
870 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
873 |
} elsif (0x0041 <= $self->{nc} and |
} elsif (0x0041 <= $self->{nc} and |
874 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
875 |
!!!cp (63); |
!!!cp (63); |
876 |
$self->{ca}->{name} .= chr ($self->{nc} + 0x0020); |
$self->{ca}->{name} |
877 |
|
.= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)); |
878 |
## Stay in the state |
## Stay in the state |
879 |
!!!next-input-character; |
!!!next-input-character; |
880 |
redo A; |
redo A; |
903 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
904 |
} |
} |
905 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
906 |
|
$self->{s_kwd} = ''; |
907 |
# reconsume |
# reconsume |
908 |
|
|
909 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
950 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
951 |
} |
} |
952 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
953 |
|
$self->{s_kwd} = ''; |
954 |
!!!next-input-character; |
!!!next-input-character; |
955 |
|
|
956 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
960 |
$self->{nc} <= 0x005A) { # A..Z |
$self->{nc} <= 0x005A) { # A..Z |
961 |
!!!cp (76); |
!!!cp (76); |
962 |
$self->{ca} |
$self->{ca} |
963 |
= {name => chr ($self->{nc} + 0x0020), |
= {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)), |
964 |
value => '', |
value => '', |
965 |
line => $self->{line}, column => $self->{column}}; |
line => $self->{line}, column => $self->{column}}; |
966 |
$self->{state} = ATTRIBUTE_NAME_STATE; |
$self->{state} = ATTRIBUTE_NAME_STATE; |
988 |
} else { |
} else { |
989 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
990 |
} |
} |
991 |
|
$self->{s_kwd} = ''; |
992 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
993 |
# reconsume |
# reconsume |
994 |
|
|
1050 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1051 |
} |
} |
1052 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1053 |
|
$self->{s_kwd} = ''; |
1054 |
!!!next-input-character; |
!!!next-input-character; |
1055 |
|
|
1056 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1074 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1075 |
} |
} |
1076 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1077 |
|
$self->{s_kwd} = ''; |
1078 |
## reconsume |
## reconsume |
1079 |
|
|
1080 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1127 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1128 |
} |
} |
1129 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1130 |
|
$self->{s_kwd} = ''; |
1131 |
## reconsume |
## reconsume |
1132 |
|
|
1133 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1179 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1180 |
} |
} |
1181 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1182 |
|
$self->{s_kwd} = ''; |
1183 |
## reconsume |
## reconsume |
1184 |
|
|
1185 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1230 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1231 |
} |
} |
1232 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1233 |
|
$self->{s_kwd} = ''; |
1234 |
!!!next-input-character; |
!!!next-input-character; |
1235 |
|
|
1236 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1254 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1255 |
} |
} |
1256 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1257 |
|
$self->{s_kwd} = ''; |
1258 |
## reconsume |
## reconsume |
1259 |
|
|
1260 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1303 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1304 |
} |
} |
1305 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1306 |
|
$self->{s_kwd} = ''; |
1307 |
!!!next-input-character; |
!!!next-input-character; |
1308 |
|
|
1309 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1331 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1332 |
} |
} |
1333 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1334 |
|
$self->{s_kwd} = ''; |
1335 |
## Reconsume. |
## Reconsume. |
1336 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1337 |
redo A; |
redo A; |
1362 |
} |
} |
1363 |
|
|
1364 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1365 |
|
$self->{s_kwd} = ''; |
1366 |
!!!next-input-character; |
!!!next-input-character; |
1367 |
|
|
1368 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1385 |
die "$0: $self->{ct}->{type}: Unknown token type"; |
die "$0: $self->{ct}->{type}: Unknown token type"; |
1386 |
} |
} |
1387 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1388 |
|
$self->{s_kwd} = ''; |
1389 |
## Reconsume. |
## Reconsume. |
1390 |
!!!emit ($self->{ct}); # start tag or end tag |
!!!emit ($self->{ct}); # start tag or end tag |
1391 |
redo A; |
redo A; |
1406 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
1407 |
!!!cp (124); |
!!!cp (124); |
1408 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1409 |
|
$self->{s_kwd} = ''; |
1410 |
!!!next-input-character; |
!!!next-input-character; |
1411 |
|
|
1412 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1414 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
1415 |
!!!cp (125); |
!!!cp (125); |
1416 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1417 |
|
$self->{s_kwd} = ''; |
1418 |
## reconsume |
## reconsume |
1419 |
|
|
1420 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1446 |
$self->{s_kwd} = chr $self->{nc}; |
$self->{s_kwd} = chr $self->{nc}; |
1447 |
!!!next-input-character; |
!!!next-input-character; |
1448 |
redo A; |
redo A; |
1449 |
} elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and |
} elsif ((($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and |
1450 |
$self->{open_elements}->[-1]->[1] & FOREIGN_EL and |
$self->{open_elements}->[-1]->[1] & FOREIGN_EL) or |
1451 |
|
$self->{is_xml}) and |
1452 |
$self->{nc} == 0x005B) { # [ |
$self->{nc} == 0x005B) { # [ |
1453 |
!!!cp (135.4); |
!!!cp (135.4); |
1454 |
$self->{state} = MD_CDATA_STATE; |
$self->{state} = MD_CDATA_STATE; |
1589 |
!!!cp (138); |
!!!cp (138); |
1590 |
!!!parse-error (type => 'bogus comment'); |
!!!parse-error (type => 'bogus comment'); |
1591 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1592 |
|
$self->{s_kwd} = ''; |
1593 |
!!!next-input-character; |
!!!next-input-character; |
1594 |
|
|
1595 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1599 |
!!!cp (139); |
!!!cp (139); |
1600 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
1601 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1602 |
|
$self->{s_kwd} = ''; |
1603 |
## reconsume |
## reconsume |
1604 |
|
|
1605 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1623 |
!!!cp (142); |
!!!cp (142); |
1624 |
!!!parse-error (type => 'bogus comment'); |
!!!parse-error (type => 'bogus comment'); |
1625 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1626 |
|
$self->{s_kwd} = ''; |
1627 |
!!!next-input-character; |
!!!next-input-character; |
1628 |
|
|
1629 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1633 |
!!!cp (143); |
!!!cp (143); |
1634 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
1635 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1636 |
|
$self->{s_kwd} = ''; |
1637 |
## reconsume |
## reconsume |
1638 |
|
|
1639 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1657 |
!!!cp (146); |
!!!cp (146); |
1658 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
1659 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1660 |
|
$self->{s_kwd} = ''; |
1661 |
## reconsume |
## reconsume |
1662 |
|
|
1663 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1683 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
1684 |
!!!cp (149); |
!!!cp (149); |
1685 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
1686 |
|
$self->{s_kwd} = ''; |
1687 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1688 |
|
$self->{s_kwd} = ''; |
1689 |
## reconsume |
## reconsume |
1690 |
|
|
1691 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1702 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
1703 |
!!!cp (151); |
!!!cp (151); |
1704 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1705 |
|
$self->{s_kwd} = ''; |
1706 |
!!!next-input-character; |
!!!next-input-character; |
1707 |
|
|
1708 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1721 |
!!!cp (153); |
!!!cp (153); |
1722 |
!!!parse-error (type => 'unclosed comment'); |
!!!parse-error (type => 'unclosed comment'); |
1723 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1724 |
|
$self->{s_kwd} = ''; |
1725 |
## reconsume |
## reconsume |
1726 |
|
|
1727 |
!!!emit ($self->{ct}); # comment |
!!!emit ($self->{ct}); # comment |
1760 |
!!!cp (158); |
!!!cp (158); |
1761 |
!!!parse-error (type => 'no DOCTYPE name'); |
!!!parse-error (type => 'no DOCTYPE name'); |
1762 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1763 |
|
$self->{s_kwd} = ''; |
1764 |
!!!next-input-character; |
!!!next-input-character; |
1765 |
|
|
1766 |
!!!emit ($self->{ct}); # DOCTYPE (quirks) |
!!!emit ($self->{ct}); # DOCTYPE (quirks) |
1770 |
!!!cp (159); |
!!!cp (159); |
1771 |
!!!parse-error (type => 'no DOCTYPE name'); |
!!!parse-error (type => 'no DOCTYPE name'); |
1772 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1773 |
|
$self->{s_kwd} = ''; |
1774 |
## reconsume |
## reconsume |
1775 |
|
|
1776 |
!!!emit ($self->{ct}); # DOCTYPE (quirks) |
!!!emit ($self->{ct}); # DOCTYPE (quirks) |
1794 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
1795 |
!!!cp (162); |
!!!cp (162); |
1796 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1797 |
|
$self->{s_kwd} = ''; |
1798 |
!!!next-input-character; |
!!!next-input-character; |
1799 |
|
|
1800 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
1804 |
!!!cp (163); |
!!!cp (163); |
1805 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
1806 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1807 |
|
$self->{s_kwd} = ''; |
1808 |
## reconsume |
## reconsume |
1809 |
|
|
1810 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
1828 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
1829 |
!!!cp (166); |
!!!cp (166); |
1830 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1831 |
|
$self->{s_kwd} = ''; |
1832 |
!!!next-input-character; |
!!!next-input-character; |
1833 |
|
|
1834 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
1838 |
!!!cp (167); |
!!!cp (167); |
1839 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
1840 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1841 |
|
$self->{s_kwd} = ''; |
1842 |
## reconsume |
## reconsume |
1843 |
|
|
1844 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
1967 |
!!!parse-error (type => 'no PUBLIC literal'); |
!!!parse-error (type => 'no PUBLIC literal'); |
1968 |
|
|
1969 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1970 |
|
$self->{s_kwd} = ''; |
1971 |
!!!next-input-character; |
!!!next-input-character; |
1972 |
|
|
1973 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
1979 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
1980 |
|
|
1981 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
1982 |
|
$self->{s_kwd} = ''; |
1983 |
## reconsume |
## reconsume |
1984 |
|
|
1985 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2006 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
2007 |
|
|
2008 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2009 |
|
$self->{s_kwd} = ''; |
2010 |
!!!next-input-character; |
!!!next-input-character; |
2011 |
|
|
2012 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2018 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
2019 |
|
|
2020 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2021 |
|
$self->{s_kwd} = ''; |
2022 |
## reconsume |
## reconsume |
2023 |
|
|
2024 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2047 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
2048 |
|
|
2049 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2050 |
|
$self->{s_kwd} = ''; |
2051 |
!!!next-input-character; |
!!!next-input-character; |
2052 |
|
|
2053 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2059 |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
!!!parse-error (type => 'unclosed PUBLIC literal'); |
2060 |
|
|
2061 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2062 |
|
$self->{s_kwd} = ''; |
2063 |
## reconsume |
## reconsume |
2064 |
|
|
2065 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2098 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
2099 |
!!!cp (198); |
!!!cp (198); |
2100 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2101 |
|
$self->{s_kwd} = ''; |
2102 |
!!!next-input-character; |
!!!next-input-character; |
2103 |
|
|
2104 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
2109 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
2110 |
|
|
2111 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2112 |
|
$self->{s_kwd} = ''; |
2113 |
## reconsume |
## reconsume |
2114 |
|
|
2115 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2147 |
!!!cp (204); |
!!!cp (204); |
2148 |
!!!parse-error (type => 'no SYSTEM literal'); |
!!!parse-error (type => 'no SYSTEM literal'); |
2149 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2150 |
|
$self->{s_kwd} = ''; |
2151 |
!!!next-input-character; |
!!!next-input-character; |
2152 |
|
|
2153 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2159 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
2160 |
|
|
2161 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2162 |
|
$self->{s_kwd} = ''; |
2163 |
## reconsume |
## reconsume |
2164 |
|
|
2165 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2186 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
2187 |
|
|
2188 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2189 |
|
$self->{s_kwd} = ''; |
2190 |
!!!next-input-character; |
!!!next-input-character; |
2191 |
|
|
2192 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2198 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
2199 |
|
|
2200 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2201 |
|
$self->{s_kwd} = ''; |
2202 |
## reconsume |
## reconsume |
2203 |
|
|
2204 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2227 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
2228 |
|
|
2229 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2230 |
|
$self->{s_kwd} = ''; |
2231 |
!!!next-input-character; |
!!!next-input-character; |
2232 |
|
|
2233 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2239 |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
!!!parse-error (type => 'unclosed SYSTEM literal'); |
2240 |
|
|
2241 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2242 |
|
$self->{s_kwd} = ''; |
2243 |
## reconsume |
## reconsume |
2244 |
|
|
2245 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2266 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
2267 |
!!!cp (216); |
!!!cp (216); |
2268 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2269 |
|
$self->{s_kwd} = ''; |
2270 |
!!!next-input-character; |
!!!next-input-character; |
2271 |
|
|
2272 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
2276 |
!!!cp (217); |
!!!cp (217); |
2277 |
!!!parse-error (type => 'unclosed DOCTYPE'); |
!!!parse-error (type => 'unclosed DOCTYPE'); |
2278 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2279 |
|
$self->{s_kwd} = ''; |
2280 |
## reconsume |
## reconsume |
2281 |
|
|
2282 |
$self->{ct}->{quirks} = 1; |
$self->{ct}->{quirks} = 1; |
2296 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
2297 |
!!!cp (219); |
!!!cp (219); |
2298 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2299 |
|
$self->{s_kwd} = ''; |
2300 |
!!!next-input-character; |
!!!next-input-character; |
2301 |
|
|
2302 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
2305 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
2306 |
!!!cp (220); |
!!!cp (220); |
2307 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2308 |
|
$self->{s_kwd} = ''; |
2309 |
## reconsume |
## reconsume |
2310 |
|
|
2311 |
!!!emit ($self->{ct}); # DOCTYPE |
!!!emit ($self->{ct}); # DOCTYPE |
2332 |
redo A; |
redo A; |
2333 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
2334 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2335 |
|
$self->{s_kwd} = ''; |
2336 |
!!!next-input-character; |
!!!next-input-character; |
2337 |
if (length $self->{ct}->{data}) { # character |
if (length $self->{ct}->{data}) { # character |
2338 |
!!!cp (221.2); |
!!!cp (221.2); |
2371 |
} elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) { |
} elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) { |
2372 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
2373 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2374 |
|
$self->{s_kwd} = ''; |
2375 |
!!!next-input-character; |
!!!next-input-character; |
2376 |
if (length $self->{ct}->{data}) { # character |
if (length $self->{ct}->{data}) { # character |
2377 |
!!!cp (221.7); |
!!!cp (221.7); |
2439 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2440 |
!!!cp (997); |
!!!cp (997); |
2441 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2442 |
|
$self->{s_kwd} = ''; |
2443 |
## Reconsume. |
## Reconsume. |
2444 |
!!!emit ({type => CHARACTER_TOKEN, data => '&', |
!!!emit ({type => CHARACTER_TOKEN, data => '&', |
2445 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
2450 |
!!!cp (996); |
!!!cp (996); |
2451 |
$self->{ca}->{value} .= '&'; |
$self->{ca}->{value} .= '&'; |
2452 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2453 |
|
$self->{s_kwd} = ''; |
2454 |
## Reconsume. |
## Reconsume. |
2455 |
redo A; |
redo A; |
2456 |
} |
} |
2481 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2482 |
!!!cp (1019); |
!!!cp (1019); |
2483 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2484 |
|
$self->{s_kwd} = ''; |
2485 |
## Reconsume. |
## Reconsume. |
2486 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
2487 |
data => '&#', |
data => '&#', |
2493 |
!!!cp (993); |
!!!cp (993); |
2494 |
$self->{ca}->{value} .= '&#'; |
$self->{ca}->{value} .= '&#'; |
2495 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2496 |
|
$self->{s_kwd} = ''; |
2497 |
## Reconsume. |
## Reconsume. |
2498 |
redo A; |
redo A; |
2499 |
} |
} |
2539 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2540 |
!!!cp (992); |
!!!cp (992); |
2541 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2542 |
|
$self->{s_kwd} = ''; |
2543 |
## Reconsume. |
## Reconsume. |
2544 |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
2545 |
line => $l, column => $c, |
line => $l, column => $c, |
2550 |
$self->{ca}->{value} .= chr $code; |
$self->{ca}->{value} .= chr $code; |
2551 |
$self->{ca}->{has_reference} = 1; |
$self->{ca}->{has_reference} = 1; |
2552 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2553 |
|
$self->{s_kwd} = ''; |
2554 |
## Reconsume. |
## Reconsume. |
2555 |
redo A; |
redo A; |
2556 |
} |
} |
2576 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2577 |
!!!cp (1005); |
!!!cp (1005); |
2578 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2579 |
|
$self->{s_kwd} = ''; |
2580 |
## Reconsume. |
## Reconsume. |
2581 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
2582 |
data => '&' . $self->{s_kwd}, |
data => '&' . $self->{s_kwd}, |
2588 |
!!!cp (989); |
!!!cp (989); |
2589 |
$self->{ca}->{value} .= '&' . $self->{s_kwd}; |
$self->{ca}->{value} .= '&' . $self->{s_kwd}; |
2590 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2591 |
|
$self->{s_kwd} = ''; |
2592 |
## Reconsume. |
## Reconsume. |
2593 |
redo A; |
redo A; |
2594 |
} |
} |
2651 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2652 |
!!!cp (988); |
!!!cp (988); |
2653 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2654 |
|
$self->{s_kwd} = ''; |
2655 |
## Reconsume. |
## Reconsume. |
2656 |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
2657 |
line => $l, column => $c, |
line => $l, column => $c, |
2662 |
$self->{ca}->{value} .= chr $code; |
$self->{ca}->{value} .= chr $code; |
2663 |
$self->{ca}->{has_reference} = 1; |
$self->{ca}->{has_reference} = 1; |
2664 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2665 |
|
$self->{s_kwd} = ''; |
2666 |
## Reconsume. |
## Reconsume. |
2667 |
redo A; |
redo A; |
2668 |
} |
} |
2745 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2746 |
!!!cp (986); |
!!!cp (986); |
2747 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2748 |
|
$self->{s_kwd} = ''; |
2749 |
## Reconsume. |
## Reconsume. |
2750 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
2751 |
data => $data, |
data => $data, |
2758 |
$self->{ca}->{value} .= $data; |
$self->{ca}->{value} .= $data; |
2759 |
$self->{ca}->{has_reference} = 1 if $has_ref; |
$self->{ca}->{has_reference} = 1 if $has_ref; |
2760 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2761 |
|
$self->{s_kwd} = ''; |
2762 |
## Reconsume. |
## Reconsume. |
2763 |
redo A; |
redo A; |
2764 |
} |
} |