/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1 by wakaba, Tue Oct 14 02:27:58 2008 UTC revision 1.5 by wakaba, Tue Oct 14 14:38:59 2008 UTC
# Line 2  package Whatpm::HTML::Tokenizer; Line 2  package Whatpm::HTML::Tokenizer;
2  use strict;  use strict;
3  our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};  our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4    
5    BEGIN {
6      require Exporter;
7      push our @ISA, 'Exporter';
8    
9      our @EXPORT_OK = qw(
10        DOCTYPE_TOKEN
11        COMMENT_TOKEN
12        START_TAG_TOKEN
13        END_TAG_TOKEN
14        END_OF_FILE_TOKEN
15        CHARACTER_TOKEN
16        PI_TOKEN
17        ABORT_TOKEN
18      );
19      
20      our %EXPORT_TAGS = (
21        token => [qw(
22          DOCTYPE_TOKEN
23          COMMENT_TOKEN
24          START_TAG_TOKEN
25          END_TAG_TOKEN
26          END_OF_FILE_TOKEN
27          CHARACTER_TOKEN
28          PI_TOKEN
29          ABORT_TOKEN
30        )],
31      );
32    }
33    
34    ## Token types
35    
36    sub DOCTYPE_TOKEN () { 1 }
37    sub COMMENT_TOKEN () { 2 }
38    sub START_TAG_TOKEN () { 3 }
39    sub END_TAG_TOKEN () { 4 }
40    sub END_OF_FILE_TOKEN () { 5 }
41    sub CHARACTER_TOKEN () { 6 }
42    sub PI_TOKEN () { 7 } # XML5
43    sub ABORT_TOKEN () { 8 } # Not a token actually
44    
45  package Whatpm::HTML;  package Whatpm::HTML;
46    
47    BEGIN { Whatpm::HTML::Tokenizer->import (':token') }
48    
49  ## Content model flags  ## Content model flags
50    
51  sub CM_ENTITY () { 0b001 } # & markup in data  sub CM_ENTITY () { 0b001 } # & markup in data
# Line 72  sub HEXREF_HEX_STATE () { 48 } Line 114  sub HEXREF_HEX_STATE () { 48 }
114  sub ENTITY_NAME_STATE () { 49 }  sub ENTITY_NAME_STATE () { 49 }
115  sub PCDATA_STATE () { 50 } # "data state" in the spec  sub PCDATA_STATE () { 50 } # "data state" in the spec
116    
 ## Token types  
   
 sub DOCTYPE_TOKEN () { 1 }  
 sub COMMENT_TOKEN () { 2 }  
 sub START_TAG_TOKEN () { 3 }  
 sub END_TAG_TOKEN () { 4 }  
 sub END_OF_FILE_TOKEN () { 5 }  
 sub CHARACTER_TOKEN () { 6 }  
   
117  ## Tree constructor state constants (see Whatpm::HTML for the full  ## Tree constructor state constants (see Whatpm::HTML for the full
118  ## list and descriptions)  ## list and descriptions)
119    
# Line 142  sub _initialize_tokenizer ($) { Line 175  sub _initialize_tokenizer ($) {
175    #$self->{level}    #$self->{level}
176    #$self->{set_nc}    #$self->{set_nc}
177    #$self->{parse_error}    #$self->{parse_error}
178      #$self->{is_xml} (if XML)
179    
180    $self->{state} = DATA_STATE; # MUST    $self->{state} = DATA_STATE; # MUST
181    #$self->{s_kwd}; # state keyword - initialized when used    $self->{s_kwd} = ''; # state keyword
182    #$self->{entity__value}; # initialized when used    #$self->{entity__value}; # initialized when used
183    #$self->{entity__match}; # initialized when used    #$self->{entity__match}; # initialized when used
184    $self->{content_model} = PCDATA_CONTENT_MODEL; # be    $self->{content_model} = PCDATA_CONTENT_MODEL; # be
# Line 278  sub _get_next_token ($) { Line 312  sub _get_next_token ($) {
312          }          }
313        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
314          if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA          if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
315            $self->{s_kwd} .= '-';            if ($self->{s_kwd} eq '<!-') {
             
           if ($self->{s_kwd} eq '<!--') {  
316              !!!cp (3);              !!!cp (3);
317              $self->{escape} = 1; # unless $self->{escape};              $self->{escape} = 1; # unless $self->{escape};
318              $self->{s_kwd} = '--';              $self->{s_kwd} = '--';
319              #              #
320            } elsif ($self->{s_kwd} eq '---') {            } elsif ($self->{s_kwd} eq '-') {
321              !!!cp (4);              !!!cp (4);
322              $self->{s_kwd} = '--';              $self->{s_kwd} = '--';
323              #              #
324              } elsif ($self->{s_kwd} eq '<!' or $self->{s_kwd} eq '-') {
325                !!!cp (4.1);
326                $self->{s_kwd} .= '-';
327                #
328            } else {            } else {
329              !!!cp (5);              !!!cp (5);
330                $self->{s_kwd} = '-';
331              #              #
332            }            }
333          }          }
# Line 326  sub _get_next_token ($) { Line 363  sub _get_next_token ($) {
363            if ($self->{s_kwd} eq '--') {            if ($self->{s_kwd} eq '--') {
364              !!!cp (8);              !!!cp (8);
365              delete $self->{escape};              delete $self->{escape};
366                #
367            } else {            } else {
368              !!!cp (9);              !!!cp (9);
369                #
370            }            }
371            } elsif ($self->{is_xml} and $self->{s_kwd} eq ']]') {
372              !!!cp (9.1);
373              !!!parse-error (type => 'unmatched mse', ## TODO: type
374                              line => $self->{line_prev},
375                              column => $self->{column_prev} - 1);
376              #
377          } else {          } else {
378            !!!cp (10);            !!!cp (10);
379              #
380          }          }
381                    
382          $self->{s_kwd} = '';          $self->{s_kwd} = '';
383          #          #
384          } elsif ($self->{nc} == 0x005D) { # ]
385            if ($self->{s_kwd} eq ']' or $self->{s_kwd} eq '') {
386              !!!cp (10.1);
387              $self->{s_kwd} .= ']';
388            } elsif ($self->{s_kwd} eq ']]') {
389              !!!cp (10.2);
390              #
391            } else {
392              !!!cp (10.3);
393              $self->{s_kwd} = '';
394            }
395            #
396        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
397          !!!cp (11);          !!!cp (11);
398          $self->{s_kwd} = '';          $self->{s_kwd} = '';
# Line 352  sub _get_next_token ($) { Line 410  sub _get_next_token ($) {
410                     data => chr $self->{nc},                     data => chr $self->{nc},
411                     line => $self->{line}, column => $self->{column},                     line => $self->{line}, column => $self->{column},
412                    };                    };
413        if ($self->{read_until}->($token->{data}, q[-!<>&],        if ($self->{read_until}->($token->{data}, q{-!<>&\]},
414                                  length $token->{data})) {                                  length $token->{data})) {
415          $self->{s_kwd} = '';          $self->{s_kwd} = '';
416        }        }
417    
418        ## Stay in the data state.        ## Stay in the data state.
419        if ($self->{content_model} == PCDATA_CONTENT_MODEL) {        if (not $self->{is_xml} and
420              $self->{content_model} == PCDATA_CONTENT_MODEL) {
421          !!!cp (13);          !!!cp (13);
422          $self->{state} = PCDATA_STATE;          $self->{state} = PCDATA_STATE;
423        } else {        } else {
# Line 386  sub _get_next_token ($) { Line 445  sub _get_next_token ($) {
445    
446          ## reconsume          ## reconsume
447          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
448            $self->{s_kwd} = '';
449          !!!emit ({type => CHARACTER_TOKEN, data => '<',          !!!emit ({type => CHARACTER_TOKEN, data => '<',
450                    line => $self->{line_prev},                    line => $self->{line_prev},
451                    column => $self->{column_prev},                    column => $self->{column_prev},
# Line 407  sub _get_next_token ($) { Line 467  sub _get_next_token ($) {
467            !!!cp (19);            !!!cp (19);
468            $self->{ct}            $self->{ct}
469              = {type => START_TAG_TOKEN,              = {type => START_TAG_TOKEN,
470                 tag_name => chr ($self->{nc} + 0x0020),                 tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
471                 line => $self->{line_prev},                 line => $self->{line_prev},
472                 column => $self->{column_prev}};                 column => $self->{column_prev}};
473            $self->{state} = TAG_NAME_STATE;            $self->{state} = TAG_NAME_STATE;
# Line 429  sub _get_next_token ($) { Line 489  sub _get_next_token ($) {
489                            line => $self->{line_prev},                            line => $self->{line_prev},
490                            column => $self->{column_prev});                            column => $self->{column_prev});
491            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
492              $self->{s_kwd} = '';
493            !!!next-input-character;            !!!next-input-character;
494    
495            !!!emit ({type => CHARACTER_TOKEN, data => '<>',            !!!emit ({type => CHARACTER_TOKEN, data => '<>',
# Line 455  sub _get_next_token ($) { Line 516  sub _get_next_token ($) {
516                            line => $self->{line_prev},                            line => $self->{line_prev},
517                            column => $self->{column_prev});                            column => $self->{column_prev});
518            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
519              $self->{s_kwd} = '';
520            ## reconsume            ## reconsume
521    
522            !!!emit ({type => CHARACTER_TOKEN, data => '<',            !!!emit ({type => CHARACTER_TOKEN, data => '<',
# Line 483  sub _get_next_token ($) { Line 545  sub _get_next_token ($) {
545            ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.            ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.
546            !!!cp (28);            !!!cp (28);
547            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
548              $self->{s_kwd} = '';
549            ## Reconsume.            ## Reconsume.
550            !!!emit ({type => CHARACTER_TOKEN, data => '</',            !!!emit ({type => CHARACTER_TOKEN, data => '</',
551                      line => $l, column => $c,                      line => $l, column => $c,
# Line 496  sub _get_next_token ($) { Line 559  sub _get_next_token ($) {
559          !!!cp (29);          !!!cp (29);
560          $self->{ct}          $self->{ct}
561              = {type => END_TAG_TOKEN,              = {type => END_TAG_TOKEN,
562                 tag_name => chr ($self->{nc} + 0x0020),                 tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
563                 line => $l, column => $c};                 line => $l, column => $c};
564          $self->{state} = TAG_NAME_STATE;          $self->{state} = TAG_NAME_STATE;
565          !!!next-input-character;          !!!next-input-character;
# Line 516  sub _get_next_token ($) { Line 579  sub _get_next_token ($) {
579                          line => $self->{line_prev}, ## "<" in "</>"                          line => $self->{line_prev}, ## "<" in "</>"
580                          column => $self->{column_prev} - 1);                          column => $self->{column_prev} - 1);
581          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
582            $self->{s_kwd} = '';
583          !!!next-input-character;          !!!next-input-character;
584          redo A;          redo A;
585        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
586          !!!cp (32);          !!!cp (32);
587          !!!parse-error (type => 'bare etago');          !!!parse-error (type => 'bare etago');
588            $self->{s_kwd} = '';
589          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
590          # reconsume          # reconsume
591    
# Line 560  sub _get_next_token ($) { Line 625  sub _get_next_token ($) {
625          } else {          } else {
626            !!!cp (25);            !!!cp (25);
627            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
628              $self->{s_kwd} = '';
629            ## Reconsume.            ## Reconsume.
630            !!!emit ({type => CHARACTER_TOKEN,            !!!emit ({type => CHARACTER_TOKEN,
631                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{s_kwd},
# Line 578  sub _get_next_token ($) { Line 644  sub _get_next_token ($) {
644            !!!cp (26);            !!!cp (26);
645            ## Reconsume.            ## Reconsume.
646            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
647              $self->{s_kwd} = '';
648            !!!emit ({type => CHARACTER_TOKEN,            !!!emit ({type => CHARACTER_TOKEN,
649                      data => '</' . $self->{s_kwd},                      data => '</' . $self->{s_kwd},
650                      line => $self->{line_prev},                      line => $self->{line_prev},
# Line 619  sub _get_next_token ($) { Line 686  sub _get_next_token ($) {
686            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
687          }          }
688          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
689            $self->{s_kwd} = '';
690          !!!next-input-character;          !!!next-input-character;
691    
692          !!!emit ($self->{ct}); # start tag or end tag          !!!emit ($self->{ct}); # start tag or end tag
# Line 627  sub _get_next_token ($) { Line 695  sub _get_next_token ($) {
695        } elsif (0x0041 <= $self->{nc} and        } elsif (0x0041 <= $self->{nc} and
696                 $self->{nc} <= 0x005A) { # A..Z                 $self->{nc} <= 0x005A) { # A..Z
697          !!!cp (38);          !!!cp (38);
698          $self->{ct}->{tag_name} .= chr ($self->{nc} + 0x0020);          $self->{ct}->{tag_name}
699                .= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
700            # start tag or end tag            # start tag or end tag
701          ## Stay in this state          ## Stay in this state
702          !!!next-input-character;          !!!next-input-character;
# Line 650  sub _get_next_token ($) { Line 719  sub _get_next_token ($) {
719            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
720          }          }
721          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
722            $self->{s_kwd} = '';
723          # reconsume          # reconsume
724    
725          !!!emit ($self->{ct}); # start tag or end tag          !!!emit ($self->{ct}); # start tag or end tag
# Line 690  sub _get_next_token ($) { Line 760  sub _get_next_token ($) {
760            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
761          }          }
762          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
763            $self->{s_kwd} = '';
764          !!!next-input-character;          !!!next-input-character;
765    
766          !!!emit ($self->{ct}); # start tag or end tag          !!!emit ($self->{ct}); # start tag or end tag
# Line 699  sub _get_next_token ($) { Line 770  sub _get_next_token ($) {
770                 $self->{nc} <= 0x005A) { # A..Z                 $self->{nc} <= 0x005A) { # A..Z
771          !!!cp (49);          !!!cp (49);
772          $self->{ca}          $self->{ca}
773              = {name => chr ($self->{nc} + 0x0020),              = {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
774                 value => '',                 value => '',
775                 line => $self->{line}, column => $self->{column}};                 line => $self->{line}, column => $self->{column}};
776          $self->{state} = ATTRIBUTE_NAME_STATE;          $self->{state} = ATTRIBUTE_NAME_STATE;
# Line 727  sub _get_next_token ($) { Line 798  sub _get_next_token ($) {
798            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
799          }          }
800          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
801            $self->{s_kwd} = '';
802          # reconsume          # reconsume
803    
804          !!!emit ($self->{ct}); # start tag or end tag          !!!emit ($self->{ct}); # start tag or end tag
# Line 792  sub _get_next_token ($) { Line 864  sub _get_next_token ($) {
864            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
865          }          }
866          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
867            $self->{s_kwd} = '';
868          !!!next-input-character;          !!!next-input-character;
869    
870          !!!emit ($self->{ct}); # start tag or end tag          !!!emit ($self->{ct}); # start tag or end tag
# Line 800  sub _get_next_token ($) { Line 873  sub _get_next_token ($) {
873        } elsif (0x0041 <= $self->{nc} and        } elsif (0x0041 <= $self->{nc} and
874                 $self->{nc} <= 0x005A) { # A..Z                 $self->{nc} <= 0x005A) { # A..Z
875          !!!cp (63);          !!!cp (63);
876          $self->{ca}->{name} .= chr ($self->{nc} + 0x0020);          $self->{ca}->{name}
877                .= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
878          ## Stay in the state          ## Stay in the state
879          !!!next-input-character;          !!!next-input-character;
880          redo A;          redo A;
# Line 829  sub _get_next_token ($) { Line 903  sub _get_next_token ($) {
903            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
904          }          }
905          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
906            $self->{s_kwd} = '';
907          # reconsume          # reconsume
908    
909          !!!emit ($self->{ct}); # start tag or end tag          !!!emit ($self->{ct}); # start tag or end tag
# Line 875  sub _get_next_token ($) { Line 950  sub _get_next_token ($) {
950            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
951          }          }
952          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
953            $self->{s_kwd} = '';
954          !!!next-input-character;          !!!next-input-character;
955    
956          !!!emit ($self->{ct}); # start tag or end tag          !!!emit ($self->{ct}); # start tag or end tag
# Line 884  sub _get_next_token ($) { Line 960  sub _get_next_token ($) {
960                 $self->{nc} <= 0x005A) { # A..Z                 $self->{nc} <= 0x005A) { # A..Z
961          !!!cp (76);          !!!cp (76);
962          $self->{ca}          $self->{ca}
963              = {name => chr ($self->{nc} + 0x0020),              = {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
964                 value => '',                 value => '',
965                 line => $self->{line}, column => $self->{column}};                 line => $self->{line}, column => $self->{column}};
966          $self->{state} = ATTRIBUTE_NAME_STATE;          $self->{state} = ATTRIBUTE_NAME_STATE;
# Line 912  sub _get_next_token ($) { Line 988  sub _get_next_token ($) {
988          } else {          } else {
989            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
990          }          }
991            $self->{s_kwd} = '';
992          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
993          # reconsume          # reconsume
994    
# Line 973  sub _get_next_token ($) { Line 1050  sub _get_next_token ($) {
1050            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1051          }          }
1052          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1053            $self->{s_kwd} = '';
1054          !!!next-input-character;          !!!next-input-character;
1055    
1056          !!!emit ($self->{ct}); # start tag or end tag          !!!emit ($self->{ct}); # start tag or end tag
# Line 996  sub _get_next_token ($) { Line 1074  sub _get_next_token ($) {
1074            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1075          }          }
1076          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1077            $self->{s_kwd} = '';
1078          ## reconsume          ## reconsume
1079    
1080          !!!emit ($self->{ct}); # start tag or end tag          !!!emit ($self->{ct}); # start tag or end tag
# Line 1048  sub _get_next_token ($) { Line 1127  sub _get_next_token ($) {
1127            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1128          }          }
1129          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1130            $self->{s_kwd} = '';
1131          ## reconsume          ## reconsume
1132    
1133          !!!emit ($self->{ct}); # start tag or end tag          !!!emit ($self->{ct}); # start tag or end tag
# Line 1099  sub _get_next_token ($) { Line 1179  sub _get_next_token ($) {
1179            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1180          }          }
1181          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1182            $self->{s_kwd} = '';
1183          ## reconsume          ## reconsume
1184    
1185          !!!emit ($self->{ct}); # start tag or end tag          !!!emit ($self->{ct}); # start tag or end tag
# Line 1149  sub _get_next_token ($) { Line 1230  sub _get_next_token ($) {
1230            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1231          }          }
1232          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1233            $self->{s_kwd} = '';
1234          !!!next-input-character;          !!!next-input-character;
1235    
1236          !!!emit ($self->{ct}); # start tag or end tag          !!!emit ($self->{ct}); # start tag or end tag
# Line 1172  sub _get_next_token ($) { Line 1254  sub _get_next_token ($) {
1254            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1255          }          }
1256          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1257            $self->{s_kwd} = '';
1258          ## reconsume          ## reconsume
1259    
1260          !!!emit ($self->{ct}); # start tag or end tag          !!!emit ($self->{ct}); # start tag or end tag
# Line 1220  sub _get_next_token ($) { Line 1303  sub _get_next_token ($) {
1303            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1304          }          }
1305          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1306            $self->{s_kwd} = '';
1307          !!!next-input-character;          !!!next-input-character;
1308    
1309          !!!emit ($self->{ct}); # start tag or end tag          !!!emit ($self->{ct}); # start tag or end tag
# Line 1247  sub _get_next_token ($) { Line 1331  sub _get_next_token ($) {
1331            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1332          }          }
1333          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1334            $self->{s_kwd} = '';
1335          ## Reconsume.          ## Reconsume.
1336          !!!emit ($self->{ct}); # start tag or end tag          !!!emit ($self->{ct}); # start tag or end tag
1337          redo A;          redo A;
# Line 1277  sub _get_next_token ($) { Line 1362  sub _get_next_token ($) {
1362          }          }
1363    
1364          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1365            $self->{s_kwd} = '';
1366          !!!next-input-character;          !!!next-input-character;
1367    
1368          !!!emit ($self->{ct}); # start tag or end tag          !!!emit ($self->{ct}); # start tag or end tag
# Line 1299  sub _get_next_token ($) { Line 1385  sub _get_next_token ($) {
1385            die "$0: $self->{ct}->{type}: Unknown token type";            die "$0: $self->{ct}->{type}: Unknown token type";
1386          }          }
1387          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1388            $self->{s_kwd} = '';
1389          ## Reconsume.          ## Reconsume.
1390          !!!emit ($self->{ct}); # start tag or end tag          !!!emit ($self->{ct}); # start tag or end tag
1391          redo A;          redo A;
# Line 1319  sub _get_next_token ($) { Line 1406  sub _get_next_token ($) {
1406        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
1407          !!!cp (124);          !!!cp (124);
1408          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1409            $self->{s_kwd} = '';
1410          !!!next-input-character;          !!!next-input-character;
1411    
1412          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
# Line 1326  sub _get_next_token ($) { Line 1414  sub _get_next_token ($) {
1414        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
1415          !!!cp (125);          !!!cp (125);
1416          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1417            $self->{s_kwd} = '';
1418          ## reconsume          ## reconsume
1419    
1420          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
# Line 1357  sub _get_next_token ($) { Line 1446  sub _get_next_token ($) {
1446          $self->{s_kwd} = chr $self->{nc};          $self->{s_kwd} = chr $self->{nc};
1447          !!!next-input-character;          !!!next-input-character;
1448          redo A;          redo A;
1449        } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and        } elsif ((($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
1450                 $self->{open_elements}->[-1]->[1] & FOREIGN_EL and                   $self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
1451                    $self->{is_xml}) and
1452                 $self->{nc} == 0x005B) { # [                 $self->{nc} == 0x005B) { # [
1453          !!!cp (135.4);                          !!!cp (135.4);                
1454          $self->{state} = MD_CDATA_STATE;          $self->{state} = MD_CDATA_STATE;
# Line 1499  sub _get_next_token ($) { Line 1589  sub _get_next_token ($) {
1589          !!!cp (138);          !!!cp (138);
1590          !!!parse-error (type => 'bogus comment');          !!!parse-error (type => 'bogus comment');
1591          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1592            $self->{s_kwd} = '';
1593          !!!next-input-character;          !!!next-input-character;
1594    
1595          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
# Line 1508  sub _get_next_token ($) { Line 1599  sub _get_next_token ($) {
1599          !!!cp (139);          !!!cp (139);
1600          !!!parse-error (type => 'unclosed comment');          !!!parse-error (type => 'unclosed comment');
1601          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1602            $self->{s_kwd} = '';
1603          ## reconsume          ## reconsume
1604    
1605          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
# Line 1531  sub _get_next_token ($) { Line 1623  sub _get_next_token ($) {
1623          !!!cp (142);          !!!cp (142);
1624          !!!parse-error (type => 'bogus comment');          !!!parse-error (type => 'bogus comment');
1625          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1626            $self->{s_kwd} = '';
1627          !!!next-input-character;          !!!next-input-character;
1628    
1629          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
# Line 1540  sub _get_next_token ($) { Line 1633  sub _get_next_token ($) {
1633          !!!cp (143);          !!!cp (143);
1634          !!!parse-error (type => 'unclosed comment');          !!!parse-error (type => 'unclosed comment');
1635          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1636            $self->{s_kwd} = '';
1637          ## reconsume          ## reconsume
1638    
1639          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
# Line 1563  sub _get_next_token ($) { Line 1657  sub _get_next_token ($) {
1657          !!!cp (146);          !!!cp (146);
1658          !!!parse-error (type => 'unclosed comment');          !!!parse-error (type => 'unclosed comment');
1659          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1660            $self->{s_kwd} = '';
1661          ## reconsume          ## reconsume
1662    
1663          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
# Line 1588  sub _get_next_token ($) { Line 1683  sub _get_next_token ($) {
1683        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
1684          !!!cp (149);          !!!cp (149);
1685          !!!parse-error (type => 'unclosed comment');          !!!parse-error (type => 'unclosed comment');
1686            $self->{s_kwd} = '';
1687          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1688            $self->{s_kwd} = '';
1689          ## reconsume          ## reconsume
1690    
1691          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
# Line 1605  sub _get_next_token ($) { Line 1702  sub _get_next_token ($) {
1702        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
1703          !!!cp (151);          !!!cp (151);
1704          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1705            $self->{s_kwd} = '';
1706          !!!next-input-character;          !!!next-input-character;
1707    
1708          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
# Line 1623  sub _get_next_token ($) { Line 1721  sub _get_next_token ($) {
1721          !!!cp (153);          !!!cp (153);
1722          !!!parse-error (type => 'unclosed comment');          !!!parse-error (type => 'unclosed comment');
1723          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1724            $self->{s_kwd} = '';
1725          ## reconsume          ## reconsume
1726    
1727          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
# Line 1661  sub _get_next_token ($) { Line 1760  sub _get_next_token ($) {
1760          !!!cp (158);          !!!cp (158);
1761          !!!parse-error (type => 'no DOCTYPE name');          !!!parse-error (type => 'no DOCTYPE name');
1762          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1763            $self->{s_kwd} = '';
1764          !!!next-input-character;          !!!next-input-character;
1765    
1766          !!!emit ($self->{ct}); # DOCTYPE (quirks)          !!!emit ($self->{ct}); # DOCTYPE (quirks)
# Line 1670  sub _get_next_token ($) { Line 1770  sub _get_next_token ($) {
1770          !!!cp (159);          !!!cp (159);
1771          !!!parse-error (type => 'no DOCTYPE name');          !!!parse-error (type => 'no DOCTYPE name');
1772          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1773            $self->{s_kwd} = '';
1774          ## reconsume          ## reconsume
1775    
1776          !!!emit ($self->{ct}); # DOCTYPE (quirks)          !!!emit ($self->{ct}); # DOCTYPE (quirks)
# Line 1693  sub _get_next_token ($) { Line 1794  sub _get_next_token ($) {
1794        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1795          !!!cp (162);          !!!cp (162);
1796          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1797            $self->{s_kwd} = '';
1798          !!!next-input-character;          !!!next-input-character;
1799    
1800          !!!emit ($self->{ct}); # DOCTYPE          !!!emit ($self->{ct}); # DOCTYPE
# Line 1702  sub _get_next_token ($) { Line 1804  sub _get_next_token ($) {
1804          !!!cp (163);          !!!cp (163);
1805          !!!parse-error (type => 'unclosed DOCTYPE');          !!!parse-error (type => 'unclosed DOCTYPE');
1806          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1807            $self->{s_kwd} = '';
1808          ## reconsume          ## reconsume
1809    
1810          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 1725  sub _get_next_token ($) { Line 1828  sub _get_next_token ($) {
1828        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
1829          !!!cp (166);          !!!cp (166);
1830          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1831            $self->{s_kwd} = '';
1832          !!!next-input-character;          !!!next-input-character;
1833    
1834          !!!emit ($self->{ct}); # DOCTYPE          !!!emit ($self->{ct}); # DOCTYPE
# Line 1734  sub _get_next_token ($) { Line 1838  sub _get_next_token ($) {
1838          !!!cp (167);          !!!cp (167);
1839          !!!parse-error (type => 'unclosed DOCTYPE');          !!!parse-error (type => 'unclosed DOCTYPE');
1840          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1841            $self->{s_kwd} = '';
1842          ## reconsume          ## reconsume
1843    
1844          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 1862  sub _get_next_token ($) { Line 1967  sub _get_next_token ($) {
1967          !!!parse-error (type => 'no PUBLIC literal');          !!!parse-error (type => 'no PUBLIC literal');
1968    
1969          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1970            $self->{s_kwd} = '';
1971          !!!next-input-character;          !!!next-input-character;
1972    
1973          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 1873  sub _get_next_token ($) { Line 1979  sub _get_next_token ($) {
1979          !!!parse-error (type => 'unclosed DOCTYPE');          !!!parse-error (type => 'unclosed DOCTYPE');
1980    
1981          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
1982            $self->{s_kwd} = '';
1983          ## reconsume          ## reconsume
1984    
1985          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 1899  sub _get_next_token ($) { Line 2006  sub _get_next_token ($) {
2006          !!!parse-error (type => 'unclosed PUBLIC literal');          !!!parse-error (type => 'unclosed PUBLIC literal');
2007    
2008          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2009            $self->{s_kwd} = '';
2010          !!!next-input-character;          !!!next-input-character;
2011    
2012          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 1910  sub _get_next_token ($) { Line 2018  sub _get_next_token ($) {
2018          !!!parse-error (type => 'unclosed PUBLIC literal');          !!!parse-error (type => 'unclosed PUBLIC literal');
2019    
2020          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2021            $self->{s_kwd} = '';
2022          ## reconsume          ## reconsume
2023    
2024          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 1938  sub _get_next_token ($) { Line 2047  sub _get_next_token ($) {
2047          !!!parse-error (type => 'unclosed PUBLIC literal');          !!!parse-error (type => 'unclosed PUBLIC literal');
2048    
2049          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2050            $self->{s_kwd} = '';
2051          !!!next-input-character;          !!!next-input-character;
2052    
2053          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 1949  sub _get_next_token ($) { Line 2059  sub _get_next_token ($) {
2059          !!!parse-error (type => 'unclosed PUBLIC literal');          !!!parse-error (type => 'unclosed PUBLIC literal');
2060    
2061          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2062            $self->{s_kwd} = '';
2063          ## reconsume          ## reconsume
2064    
2065          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 1987  sub _get_next_token ($) { Line 2098  sub _get_next_token ($) {
2098        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
2099          !!!cp (198);          !!!cp (198);
2100          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2101            $self->{s_kwd} = '';
2102          !!!next-input-character;          !!!next-input-character;
2103    
2104          !!!emit ($self->{ct}); # DOCTYPE          !!!emit ($self->{ct}); # DOCTYPE
# Line 1997  sub _get_next_token ($) { Line 2109  sub _get_next_token ($) {
2109          !!!parse-error (type => 'unclosed DOCTYPE');          !!!parse-error (type => 'unclosed DOCTYPE');
2110    
2111          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2112            $self->{s_kwd} = '';
2113          ## reconsume          ## reconsume
2114    
2115          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 2034  sub _get_next_token ($) { Line 2147  sub _get_next_token ($) {
2147          !!!cp (204);          !!!cp (204);
2148          !!!parse-error (type => 'no SYSTEM literal');          !!!parse-error (type => 'no SYSTEM literal');
2149          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2150            $self->{s_kwd} = '';
2151          !!!next-input-character;          !!!next-input-character;
2152    
2153          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 2045  sub _get_next_token ($) { Line 2159  sub _get_next_token ($) {
2159          !!!parse-error (type => 'unclosed DOCTYPE');          !!!parse-error (type => 'unclosed DOCTYPE');
2160    
2161          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2162            $self->{s_kwd} = '';
2163          ## reconsume          ## reconsume
2164    
2165          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 2071  sub _get_next_token ($) { Line 2186  sub _get_next_token ($) {
2186          !!!parse-error (type => 'unclosed SYSTEM literal');          !!!parse-error (type => 'unclosed SYSTEM literal');
2187    
2188          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2189            $self->{s_kwd} = '';
2190          !!!next-input-character;          !!!next-input-character;
2191    
2192          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 2082  sub _get_next_token ($) { Line 2198  sub _get_next_token ($) {
2198          !!!parse-error (type => 'unclosed SYSTEM literal');          !!!parse-error (type => 'unclosed SYSTEM literal');
2199    
2200          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2201            $self->{s_kwd} = '';
2202          ## reconsume          ## reconsume
2203    
2204          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 2110  sub _get_next_token ($) { Line 2227  sub _get_next_token ($) {
2227          !!!parse-error (type => 'unclosed SYSTEM literal');          !!!parse-error (type => 'unclosed SYSTEM literal');
2228    
2229          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2230            $self->{s_kwd} = '';
2231          !!!next-input-character;          !!!next-input-character;
2232    
2233          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 2121  sub _get_next_token ($) { Line 2239  sub _get_next_token ($) {
2239          !!!parse-error (type => 'unclosed SYSTEM literal');          !!!parse-error (type => 'unclosed SYSTEM literal');
2240    
2241          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2242            $self->{s_kwd} = '';
2243          ## reconsume          ## reconsume
2244    
2245          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 2147  sub _get_next_token ($) { Line 2266  sub _get_next_token ($) {
2266        } elsif ($self->{nc} == 0x003E) { # >        } elsif ($self->{nc} == 0x003E) { # >
2267          !!!cp (216);          !!!cp (216);
2268          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2269            $self->{s_kwd} = '';
2270          !!!next-input-character;          !!!next-input-character;
2271    
2272          !!!emit ($self->{ct}); # DOCTYPE          !!!emit ($self->{ct}); # DOCTYPE
# Line 2156  sub _get_next_token ($) { Line 2276  sub _get_next_token ($) {
2276          !!!cp (217);          !!!cp (217);
2277          !!!parse-error (type => 'unclosed DOCTYPE');          !!!parse-error (type => 'unclosed DOCTYPE');
2278          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2279            $self->{s_kwd} = '';
2280          ## reconsume          ## reconsume
2281    
2282          $self->{ct}->{quirks} = 1;          $self->{ct}->{quirks} = 1;
# Line 2175  sub _get_next_token ($) { Line 2296  sub _get_next_token ($) {
2296        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2297          !!!cp (219);          !!!cp (219);
2298          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2299            $self->{s_kwd} = '';
2300          !!!next-input-character;          !!!next-input-character;
2301    
2302          !!!emit ($self->{ct}); # DOCTYPE          !!!emit ($self->{ct}); # DOCTYPE
# Line 2183  sub _get_next_token ($) { Line 2305  sub _get_next_token ($) {
2305        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2306          !!!cp (220);          !!!cp (220);
2307          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2308            $self->{s_kwd} = '';
2309          ## reconsume          ## reconsume
2310    
2311          !!!emit ($self->{ct}); # DOCTYPE          !!!emit ($self->{ct}); # DOCTYPE
# Line 2209  sub _get_next_token ($) { Line 2332  sub _get_next_token ($) {
2332          redo A;          redo A;
2333        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2334          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2335            $self->{s_kwd} = '';
2336          !!!next-input-character;          !!!next-input-character;
2337          if (length $self->{ct}->{data}) { # character          if (length $self->{ct}->{data}) { # character
2338            !!!cp (221.2);            !!!cp (221.2);
# Line 2247  sub _get_next_token ($) { Line 2371  sub _get_next_token ($) {
2371      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {      } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
2372        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2373          $self->{state} = DATA_STATE;          $self->{state} = DATA_STATE;
2374            $self->{s_kwd} = '';
2375          !!!next-input-character;          !!!next-input-character;
2376          if (length $self->{ct}->{data}) { # character          if (length $self->{ct}->{data}) { # character
2377            !!!cp (221.7);            !!!cp (221.7);
# Line 2314  sub _get_next_token ($) { Line 2439  sub _get_next_token ($) {
2439        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
2440          !!!cp (997);          !!!cp (997);
2441          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
2442            $self->{s_kwd} = '';
2443          ## Reconsume.          ## Reconsume.
2444          !!!emit ({type => CHARACTER_TOKEN, data => '&',          !!!emit ({type => CHARACTER_TOKEN, data => '&',
2445                    line => $self->{line_prev},                    line => $self->{line_prev},
# Line 2324  sub _get_next_token ($) { Line 2450  sub _get_next_token ($) {
2450          !!!cp (996);          !!!cp (996);
2451          $self->{ca}->{value} .= '&';          $self->{ca}->{value} .= '&';
2452          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
2453            $self->{s_kwd} = '';
2454          ## Reconsume.          ## Reconsume.
2455          redo A;          redo A;
2456        }        }
# Line 2354  sub _get_next_token ($) { Line 2481  sub _get_next_token ($) {
2481          if ($self->{prev_state} == DATA_STATE) {          if ($self->{prev_state} == DATA_STATE) {
2482            !!!cp (1019);            !!!cp (1019);
2483            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
2484              $self->{s_kwd} = '';
2485            ## Reconsume.            ## Reconsume.
2486            !!!emit ({type => CHARACTER_TOKEN,            !!!emit ({type => CHARACTER_TOKEN,
2487                      data => '&#',                      data => '&#',
# Line 2365  sub _get_next_token ($) { Line 2493  sub _get_next_token ($) {
2493            !!!cp (993);            !!!cp (993);
2494            $self->{ca}->{value} .= '&#';            $self->{ca}->{value} .= '&#';
2495            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
2496              $self->{s_kwd} = '';
2497            ## Reconsume.            ## Reconsume.
2498            redo A;            redo A;
2499          }          }
# Line 2410  sub _get_next_token ($) { Line 2539  sub _get_next_token ($) {
2539        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
2540          !!!cp (992);          !!!cp (992);
2541          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
2542            $self->{s_kwd} = '';
2543          ## Reconsume.          ## Reconsume.
2544          !!!emit ({type => CHARACTER_TOKEN, data => chr $code,          !!!emit ({type => CHARACTER_TOKEN, data => chr $code,
2545                    line => $l, column => $c,                    line => $l, column => $c,
# Line 2420  sub _get_next_token ($) { Line 2550  sub _get_next_token ($) {
2550          $self->{ca}->{value} .= chr $code;          $self->{ca}->{value} .= chr $code;
2551          $self->{ca}->{has_reference} = 1;          $self->{ca}->{has_reference} = 1;
2552          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
2553            $self->{s_kwd} = '';
2554          ## Reconsume.          ## Reconsume.
2555          redo A;          redo A;
2556        }        }
# Line 2445  sub _get_next_token ($) { Line 2576  sub _get_next_token ($) {
2576          if ($self->{prev_state} == DATA_STATE) {          if ($self->{prev_state} == DATA_STATE) {
2577            !!!cp (1005);            !!!cp (1005);
2578            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
2579              $self->{s_kwd} = '';
2580            ## Reconsume.            ## Reconsume.
2581            !!!emit ({type => CHARACTER_TOKEN,            !!!emit ({type => CHARACTER_TOKEN,
2582                      data => '&' . $self->{s_kwd},                      data => '&' . $self->{s_kwd},
# Line 2456  sub _get_next_token ($) { Line 2588  sub _get_next_token ($) {
2588            !!!cp (989);            !!!cp (989);
2589            $self->{ca}->{value} .= '&' . $self->{s_kwd};            $self->{ca}->{value} .= '&' . $self->{s_kwd};
2590            $self->{state} = $self->{prev_state};            $self->{state} = $self->{prev_state};
2591              $self->{s_kwd} = '';
2592            ## Reconsume.            ## Reconsume.
2593            redo A;            redo A;
2594          }          }
# Line 2518  sub _get_next_token ($) { Line 2651  sub _get_next_token ($) {
2651        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
2652          !!!cp (988);          !!!cp (988);
2653          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
2654            $self->{s_kwd} = '';
2655          ## Reconsume.          ## Reconsume.
2656          !!!emit ({type => CHARACTER_TOKEN, data => chr $code,          !!!emit ({type => CHARACTER_TOKEN, data => chr $code,
2657                    line => $l, column => $c,                    line => $l, column => $c,
# Line 2528  sub _get_next_token ($) { Line 2662  sub _get_next_token ($) {
2662          $self->{ca}->{value} .= chr $code;          $self->{ca}->{value} .= chr $code;
2663          $self->{ca}->{has_reference} = 1;          $self->{ca}->{has_reference} = 1;
2664          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
2665            $self->{s_kwd} = '';
2666          ## Reconsume.          ## Reconsume.
2667          redo A;          redo A;
2668        }        }
# Line 2610  sub _get_next_token ($) { Line 2745  sub _get_next_token ($) {
2745        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
2746          !!!cp (986);          !!!cp (986);
2747          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
2748            $self->{s_kwd} = '';
2749          ## Reconsume.          ## Reconsume.
2750          !!!emit ({type => CHARACTER_TOKEN,          !!!emit ({type => CHARACTER_TOKEN,
2751                    data => $data,                    data => $data,
# Line 2622  sub _get_next_token ($) { Line 2758  sub _get_next_token ($) {
2758          $self->{ca}->{value} .= $data;          $self->{ca}->{value} .= $data;
2759          $self->{ca}->{has_reference} = 1 if $has_ref;          $self->{ca}->{has_reference} = 1 if $has_ref;
2760          $self->{state} = $self->{prev_state};          $self->{state} = $self->{prev_state};
2761            $self->{s_kwd} = '';
2762          ## Reconsume.          ## Reconsume.
2763          redo A;          redo A;
2764        }        }

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.5

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24