/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.26 by wakaba, Thu Jul 2 21:42:43 2009 UTC revision 1.31 by wakaba, Sat Sep 5 09:26:55 2009 UTC
# Line 105  sub COMMENT_START_STATE () { 14 } Line 105  sub COMMENT_START_STATE () { 14 }
105  sub COMMENT_START_DASH_STATE () { 15 }  sub COMMENT_START_DASH_STATE () { 15 }
106  sub COMMENT_STATE () { 16 }  sub COMMENT_STATE () { 16 }
107  sub COMMENT_END_STATE () { 17 }  sub COMMENT_END_STATE () { 17 }
108    sub COMMENT_END_BANG_STATE () { 102 } ## LAST
109  sub COMMENT_END_DASH_STATE () { 18 }  sub COMMENT_END_DASH_STATE () { 18 }
110  sub BOGUS_COMMENT_STATE () { 19 }  sub BOGUS_COMMENT_STATE () { 19 }
111  sub DOCTYPE_STATE () { 20 }  sub DOCTYPE_STATE () { 20 }
# Line 948  sub _get_next_token ($) { Line 949  sub _get_next_token ($) {
949          if ({          if ({
950               0x0022 => 1, # "               0x0022 => 1, # "
951               0x0027 => 1, # '               0x0027 => 1, # '
952                 0x003C => 1, # <
953               0x003D => 1, # =               0x003D => 1, # =
954              }->{$self->{nc}}) {              }->{$self->{nc}}) {
955            !!!cp (55);            !!!cp (55);
# Line 1070  sub _get_next_token ($) { Line 1072  sub _get_next_token ($) {
1072    
1073          redo A;          redo A;
1074        } else {        } else {
1075          if ($self->{nc} == 0x0022 or # "          if ({
1076              $self->{nc} == 0x0027) { # '               0x0022 => 1, # "
1077                 0x0027 => 1, # '
1078                 0x003C => 1, # <
1079                }->{$self->{nc}}) {
1080            !!!cp (69);            !!!cp (69);
1081            ## XML5: Not a parse error.            ## XML5: Not a parse error.
1082            !!!parse-error (type => 'bad attribute name');            !!!parse-error (type => 'bad attribute name');
# Line 1182  sub _get_next_token ($) { Line 1187  sub _get_next_token ($) {
1187            !!!cp (78.2);            !!!cp (78.2);
1188          }          }
1189    
1190          if ($self->{nc} == 0x0022 or # "          if ({
1191              $self->{nc} == 0x0027) { # '               0x0022 => 1, # "
1192                 0x0027 => 1, # '
1193                 0x003C => 1, # <
1194                }->{$self->{nc}}) {
1195            !!!cp (78);            !!!cp (78);
1196            ## XML5: Not a parse error.            ## XML5: Not a parse error.
1197            !!!parse-error (type => 'bad attribute name');            !!!parse-error (type => 'bad attribute name');
# Line 2075  sub _get_next_token ($) { Line 2083  sub _get_next_token ($) {
2083          !!!next-input-character;          !!!next-input-character;
2084          redo A;          redo A;
2085        }        }
2086      } elsif ($self->{state} == COMMENT_END_STATE) {      } elsif ($self->{state} == COMMENT_END_STATE or
2087                 $self->{state} == COMMENT_END_BANG_STATE) {
2088        ## XML5: "Comment end state" and "DOCTYPE comment end state".        ## XML5: "Comment end state" and "DOCTYPE comment end state".
2089          ## (No comment end bang state.)
2090    
2091        if ($self->{nc} == 0x003E) { # >        if ($self->{nc} == 0x003E) { # >
2092          if ($self->{in_subset}) {          if ($self->{in_subset}) {
# Line 2093  sub _get_next_token ($) { Line 2103  sub _get_next_token ($) {
2103    
2104          redo A;          redo A;
2105        } elsif ($self->{nc} == 0x002D) { # -        } elsif ($self->{nc} == 0x002D) { # -
2106          !!!cp (152);          if ($self->{state} == COMMENT_END_BANG_STATE) {
2107          ## XML5: Not a parse error.            !!!cp (154.3);
2108          !!!parse-error (type => 'dash in comment',            $self->{ct}->{data} .= '--!'; # comment
2109                          line => $self->{line_prev},            $self->{state} = COMMENT_END_DASH_STATE;
2110                          column => $self->{column_prev});          } else {
2111          $self->{ct}->{data} .= '-'; # comment            !!!cp (152);
2112          ## Stay in the state            ## XML5: Not a parse error.
2113              !!!parse-error (type => 'dash in comment',
2114                              line => $self->{line_prev},
2115                              column => $self->{column_prev});
2116              $self->{ct}->{data} .= '-'; # comment
2117              ## Stay in the state
2118            }
2119            !!!next-input-character;
2120            redo A;
2121          } elsif ($self->{nc} == 0x0021 and # !
2122                   $self->{state} != COMMENT_END_BANG_STATE) {
2123            !!!parse-error (type => 'comment end bang'); # XXX error type
2124            $self->{state} = COMMENT_END_BANG_STATE;
2125          !!!next-input-character;          !!!next-input-character;
2126          redo A;          redo A;
2127        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
# Line 2112  sub _get_next_token ($) { Line 2134  sub _get_next_token ($) {
2134            $self->{state} = DATA_STATE;            $self->{state} = DATA_STATE;
2135            $self->{s_kwd} = '';            $self->{s_kwd} = '';
2136          }          }
2137          ## reconsume          ## Reconsume.
2138    
2139          !!!emit ($self->{ct}); # comment          !!!emit ($self->{ct}); # comment
2140    
2141          redo A;          redo A;
2142        } else {        } else {
2143          !!!cp (154);          !!!cp (154);
2144          ## XML5: Not a parse error.          if ($self->{state} == COMMENT_END_BANG_STATE) {
2145          !!!parse-error (type => 'dash in comment',            $self->{ct}->{data} .= '--!' . chr ($self->{nc}); # comment
2146                          line => $self->{line_prev},          } else {
2147                          column => $self->{column_prev});            $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment
2148          $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment          }
2149          $self->{state} = COMMENT_STATE;          $self->{state} = COMMENT_STATE;
2150          !!!next-input-character;          !!!next-input-character;
2151          redo A;          redo A;
# Line 2134  sub _get_next_token ($) { Line 2156  sub _get_next_token ($) {
2156          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2157          !!!next-input-character;          !!!next-input-character;
2158          redo A;          redo A;
2159          } elsif ($self->{nc} == -1) {
2160            !!!cp (155.1);
2161            !!!parse-error (type => 'unclosed DOCTYPE');
2162            $self->{ct}->{quirks} = 1;
2163    
2164            $self->{state} = DATA_STATE;
2165            ## Reconsume.
2166            !!!emit ($self->{ct}); # DOCTYPE (quirks)
2167    
2168            redo A;
2169        } else {        } else {
2170          !!!cp (156);          !!!cp (156);
2171          ## XML5: Unless EOF, swith to the bogus comment state.          ## XML5: Swith to the bogus comment state.
2172          !!!parse-error (type => 'no space before DOCTYPE name');          !!!parse-error (type => 'no space before DOCTYPE name');
2173          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2174          ## reconsume          ## reconsume
# Line 2161  sub _get_next_token ($) { Line 2193  sub _get_next_token ($) {
2193          !!!emit ($self->{ct}); # DOCTYPE (quirks)          !!!emit ($self->{ct}); # DOCTYPE (quirks)
2194    
2195          redo A;          redo A;
2196          } elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z
2197            !!!cp (158.1);
2198            $self->{ct}->{name} # DOCTYPE
2199                = chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
2200            delete $self->{ct}->{quirks};
2201            $self->{state} = DOCTYPE_NAME_STATE;
2202            !!!next-input-character;
2203            redo A;
2204        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2205          !!!cp (159);          !!!cp (159);
2206          !!!parse-error (type => 'no DOCTYPE name');          !!!parse-error (type => 'no DOCTYPE name');
# Line 2207  sub _get_next_token ($) { Line 2247  sub _get_next_token ($) {
2247          !!!emit ($self->{ct}); # DOCTYPE          !!!emit ($self->{ct}); # DOCTYPE
2248    
2249          redo A;          redo A;
2250          } elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z
2251            !!!cp (162.1);
2252            $self->{ct}->{name} # DOCTYPE
2253                .= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
2254            delete $self->{ct}->{quirks};
2255            ## Stay in the state.
2256            !!!next-input-character;
2257            redo A;
2258        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
2259          !!!cp (163);          !!!cp (163);
2260          !!!parse-error (type => 'unclosed DOCTYPE');          !!!parse-error (type => 'unclosed DOCTYPE');
# Line 2228  sub _get_next_token ($) { Line 2276  sub _get_next_token ($) {
2276          redo A;          redo A;
2277        } else {        } else {
2278          !!!cp (164);          !!!cp (164);
2279          $self->{ct}->{name}          $self->{ct}->{name} .= chr ($self->{nc}); # DOCTYPE
2280            .= chr ($self->{nc}); # DOCTYPE          ## Stay in the state.
         ## Stay in the state  
2281          !!!next-input-character;          !!!next-input-character;
2282          redo A;          redo A;
2283        }        }

Legend:
Removed from v.1.26  
changed lines
  Added in v.1.31

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24