/[suikacvs]/markup/html/whatpm/Whatpm/CSS/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/CSS/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5 by wakaba, Sat Sep 8 03:25:05 2007 UTC revision 1.14 by wakaba, Sat Sep 22 12:16:33 2007 UTC
# Line 1  Line 1 
1  package Whatpm::CSS::Tokenizer;  package Whatpm::CSS::Tokenizer;
2  use strict;  use strict;
3    
4    require Exporter;
5    push our @ISA, 'Exporter';
6    
7  sub BEFORE_TOKEN_STATE () { 0 }  sub BEFORE_TOKEN_STATE () { 0 }
8  sub BEFORE_NMSTART_STATE () { 1 }  sub BEFORE_NMSTART_STATE () { 1 }
9  sub NAME_STATE () { 2 }  sub NAME_STATE () { 2 }
# Line 36  sub NUMBER_TOKEN () { 11 } Line 39  sub NUMBER_TOKEN () { 11 }
39  sub DIMENSION_TOKEN () { 12 }  sub DIMENSION_TOKEN () { 12 }
40  sub PERCENTAGE_TOKEN () { 13 }  sub PERCENTAGE_TOKEN () { 13 }
41  sub UNICODE_RANGE_TOKEN () { 14 }  sub UNICODE_RANGE_TOKEN () { 14 }
 sub UNICODE_RANGE_INVALID_TOKEN () { 15 }  
42  sub DELIM_TOKEN () { 16 }  sub DELIM_TOKEN () { 16 }
43  sub PLUS_TOKEN () { 17 }  sub PLUS_TOKEN () { 17 }
44  sub GREATER_TOKEN () { 18 }  sub GREATER_TOKEN () { 18 }
# Line 60  sub CDC_TOKEN () { 35 } Line 62  sub CDC_TOKEN () { 35 }
62  sub COMMENT_TOKEN () { 36 }  sub COMMENT_TOKEN () { 36 }
63  sub COMMENT_INVALID_TOKEN () { 37 }  sub COMMENT_INVALID_TOKEN () { 37 }
64  sub EOF_TOKEN () { 38 }  sub EOF_TOKEN () { 38 }
65    sub MINUS_TOKEN () { 39 }
66    sub STAR_TOKEN () { 40 }
67    sub VBAR_TOKEN () { 41 }
68    sub DOT_TOKEN () { 42 }
69    sub COLON_TOKEN () { 43 }
70    sub MATCH_TOKEN () { 44 }
71    sub EXCLAMATION_TOKEN () { 45 }
72    
73  our @TokenName = qw(  our @TokenName = qw(
74    0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID    0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID
75    STRING INVALID NUMBER DIMENSION PERCENTAGE UNICODE_RANGE    STRING INVALID NUMBER DIMENSION PERCENTAGE UNICODE_RANGE
76    UNICODE_RANGE_INVALID DELIM PLUS GREATER COMMA TILDE DASHMATCH    0 DELIM PLUS GREATER COMMA TILDE DASHMATCH
77    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON
78    LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT    LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT
79    COMMENT_INVALID EOF    COMMENT_INVALID EOF MINUS STAR VBAR DOT COLON MATCH EXCLAMATION
80    );
81    
82    our @EXPORT_OK = qw(
83      IDENT_TOKEN ATKEYWORD_TOKEN HASH_TOKEN FUNCTION_TOKEN URI_TOKEN
84      URI_INVALID_TOKEN URI_PREFIX_TOKEN URI_PREFIX_INVALID_TOKEN
85      STRING_TOKEN INVALID_TOKEN NUMBER_TOKEN DIMENSION_TOKEN PERCENTAGE_TOKEN
86      UNICODE_RANGE_TOKEN DELIM_TOKEN PLUS_TOKEN GREATER_TOKEN COMMA_TOKEN
87      TILDE_TOKEN DASHMATCH_TOKEN PREFIXMATCH_TOKEN SUFFIXMATCH_TOKEN
88      SUBSTRINGMATCH_TOKEN INCLUDES_TOKEN SEMICOLON_TOKEN LBRACE_TOKEN
89      RBRACE_TOKEN LPAREN_TOKEN RPAREN_TOKEN LBRACKET_TOKEN RBRACKET_TOKEN
90      S_TOKEN CDO_TOKEN CDC_TOKEN COMMENT_TOKEN COMMENT_INVALID_TOKEN EOF_TOKEN
91      MINUS_TOKEN STAR_TOKEN VBAR_TOKEN DOT_TOKEN COLON_TOKEN MATCH_TOKEN
92      EXCLAMATION_TOKEN
93  );  );
94    
95    our %EXPORT_TAGS = ('token' => [@EXPORT_OK]);
96    
97  sub new ($) {  sub new ($) {
98    my $self = bless {token => [], get_char => sub { -1 },    my $self = bless {token => [], get_char => sub { -1 },
99                      onerror => sub { }}, shift;                      onerror => sub { }}, shift;
# Line 103  sub get_next_token ($) { Line 127  sub get_next_token ($) {
127      if ($self->{state} == BEFORE_TOKEN_STATE) {      if ($self->{state} == BEFORE_TOKEN_STATE) {
128        if ($self->{c} == 0x002D) { # -        if ($self->{c} == 0x002D) { # -
129          ## NOTE: |-| in |ident| in |IDENT|          ## NOTE: |-| in |ident| in |IDENT|
130          $self->{t} = {type => IDENT_TOKEN, value => '-'};          $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1};
131          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = BEFORE_NMSTART_STATE;
132          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
133          redo A;          redo A;
# Line 116  sub get_next_token ($) { Line 140  sub get_next_token ($) {
140                (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F                (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
141                (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f                (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
142                $self->{c} == 0x003F) { # ?                $self->{c} == 0x003F) { # ?
143              $self->{t}->{value} .= '+' . chr $self->{c};              $self->{t}->{value} = chr $self->{c};
144              $self->{t}->{type} = UNICODE_RANGE_TOKEN;              $self->{t}->{type} = UNICODE_RANGE_TOKEN;
145              $self->{c} = $self->{get_char}->();              $self->{c} = $self->{get_char}->();
146              C: for (2..6) {              C: for (2..6) {
# Line 252  sub get_next_token ($) { Line 276  sub get_next_token ($) {
276          } else {          } else {
277            # stay in the state.            # stay in the state.
278            # reprocess            # reprocess
279            return {type => DELIM_STATE, value => '/'};            return {type => DELIM_TOKEN, value => '/'};
280            #redo A;            #redo A;
281          }                  }        
282        } elsif ($self->{c} == 0x003C) { # <        } elsif ($self->{c} == 0x003C) { # <
# Line 260  sub get_next_token ($) { Line 284  sub get_next_token ($) {
284          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
285          if ($self->{c} == 0x0021) { # !          if ($self->{c} == 0x0021) { # !
286            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
287            if ($self->{c} == 0x002C) { # -            if ($self->{c} == 0x002D) { # -
288              $self->{c} = $self->{get_char}->();              $self->{c} = $self->{get_char}->();
289              if ($self->{c} == 0x002C) { # -              if ($self->{c} == 0x002D) { # -
290                $self->{state} = BEFORE_TOKEN_STATE;                $self->{state} = BEFORE_TOKEN_STATE;
291                $self->{c} = $self->{get_char}->();                $self->{c} = $self->{get_char}->();
292                return {type => CDO_TOKEN};                return {type => CDO_TOKEN};
293                #redo A;                #redo A;
294              } else {              } else {
295                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};                unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN};
296                ## NOTE: |-| in |ident| in |IDENT|                ## NOTE: |-| in |ident| in |IDENT|
297                $self->{t} = {type => IDENT_TOKEN, value => '-'};                $self->{t} = {type => IDENT_TOKEN, value => '-'};
298                $self->{state} = BEFORE_NMSTART_STATE;                $self->{state} = BEFORE_NMSTART_STATE;
# Line 277  sub get_next_token ($) { Line 301  sub get_next_token ($) {
301                #redo A;                #redo A;
302              }              }
303            } else {            } else {
304              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};              unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN};
305              $self->{state} = BEFORE_TOKEN_STATE;              $self->{state} = BEFORE_TOKEN_STATE;
306              #reprocess              #reprocess
307              return {type => DELIM_TOKEN, value => '<'};              return {type => DELIM_TOKEN, value => '<'};
# Line 290  sub get_next_token ($) { Line 314  sub get_next_token ($) {
314            #redo A;            #redo A;
315          }          }
316        } elsif (my $t = {        } elsif (my $t = {
317                  0x003B => SEMICOLON_TOKEN, # ;                          0x0021 => EXCLAMATION_TOKEN, # !
318                  0x007B => LBRACE_TOKEN, # {                          0x002D => MINUS_TOKEN, # -
319                  0x007D => RBRACE_TOKEN, # }                          0x002E => DOT_TOKEN, # .
320                  0x0028 => LPAREN_TOKEN, # (                          0x003A => COLON_TOKEN, # :
321                  0x0029 => RPAREN_TOKEN, # )                          0x003B => SEMICOLON_TOKEN, # ;
322                  0x005B => LBRACKET_TOKEN, # [                          0x003D => MATCH_TOKEN, # =
323                  0x005D => RBRACKET_TOKEN, # ]                          0x007B => LBRACE_TOKEN, # {
324                            0x007D => RBRACE_TOKEN, # }
325                            0x0028 => LPAREN_TOKEN, # (
326                            0x0029 => RPAREN_TOKEN, # )
327                            0x005B => LBRACKET_TOKEN, # [
328                            0x005D => RBRACKET_TOKEN, # ]
329                 }->{$self->{c}}) {                 }->{$self->{c}}) {
330          # stay in the state          # stay in the state
331          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
# Line 349  sub get_next_token ($) { Line 378  sub get_next_token ($) {
378            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
379            return {type => $v};            return {type => $v};
380            #redo A;            #redo A;
381            } elsif ($v = {
382                           0x002A => STAR_TOKEN, # *
383                           0x007C => VBAR_TOKEN, # |
384                          }->{$c}) {
385              # stay in the state.
386              # reprocess
387              return {type => $v};
388              #redo A;
389          } else {          } else {
390            # stay in the state            # stay in the state
391            # reprocess            # reprocess
# Line 409  sub get_next_token ($) { Line 446  sub get_next_token ($) {
446          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
447          redo A;          redo A;
448        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
 ## TODO: 12-\X, 12-\{nl}  
449          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
450          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
451          redo A;          redo A;
452        } elsif ($self->{c} == 0x002D and # -        } elsif ($self->{c} == 0x002D) { # -
453                 $self->{t}->{type} == IDENT_TOKEN) {          if ($self->{t}->{type} == IDENT_TOKEN) {
         $self->{c} = $self->{get_char}->();  
         if ($self->{c} == 0x003E) { # >  
           $self->{state} = BEFORE_TOKEN_STATE;  
454            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
455            return {type => CDC_TOKEN};            if ($self->{c} == 0x003E) { # >
456            #redo A;              $self->{state} = BEFORE_TOKEN_STATE;
457                $self->{c} = $self->{get_char}->();
458                return {type => CDC_TOKEN};
459                #redo A;
460              } else {
461                ## NOTE: |-|, |-|, $self->{c}
462                #$self->{t} = {type => IDENT_TOKEN, value => '-'};
463                # stay in the state
464                # reconsume
465                return {type => MINUS_TOKEN};
466                #redo A;
467              }
468            } elsif ($self->{t}->{type} == DIMENSION_TOKEN) {
469              $self->{c} = $self->{get_char}->();
470              if ($self->{c} == 0x003E) { # >
471                unshift @{$self->{token}}, {type => CDC_TOKEN};
472                $self->{t}->{type} = NUMBER_TOKEN;
473                $self->{t}->{value} = '';
474                $self->{state} = BEFORE_TOKEN_STATE;
475                $self->{c} = $self->{get_char}->();
476                return $self->{t};
477                #redo A;
478              } else {
479                ## NOTE: |-|, |-|, $self->{c}
480                my $t = $self->{t};
481                $t->{type} = NUMBER_TOKEN;
482                $t->{value} = '';
483                $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1};
484                unshift @{$self->{token}}, {type => MINUS_TOKEN};
485                # stay in the state
486                # reconsume
487                return $t;
488                #redo A;
489              }
490          } else {          } else {
491            ## NOTE: |-|, |-|, $self->{c}            #
           #$self->{t} = {type => IDENT_TOKEN, value => '-'};  
           # stay in the state  
           # reconsume  
           return {type => DELIM_TOKEN, value => '-'};  
           #redo A;  
492          }          }
493        } else {        } else {
494          if ($self->{t}->{type} == NUMBER_TOKEN) {          #
495            ## NOTE: |-| after |NUMBER|.        }
496            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};        
497            $self->{state} = BEFORE_TOKEN_STATE;        if ($self->{t}->{type} == DIMENSION_TOKEN) {
498            # reconsume          ## NOTE: |-| after |NUMBER|.
499            $self->{t}->{value} = $self->{t}->{number};          unshift @{$self->{token}}, {type => MINUS_TOKEN};
500            delete $self->{t}->{number};          $self->{state} = BEFORE_TOKEN_STATE;
501            return $self->{t};          # reprocess
502          } else {          $self->{t}->{type} = NUMBER_TOKEN;
503            ## NOTE: |-| not followed by |nmstart|.          $self->{t}->{value} = '';
504            $self->{state} = BEFORE_TOKEN_STATE;          return $self->{t};
505            $self->{c} = $self->{get_char}->();        } else {
506            return {type => DELIM_TOKEN, value => '-'};          ## NOTE: |-| not followed by |nmstart|.
507          }          $self->{state} = BEFORE_TOKEN_STATE;
508            # reprocess
509            return {type => MINUS_TOKEN};
510        }        }
511      } elsif ($self->{state} == AFTER_AT_STATE) {      } elsif ($self->{state} == AFTER_AT_STATE) {
512        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
# Line 486  sub get_next_token ($) { Line 549  sub get_next_token ($) {
549            return {type => DELIM_TOKEN, value => '@'};            return {type => DELIM_TOKEN, value => '@'};
550            #redo A;            #redo A;
551          } else {          } else {
552            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};            unshift @{$self->{token}}, {type => MINUS_TOKEN};
553            $self->{t} = {type => IDENT_TOKEN, value => '-'};            $self->{t} = {type => IDENT_TOKEN, value => '-'};
554            $self->{state} = BEFORE_NMSTART_STATE;            $self->{state} = BEFORE_NMSTART_STATE;
555            # reprocess            # reprocess
# Line 499  sub get_next_token ($) { Line 562  sub get_next_token ($) {
562          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
563          redo A;          redo A;
564        } else {        } else {
565          unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};          unshift @{$self->{token}}, {type => MINUS_TOKEN};
566          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
567          # reprocess          # reprocess
568          return {type => DELIM_TOKEN, value => '@'};          return {type => DELIM_TOKEN, value => '@'};
# Line 507  sub get_next_token ($) { Line 570  sub get_next_token ($) {
570      } elsif ($self->{state} == AFTER_NUMBER_STATE) {      } elsif ($self->{state} == AFTER_NUMBER_STATE) {
571        if ($self->{c} == 0x002D) { # -        if ($self->{c} == 0x002D) { # -
572          ## NOTE: |-| in |ident|.          ## NOTE: |-| in |ident|.
573            $self->{t}->{hyphen} = 1;
574          $self->{t}->{value} = '-';          $self->{t}->{value} = '-';
575            $self->{t}->{type} = DIMENSION_TOKEN;
576          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = BEFORE_NMSTART_STATE;
577          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
578          redo A;          redo A;
# Line 524  sub get_next_token ($) { Line 589  sub get_next_token ($) {
589        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
590          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
591          $self->{t}->{value} = '';          $self->{t}->{value} = '';
592            $self->{t}->{type} = DIMENSION_TOKEN;
593          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
594          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
595          redo A;          redo A;
# Line 557  sub get_next_token ($) { Line 623  sub get_next_token ($) {
623          redo A;          redo A;
624        } else {        } else {
625          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
626          $self->{c} = $self->{get_char}->();          # reprocess
627          return {type => DELIM_TOKEN, value => '#'};          return {type => DELIM_TOKEN, value => '#'};
628          #redo A;          #redo A;
629        }        }
# Line 766  sub get_next_token ($) { Line 832  sub get_next_token ($) {
832          redo A;          redo A;
833        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f
834          ## NOTE: second character of |unicode| in |escape|.          ## NOTE: second character of |unicode| in |escape|.
835          $char = $self->{c} - 0x0061 - 0xA;          $char = $self->{c} - 0x0061 + 0xA;
836          $self->{state} = ESCAPE_STATE; $i = 2;          $self->{state} = ESCAPE_STATE; $i = 2;
837          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
838          redo A;          redo A;
839        } elsif ($self->{c} == 0x000A or # \n        } elsif ($self->{c} == 0x000A or # \n
840                 $self->{c} == 0x000C) { # \f                 $self->{c} == 0x000C) { # \f
841          if ($q == 0) {          if ($q == 0) {
842            ## NOTE: In |escape| in ... in |ident|.            #
           $self->{state} = BEFORE_TOKEN_STATE;  
           unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};  
           return $self->{t};  
           # reconsume  
           #redo A;  
843          } elsif ($q == 1) {          } elsif ($q == 1) {
844            ## NOTE: In |escape| in |URI|.            ## NOTE: In |escape| in |URI|.
845            $self->{t}->{type} = {            $self->{t}->{type} = {
# Line 800  sub get_next_token ($) { Line 861  sub get_next_token ($) {
861          }          }
862        } elsif ($self->{c} == 0x000D) { # \r        } elsif ($self->{c} == 0x000D) { # \r
863          if ($q == 0) {          if ($q == 0) {
864            ## NOTE: In |escape| in ... in |ident|.            #
           $self->{state} = BEFORE_TOKEN_STATE;  
           unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};  
           return $self->{t};  
           # reconsume  
           #redo A;  
865          } elsif ($q == 1) {          } elsif ($q == 1) {
866              ## NOTE: In |escape| in |URI|.
867            $self->{t}->{type} = {            $self->{t}->{type} = {
868                URI_TOKEN, URI_INVALID_TOKEN,                URI_TOKEN, URI_INVALID_TOKEN,
869                URI_INVALID_TOKEN, URI_INVALID_TOKEN,                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
870                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
871                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
872            }->{$self->{t}->{type}};            }->{$self->{t}->{type}};
873            $self->{t}->{value} .= "\x0D\x0A";            $self->{t}->{value} .= "\x0D";
874            $self->{state} = URI_UNQUOTED_STATE;            $self->{state} = ESCAPE_BEFORE_LF_STATE;
875            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
876            redo A;            redo A;
877          } else {          } else {
878            ## Note: In |nl| in ... in |string| or |ident|.            ## Note: In |nl| in ... in |string| or |ident|.
879            $self->{t}->{value} .= "\x0D\x0A";            $self->{t}->{value} .= "\x0D";
880            $self->{state} = ESCAPE_BEFORE_LF_STATE;            $self->{state} = ESCAPE_BEFORE_LF_STATE;
881            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
882            redo A;            redo A;
883          }          }
884          } elsif ($self->{c} == -1) {
885            #
886        } else {        } else {
887          ## NOTE: second character of |escape|.          ## NOTE: second character of |escape|.
888          $self->{t}->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
# Line 832  sub get_next_token ($) { Line 891  sub get_next_token ($) {
891          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
892          redo A;          redo A;
893        }        }
894    
895          if ($q == 0) {
896            if ($self->{t}->{type} == DIMENSION_TOKEN) {
897              if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') {
898                $self->{state} = BEFORE_TOKEN_STATE;
899                # reprocess
900                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
901                unshift @{$self->{token}}, {type => MINUS_TOKEN};
902                $self->{t}->{type} = NUMBER_TOKEN;
903                $self->{t}->{value} = '';
904                return $self->{t};
905                #redo A;
906              } elsif (length $self->{t}->{value}) {
907                $self->{state} = BEFORE_TOKEN_STATE;
908                # reprocess
909                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
910                return $self->{t};
911                #redo A;
912              } else {
913                $self->{state} = BEFORE_TOKEN_STATE;
914                # reprocess
915                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
916                $self->{t}->{type} = NUMBER_TOKEN;
917                $self->{t}->{value} = '';
918                return $self->{t};
919                #redo A;
920              }
921            } else {
922              if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') {
923                $self->{state} = BEFORE_TOKEN_STATE;
924                # reprocess
925                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
926                return {type => MINUS_TOKEN};
927                #redo A;
928              } elsif (length $self->{t}->{value}) {
929                $self->{state} = BEFORE_TOKEN_STATE;
930                # reprocess
931                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
932                return $self->{t};
933                #redo A;
934              } else {
935                $self->{state} = BEFORE_TOKEN_STATE;
936                # reprocess
937                return {type => DELIM_TOKEN, value => '\\'};
938                #redo A;
939              }
940            }
941          } elsif ($q == 1) {
942            $self->{state} = URI_UNQUOTED_STATE;
943            $self->{c} = $self->{get_char}->();
944            redo A;
945          } else {
946            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
947            $self->{t}->{type} = {
948              STRING_TOKEN, INVALID_TOKEN,
949              URI_TOKEN, URI_INVALID_TOKEN,
950              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
951            }->{$self->{t}->{type}} || $self->{t}->{type};
952            $self->{state} = BEFORE_TOKEN_STATE;
953            # reprocess
954            return $self->{t};
955            #redo A;
956          }
957      } elsif ($self->{state} == ESCAPE_STATE) {      } elsif ($self->{state} == ESCAPE_STATE) {
958        ## NOTE: third..seventh character of |unicode| in |escape|.        ## NOTE: third..seventh character of |unicode| in |escape|.
959        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
# Line 845  sub get_next_token ($) { Line 967  sub get_next_token ($) {
967          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
968          redo A;          redo A;
969        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f
970          $char = $char * 0x10 + $self->{c} - 0x0061 - 0xA;          $char = $char * 0x10 + $self->{c} - 0x0061 + 0xA;
971          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
972          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
973          redo A;          redo A;
# Line 894  sub get_next_token ($) { Line 1016  sub get_next_token ($) {
1016      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {
1017        ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.        ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.
1018        if ($self->{c} == 0x000A) { # \n        if ($self->{c} == 0x000A) { # \n
1019          $self->{t}->{value} .= chr $char;          $self->{t}->{value} .= chr $self->{c};
1020          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
1021              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
1022          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1023          redo A;          redo A;
1024        } else {        } else {
         $self->{t}->{value} .= chr $char;  
1025          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
1026              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
1027          # reconsume          # reprocess
1028          redo A;          redo A;
1029        }        }
1030      } elsif ($self->{state} == STRING_STATE) {      } elsif ($self->{state} == STRING_STATE) {
# Line 930  sub get_next_token ($) { Line 1051  sub get_next_token ($) {
1051                 $self->{c} == 0x000D or # \r                 $self->{c} == 0x000D or # \r
1052                 $self->{c} == 0x000C or # \f                 $self->{c} == 0x000C or # \f
1053                 $self->{c} == -1) {                 $self->{c} == -1) {
1054          $self->{t}->{type} = INVALID_TOKEN;          $self->{t}->{type} = {
1055              STRING_TOKEN, INVALID_TOKEN,
1056              INVALID_TOKEN, INVALID_TOKEN,
1057              URI_TOKEN, URI_INVALID_TOKEN,
1058              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
1059              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
1060              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
1061            }->{$self->{t}->{type}};
1062          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
1063          # reconsume          # reconsume
1064          return $self->{t};          return $self->{t};
# Line 967  sub get_next_token ($) { Line 1095  sub get_next_token ($) {
1095          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1096          redo A;          redo A;
1097        } else {        } else {
1098          unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'};          unshift @{$self->{token}}, {type => DOT_TOKEN};
1099          $self->{t}->{number} = $self->{t}->{value};          $self->{t}->{number} = $self->{t}->{value};
1100          $self->{t}->{value} = '';          $self->{t}->{value} = '';
1101          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
# Line 984  sub get_next_token ($) { Line 1112  sub get_next_token ($) {
1112          redo A;          redo A;
1113        } else {        } else {
1114          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
1115          $self->{c} = $self->{get_char}->();          # reprocess
1116          return {type => DELIM_TOKEN, value => '.'};          return {type => DOT_TOKEN};
1117          #redo A;          #redo A;
1118        }        }
1119      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.14

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24