/[suikacvs]/markup/html/whatpm/Whatpm/CSS/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/CSS/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5 by wakaba, Sat Sep 8 03:25:05 2007 UTC revision 1.13 by wakaba, Sat Sep 8 17:43:41 2007 UTC
# Line 36  sub NUMBER_TOKEN () { 11 } Line 36  sub NUMBER_TOKEN () { 11 }
36  sub DIMENSION_TOKEN () { 12 }  sub DIMENSION_TOKEN () { 12 }
37  sub PERCENTAGE_TOKEN () { 13 }  sub PERCENTAGE_TOKEN () { 13 }
38  sub UNICODE_RANGE_TOKEN () { 14 }  sub UNICODE_RANGE_TOKEN () { 14 }
 sub UNICODE_RANGE_INVALID_TOKEN () { 15 }  
39  sub DELIM_TOKEN () { 16 }  sub DELIM_TOKEN () { 16 }
40  sub PLUS_TOKEN () { 17 }  sub PLUS_TOKEN () { 17 }
41  sub GREATER_TOKEN () { 18 }  sub GREATER_TOKEN () { 18 }
# Line 60  sub CDC_TOKEN () { 35 } Line 59  sub CDC_TOKEN () { 35 }
59  sub COMMENT_TOKEN () { 36 }  sub COMMENT_TOKEN () { 36 }
60  sub COMMENT_INVALID_TOKEN () { 37 }  sub COMMENT_INVALID_TOKEN () { 37 }
61  sub EOF_TOKEN () { 38 }  sub EOF_TOKEN () { 38 }
62    sub MINUS_TOKEN () { 39 }
63    sub STAR_TOKEN () { 40 }
64    sub VBAR_TOKEN () { 41 }
65    sub DOT_TOKEN () { 42 }
66    sub COLON_TOKEN () { 43 }
67    sub MATCH_TOKEN () { 44 }
68    sub EXCLAMATION_TOKEN () { 45 }
69    
70  our @TokenName = qw(  our @TokenName = qw(
71    0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID    0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID
72    STRING INVALID NUMBER DIMENSION PERCENTAGE UNICODE_RANGE    STRING INVALID NUMBER DIMENSION PERCENTAGE UNICODE_RANGE
73    UNICODE_RANGE_INVALID DELIM PLUS GREATER COMMA TILDE DASHMATCH    0 DELIM PLUS GREATER COMMA TILDE DASHMATCH
74    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON
75    LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT    LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT
76    COMMENT_INVALID EOF    COMMENT_INVALID EOF MINUS STAR VBAR DOT COLON MATCH EXCLAMATION
77  );  );
78    
79  sub new ($) {  sub new ($) {
# Line 103  sub get_next_token ($) { Line 109  sub get_next_token ($) {
109      if ($self->{state} == BEFORE_TOKEN_STATE) {      if ($self->{state} == BEFORE_TOKEN_STATE) {
110        if ($self->{c} == 0x002D) { # -        if ($self->{c} == 0x002D) { # -
111          ## NOTE: |-| in |ident| in |IDENT|          ## NOTE: |-| in |ident| in |IDENT|
112          $self->{t} = {type => IDENT_TOKEN, value => '-'};          $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1};
113          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = BEFORE_NMSTART_STATE;
114          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
115          redo A;          redo A;
# Line 116  sub get_next_token ($) { Line 122  sub get_next_token ($) {
122                (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F                (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
123                (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f                (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
124                $self->{c} == 0x003F) { # ?                $self->{c} == 0x003F) { # ?
125              $self->{t}->{value} .= '+' . chr $self->{c};              $self->{t}->{value} = chr $self->{c};
126              $self->{t}->{type} = UNICODE_RANGE_TOKEN;              $self->{t}->{type} = UNICODE_RANGE_TOKEN;
127              $self->{c} = $self->{get_char}->();              $self->{c} = $self->{get_char}->();
128              C: for (2..6) {              C: for (2..6) {
# Line 252  sub get_next_token ($) { Line 258  sub get_next_token ($) {
258          } else {          } else {
259            # stay in the state.            # stay in the state.
260            # reprocess            # reprocess
261            return {type => DELIM_STATE, value => '/'};            return {type => DELIM_TOKEN, value => '/'};
262            #redo A;            #redo A;
263          }                  }        
264        } elsif ($self->{c} == 0x003C) { # <        } elsif ($self->{c} == 0x003C) { # <
# Line 260  sub get_next_token ($) { Line 266  sub get_next_token ($) {
266          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
267          if ($self->{c} == 0x0021) { # !          if ($self->{c} == 0x0021) { # !
268            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
269            if ($self->{c} == 0x002C) { # -            if ($self->{c} == 0x002D) { # -
270              $self->{c} = $self->{get_char}->();              $self->{c} = $self->{get_char}->();
271              if ($self->{c} == 0x002C) { # -              if ($self->{c} == 0x002D) { # -
272                $self->{state} = BEFORE_TOKEN_STATE;                $self->{state} = BEFORE_TOKEN_STATE;
273                $self->{c} = $self->{get_char}->();                $self->{c} = $self->{get_char}->();
274                return {type => CDO_TOKEN};                return {type => CDO_TOKEN};
275                #redo A;                #redo A;
276              } else {              } else {
277                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};                unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN};
278                ## NOTE: |-| in |ident| in |IDENT|                ## NOTE: |-| in |ident| in |IDENT|
279                $self->{t} = {type => IDENT_TOKEN, value => '-'};                $self->{t} = {type => IDENT_TOKEN, value => '-'};
280                $self->{state} = BEFORE_NMSTART_STATE;                $self->{state} = BEFORE_NMSTART_STATE;
# Line 277  sub get_next_token ($) { Line 283  sub get_next_token ($) {
283                #redo A;                #redo A;
284              }              }
285            } else {            } else {
286              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};              unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN};
287              $self->{state} = BEFORE_TOKEN_STATE;              $self->{state} = BEFORE_TOKEN_STATE;
288              #reprocess              #reprocess
289              return {type => DELIM_TOKEN, value => '<'};              return {type => DELIM_TOKEN, value => '<'};
# Line 290  sub get_next_token ($) { Line 296  sub get_next_token ($) {
296            #redo A;            #redo A;
297          }          }
298        } elsif (my $t = {        } elsif (my $t = {
299                  0x003B => SEMICOLON_TOKEN, # ;                          0x0021 => EXCLAMATION_TOKEN, # !
300                  0x007B => LBRACE_TOKEN, # {                          0x002D => MINUS_TOKEN, # -
301                  0x007D => RBRACE_TOKEN, # }                          0x002E => DOT_TOKEN, # .
302                  0x0028 => LPAREN_TOKEN, # (                          0x003A => COLON_TOKEN, # :
303                  0x0029 => RPAREN_TOKEN, # )                          0x003B => SEMICOLON_TOKEN, # ;
304                  0x005B => LBRACKET_TOKEN, # [                          0x003D => MATCH_TOKEN, # =
305                  0x005D => RBRACKET_TOKEN, # ]                          0x007B => LBRACE_TOKEN, # {
306                            0x007D => RBRACE_TOKEN, # }
307                            0x0028 => LPAREN_TOKEN, # (
308                            0x0029 => RPAREN_TOKEN, # )
309                            0x005B => LBRACKET_TOKEN, # [
310                            0x005D => RBRACKET_TOKEN, # ]
311                 }->{$self->{c}}) {                 }->{$self->{c}}) {
312          # stay in the state          # stay in the state
313          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
# Line 349  sub get_next_token ($) { Line 360  sub get_next_token ($) {
360            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
361            return {type => $v};            return {type => $v};
362            #redo A;            #redo A;
363            } elsif ($v = {
364                           0x002A => STAR_TOKEN, # *
365                           0x007C => VBAR_TOKEN, # |
366                          }->{$c}) {
367              # stay in the state.
368              # reprocess
369              return {type => $v};
370              #redo A;
371          } else {          } else {
372            # stay in the state            # stay in the state
373            # reprocess            # reprocess
# Line 409  sub get_next_token ($) { Line 428  sub get_next_token ($) {
428          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
429          redo A;          redo A;
430        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
 ## TODO: 12-\X, 12-\{nl}  
431          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
432          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
433          redo A;          redo A;
434        } elsif ($self->{c} == 0x002D and # -        } elsif ($self->{c} == 0x002D) { # -
435                 $self->{t}->{type} == IDENT_TOKEN) {          if ($self->{t}->{type} == IDENT_TOKEN) {
         $self->{c} = $self->{get_char}->();  
         if ($self->{c} == 0x003E) { # >  
           $self->{state} = BEFORE_TOKEN_STATE;  
436            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
437            return {type => CDC_TOKEN};            if ($self->{c} == 0x003E) { # >
438            #redo A;              $self->{state} = BEFORE_TOKEN_STATE;
439                $self->{c} = $self->{get_char}->();
440                return {type => CDC_TOKEN};
441                #redo A;
442              } else {
443                ## NOTE: |-|, |-|, $self->{c}
444                #$self->{t} = {type => IDENT_TOKEN, value => '-'};
445                # stay in the state
446                # reconsume
447                return {type => MINUS_TOKEN};
448                #redo A;
449              }
450            } elsif ($self->{t}->{type} == DIMENSION_TOKEN) {
451              $self->{c} = $self->{get_char}->();
452              if ($self->{c} == 0x003E) { # >
453                unshift @{$self->{token}}, {type => CDC_TOKEN};
454                $self->{t}->{type} = NUMBER_TOKEN;
455                $self->{t}->{value} = '';
456                $self->{state} = BEFORE_TOKEN_STATE;
457                $self->{c} = $self->{get_char}->();
458                return $self->{t};
459                #redo A;
460              } else {
461                ## NOTE: |-|, |-|, $self->{c}
462                my $t = $self->{t};
463                $t->{type} = NUMBER_TOKEN;
464                $t->{value} = '';
465                $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1};
466                unshift @{$self->{token}}, {type => MINUS_TOKEN};
467                # stay in the state
468                # reconsume
469                return $t;
470                #redo A;
471              }
472          } else {          } else {
473            ## NOTE: |-|, |-|, $self->{c}            #
           #$self->{t} = {type => IDENT_TOKEN, value => '-'};  
           # stay in the state  
           # reconsume  
           return {type => DELIM_TOKEN, value => '-'};  
           #redo A;  
474          }          }
475        } else {        } else {
476          if ($self->{t}->{type} == NUMBER_TOKEN) {          #
477            ## NOTE: |-| after |NUMBER|.        }
478            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};        
479            $self->{state} = BEFORE_TOKEN_STATE;        if ($self->{t}->{type} == DIMENSION_TOKEN) {
480            # reconsume          ## NOTE: |-| after |NUMBER|.
481            $self->{t}->{value} = $self->{t}->{number};          unshift @{$self->{token}}, {type => MINUS_TOKEN};
482            delete $self->{t}->{number};          $self->{state} = BEFORE_TOKEN_STATE;
483            return $self->{t};          # reprocess
484          } else {          $self->{t}->{type} = NUMBER_TOKEN;
485            ## NOTE: |-| not followed by |nmstart|.          $self->{t}->{value} = '';
486            $self->{state} = BEFORE_TOKEN_STATE;          return $self->{t};
487            $self->{c} = $self->{get_char}->();        } else {
488            return {type => DELIM_TOKEN, value => '-'};          ## NOTE: |-| not followed by |nmstart|.
489          }          $self->{state} = BEFORE_TOKEN_STATE;
490            # reprocess
491            return {type => MINUS_TOKEN};
492        }        }
493      } elsif ($self->{state} == AFTER_AT_STATE) {      } elsif ($self->{state} == AFTER_AT_STATE) {
494        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
# Line 486  sub get_next_token ($) { Line 531  sub get_next_token ($) {
531            return {type => DELIM_TOKEN, value => '@'};            return {type => DELIM_TOKEN, value => '@'};
532            #redo A;            #redo A;
533          } else {          } else {
534            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};            unshift @{$self->{token}}, {type => MINUS_TOKEN};
535            $self->{t} = {type => IDENT_TOKEN, value => '-'};            $self->{t} = {type => IDENT_TOKEN, value => '-'};
536            $self->{state} = BEFORE_NMSTART_STATE;            $self->{state} = BEFORE_NMSTART_STATE;
537            # reprocess            # reprocess
# Line 499  sub get_next_token ($) { Line 544  sub get_next_token ($) {
544          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
545          redo A;          redo A;
546        } else {        } else {
547          unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};          unshift @{$self->{token}}, {type => MINUS_TOKEN};
548          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
549          # reprocess          # reprocess
550          return {type => DELIM_TOKEN, value => '@'};          return {type => DELIM_TOKEN, value => '@'};
# Line 507  sub get_next_token ($) { Line 552  sub get_next_token ($) {
552      } elsif ($self->{state} == AFTER_NUMBER_STATE) {      } elsif ($self->{state} == AFTER_NUMBER_STATE) {
553        if ($self->{c} == 0x002D) { # -        if ($self->{c} == 0x002D) { # -
554          ## NOTE: |-| in |ident|.          ## NOTE: |-| in |ident|.
555            $self->{t}->{hyphen} = 1;
556          $self->{t}->{value} = '-';          $self->{t}->{value} = '-';
557            $self->{t}->{type} = DIMENSION_TOKEN;
558          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = BEFORE_NMSTART_STATE;
559          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
560          redo A;          redo A;
# Line 524  sub get_next_token ($) { Line 571  sub get_next_token ($) {
571        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
572          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
573          $self->{t}->{value} = '';          $self->{t}->{value} = '';
574            $self->{t}->{type} = DIMENSION_TOKEN;
575          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
576          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
577          redo A;          redo A;
# Line 557  sub get_next_token ($) { Line 605  sub get_next_token ($) {
605          redo A;          redo A;
606        } else {        } else {
607          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
608          $self->{c} = $self->{get_char}->();          # reprocess
609          return {type => DELIM_TOKEN, value => '#'};          return {type => DELIM_TOKEN, value => '#'};
610          #redo A;          #redo A;
611        }        }
# Line 766  sub get_next_token ($) { Line 814  sub get_next_token ($) {
814          redo A;          redo A;
815        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f
816          ## NOTE: second character of |unicode| in |escape|.          ## NOTE: second character of |unicode| in |escape|.
817          $char = $self->{c} - 0x0061 - 0xA;          $char = $self->{c} - 0x0061 + 0xA;
818          $self->{state} = ESCAPE_STATE; $i = 2;          $self->{state} = ESCAPE_STATE; $i = 2;
819          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
820          redo A;          redo A;
821        } elsif ($self->{c} == 0x000A or # \n        } elsif ($self->{c} == 0x000A or # \n
822                 $self->{c} == 0x000C) { # \f                 $self->{c} == 0x000C) { # \f
823          if ($q == 0) {          if ($q == 0) {
824            ## NOTE: In |escape| in ... in |ident|.            #
           $self->{state} = BEFORE_TOKEN_STATE;  
           unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};  
           return $self->{t};  
           # reconsume  
           #redo A;  
825          } elsif ($q == 1) {          } elsif ($q == 1) {
826            ## NOTE: In |escape| in |URI|.            ## NOTE: In |escape| in |URI|.
827            $self->{t}->{type} = {            $self->{t}->{type} = {
# Line 800  sub get_next_token ($) { Line 843  sub get_next_token ($) {
843          }          }
844        } elsif ($self->{c} == 0x000D) { # \r        } elsif ($self->{c} == 0x000D) { # \r
845          if ($q == 0) {          if ($q == 0) {
846            ## NOTE: In |escape| in ... in |ident|.            #
           $self->{state} = BEFORE_TOKEN_STATE;  
           unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};  
           return $self->{t};  
           # reconsume  
           #redo A;  
847          } elsif ($q == 1) {          } elsif ($q == 1) {
848              ## NOTE: In |escape| in |URI|.
849            $self->{t}->{type} = {            $self->{t}->{type} = {
850                URI_TOKEN, URI_INVALID_TOKEN,                URI_TOKEN, URI_INVALID_TOKEN,
851                URI_INVALID_TOKEN, URI_INVALID_TOKEN,                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
852                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
853                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
854            }->{$self->{t}->{type}};            }->{$self->{t}->{type}};
855            $self->{t}->{value} .= "\x0D\x0A";            $self->{t}->{value} .= "\x0D";
856            $self->{state} = URI_UNQUOTED_STATE;            $self->{state} = ESCAPE_BEFORE_LF_STATE;
857            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
858            redo A;            redo A;
859          } else {          } else {
860            ## Note: In |nl| in ... in |string| or |ident|.            ## Note: In |nl| in ... in |string| or |ident|.
861            $self->{t}->{value} .= "\x0D\x0A";            $self->{t}->{value} .= "\x0D";
862            $self->{state} = ESCAPE_BEFORE_LF_STATE;            $self->{state} = ESCAPE_BEFORE_LF_STATE;
863            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
864            redo A;            redo A;
865          }          }
866          } elsif ($self->{c} == -1) {
867            #
868        } else {        } else {
869          ## NOTE: second character of |escape|.          ## NOTE: second character of |escape|.
870          $self->{t}->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
# Line 832  sub get_next_token ($) { Line 873  sub get_next_token ($) {
873          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
874          redo A;          redo A;
875        }        }
876    
877          if ($q == 0) {
878            if ($self->{t}->{type} == DIMENSION_TOKEN) {
879              if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') {
880                $self->{state} = BEFORE_TOKEN_STATE;
881                # reprocess
882                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
883                unshift @{$self->{token}}, {type => MINUS_TOKEN};
884                $self->{t}->{type} = NUMBER_TOKEN;
885                $self->{t}->{value} = '';
886                return $self->{t};
887                #redo A;
888              } elsif (length $self->{t}->{value}) {
889                $self->{state} = BEFORE_TOKEN_STATE;
890                # reprocess
891                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
892                return $self->{t};
893                #redo A;
894              } else {
895                $self->{state} = BEFORE_TOKEN_STATE;
896                # reprocess
897                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
898                $self->{t}->{type} = NUMBER_TOKEN;
899                $self->{t}->{value} = '';
900                return $self->{t};
901                #redo A;
902              }
903            } else {
904              if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') {
905                $self->{state} = BEFORE_TOKEN_STATE;
906                # reprocess
907                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
908                return {type => MINUS_TOKEN};
909                #redo A;
910              } elsif (length $self->{t}->{value}) {
911                $self->{state} = BEFORE_TOKEN_STATE;
912                # reprocess
913                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
914                return $self->{t};
915                #redo A;
916              } else {
917                $self->{state} = BEFORE_TOKEN_STATE;
918                # reprocess
919                return {type => DELIM_TOKEN, value => '\\'};
920                #redo A;
921              }
922            }
923          } elsif ($q == 1) {
924            $self->{state} = URI_UNQUOTED_STATE;
925            $self->{c} = $self->{get_char}->();
926            redo A;
927          } else {
928            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
929            $self->{t}->{type} = {
930              STRING_TOKEN, INVALID_TOKEN,
931              URI_TOKEN, URI_INVALID_TOKEN,
932              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
933            }->{$self->{t}->{type}} || $self->{t}->{type};
934            $self->{state} = BEFORE_TOKEN_STATE;
935            # reprocess
936            return $self->{t};
937            #redo A;
938          }
939      } elsif ($self->{state} == ESCAPE_STATE) {      } elsif ($self->{state} == ESCAPE_STATE) {
940        ## NOTE: third..seventh character of |unicode| in |escape|.        ## NOTE: third..seventh character of |unicode| in |escape|.
941        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
# Line 845  sub get_next_token ($) { Line 949  sub get_next_token ($) {
949          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
950          redo A;          redo A;
951        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f
952          $char = $char * 0x10 + $self->{c} - 0x0061 - 0xA;          $char = $char * 0x10 + $self->{c} - 0x0061 + 0xA;
953          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
954          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
955          redo A;          redo A;
# Line 894  sub get_next_token ($) { Line 998  sub get_next_token ($) {
998      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {
999        ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.        ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.
1000        if ($self->{c} == 0x000A) { # \n        if ($self->{c} == 0x000A) { # \n
1001          $self->{t}->{value} .= chr $char;          $self->{t}->{value} .= chr $self->{c};
1002          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
1003              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
1004          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1005          redo A;          redo A;
1006        } else {        } else {
         $self->{t}->{value} .= chr $char;  
1007          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
1008              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
1009          # reconsume          # reprocess
1010          redo A;          redo A;
1011        }        }
1012      } elsif ($self->{state} == STRING_STATE) {      } elsif ($self->{state} == STRING_STATE) {
# Line 930  sub get_next_token ($) { Line 1033  sub get_next_token ($) {
1033                 $self->{c} == 0x000D or # \r                 $self->{c} == 0x000D or # \r
1034                 $self->{c} == 0x000C or # \f                 $self->{c} == 0x000C or # \f
1035                 $self->{c} == -1) {                 $self->{c} == -1) {
1036          $self->{t}->{type} = INVALID_TOKEN;          $self->{t}->{type} = {
1037              STRING_TOKEN, INVALID_TOKEN,
1038              INVALID_TOKEN, INVALID_TOKEN,
1039              URI_TOKEN, URI_INVALID_TOKEN,
1040              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
1041              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
1042              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
1043            }->{$self->{t}->{type}};
1044          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
1045          # reconsume          # reconsume
1046          return $self->{t};          return $self->{t};
# Line 967  sub get_next_token ($) { Line 1077  sub get_next_token ($) {
1077          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1078          redo A;          redo A;
1079        } else {        } else {
1080          unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'};          unshift @{$self->{token}}, {type => DOT_TOKEN};
1081          $self->{t}->{number} = $self->{t}->{value};          $self->{t}->{number} = $self->{t}->{value};
1082          $self->{t}->{value} = '';          $self->{t}->{value} = '';
1083          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
# Line 984  sub get_next_token ($) { Line 1094  sub get_next_token ($) {
1094          redo A;          redo A;
1095        } else {        } else {
1096          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
1097          $self->{c} = $self->{get_char}->();          # reprocess
1098          return {type => DELIM_TOKEN, value => '.'};          return {type => DOT_TOKEN};
1099          #redo A;          #redo A;
1100        }        }
1101      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.13

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24