/[suikacvs]/markup/html/whatpm/Whatpm/CSS/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/CSS/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4 by wakaba, Sat Sep 8 02:58:24 2007 UTC revision 1.13 by wakaba, Sat Sep 8 17:43:41 2007 UTC
# Line 36  sub NUMBER_TOKEN () { 11 } Line 36  sub NUMBER_TOKEN () { 11 }
36  sub DIMENSION_TOKEN () { 12 }  sub DIMENSION_TOKEN () { 12 }
37  sub PERCENTAGE_TOKEN () { 13 }  sub PERCENTAGE_TOKEN () { 13 }
38  sub UNICODE_RANGE_TOKEN () { 14 }  sub UNICODE_RANGE_TOKEN () { 14 }
 sub UNICODE_RANGE_INVALID_TOKEN () { 15 }  
39  sub DELIM_TOKEN () { 16 }  sub DELIM_TOKEN () { 16 }
40  sub PLUS_TOKEN () { 17 }  sub PLUS_TOKEN () { 17 }
41  sub GREATER_TOKEN () { 18 }  sub GREATER_TOKEN () { 18 }
# Line 60  sub CDC_TOKEN () { 35 } Line 59  sub CDC_TOKEN () { 35 }
59  sub COMMENT_TOKEN () { 36 }  sub COMMENT_TOKEN () { 36 }
60  sub COMMENT_INVALID_TOKEN () { 37 }  sub COMMENT_INVALID_TOKEN () { 37 }
61  sub EOF_TOKEN () { 38 }  sub EOF_TOKEN () { 38 }
62    sub MINUS_TOKEN () { 39 }
63    sub STAR_TOKEN () { 40 }
64    sub VBAR_TOKEN () { 41 }
65    sub DOT_TOKEN () { 42 }
66    sub COLON_TOKEN () { 43 }
67    sub MATCH_TOKEN () { 44 }
68    sub EXCLAMATION_TOKEN () { 45 }
69    
70  our @TokenName = qw(  our @TokenName = qw(
71    0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID    0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID
72    STRING INVALID NUMBER DIMENSION PERCENTAGE UNICODE_RANGE    STRING INVALID NUMBER DIMENSION PERCENTAGE UNICODE_RANGE
73    UNICODE_RANGE_INVALID DELIM PLUS GREATER COMMA TILDE DASHMATCH    0 DELIM PLUS GREATER COMMA TILDE DASHMATCH
74    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON
75    LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT    LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT
76    COMMENT_INVALID EOF    COMMENT_INVALID EOF MINUS STAR VBAR DOT COLON MATCH EXCLAMATION
77  );  );
78    
79  sub new ($) {  sub new ($) {
# Line 80  sub init ($) { Line 86  sub init ($) {
86    my $self = shift;    my $self = shift;
87    $self->{state} = BEFORE_TOKEN_STATE;    $self->{state} = BEFORE_TOKEN_STATE;
88    $self->{c} = $self->{get_char}->();    $self->{c} = $self->{get_char}->();
89      #$self->{t} = {type => token-type, value => value, number => number};
90  } # init  } # init
91    
92  sub get_next_token ($) {  sub get_next_token ($) {
# Line 88  sub get_next_token ($) { Line 95  sub get_next_token ($) {
95      return shift @{$self->{token}};      return shift @{$self->{token}};
96    }    }
97    
   my $current_token;  
98    my $char;    my $char;
99    my $num; # |{num}|, if any.    my $num; # |{num}|, if any.
100    my $i; # |$i + 1|th character in |unicode| in |escape|.    my $i; # |$i + 1|th character in |unicode| in |escape|.
# Line 103  sub get_next_token ($) { Line 109  sub get_next_token ($) {
109      if ($self->{state} == BEFORE_TOKEN_STATE) {      if ($self->{state} == BEFORE_TOKEN_STATE) {
110        if ($self->{c} == 0x002D) { # -        if ($self->{c} == 0x002D) { # -
111          ## NOTE: |-| in |ident| in |IDENT|          ## NOTE: |-| in |ident| in |IDENT|
112          $current_token = {type => IDENT_TOKEN, value => '-'};          $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1};
113          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = BEFORE_NMSTART_STATE;
114          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
115          redo A;          redo A;
116          } elsif ($self->{c} == 0x0055 or $self->{c} == 0x0075) { # U or u
117            $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}};
118            $self->{c} = $self->{get_char}->();
119            if ($self->{c} == 0x002B) { # +
120              $self->{c} = $self->{get_char}->();
121              if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
122                  (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
123                  (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
124                  $self->{c} == 0x003F) { # ?
125                $self->{t}->{value} = chr $self->{c};
126                $self->{t}->{type} = UNICODE_RANGE_TOKEN;
127                $self->{c} = $self->{get_char}->();
128                C: for (2..6) {
129                  if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
130                      (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
131                      (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
132                      $self->{c} == 0x003F) { # ?
133                    $self->{t}->{value} .= chr $self->{c};
134                    $self->{c} = $self->{get_char}->();
135                  } else {
136                    last C;
137                  }
138                } # C
139    
140                if ($self->{c} == 0x002D) { # -
141                  $self->{c} = $self->{get_char}->();
142                  if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
143                      (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
144                      (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f
145                    $self->{t}->{value} .= '-' . chr $self->{c};
146                    $self->{c} = $self->{get_char}->();
147                    C: for (2..6) {
148                      if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
149                          (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
150                          (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f
151                        $self->{t}->{value} .= chr $self->{c};
152                        $self->{c} = $self->{get_char}->();
153                      } else {
154                        last C;
155                      }
156                    } # C
157                    
158                    #
159                  } else {
160                    my $token = $self->{t};
161                    $self->{t} = {type => IDENT_TOKEN, value => '-'};
162                    $self->{state} = BEFORE_NMSTART_STATE;
163                    # reprocess
164                    return $token;
165                    #redo A;
166                  }
167                }
168    
169                $self->{state} = BEFORE_TOKEN_STATE;
170                # reprocess
171                return $self->{t};
172                #redo A;
173              } else {
174                unshift @{$self->{token}}, {type => PLUS_TOKEN};
175                $self->{state} = BEFORE_TOKEN_STATE;
176                # reprocess
177                return $self->{t};
178                #redo A;
179              }
180            } else {
181              $self->{state} = NAME_STATE;
182              # reprocess
183              redo A;
184            }
185        } elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z        } elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
186                 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z                 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
187                 $self->{c} == 0x005F or # _                 $self->{c} == 0x005F or # _
188                 $self->{c} > 0x007F) { # nonascii                 $self->{c} > 0x007F) { # nonascii
189          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
190          $current_token = {type => IDENT_TOKEN, value => chr $self->{c}};          $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}};
191          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
192          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
193          redo A;          redo A;
194        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
195          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
196          $current_token = {type => IDENT_TOKEN, value => ''};          $self->{t} = {type => IDENT_TOKEN, value => ''};
197          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
198          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
199          redo A;          redo A;
200        } elsif ($self->{c} == 0x0040) { # @        } elsif ($self->{c} == 0x0040) { # @
201          ## NOTE: |@| in |ATKEYWORD|          ## NOTE: |@| in |ATKEYWORD|
202          $current_token = {type => ATKEYWORD_TOKEN, value => ''};          $self->{t} = {type => ATKEYWORD_TOKEN, value => ''};
203          $self->{state} = AFTER_AT_STATE;          $self->{state} = AFTER_AT_STATE;
204          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
205          redo A;          redo A;
206        } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '        } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '
207          $current_token = {type => STRING_TOKEN, value => ''};          $self->{t} = {type => STRING_TOKEN, value => ''};
208          $self->{state} = STRING_STATE; $q = $self->{c};          $self->{state} = STRING_STATE; $q = $self->{c};
209          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
210          redo A;          redo A;
211        } elsif ($self->{c} == 0x0023) { # #        } elsif ($self->{c} == 0x0023) { # #
212          ## NOTE: |#| in |HASH|.          ## NOTE: |#| in |HASH|.
213          $current_token = {type => HASH_TOKEN, value => ''};          $self->{t} = {type => HASH_TOKEN, value => ''};
214          $self->{state} = HASH_OPEN_STATE;          $self->{state} = HASH_OPEN_STATE;
215          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
216          redo A;          redo A;
217        } elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9        } elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
218          ## NOTE: |num|.          ## NOTE: |num|.
219          $current_token = {type => NUMBER_TOKEN, value => chr $self->{c}};          $self->{t} = {type => NUMBER_TOKEN, value => chr $self->{c}};
220          $self->{state} = NUMBER_STATE;          $self->{state} = NUMBER_STATE;
221          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
222          redo A;          redo A;
223        } elsif ($self->{c} == 0x002E) { # .        } elsif ($self->{c} == 0x002E) { # .
224          ## NOTE: |num|.          ## NOTE: |num|.
225          $current_token = {type => NUMBER_TOKEN, value => '0'};          $self->{t} = {type => NUMBER_TOKEN, value => '0'};
226          $self->{state} = NUMBER_FRACTION_STATE;          $self->{state} = NUMBER_FRACTION_STATE;
227          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
228          redo A;          redo A;
# Line 183  sub get_next_token ($) { Line 258  sub get_next_token ($) {
258          } else {          } else {
259            # stay in the state.            # stay in the state.
260            # reprocess            # reprocess
261            return {type => DELIM_STATE, value => '/'};            return {type => DELIM_TOKEN, value => '/'};
262            #redo A;            #redo A;
263          }                  }        
264        } elsif ($self->{c} == 0x003C) { # <        } elsif ($self->{c} == 0x003C) { # <
# Line 191  sub get_next_token ($) { Line 266  sub get_next_token ($) {
266          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
267          if ($self->{c} == 0x0021) { # !          if ($self->{c} == 0x0021) { # !
268            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
269            if ($self->{c} == 0x002C) { # -            if ($self->{c} == 0x002D) { # -
270              $self->{c} = $self->{get_char}->();              $self->{c} = $self->{get_char}->();
271              if ($self->{c} == 0x002C) { # -              if ($self->{c} == 0x002D) { # -
272                $self->{state} = BEFORE_TOKEN_STATE;                $self->{state} = BEFORE_TOKEN_STATE;
273                $self->{c} = $self->{get_char}->();                $self->{c} = $self->{get_char}->();
274                return {type => CDO_TOKEN};                return {type => CDO_TOKEN};
275                #redo A;                #redo A;
276              } else {              } else {
277                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};                unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN};
278                ## NOTE: |-| in |ident| in |IDENT|                ## NOTE: |-| in |ident| in |IDENT|
279                $current_token = {type => IDENT_TOKEN, value => '-'};                $self->{t} = {type => IDENT_TOKEN, value => '-'};
280                $self->{state} = BEFORE_NMSTART_STATE;                $self->{state} = BEFORE_NMSTART_STATE;
281                #reprocess                #reprocess
282                return {type => DELIM_TOKEN, value => '<'};                return {type => DELIM_TOKEN, value => '<'};
283                #redo A;                #redo A;
284              }              }
285            } else {            } else {
286              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};              unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN};
287              $self->{state} = BEFORE_TOKEN_STATE;              $self->{state} = BEFORE_TOKEN_STATE;
288              #reprocess              #reprocess
289              return {type => DELIM_TOKEN, value => '<'};              return {type => DELIM_TOKEN, value => '<'};
# Line 221  sub get_next_token ($) { Line 296  sub get_next_token ($) {
296            #redo A;            #redo A;
297          }          }
298        } elsif (my $t = {        } elsif (my $t = {
299                  0x003B => SEMICOLON_TOKEN, # ;                          0x0021 => EXCLAMATION_TOKEN, # !
300                  0x007B => LBRACE_TOKEN, # {                          0x002D => MINUS_TOKEN, # -
301                  0x007D => RBRACE_TOKEN, # }                          0x002E => DOT_TOKEN, # .
302                  0x0028 => LPAREN_TOKEN, # (                          0x003A => COLON_TOKEN, # :
303                  0x0029 => RPAREN_TOKEN, # )                          0x003B => SEMICOLON_TOKEN, # ;
304                  0x005B => LBRACKET_TOKEN, # [                          0x003D => MATCH_TOKEN, # =
305                  0x005D => RBRACKET_TOKEN, # ]                          0x007B => LBRACE_TOKEN, # {
306                            0x007D => RBRACE_TOKEN, # }
307                            0x0028 => LPAREN_TOKEN, # (
308                            0x0029 => RPAREN_TOKEN, # )
309                            0x005B => LBRACKET_TOKEN, # [
310                            0x005D => RBRACKET_TOKEN, # ]
311                 }->{$self->{c}}) {                 }->{$self->{c}}) {
312          # stay in the state          # stay in the state
313          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
# Line 280  sub get_next_token ($) { Line 360  sub get_next_token ($) {
360            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
361            return {type => $v};            return {type => $v};
362            #redo A;            #redo A;
363            } elsif ($v = {
364                           0x002A => STAR_TOKEN, # *
365                           0x007C => VBAR_TOKEN, # |
366                          }->{$c}) {
367              # stay in the state.
368              # reprocess
369              return {type => $v};
370              #redo A;
371          } else {          } else {
372            # stay in the state            # stay in the state
373            # reprocess            # reprocess
# Line 321  sub get_next_token ($) { Line 409  sub get_next_token ($) {
409          #redo A;          #redo A;
410        } else {        } else {
411          # stay in the state          # stay in the state
412          $current_token = {type => DELIM_TOKEN, value => chr $self->{c}};          $self->{t} = {type => DELIM_TOKEN, value => chr $self->{c}};
413          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
414          return $current_token;          return $self->{t};
415          #redo A;          #redo A;
416        }        }
417      } elsif ($self->{state} == BEFORE_NMSTART_STATE) {      } elsif ($self->{state} == BEFORE_NMSTART_STATE) {
# Line 333  sub get_next_token ($) { Line 421  sub get_next_token ($) {
421            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
422            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
423            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
424          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
425          $current_token->{type} = DIMENSION_TOKEN          $self->{t}->{type} = DIMENSION_TOKEN
426              if $current_token->{type} == NUMBER_TOKEN;              if $self->{t}->{type} == NUMBER_TOKEN;
427          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
428          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
429          redo A;          redo A;
430        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
 ## TODO: 12-\X, 12-\{nl}  
431          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
432          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
433          redo A;          redo A;
434        } elsif ($self->{c} == 0x002D and # -        } elsif ($self->{c} == 0x002D) { # -
435                 $current_token->{type} == IDENT_TOKEN) {          if ($self->{t}->{type} == IDENT_TOKEN) {
         $self->{c} = $self->{get_char}->();  
         if ($self->{c} == 0x003E) { # >  
           $self->{state} = BEFORE_TOKEN_STATE;  
436            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
437            return {type => CDC_TOKEN};            if ($self->{c} == 0x003E) { # >
438            #redo A;              $self->{state} = BEFORE_TOKEN_STATE;
439                $self->{c} = $self->{get_char}->();
440                return {type => CDC_TOKEN};
441                #redo A;
442              } else {
443                ## NOTE: |-|, |-|, $self->{c}
444                #$self->{t} = {type => IDENT_TOKEN, value => '-'};
445                # stay in the state
446                # reconsume
447                return {type => MINUS_TOKEN};
448                #redo A;
449              }
450            } elsif ($self->{t}->{type} == DIMENSION_TOKEN) {
451              $self->{c} = $self->{get_char}->();
452              if ($self->{c} == 0x003E) { # >
453                unshift @{$self->{token}}, {type => CDC_TOKEN};
454                $self->{t}->{type} = NUMBER_TOKEN;
455                $self->{t}->{value} = '';
456                $self->{state} = BEFORE_TOKEN_STATE;
457                $self->{c} = $self->{get_char}->();
458                return $self->{t};
459                #redo A;
460              } else {
461                ## NOTE: |-|, |-|, $self->{c}
462                my $t = $self->{t};
463                $t->{type} = NUMBER_TOKEN;
464                $t->{value} = '';
465                $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1};
466                unshift @{$self->{token}}, {type => MINUS_TOKEN};
467                # stay in the state
468                # reconsume
469                return $t;
470                #redo A;
471              }
472          } else {          } else {
473            ## NOTE: |-|, |-|, $self->{c}            #
           #$current_token = {type => IDENT_TOKEN, value => '-'};  
           # stay in the state  
           # reconsume  
           return {type => DELIM_TOKEN, value => '-'};  
           #redo A;  
474          }          }
475        } else {        } else {
476          if ($current_token->{type} == NUMBER_TOKEN) {          #
477            ## NOTE: |-| after |NUMBER|.        }
478            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};        
479            $self->{state} = BEFORE_TOKEN_STATE;        if ($self->{t}->{type} == DIMENSION_TOKEN) {
480            # reconsume          ## NOTE: |-| after |NUMBER|.
481            $current_token->{value} = $current_token->{number};          unshift @{$self->{token}}, {type => MINUS_TOKEN};
482            delete $current_token->{number};          $self->{state} = BEFORE_TOKEN_STATE;
483            return $current_token;          # reprocess
484          } else {          $self->{t}->{type} = NUMBER_TOKEN;
485            ## NOTE: |-| not followed by |nmstart|.          $self->{t}->{value} = '';
486            $self->{state} = BEFORE_TOKEN_STATE;          return $self->{t};
487            $self->{c} = $self->{get_char}->();        } else {
488            return {type => DELIM_TOKEN, value => '-'};          ## NOTE: |-| not followed by |nmstart|.
489          }          $self->{state} = BEFORE_TOKEN_STATE;
490            # reprocess
491            return {type => MINUS_TOKEN};
492        }        }
493      } elsif ($self->{state} == AFTER_AT_STATE) {      } elsif ($self->{state} == AFTER_AT_STATE) {
494        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
495            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
496            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
497            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
498          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
499          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
500          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
501          redo A;          redo A;
502        } elsif ($self->{c} == 0x002D) { # -        } elsif ($self->{c} == 0x002D) { # -
503          $current_token->{value} .= '-';          $self->{t}->{value} .= '-';
504          $self->{state} = AFTER_AT_HYPHEN_STATE;          $self->{state} = AFTER_AT_HYPHEN_STATE;
505          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
506          redo A;          redo A;
# Line 404  sub get_next_token ($) { Line 518  sub get_next_token ($) {
518            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
519            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
520            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
521          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
522          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
523          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
524          redo A;          redo A;
# Line 417  sub get_next_token ($) { Line 531  sub get_next_token ($) {
531            return {type => DELIM_TOKEN, value => '@'};            return {type => DELIM_TOKEN, value => '@'};
532            #redo A;            #redo A;
533          } else {          } else {
534            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};            unshift @{$self->{token}}, {type => MINUS_TOKEN};
535            $current_token = {type => IDENT_TOKEN, value => '-'};            $self->{t} = {type => IDENT_TOKEN, value => '-'};
536            $self->{state} = BEFORE_NMSTART_STATE;            $self->{state} = BEFORE_NMSTART_STATE;
537            # reprocess            # reprocess
538            return {type => DELIM_TOKEN, value => '@'};            return {type => DELIM_TOKEN, value => '@'};
# Line 430  sub get_next_token ($) { Line 544  sub get_next_token ($) {
544          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
545          redo A;          redo A;
546        } else {        } else {
547          unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};          unshift @{$self->{token}}, {type => MINUS_TOKEN};
548          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
549          # reprocess          # reprocess
550          return {type => DELIM_TOKEN, value => '@'};          return {type => DELIM_TOKEN, value => '@'};
# Line 438  sub get_next_token ($) { Line 552  sub get_next_token ($) {
552      } elsif ($self->{state} == AFTER_NUMBER_STATE) {      } elsif ($self->{state} == AFTER_NUMBER_STATE) {
553        if ($self->{c} == 0x002D) { # -        if ($self->{c} == 0x002D) { # -
554          ## NOTE: |-| in |ident|.          ## NOTE: |-| in |ident|.
555          $current_token->{value} = '-';          $self->{t}->{hyphen} = 1;
556            $self->{t}->{value} = '-';
557            $self->{t}->{type} = DIMENSION_TOKEN;
558          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = BEFORE_NMSTART_STATE;
559          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
560          redo A;          redo A;
# Line 447  sub get_next_token ($) { Line 563  sub get_next_token ($) {
563                 $self->{c} == 0x005F or # _                 $self->{c} == 0x005F or # _
564                 $self->{c} > 0x007F) { # nonascii                 $self->{c} > 0x007F) { # nonascii
565          ## NOTE: |nmstart| in |ident|.          ## NOTE: |nmstart| in |ident|.
566          $current_token->{value} = chr $self->{c};          $self->{t}->{value} = chr $self->{c};
567          $current_token->{type} = DIMENSION_TOKEN;          $self->{t}->{type} = DIMENSION_TOKEN;
568          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
569          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
570          redo A;          redo A;
571        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
572          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
573          $current_token->{value} = '';          $self->{t}->{value} = '';
574            $self->{t}->{type} = DIMENSION_TOKEN;
575          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
576          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
577          redo A;          redo A;
578        } elsif ($self->{c} == 0x0025) { # %        } elsif ($self->{c} == 0x0025) { # %
579          $current_token->{type} = PERCENTAGE_TOKEN;          $self->{t}->{type} = PERCENTAGE_TOKEN;
580          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
581          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
582          return $current_token;          return $self->{t};
583          #redo A;          #redo A;
584        } else {        } else {
585          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
586          # reprocess          # reprocess
587          return $current_token;          return $self->{t};
588          #redo A;          #redo A;
589        }        }
590      } elsif ($self->{state} == HASH_OPEN_STATE) {      } elsif ($self->{state} == HASH_OPEN_STATE) {
# Line 478  sub get_next_token ($) { Line 595  sub get_next_token ($) {
595            $self->{c} == 0x002D or # -            $self->{c} == 0x002D or # -
596            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
597            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
598          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
599          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
600          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
601          redo A;          redo A;
# Line 488  sub get_next_token ($) { Line 605  sub get_next_token ($) {
605          redo A;          redo A;
606        } else {        } else {
607          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
608          $self->{c} = $self->{get_char}->();          # reprocess
609          return {type => DELIM_TOKEN, value => '#'};          return {type => DELIM_TOKEN, value => '#'};
610          #redo A;          #redo A;
611        }        }
# Line 500  sub get_next_token ($) { Line 617  sub get_next_token ($) {
617            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
618            $self->{c} == 0x002D or # -            $self->{c} == 0x002D or # -
619            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
620          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
621          # stay in the state          # stay in the state
622          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
623          redo A;          redo A;
# Line 509  sub get_next_token ($) { Line 626  sub get_next_token ($) {
626          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
627          redo A;          redo A;
628        } elsif ($self->{c} == 0x0028 and # (        } elsif ($self->{c} == 0x0028 and # (
629                 $current_token->{type} == IDENT_TOKEN) { # (                 $self->{t}->{type} == IDENT_TOKEN) { # (
630          my $func_name = $current_token->{value};          my $func_name = $self->{t}->{value};
631          $func_name =~ tr/A-Z/a-z/; ## TODO: Unicode or ASCII case-insensitive?          $func_name =~ tr/A-Z/a-z/; ## TODO: Unicode or ASCII case-insensitive?
632          if ($func_name eq 'url' or $func_name eq 'url-prefix') {          if ($func_name eq 'url' or $func_name eq 'url-prefix') {
633            if ($current_token->{has_escape}) {            if ($self->{t}->{has_escape}) {
634              ## TODO: warn              ## TODO: warn
635            }            }
636            $current_token->{type}            $self->{t}->{type}
637                = $func_name eq 'url' ? URI_TOKEN : URI_PREFIX_TOKEN;                = $func_name eq 'url' ? URI_TOKEN : URI_PREFIX_TOKEN;
638            $current_token->{value} = '';            $self->{t}->{value} = '';
639            $self->{state} = URI_BEFORE_WSP_STATE;            $self->{state} = URI_BEFORE_WSP_STATE;
640            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
641            redo A;            redo A;
642          } else {          } else {
643            $current_token->{type} = FUNCTION_TOKEN;            $self->{t}->{type} = FUNCTION_TOKEN;
644            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
645            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
646            return $current_token;            return $self->{t};
647            #redo A;            #redo A;
648          }          }
649        } else {        } else {
650          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
651          # reconsume          # reconsume
652          return $current_token;          return $self->{t};
653          #redo A;          #redo A;
654        }        }
655      } elsif ($self->{state} == URI_BEFORE_WSP_STATE) {      } elsif ($self->{state} == URI_BEFORE_WSP_STATE) {
# Line 546  sub get_next_token ($) { Line 663  sub get_next_token ($) {
663          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
664        }        }
665        if ($self->{c} == -1) {        if ($self->{c} == -1) {
666          $current_token->{type} = {          $self->{t}->{type} = {
667              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
668              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
669              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
670              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
671          }->{$current_token->{type}};                  }->{$self->{t}->{type}};        
672          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
673          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
674          return $current_token;          return $self->{t};
675          #redo A;          #redo A;
676        } elsif ($self->{c} < 0x0020 or $self->{c} == 0x0028) { # C0 or (        } elsif ($self->{c} < 0x0020 or $self->{c} == 0x0028) { # C0 or (
677          ## TODO: Should we consider matches of "(" and ")"?          ## TODO: Should we consider matches of "(" and ")"?
678          $current_token->{type} = {          $self->{t}->{type} = {
679              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
680              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
681              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
682              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
683          }->{$current_token->{type}};          }->{$self->{t}->{type}};
684          $self->{state} = URI_UNQUOTED_STATE;          $self->{state} = URI_UNQUOTED_STATE;
685          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
686          redo A;          redo A;
# Line 574  sub get_next_token ($) { Line 691  sub get_next_token ($) {
691        } elsif ($self->{c} == 0x0029) { # )        } elsif ($self->{c} == 0x0029) { # )
692          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
693          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
694          return $current_token;          return $self->{t};
695          #redo A;          #redo A;
696        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
697          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
698          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
699          redo A;          redo A;
700        } else {        } else {
701          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
702          $self->{state} = URI_UNQUOTED_STATE;          $self->{state} = URI_UNQUOTED_STATE;
703          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
704          redo A;          redo A;
# Line 598  sub get_next_token ($) { Line 715  sub get_next_token ($) {
715          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
716          redo A;          redo A;
717        } elsif ($self->{c} == -1) {        } elsif ($self->{c} == -1) {
718          $current_token->{type} = {          $self->{t}->{type} = {
719              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
720              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
721              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
722              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
723          }->{$current_token->{type}};                  }->{$self->{t}->{type}};        
724          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
725          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
726          return $current_token;          return $self->{t};
727          #redo A;          #redo A;
728        } elsif ($self->{c} < 0x0020 or {        } elsif ($self->{c} < 0x0020 or {
729            0x0022 => 1, # "            0x0022 => 1, # "
# Line 614  sub get_next_token ($) { Line 731  sub get_next_token ($) {
731            0x0028 => 1, # (            0x0028 => 1, # (
732        }->{$self->{c}}) { # C0 or (        }->{$self->{c}}) { # C0 or (
733          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?
734          $current_token->{type} = {          $self->{t}->{type} = {
735              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
736              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
737              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
738              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
739          }->{$current_token->{type}};          }->{$self->{t}->{type}};
740          # stay in the state.          # stay in the state.
741          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
742          redo A;          redo A;
743        } elsif ($self->{c} == 0x0029) { # )        } elsif ($self->{c} == 0x0029) { # )
744          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
745          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
746          return $current_token;          return $self->{t};
747          #redo A;          #redo A;
748        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
749          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
750          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
751          redo A;          redo A;
752        } else {        } else {
753          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
754          # stay in the state.          # stay in the state.
755          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
756          redo A;          redo A;
# Line 650  sub get_next_token ($) { Line 767  sub get_next_token ($) {
767          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
768          redo A;          redo A;
769        } elsif ($self->{c} == -1) {        } elsif ($self->{c} == -1) {
770          $current_token->{type} = {          $self->{t}->{type} = {
771              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
772              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
773              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
774              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
775          }->{$current_token->{type}};                  }->{$self->{t}->{type}};        
776          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
777          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
778          return $current_token;          return $self->{t};
779          #redo A;          #redo A;
780        } elsif ($self->{c} == 0x0029) { # )        } elsif ($self->{c} == 0x0029) { # )
781          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
782          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
783          return $current_token;          return $self->{t};
784          #redo A;          #redo A;
785        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
786          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
# Line 671  sub get_next_token ($) { Line 788  sub get_next_token ($) {
788          redo A;          redo A;
789        } else {        } else {
790          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?
791          $current_token->{type} = {          $self->{t}->{type} = {
792              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
793              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
794              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
795              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
796          }->{$current_token->{type}};          }->{$self->{t}->{type}};
797          # stay in the state.          # stay in the state.
798          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
799          redo A;          redo A;
800        }        }
801      } elsif ($self->{state} == ESCAPE_OPEN_STATE) {      } elsif ($self->{state} == ESCAPE_OPEN_STATE) {
802        $current_token->{has_escape} = 1;        $self->{t}->{has_escape} = 1;
803        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
804          ## NOTE: second character of |unicode| in |escape|.          ## NOTE: second character of |unicode| in |escape|.
805          $char = $self->{c} - 0x0030;          $char = $self->{c} - 0x0030;
# Line 697  sub get_next_token ($) { Line 814  sub get_next_token ($) {
814          redo A;          redo A;
815        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f
816          ## NOTE: second character of |unicode| in |escape|.          ## NOTE: second character of |unicode| in |escape|.
817          $char = $self->{c} - 0x0061 - 0xA;          $char = $self->{c} - 0x0061 + 0xA;
818          $self->{state} = ESCAPE_STATE; $i = 2;          $self->{state} = ESCAPE_STATE; $i = 2;
819          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
820          redo A;          redo A;
821        } elsif ($self->{c} == 0x000A or # \n        } elsif ($self->{c} == 0x000A or # \n
822                 $self->{c} == 0x000C) { # \f                 $self->{c} == 0x000C) { # \f
823          if ($q == 0) {          if ($q == 0) {
824            ## NOTE: In |escape| in ... in |ident|.            #
           $self->{state} = BEFORE_TOKEN_STATE;  
           unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};  
           return $current_token;  
           # reconsume  
           #redo A;  
825          } elsif ($q == 1) {          } elsif ($q == 1) {
826            ## NOTE: In |escape| in |URI|.            ## NOTE: In |escape| in |URI|.
827            $current_token->{type} = {            $self->{t}->{type} = {
828                URI_TOKEN, URI_INVALID_TOKEN,                URI_TOKEN, URI_INVALID_TOKEN,
829                URI_INVALID_TOKEN, URI_INVALID_TOKEN,                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
830                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
831                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
832            }->{$current_token->{type}};            }->{$self->{t}->{type}};
833            $current_token->{value} .= chr $self->{c};            $self->{t}->{value} .= chr $self->{c};
834            $self->{state} = URI_UNQUOTED_STATE;            $self->{state} = URI_UNQUOTED_STATE;
835            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
836            redo A;            redo A;
837          } else {          } else {
838            ## Note: In |nl| in ... in |string| or |ident|.            ## Note: In |nl| in ... in |string| or |ident|.
839            $current_token->{value} .= chr $self->{c};            $self->{t}->{value} .= chr $self->{c};
840            $self->{state} = STRING_STATE;            $self->{state} = STRING_STATE;
841            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
842            redo A;            redo A;
843          }          }
844        } elsif ($self->{c} == 0x000D) { # \r        } elsif ($self->{c} == 0x000D) { # \r
845          if ($q == 0) {          if ($q == 0) {
846            ## NOTE: In |escape| in ... in |ident|.            #
           $self->{state} = BEFORE_TOKEN_STATE;  
           unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};  
           return $current_token;  
           # reconsume  
           #redo A;  
847          } elsif ($q == 1) {          } elsif ($q == 1) {
848            $current_token->{type} = {            ## NOTE: In |escape| in |URI|.
849              $self->{t}->{type} = {
850                URI_TOKEN, URI_INVALID_TOKEN,                URI_TOKEN, URI_INVALID_TOKEN,
851                URI_INVALID_TOKEN, URI_INVALID_TOKEN,                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
852                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
853                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
854            }->{$current_token->{type}};            }->{$self->{t}->{type}};
855            $current_token->{value} .= "\x0D\x0A";            $self->{t}->{value} .= "\x0D";
856            $self->{state} = URI_UNQUOTED_STATE;            $self->{state} = ESCAPE_BEFORE_LF_STATE;
857            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
858            redo A;            redo A;
859          } else {          } else {
860            ## Note: In |nl| in ... in |string| or |ident|.            ## Note: In |nl| in ... in |string| or |ident|.
861            $current_token->{value} .= "\x0D\x0A";            $self->{t}->{value} .= "\x0D";
862            $self->{state} = ESCAPE_BEFORE_LF_STATE;            $self->{state} = ESCAPE_BEFORE_LF_STATE;
863            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
864            redo A;            redo A;
865          }          }
866          } elsif ($self->{c} == -1) {
867            #
868        } else {        } else {
869          ## NOTE: second character of |escape|.          ## NOTE: second character of |escape|.
870          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
871          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
872              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
873          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
874          redo A;          redo A;
875        }        }
876    
877          if ($q == 0) {
878            if ($self->{t}->{type} == DIMENSION_TOKEN) {
879              if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') {
880                $self->{state} = BEFORE_TOKEN_STATE;
881                # reprocess
882                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
883                unshift @{$self->{token}}, {type => MINUS_TOKEN};
884                $self->{t}->{type} = NUMBER_TOKEN;
885                $self->{t}->{value} = '';
886                return $self->{t};
887                #redo A;
888              } elsif (length $self->{t}->{value}) {
889                $self->{state} = BEFORE_TOKEN_STATE;
890                # reprocess
891                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
892                return $self->{t};
893                #redo A;
894              } else {
895                $self->{state} = BEFORE_TOKEN_STATE;
896                # reprocess
897                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
898                $self->{t}->{type} = NUMBER_TOKEN;
899                $self->{t}->{value} = '';
900                return $self->{t};
901                #redo A;
902              }
903            } else {
904              if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') {
905                $self->{state} = BEFORE_TOKEN_STATE;
906                # reprocess
907                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
908                return {type => MINUS_TOKEN};
909                #redo A;
910              } elsif (length $self->{t}->{value}) {
911                $self->{state} = BEFORE_TOKEN_STATE;
912                # reprocess
913                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
914                return $self->{t};
915                #redo A;
916              } else {
917                $self->{state} = BEFORE_TOKEN_STATE;
918                # reprocess
919                return {type => DELIM_TOKEN, value => '\\'};
920                #redo A;
921              }
922            }
923          } elsif ($q == 1) {
924            $self->{state} = URI_UNQUOTED_STATE;
925            $self->{c} = $self->{get_char}->();
926            redo A;
927          } else {
928            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
929            $self->{t}->{type} = {
930              STRING_TOKEN, INVALID_TOKEN,
931              URI_TOKEN, URI_INVALID_TOKEN,
932              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
933            }->{$self->{t}->{type}} || $self->{t}->{type};
934            $self->{state} = BEFORE_TOKEN_STATE;
935            # reprocess
936            return $self->{t};
937            #redo A;
938          }
939      } elsif ($self->{state} == ESCAPE_STATE) {      } elsif ($self->{state} == ESCAPE_STATE) {
940        ## NOTE: third..seventh character of |unicode| in |escape|.        ## NOTE: third..seventh character of |unicode| in |escape|.
941        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
# Line 776  sub get_next_token ($) { Line 949  sub get_next_token ($) {
949          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
950          redo A;          redo A;
951        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f
952          $char = $char * 0x10 + $self->{c} - 0x0061 - 0xA;          $char = $char * 0x10 + $self->{c} - 0x0061 + 0xA;
953          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
954          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
955          redo A;          redo A;
# Line 784  sub get_next_token ($) { Line 957  sub get_next_token ($) {
957                 $self->{c} == 0x000A or # \n                 $self->{c} == 0x000A or # \n
958                 $self->{c} == 0x0009 or # \t                 $self->{c} == 0x0009 or # \t
959                 $self->{c} == 0x000C) { # \f                 $self->{c} == 0x000C) { # \f
960          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
961          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
962              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
963          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
# Line 794  sub get_next_token ($) { Line 967  sub get_next_token ($) {
967          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
968          redo A;          redo A;
969        } else {        } else {
970          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
971          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
972              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
973          # reconsume          # reconsume
# Line 806  sub get_next_token ($) { Line 979  sub get_next_token ($) {
979            $self->{c} == 0x000A or # \n            $self->{c} == 0x000A or # \n
980            $self->{c} == 0x0009 or # \t            $self->{c} == 0x0009 or # \t
981            $self->{c} == 0x000C) { # \f            $self->{c} == 0x000C) { # \f
982          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
983          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
984              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
985          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
# Line 816  sub get_next_token ($) { Line 989  sub get_next_token ($) {
989          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
990          redo A;          redo A;
991        } else {        } else {
992          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
993          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
994              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
995          # reconsume          # reconsume
# Line 825  sub get_next_token ($) { Line 998  sub get_next_token ($) {
998      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {
999        ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.        ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.
1000        if ($self->{c} == 0x000A) { # \n        if ($self->{c} == 0x000A) { # \n
1001          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $self->{c};
1002          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
1003              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
1004          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1005          redo A;          redo A;
1006        } else {        } else {
         $current_token->{value} .= chr $char;  
1007          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
1008              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
1009          # reconsume          # reprocess
1010          redo A;          redo A;
1011        }        }
1012      } elsif ($self->{state} == STRING_STATE) {      } elsif ($self->{state} == STRING_STATE) {
# Line 847  sub get_next_token ($) { Line 1019  sub get_next_token ($) {
1019          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1020          redo A;          redo A;
1021        } elsif ($self->{c} == $q) { # " | '        } elsif ($self->{c} == $q) { # " | '
1022          if ($current_token->{type} == STRING_TOKEN) {          if ($self->{t}->{type} == STRING_TOKEN) {
1023            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
1024            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
1025            return $current_token;            return $self->{t};
1026            #redo A;            #redo A;
1027          } else {          } else {
1028            $self->{state} = URI_AFTER_WSP_STATE;            $self->{state} = URI_AFTER_WSP_STATE;
# Line 861  sub get_next_token ($) { Line 1033  sub get_next_token ($) {
1033                 $self->{c} == 0x000D or # \r                 $self->{c} == 0x000D or # \r
1034                 $self->{c} == 0x000C or # \f                 $self->{c} == 0x000C or # \f
1035                 $self->{c} == -1) {                 $self->{c} == -1) {
1036          $current_token->{type} = INVALID_TOKEN;          $self->{t}->{type} = {
1037              STRING_TOKEN, INVALID_TOKEN,
1038              INVALID_TOKEN, INVALID_TOKEN,
1039              URI_TOKEN, URI_INVALID_TOKEN,
1040              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
1041              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
1042              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
1043            }->{$self->{t}->{type}};
1044          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
1045          # reconsume          # reconsume
1046          return $current_token;          return $self->{t};
1047          #redo A;          #redo A;
1048        } else {        } else {
1049          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
1050          # stay in the state          # stay in the state
1051          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1052          redo A;          redo A;
# Line 875  sub get_next_token ($) { Line 1054  sub get_next_token ($) {
1054      } elsif ($self->{state} == NUMBER_STATE) {      } elsif ($self->{state} == NUMBER_STATE) {
1055        ## NOTE: 2nd, 3rd, or ... character in |num| before |.|.        ## NOTE: 2nd, 3rd, or ... character in |num| before |.|.
1056        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
1057          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
1058          # stay in the state          # stay in the state
1059          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1060          redo A;          redo A;
# Line 884  sub get_next_token ($) { Line 1063  sub get_next_token ($) {
1063          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1064          redo A;          redo A;
1065        } else {        } else {
1066          $current_token->{number} = $current_token->{value};          $self->{t}->{number} = $self->{t}->{value};
1067          $current_token->{value} = '';          $self->{t}->{value} = '';
1068          $self->{state} = AFTER_NUMBER_STATE;          $self->{state} = AFTER_NUMBER_STATE;
1069          # reprocess          # reprocess
1070          redo A;          redo A;
# Line 893  sub get_next_token ($) { Line 1072  sub get_next_token ($) {
1072      } elsif ($self->{state} == NUMBER_DOT_STATE) {      } elsif ($self->{state} == NUMBER_DOT_STATE) {
1073        ## NOTE: The character immediately following |.| in |num|.        ## NOTE: The character immediately following |.| in |num|.
1074        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
1075          $current_token->{value} .= '.' . chr $self->{c};          $self->{t}->{value} .= '.' . chr $self->{c};
1076          $self->{state} = NUMBER_DOT_NUMBER_STATE;          $self->{state} = NUMBER_DOT_NUMBER_STATE;
1077          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1078          redo A;          redo A;
1079        } else {        } else {
1080          unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'};          unshift @{$self->{token}}, {type => DOT_TOKEN};
1081          $current_token->{number} = $current_token->{value};          $self->{t}->{number} = $self->{t}->{value};
1082          $current_token->{value} = '';          $self->{t}->{value} = '';
1083          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
1084          # reprocess          # reprocess
1085          return $current_token;          return $self->{t};
1086          #redo A;          #redo A;
1087        }        }
1088      } elsif ($self->{state} == NUMBER_FRACTION_STATE) {      } elsif ($self->{state} == NUMBER_FRACTION_STATE) {
1089        ## NOTE: The character immediately following |.| at the beginning of |num|.        ## NOTE: The character immediately following |.| at the beginning of |num|.
1090        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
1091          $current_token->{value} .= '.' . chr $self->{c};          $self->{t}->{value} .= '.' . chr $self->{c};
1092          $self->{state} = NUMBER_DOT_NUMBER_STATE;          $self->{state} = NUMBER_DOT_NUMBER_STATE;
1093          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1094          redo A;          redo A;
1095        } else {        } else {
1096          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
1097          $self->{c} = $self->{get_char}->();          # reprocess
1098          return {type => DELIM_TOKEN, value => '.'};          return {type => DOT_TOKEN};
1099          #redo A;          #redo A;
1100        }        }
1101      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {
1102        ## NOTE: |[0-9]| in |num| after |.|.        ## NOTE: |[0-9]| in |num| after |.|.
1103        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
1104          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
1105          # stay in the state          # stay in the state
1106          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1107          redo A;          redo A;
1108        } else {        } else {
1109          $current_token->{number} = $current_token->{value};          $self->{t}->{number} = $self->{t}->{value};
1110          $current_token->{value} = '';          $self->{t}->{value} = '';
1111          $self->{state} = AFTER_NUMBER_STATE;          $self->{state} = AFTER_NUMBER_STATE;
1112          # reprocess          # reprocess
1113          redo A;          redo A;
# Line 937  sub get_next_token ($) { Line 1116  sub get_next_token ($) {
1116        die "$0: Unknown state |$self->{state}|";        die "$0: Unknown state |$self->{state}|";
1117      }      }
1118    } # A    } # A
   
   ## TODO: |URI|, |UNICODE-RANGE|, |COMMENT|  
   
1119  } # get_next_token  } # get_next_token
1120    
1121  1;  1;

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.13

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24