/[suikacvs]/markup/html/whatpm/Whatpm/CSS/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/CSS/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4 by wakaba, Sat Sep 8 02:58:24 2007 UTC revision 1.14 by wakaba, Sat Sep 22 12:16:33 2007 UTC
# Line 1  Line 1 
1  package Whatpm::CSS::Tokenizer;  package Whatpm::CSS::Tokenizer;
2  use strict;  use strict;
3    
4    require Exporter;
5    push our @ISA, 'Exporter';
6    
7  sub BEFORE_TOKEN_STATE () { 0 }  sub BEFORE_TOKEN_STATE () { 0 }
8  sub BEFORE_NMSTART_STATE () { 1 }  sub BEFORE_NMSTART_STATE () { 1 }
9  sub NAME_STATE () { 2 }  sub NAME_STATE () { 2 }
# Line 36  sub NUMBER_TOKEN () { 11 } Line 39  sub NUMBER_TOKEN () { 11 }
39  sub DIMENSION_TOKEN () { 12 }  sub DIMENSION_TOKEN () { 12 }
40  sub PERCENTAGE_TOKEN () { 13 }  sub PERCENTAGE_TOKEN () { 13 }
41  sub UNICODE_RANGE_TOKEN () { 14 }  sub UNICODE_RANGE_TOKEN () { 14 }
 sub UNICODE_RANGE_INVALID_TOKEN () { 15 }  
42  sub DELIM_TOKEN () { 16 }  sub DELIM_TOKEN () { 16 }
43  sub PLUS_TOKEN () { 17 }  sub PLUS_TOKEN () { 17 }
44  sub GREATER_TOKEN () { 18 }  sub GREATER_TOKEN () { 18 }
# Line 60  sub CDC_TOKEN () { 35 } Line 62  sub CDC_TOKEN () { 35 }
62  sub COMMENT_TOKEN () { 36 }  sub COMMENT_TOKEN () { 36 }
63  sub COMMENT_INVALID_TOKEN () { 37 }  sub COMMENT_INVALID_TOKEN () { 37 }
64  sub EOF_TOKEN () { 38 }  sub EOF_TOKEN () { 38 }
65    sub MINUS_TOKEN () { 39 }
66    sub STAR_TOKEN () { 40 }
67    sub VBAR_TOKEN () { 41 }
68    sub DOT_TOKEN () { 42 }
69    sub COLON_TOKEN () { 43 }
70    sub MATCH_TOKEN () { 44 }
71    sub EXCLAMATION_TOKEN () { 45 }
72    
73  our @TokenName = qw(  our @TokenName = qw(
74    0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID    0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID
75    STRING INVALID NUMBER DIMENSION PERCENTAGE UNICODE_RANGE    STRING INVALID NUMBER DIMENSION PERCENTAGE UNICODE_RANGE
76    UNICODE_RANGE_INVALID DELIM PLUS GREATER COMMA TILDE DASHMATCH    0 DELIM PLUS GREATER COMMA TILDE DASHMATCH
77    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON
78    LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT    LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT
79    COMMENT_INVALID EOF    COMMENT_INVALID EOF MINUS STAR VBAR DOT COLON MATCH EXCLAMATION
80    );
81    
82    our @EXPORT_OK = qw(
83      IDENT_TOKEN ATKEYWORD_TOKEN HASH_TOKEN FUNCTION_TOKEN URI_TOKEN
84      URI_INVALID_TOKEN URI_PREFIX_TOKEN URI_PREFIX_INVALID_TOKEN
85      STRING_TOKEN INVALID_TOKEN NUMBER_TOKEN DIMENSION_TOKEN PERCENTAGE_TOKEN
86      UNICODE_RANGE_TOKEN DELIM_TOKEN PLUS_TOKEN GREATER_TOKEN COMMA_TOKEN
87      TILDE_TOKEN DASHMATCH_TOKEN PREFIXMATCH_TOKEN SUFFIXMATCH_TOKEN
88      SUBSTRINGMATCH_TOKEN INCLUDES_TOKEN SEMICOLON_TOKEN LBRACE_TOKEN
89      RBRACE_TOKEN LPAREN_TOKEN RPAREN_TOKEN LBRACKET_TOKEN RBRACKET_TOKEN
90      S_TOKEN CDO_TOKEN CDC_TOKEN COMMENT_TOKEN COMMENT_INVALID_TOKEN EOF_TOKEN
91      MINUS_TOKEN STAR_TOKEN VBAR_TOKEN DOT_TOKEN COLON_TOKEN MATCH_TOKEN
92      EXCLAMATION_TOKEN
93  );  );
94    
95    our %EXPORT_TAGS = ('token' => [@EXPORT_OK]);
96    
97  sub new ($) {  sub new ($) {
98    my $self = bless {token => [], get_char => sub { -1 },    my $self = bless {token => [], get_char => sub { -1 },
99                      onerror => sub { }}, shift;                      onerror => sub { }}, shift;
# Line 80  sub init ($) { Line 104  sub init ($) {
104    my $self = shift;    my $self = shift;
105    $self->{state} = BEFORE_TOKEN_STATE;    $self->{state} = BEFORE_TOKEN_STATE;
106    $self->{c} = $self->{get_char}->();    $self->{c} = $self->{get_char}->();
107      #$self->{t} = {type => token-type, value => value, number => number};
108  } # init  } # init
109    
110  sub get_next_token ($) {  sub get_next_token ($) {
# Line 88  sub get_next_token ($) { Line 113  sub get_next_token ($) {
113      return shift @{$self->{token}};      return shift @{$self->{token}};
114    }    }
115    
   my $current_token;  
116    my $char;    my $char;
117    my $num; # |{num}|, if any.    my $num; # |{num}|, if any.
118    my $i; # |$i + 1|th character in |unicode| in |escape|.    my $i; # |$i + 1|th character in |unicode| in |escape|.
# Line 103  sub get_next_token ($) { Line 127  sub get_next_token ($) {
127      if ($self->{state} == BEFORE_TOKEN_STATE) {      if ($self->{state} == BEFORE_TOKEN_STATE) {
128        if ($self->{c} == 0x002D) { # -        if ($self->{c} == 0x002D) { # -
129          ## NOTE: |-| in |ident| in |IDENT|          ## NOTE: |-| in |ident| in |IDENT|
130          $current_token = {type => IDENT_TOKEN, value => '-'};          $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1};
131          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = BEFORE_NMSTART_STATE;
132          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
133          redo A;          redo A;
134          } elsif ($self->{c} == 0x0055 or $self->{c} == 0x0075) { # U or u
135            $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}};
136            $self->{c} = $self->{get_char}->();
137            if ($self->{c} == 0x002B) { # +
138              $self->{c} = $self->{get_char}->();
139              if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
140                  (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
141                  (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
142                  $self->{c} == 0x003F) { # ?
143                $self->{t}->{value} = chr $self->{c};
144                $self->{t}->{type} = UNICODE_RANGE_TOKEN;
145                $self->{c} = $self->{get_char}->();
146                C: for (2..6) {
147                  if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
148                      (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
149                      (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
150                      $self->{c} == 0x003F) { # ?
151                    $self->{t}->{value} .= chr $self->{c};
152                    $self->{c} = $self->{get_char}->();
153                  } else {
154                    last C;
155                  }
156                } # C
157    
158                if ($self->{c} == 0x002D) { # -
159                  $self->{c} = $self->{get_char}->();
160                  if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
161                      (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
162                      (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f
163                    $self->{t}->{value} .= '-' . chr $self->{c};
164                    $self->{c} = $self->{get_char}->();
165                    C: for (2..6) {
166                      if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
167                          (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
168                          (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f
169                        $self->{t}->{value} .= chr $self->{c};
170                        $self->{c} = $self->{get_char}->();
171                      } else {
172                        last C;
173                      }
174                    } # C
175                    
176                    #
177                  } else {
178                    my $token = $self->{t};
179                    $self->{t} = {type => IDENT_TOKEN, value => '-'};
180                    $self->{state} = BEFORE_NMSTART_STATE;
181                    # reprocess
182                    return $token;
183                    #redo A;
184                  }
185                }
186    
187                $self->{state} = BEFORE_TOKEN_STATE;
188                # reprocess
189                return $self->{t};
190                #redo A;
191              } else {
192                unshift @{$self->{token}}, {type => PLUS_TOKEN};
193                $self->{state} = BEFORE_TOKEN_STATE;
194                # reprocess
195                return $self->{t};
196                #redo A;
197              }
198            } else {
199              $self->{state} = NAME_STATE;
200              # reprocess
201              redo A;
202            }
203        } elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z        } elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
204                 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z                 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
205                 $self->{c} == 0x005F or # _                 $self->{c} == 0x005F or # _
206                 $self->{c} > 0x007F) { # nonascii                 $self->{c} > 0x007F) { # nonascii
207          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
208          $current_token = {type => IDENT_TOKEN, value => chr $self->{c}};          $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}};
209          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
210          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
211          redo A;          redo A;
212        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
213          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
214          $current_token = {type => IDENT_TOKEN, value => ''};          $self->{t} = {type => IDENT_TOKEN, value => ''};
215          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
216          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
217          redo A;          redo A;
218        } elsif ($self->{c} == 0x0040) { # @        } elsif ($self->{c} == 0x0040) { # @
219          ## NOTE: |@| in |ATKEYWORD|          ## NOTE: |@| in |ATKEYWORD|
220          $current_token = {type => ATKEYWORD_TOKEN, value => ''};          $self->{t} = {type => ATKEYWORD_TOKEN, value => ''};
221          $self->{state} = AFTER_AT_STATE;          $self->{state} = AFTER_AT_STATE;
222          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
223          redo A;          redo A;
224        } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '        } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '
225          $current_token = {type => STRING_TOKEN, value => ''};          $self->{t} = {type => STRING_TOKEN, value => ''};
226          $self->{state} = STRING_STATE; $q = $self->{c};          $self->{state} = STRING_STATE; $q = $self->{c};
227          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
228          redo A;          redo A;
229        } elsif ($self->{c} == 0x0023) { # #        } elsif ($self->{c} == 0x0023) { # #
230          ## NOTE: |#| in |HASH|.          ## NOTE: |#| in |HASH|.
231          $current_token = {type => HASH_TOKEN, value => ''};          $self->{t} = {type => HASH_TOKEN, value => ''};
232          $self->{state} = HASH_OPEN_STATE;          $self->{state} = HASH_OPEN_STATE;
233          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
234          redo A;          redo A;
235        } elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9        } elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
236          ## NOTE: |num|.          ## NOTE: |num|.
237          $current_token = {type => NUMBER_TOKEN, value => chr $self->{c}};          $self->{t} = {type => NUMBER_TOKEN, value => chr $self->{c}};
238          $self->{state} = NUMBER_STATE;          $self->{state} = NUMBER_STATE;
239          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
240          redo A;          redo A;
241        } elsif ($self->{c} == 0x002E) { # .        } elsif ($self->{c} == 0x002E) { # .
242          ## NOTE: |num|.          ## NOTE: |num|.
243          $current_token = {type => NUMBER_TOKEN, value => '0'};          $self->{t} = {type => NUMBER_TOKEN, value => '0'};
244          $self->{state} = NUMBER_FRACTION_STATE;          $self->{state} = NUMBER_FRACTION_STATE;
245          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
246          redo A;          redo A;
# Line 183  sub get_next_token ($) { Line 276  sub get_next_token ($) {
276          } else {          } else {
277            # stay in the state.            # stay in the state.
278            # reprocess            # reprocess
279            return {type => DELIM_STATE, value => '/'};            return {type => DELIM_TOKEN, value => '/'};
280            #redo A;            #redo A;
281          }                  }        
282        } elsif ($self->{c} == 0x003C) { # <        } elsif ($self->{c} == 0x003C) { # <
# Line 191  sub get_next_token ($) { Line 284  sub get_next_token ($) {
284          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
285          if ($self->{c} == 0x0021) { # !          if ($self->{c} == 0x0021) { # !
286            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
287            if ($self->{c} == 0x002C) { # -            if ($self->{c} == 0x002D) { # -
288              $self->{c} = $self->{get_char}->();              $self->{c} = $self->{get_char}->();
289              if ($self->{c} == 0x002C) { # -              if ($self->{c} == 0x002D) { # -
290                $self->{state} = BEFORE_TOKEN_STATE;                $self->{state} = BEFORE_TOKEN_STATE;
291                $self->{c} = $self->{get_char}->();                $self->{c} = $self->{get_char}->();
292                return {type => CDO_TOKEN};                return {type => CDO_TOKEN};
293                #redo A;                #redo A;
294              } else {              } else {
295                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};                unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN};
296                ## NOTE: |-| in |ident| in |IDENT|                ## NOTE: |-| in |ident| in |IDENT|
297                $current_token = {type => IDENT_TOKEN, value => '-'};                $self->{t} = {type => IDENT_TOKEN, value => '-'};
298                $self->{state} = BEFORE_NMSTART_STATE;                $self->{state} = BEFORE_NMSTART_STATE;
299                #reprocess                #reprocess
300                return {type => DELIM_TOKEN, value => '<'};                return {type => DELIM_TOKEN, value => '<'};
301                #redo A;                #redo A;
302              }              }
303            } else {            } else {
304              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};              unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN};
305              $self->{state} = BEFORE_TOKEN_STATE;              $self->{state} = BEFORE_TOKEN_STATE;
306              #reprocess              #reprocess
307              return {type => DELIM_TOKEN, value => '<'};              return {type => DELIM_TOKEN, value => '<'};
# Line 221  sub get_next_token ($) { Line 314  sub get_next_token ($) {
314            #redo A;            #redo A;
315          }          }
316        } elsif (my $t = {        } elsif (my $t = {
317                  0x003B => SEMICOLON_TOKEN, # ;                          0x0021 => EXCLAMATION_TOKEN, # !
318                  0x007B => LBRACE_TOKEN, # {                          0x002D => MINUS_TOKEN, # -
319                  0x007D => RBRACE_TOKEN, # }                          0x002E => DOT_TOKEN, # .
320                  0x0028 => LPAREN_TOKEN, # (                          0x003A => COLON_TOKEN, # :
321                  0x0029 => RPAREN_TOKEN, # )                          0x003B => SEMICOLON_TOKEN, # ;
322                  0x005B => LBRACKET_TOKEN, # [                          0x003D => MATCH_TOKEN, # =
323                  0x005D => RBRACKET_TOKEN, # ]                          0x007B => LBRACE_TOKEN, # {
324                            0x007D => RBRACE_TOKEN, # }
325                            0x0028 => LPAREN_TOKEN, # (
326                            0x0029 => RPAREN_TOKEN, # )
327                            0x005B => LBRACKET_TOKEN, # [
328                            0x005D => RBRACKET_TOKEN, # ]
329                 }->{$self->{c}}) {                 }->{$self->{c}}) {
330          # stay in the state          # stay in the state
331          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
# Line 280  sub get_next_token ($) { Line 378  sub get_next_token ($) {
378            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
379            return {type => $v};            return {type => $v};
380            #redo A;            #redo A;
381            } elsif ($v = {
382                           0x002A => STAR_TOKEN, # *
383                           0x007C => VBAR_TOKEN, # |
384                          }->{$c}) {
385              # stay in the state.
386              # reprocess
387              return {type => $v};
388              #redo A;
389          } else {          } else {
390            # stay in the state            # stay in the state
391            # reprocess            # reprocess
# Line 321  sub get_next_token ($) { Line 427  sub get_next_token ($) {
427          #redo A;          #redo A;
428        } else {        } else {
429          # stay in the state          # stay in the state
430          $current_token = {type => DELIM_TOKEN, value => chr $self->{c}};          $self->{t} = {type => DELIM_TOKEN, value => chr $self->{c}};
431          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
432          return $current_token;          return $self->{t};
433          #redo A;          #redo A;
434        }        }
435      } elsif ($self->{state} == BEFORE_NMSTART_STATE) {      } elsif ($self->{state} == BEFORE_NMSTART_STATE) {
# Line 333  sub get_next_token ($) { Line 439  sub get_next_token ($) {
439            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
440            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
441            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
442          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
443          $current_token->{type} = DIMENSION_TOKEN          $self->{t}->{type} = DIMENSION_TOKEN
444              if $current_token->{type} == NUMBER_TOKEN;              if $self->{t}->{type} == NUMBER_TOKEN;
445          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
446          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
447          redo A;          redo A;
448        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
 ## TODO: 12-\X, 12-\{nl}  
449          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
450          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
451          redo A;          redo A;
452        } elsif ($self->{c} == 0x002D and # -        } elsif ($self->{c} == 0x002D) { # -
453                 $current_token->{type} == IDENT_TOKEN) {          if ($self->{t}->{type} == IDENT_TOKEN) {
         $self->{c} = $self->{get_char}->();  
         if ($self->{c} == 0x003E) { # >  
           $self->{state} = BEFORE_TOKEN_STATE;  
454            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
455            return {type => CDC_TOKEN};            if ($self->{c} == 0x003E) { # >
456            #redo A;              $self->{state} = BEFORE_TOKEN_STATE;
457                $self->{c} = $self->{get_char}->();
458                return {type => CDC_TOKEN};
459                #redo A;
460              } else {
461                ## NOTE: |-|, |-|, $self->{c}
462                #$self->{t} = {type => IDENT_TOKEN, value => '-'};
463                # stay in the state
464                # reconsume
465                return {type => MINUS_TOKEN};
466                #redo A;
467              }
468            } elsif ($self->{t}->{type} == DIMENSION_TOKEN) {
469              $self->{c} = $self->{get_char}->();
470              if ($self->{c} == 0x003E) { # >
471                unshift @{$self->{token}}, {type => CDC_TOKEN};
472                $self->{t}->{type} = NUMBER_TOKEN;
473                $self->{t}->{value} = '';
474                $self->{state} = BEFORE_TOKEN_STATE;
475                $self->{c} = $self->{get_char}->();
476                return $self->{t};
477                #redo A;
478              } else {
479                ## NOTE: |-|, |-|, $self->{c}
480                my $t = $self->{t};
481                $t->{type} = NUMBER_TOKEN;
482                $t->{value} = '';
483                $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1};
484                unshift @{$self->{token}}, {type => MINUS_TOKEN};
485                # stay in the state
486                # reconsume
487                return $t;
488                #redo A;
489              }
490          } else {          } else {
491            ## NOTE: |-|, |-|, $self->{c}            #
           #$current_token = {type => IDENT_TOKEN, value => '-'};  
           # stay in the state  
           # reconsume  
           return {type => DELIM_TOKEN, value => '-'};  
           #redo A;  
492          }          }
493        } else {        } else {
494          if ($current_token->{type} == NUMBER_TOKEN) {          #
495            ## NOTE: |-| after |NUMBER|.        }
496            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};        
497            $self->{state} = BEFORE_TOKEN_STATE;        if ($self->{t}->{type} == DIMENSION_TOKEN) {
498            # reconsume          ## NOTE: |-| after |NUMBER|.
499            $current_token->{value} = $current_token->{number};          unshift @{$self->{token}}, {type => MINUS_TOKEN};
500            delete $current_token->{number};          $self->{state} = BEFORE_TOKEN_STATE;
501            return $current_token;          # reprocess
502          } else {          $self->{t}->{type} = NUMBER_TOKEN;
503            ## NOTE: |-| not followed by |nmstart|.          $self->{t}->{value} = '';
504            $self->{state} = BEFORE_TOKEN_STATE;          return $self->{t};
505            $self->{c} = $self->{get_char}->();        } else {
506            return {type => DELIM_TOKEN, value => '-'};          ## NOTE: |-| not followed by |nmstart|.
507          }          $self->{state} = BEFORE_TOKEN_STATE;
508            # reprocess
509            return {type => MINUS_TOKEN};
510        }        }
511      } elsif ($self->{state} == AFTER_AT_STATE) {      } elsif ($self->{state} == AFTER_AT_STATE) {
512        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
513            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
514            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
515            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
516          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
517          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
518          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
519          redo A;          redo A;
520        } elsif ($self->{c} == 0x002D) { # -        } elsif ($self->{c} == 0x002D) { # -
521          $current_token->{value} .= '-';          $self->{t}->{value} .= '-';
522          $self->{state} = AFTER_AT_HYPHEN_STATE;          $self->{state} = AFTER_AT_HYPHEN_STATE;
523          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
524          redo A;          redo A;
# Line 404  sub get_next_token ($) { Line 536  sub get_next_token ($) {
536            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
537            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
538            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
539          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
540          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
541          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
542          redo A;          redo A;
# Line 417  sub get_next_token ($) { Line 549  sub get_next_token ($) {
549            return {type => DELIM_TOKEN, value => '@'};            return {type => DELIM_TOKEN, value => '@'};
550            #redo A;            #redo A;
551          } else {          } else {
552            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};            unshift @{$self->{token}}, {type => MINUS_TOKEN};
553            $current_token = {type => IDENT_TOKEN, value => '-'};            $self->{t} = {type => IDENT_TOKEN, value => '-'};
554            $self->{state} = BEFORE_NMSTART_STATE;            $self->{state} = BEFORE_NMSTART_STATE;
555            # reprocess            # reprocess
556            return {type => DELIM_TOKEN, value => '@'};            return {type => DELIM_TOKEN, value => '@'};
# Line 430  sub get_next_token ($) { Line 562  sub get_next_token ($) {
562          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
563          redo A;          redo A;
564        } else {        } else {
565          unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};          unshift @{$self->{token}}, {type => MINUS_TOKEN};
566          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
567          # reprocess          # reprocess
568          return {type => DELIM_TOKEN, value => '@'};          return {type => DELIM_TOKEN, value => '@'};
# Line 438  sub get_next_token ($) { Line 570  sub get_next_token ($) {
570      } elsif ($self->{state} == AFTER_NUMBER_STATE) {      } elsif ($self->{state} == AFTER_NUMBER_STATE) {
571        if ($self->{c} == 0x002D) { # -        if ($self->{c} == 0x002D) { # -
572          ## NOTE: |-| in |ident|.          ## NOTE: |-| in |ident|.
573          $current_token->{value} = '-';          $self->{t}->{hyphen} = 1;
574            $self->{t}->{value} = '-';
575            $self->{t}->{type} = DIMENSION_TOKEN;
576          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = BEFORE_NMSTART_STATE;
577          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
578          redo A;          redo A;
# Line 447  sub get_next_token ($) { Line 581  sub get_next_token ($) {
581                 $self->{c} == 0x005F or # _                 $self->{c} == 0x005F or # _
582                 $self->{c} > 0x007F) { # nonascii                 $self->{c} > 0x007F) { # nonascii
583          ## NOTE: |nmstart| in |ident|.          ## NOTE: |nmstart| in |ident|.
584          $current_token->{value} = chr $self->{c};          $self->{t}->{value} = chr $self->{c};
585          $current_token->{type} = DIMENSION_TOKEN;          $self->{t}->{type} = DIMENSION_TOKEN;
586          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
587          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
588          redo A;          redo A;
589        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
590          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
591          $current_token->{value} = '';          $self->{t}->{value} = '';
592            $self->{t}->{type} = DIMENSION_TOKEN;
593          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
594          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
595          redo A;          redo A;
596        } elsif ($self->{c} == 0x0025) { # %        } elsif ($self->{c} == 0x0025) { # %
597          $current_token->{type} = PERCENTAGE_TOKEN;          $self->{t}->{type} = PERCENTAGE_TOKEN;
598          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
599          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
600          return $current_token;          return $self->{t};
601          #redo A;          #redo A;
602        } else {        } else {
603          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
604          # reprocess          # reprocess
605          return $current_token;          return $self->{t};
606          #redo A;          #redo A;
607        }        }
608      } elsif ($self->{state} == HASH_OPEN_STATE) {      } elsif ($self->{state} == HASH_OPEN_STATE) {
# Line 478  sub get_next_token ($) { Line 613  sub get_next_token ($) {
613            $self->{c} == 0x002D or # -            $self->{c} == 0x002D or # -
614            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
615            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
616          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
617          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
618          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
619          redo A;          redo A;
# Line 488  sub get_next_token ($) { Line 623  sub get_next_token ($) {
623          redo A;          redo A;
624        } else {        } else {
625          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
626          $self->{c} = $self->{get_char}->();          # reprocess
627          return {type => DELIM_TOKEN, value => '#'};          return {type => DELIM_TOKEN, value => '#'};
628          #redo A;          #redo A;
629        }        }
# Line 500  sub get_next_token ($) { Line 635  sub get_next_token ($) {
635            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
636            $self->{c} == 0x002D or # -            $self->{c} == 0x002D or # -
637            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
638          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
639          # stay in the state          # stay in the state
640          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
641          redo A;          redo A;
# Line 509  sub get_next_token ($) { Line 644  sub get_next_token ($) {
644          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
645          redo A;          redo A;
646        } elsif ($self->{c} == 0x0028 and # (        } elsif ($self->{c} == 0x0028 and # (
647                 $current_token->{type} == IDENT_TOKEN) { # (                 $self->{t}->{type} == IDENT_TOKEN) { # (
648          my $func_name = $current_token->{value};          my $func_name = $self->{t}->{value};
649          $func_name =~ tr/A-Z/a-z/; ## TODO: Unicode or ASCII case-insensitive?          $func_name =~ tr/A-Z/a-z/; ## TODO: Unicode or ASCII case-insensitive?
650          if ($func_name eq 'url' or $func_name eq 'url-prefix') {          if ($func_name eq 'url' or $func_name eq 'url-prefix') {
651            if ($current_token->{has_escape}) {            if ($self->{t}->{has_escape}) {
652              ## TODO: warn              ## TODO: warn
653            }            }
654            $current_token->{type}            $self->{t}->{type}
655                = $func_name eq 'url' ? URI_TOKEN : URI_PREFIX_TOKEN;                = $func_name eq 'url' ? URI_TOKEN : URI_PREFIX_TOKEN;
656            $current_token->{value} = '';            $self->{t}->{value} = '';
657            $self->{state} = URI_BEFORE_WSP_STATE;            $self->{state} = URI_BEFORE_WSP_STATE;
658            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
659            redo A;            redo A;
660          } else {          } else {
661            $current_token->{type} = FUNCTION_TOKEN;            $self->{t}->{type} = FUNCTION_TOKEN;
662            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
663            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
664            return $current_token;            return $self->{t};
665            #redo A;            #redo A;
666          }          }
667        } else {        } else {
668          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
669          # reconsume          # reconsume
670          return $current_token;          return $self->{t};
671          #redo A;          #redo A;
672        }        }
673      } elsif ($self->{state} == URI_BEFORE_WSP_STATE) {      } elsif ($self->{state} == URI_BEFORE_WSP_STATE) {
# Line 546  sub get_next_token ($) { Line 681  sub get_next_token ($) {
681          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
682        }        }
683        if ($self->{c} == -1) {        if ($self->{c} == -1) {
684          $current_token->{type} = {          $self->{t}->{type} = {
685              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
686              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
687              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
688              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
689          }->{$current_token->{type}};                  }->{$self->{t}->{type}};        
690          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
691          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
692          return $current_token;          return $self->{t};
693          #redo A;          #redo A;
694        } elsif ($self->{c} < 0x0020 or $self->{c} == 0x0028) { # C0 or (        } elsif ($self->{c} < 0x0020 or $self->{c} == 0x0028) { # C0 or (
695          ## TODO: Should we consider matches of "(" and ")"?          ## TODO: Should we consider matches of "(" and ")"?
696          $current_token->{type} = {          $self->{t}->{type} = {
697              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
698              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
699              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
700              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
701          }->{$current_token->{type}};          }->{$self->{t}->{type}};
702          $self->{state} = URI_UNQUOTED_STATE;          $self->{state} = URI_UNQUOTED_STATE;
703          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
704          redo A;          redo A;
# Line 574  sub get_next_token ($) { Line 709  sub get_next_token ($) {
709        } elsif ($self->{c} == 0x0029) { # )        } elsif ($self->{c} == 0x0029) { # )
710          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
711          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
712          return $current_token;          return $self->{t};
713          #redo A;          #redo A;
714        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
715          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
716          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
717          redo A;          redo A;
718        } else {        } else {
719          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
720          $self->{state} = URI_UNQUOTED_STATE;          $self->{state} = URI_UNQUOTED_STATE;
721          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
722          redo A;          redo A;
# Line 598  sub get_next_token ($) { Line 733  sub get_next_token ($) {
733          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
734          redo A;          redo A;
735        } elsif ($self->{c} == -1) {        } elsif ($self->{c} == -1) {
736          $current_token->{type} = {          $self->{t}->{type} = {
737              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
738              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
739              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
740              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
741          }->{$current_token->{type}};                  }->{$self->{t}->{type}};        
742          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
743          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
744          return $current_token;          return $self->{t};
745          #redo A;          #redo A;
746        } elsif ($self->{c} < 0x0020 or {        } elsif ($self->{c} < 0x0020 or {
747            0x0022 => 1, # "            0x0022 => 1, # "
# Line 614  sub get_next_token ($) { Line 749  sub get_next_token ($) {
749            0x0028 => 1, # (            0x0028 => 1, # (
750        }->{$self->{c}}) { # C0 or (        }->{$self->{c}}) { # C0 or (
751          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?
752          $current_token->{type} = {          $self->{t}->{type} = {
753              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
754              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
755              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
756              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
757          }->{$current_token->{type}};          }->{$self->{t}->{type}};
758          # stay in the state.          # stay in the state.
759          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
760          redo A;          redo A;
761        } elsif ($self->{c} == 0x0029) { # )        } elsif ($self->{c} == 0x0029) { # )
762          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
763          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
764          return $current_token;          return $self->{t};
765          #redo A;          #redo A;
766        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
767          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
768          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
769          redo A;          redo A;
770        } else {        } else {
771          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
772          # stay in the state.          # stay in the state.
773          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
774          redo A;          redo A;
# Line 650  sub get_next_token ($) { Line 785  sub get_next_token ($) {
785          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
786          redo A;          redo A;
787        } elsif ($self->{c} == -1) {        } elsif ($self->{c} == -1) {
788          $current_token->{type} = {          $self->{t}->{type} = {
789              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
790              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
791              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
792              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
793          }->{$current_token->{type}};                  }->{$self->{t}->{type}};        
794          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
795          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
796          return $current_token;          return $self->{t};
797          #redo A;          #redo A;
798        } elsif ($self->{c} == 0x0029) { # )        } elsif ($self->{c} == 0x0029) { # )
799          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
800          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
801          return $current_token;          return $self->{t};
802          #redo A;          #redo A;
803        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
804          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
# Line 671  sub get_next_token ($) { Line 806  sub get_next_token ($) {
806          redo A;          redo A;
807        } else {        } else {
808          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?
809          $current_token->{type} = {          $self->{t}->{type} = {
810              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
811              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
812              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
813              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
814          }->{$current_token->{type}};          }->{$self->{t}->{type}};
815          # stay in the state.          # stay in the state.
816          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
817          redo A;          redo A;
818        }        }
819      } elsif ($self->{state} == ESCAPE_OPEN_STATE) {      } elsif ($self->{state} == ESCAPE_OPEN_STATE) {
820        $current_token->{has_escape} = 1;        $self->{t}->{has_escape} = 1;
821        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
822          ## NOTE: second character of |unicode| in |escape|.          ## NOTE: second character of |unicode| in |escape|.
823          $char = $self->{c} - 0x0030;          $char = $self->{c} - 0x0030;
# Line 697  sub get_next_token ($) { Line 832  sub get_next_token ($) {
832          redo A;          redo A;
833        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f
834          ## NOTE: second character of |unicode| in |escape|.          ## NOTE: second character of |unicode| in |escape|.
835          $char = $self->{c} - 0x0061 - 0xA;          $char = $self->{c} - 0x0061 + 0xA;
836          $self->{state} = ESCAPE_STATE; $i = 2;          $self->{state} = ESCAPE_STATE; $i = 2;
837          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
838          redo A;          redo A;
839        } elsif ($self->{c} == 0x000A or # \n        } elsif ($self->{c} == 0x000A or # \n
840                 $self->{c} == 0x000C) { # \f                 $self->{c} == 0x000C) { # \f
841          if ($q == 0) {          if ($q == 0) {
842            ## NOTE: In |escape| in ... in |ident|.            #
           $self->{state} = BEFORE_TOKEN_STATE;  
           unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};  
           return $current_token;  
           # reconsume  
           #redo A;  
843          } elsif ($q == 1) {          } elsif ($q == 1) {
844            ## NOTE: In |escape| in |URI|.            ## NOTE: In |escape| in |URI|.
845            $current_token->{type} = {            $self->{t}->{type} = {
846                URI_TOKEN, URI_INVALID_TOKEN,                URI_TOKEN, URI_INVALID_TOKEN,
847                URI_INVALID_TOKEN, URI_INVALID_TOKEN,                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
848                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
849                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
850            }->{$current_token->{type}};            }->{$self->{t}->{type}};
851            $current_token->{value} .= chr $self->{c};            $self->{t}->{value} .= chr $self->{c};
852            $self->{state} = URI_UNQUOTED_STATE;            $self->{state} = URI_UNQUOTED_STATE;
853            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
854            redo A;            redo A;
855          } else {          } else {
856            ## Note: In |nl| in ... in |string| or |ident|.            ## Note: In |nl| in ... in |string| or |ident|.
857            $current_token->{value} .= chr $self->{c};            $self->{t}->{value} .= chr $self->{c};
858            $self->{state} = STRING_STATE;            $self->{state} = STRING_STATE;
859            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
860            redo A;            redo A;
861          }          }
862        } elsif ($self->{c} == 0x000D) { # \r        } elsif ($self->{c} == 0x000D) { # \r
863          if ($q == 0) {          if ($q == 0) {
864            ## NOTE: In |escape| in ... in |ident|.            #
           $self->{state} = BEFORE_TOKEN_STATE;  
           unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};  
           return $current_token;  
           # reconsume  
           #redo A;  
865          } elsif ($q == 1) {          } elsif ($q == 1) {
866            $current_token->{type} = {            ## NOTE: In |escape| in |URI|.
867              $self->{t}->{type} = {
868                URI_TOKEN, URI_INVALID_TOKEN,                URI_TOKEN, URI_INVALID_TOKEN,
869                URI_INVALID_TOKEN, URI_INVALID_TOKEN,                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
870                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
871                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
872            }->{$current_token->{type}};            }->{$self->{t}->{type}};
873            $current_token->{value} .= "\x0D\x0A";            $self->{t}->{value} .= "\x0D";
874            $self->{state} = URI_UNQUOTED_STATE;            $self->{state} = ESCAPE_BEFORE_LF_STATE;
875            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
876            redo A;            redo A;
877          } else {          } else {
878            ## Note: In |nl| in ... in |string| or |ident|.            ## Note: In |nl| in ... in |string| or |ident|.
879            $current_token->{value} .= "\x0D\x0A";            $self->{t}->{value} .= "\x0D";
880            $self->{state} = ESCAPE_BEFORE_LF_STATE;            $self->{state} = ESCAPE_BEFORE_LF_STATE;
881            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
882            redo A;            redo A;
883          }          }
884          } elsif ($self->{c} == -1) {
885            #
886        } else {        } else {
887          ## NOTE: second character of |escape|.          ## NOTE: second character of |escape|.
888          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
889          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
890              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
891          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
892          redo A;          redo A;
893        }        }
894    
895          if ($q == 0) {
896            if ($self->{t}->{type} == DIMENSION_TOKEN) {
897              if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') {
898                $self->{state} = BEFORE_TOKEN_STATE;
899                # reprocess
900                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
901                unshift @{$self->{token}}, {type => MINUS_TOKEN};
902                $self->{t}->{type} = NUMBER_TOKEN;
903                $self->{t}->{value} = '';
904                return $self->{t};
905                #redo A;
906              } elsif (length $self->{t}->{value}) {
907                $self->{state} = BEFORE_TOKEN_STATE;
908                # reprocess
909                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
910                return $self->{t};
911                #redo A;
912              } else {
913                $self->{state} = BEFORE_TOKEN_STATE;
914                # reprocess
915                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
916                $self->{t}->{type} = NUMBER_TOKEN;
917                $self->{t}->{value} = '';
918                return $self->{t};
919                #redo A;
920              }
921            } else {
922              if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') {
923                $self->{state} = BEFORE_TOKEN_STATE;
924                # reprocess
925                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
926                return {type => MINUS_TOKEN};
927                #redo A;
928              } elsif (length $self->{t}->{value}) {
929                $self->{state} = BEFORE_TOKEN_STATE;
930                # reprocess
931                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
932                return $self->{t};
933                #redo A;
934              } else {
935                $self->{state} = BEFORE_TOKEN_STATE;
936                # reprocess
937                return {type => DELIM_TOKEN, value => '\\'};
938                #redo A;
939              }
940            }
941          } elsif ($q == 1) {
942            $self->{state} = URI_UNQUOTED_STATE;
943            $self->{c} = $self->{get_char}->();
944            redo A;
945          } else {
946            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
947            $self->{t}->{type} = {
948              STRING_TOKEN, INVALID_TOKEN,
949              URI_TOKEN, URI_INVALID_TOKEN,
950              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
951            }->{$self->{t}->{type}} || $self->{t}->{type};
952            $self->{state} = BEFORE_TOKEN_STATE;
953            # reprocess
954            return $self->{t};
955            #redo A;
956          }
957      } elsif ($self->{state} == ESCAPE_STATE) {      } elsif ($self->{state} == ESCAPE_STATE) {
958        ## NOTE: third..seventh character of |unicode| in |escape|.        ## NOTE: third..seventh character of |unicode| in |escape|.
959        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
# Line 776  sub get_next_token ($) { Line 967  sub get_next_token ($) {
967          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
968          redo A;          redo A;
969        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f
970          $char = $char * 0x10 + $self->{c} - 0x0061 - 0xA;          $char = $char * 0x10 + $self->{c} - 0x0061 + 0xA;
971          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
972          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
973          redo A;          redo A;
# Line 784  sub get_next_token ($) { Line 975  sub get_next_token ($) {
975                 $self->{c} == 0x000A or # \n                 $self->{c} == 0x000A or # \n
976                 $self->{c} == 0x0009 or # \t                 $self->{c} == 0x0009 or # \t
977                 $self->{c} == 0x000C) { # \f                 $self->{c} == 0x000C) { # \f
978          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
979          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
980              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
981          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
# Line 794  sub get_next_token ($) { Line 985  sub get_next_token ($) {
985          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
986          redo A;          redo A;
987        } else {        } else {
988          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
989          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
990              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
991          # reconsume          # reconsume
# Line 806  sub get_next_token ($) { Line 997  sub get_next_token ($) {
997            $self->{c} == 0x000A or # \n            $self->{c} == 0x000A or # \n
998            $self->{c} == 0x0009 or # \t            $self->{c} == 0x0009 or # \t
999            $self->{c} == 0x000C) { # \f            $self->{c} == 0x000C) { # \f
1000          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
1001          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
1002              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
1003          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
# Line 816  sub get_next_token ($) { Line 1007  sub get_next_token ($) {
1007          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1008          redo A;          redo A;
1009        } else {        } else {
1010          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
1011          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
1012              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
1013          # reconsume          # reconsume
# Line 825  sub get_next_token ($) { Line 1016  sub get_next_token ($) {
1016      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {
1017        ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.        ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.
1018        if ($self->{c} == 0x000A) { # \n        if ($self->{c} == 0x000A) { # \n
1019          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $self->{c};
1020          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
1021              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
1022          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1023          redo A;          redo A;
1024        } else {        } else {
         $current_token->{value} .= chr $char;  
1025          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
1026              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
1027          # reconsume          # reprocess
1028          redo A;          redo A;
1029        }        }
1030      } elsif ($self->{state} == STRING_STATE) {      } elsif ($self->{state} == STRING_STATE) {
# Line 847  sub get_next_token ($) { Line 1037  sub get_next_token ($) {
1037          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1038          redo A;          redo A;
1039        } elsif ($self->{c} == $q) { # " | '        } elsif ($self->{c} == $q) { # " | '
1040          if ($current_token->{type} == STRING_TOKEN) {          if ($self->{t}->{type} == STRING_TOKEN) {
1041            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
1042            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
1043            return $current_token;            return $self->{t};
1044            #redo A;            #redo A;
1045          } else {          } else {
1046            $self->{state} = URI_AFTER_WSP_STATE;            $self->{state} = URI_AFTER_WSP_STATE;
# Line 861  sub get_next_token ($) { Line 1051  sub get_next_token ($) {
1051                 $self->{c} == 0x000D or # \r                 $self->{c} == 0x000D or # \r
1052                 $self->{c} == 0x000C or # \f                 $self->{c} == 0x000C or # \f
1053                 $self->{c} == -1) {                 $self->{c} == -1) {
1054          $current_token->{type} = INVALID_TOKEN;          $self->{t}->{type} = {
1055              STRING_TOKEN, INVALID_TOKEN,
1056              INVALID_TOKEN, INVALID_TOKEN,
1057              URI_TOKEN, URI_INVALID_TOKEN,
1058              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
1059              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
1060              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
1061            }->{$self->{t}->{type}};
1062          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
1063          # reconsume          # reconsume
1064          return $current_token;          return $self->{t};
1065          #redo A;          #redo A;
1066        } else {        } else {
1067          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
1068          # stay in the state          # stay in the state
1069          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1070          redo A;          redo A;
# Line 875  sub get_next_token ($) { Line 1072  sub get_next_token ($) {
1072      } elsif ($self->{state} == NUMBER_STATE) {      } elsif ($self->{state} == NUMBER_STATE) {
1073        ## NOTE: 2nd, 3rd, or ... character in |num| before |.|.        ## NOTE: 2nd, 3rd, or ... character in |num| before |.|.
1074        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
1075          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
1076          # stay in the state          # stay in the state
1077          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1078          redo A;          redo A;
# Line 884  sub get_next_token ($) { Line 1081  sub get_next_token ($) {
1081          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1082          redo A;          redo A;
1083        } else {        } else {
1084          $current_token->{number} = $current_token->{value};          $self->{t}->{number} = $self->{t}->{value};
1085          $current_token->{value} = '';          $self->{t}->{value} = '';
1086          $self->{state} = AFTER_NUMBER_STATE;          $self->{state} = AFTER_NUMBER_STATE;
1087          # reprocess          # reprocess
1088          redo A;          redo A;
# Line 893  sub get_next_token ($) { Line 1090  sub get_next_token ($) {
1090      } elsif ($self->{state} == NUMBER_DOT_STATE) {      } elsif ($self->{state} == NUMBER_DOT_STATE) {
1091        ## NOTE: The character immediately following |.| in |num|.        ## NOTE: The character immediately following |.| in |num|.
1092        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
1093          $current_token->{value} .= '.' . chr $self->{c};          $self->{t}->{value} .= '.' . chr $self->{c};
1094          $self->{state} = NUMBER_DOT_NUMBER_STATE;          $self->{state} = NUMBER_DOT_NUMBER_STATE;
1095          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1096          redo A;          redo A;
1097        } else {        } else {
1098          unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'};          unshift @{$self->{token}}, {type => DOT_TOKEN};
1099          $current_token->{number} = $current_token->{value};          $self->{t}->{number} = $self->{t}->{value};
1100          $current_token->{value} = '';          $self->{t}->{value} = '';
1101          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
1102          # reprocess          # reprocess
1103          return $current_token;          return $self->{t};
1104          #redo A;          #redo A;
1105        }        }
1106      } elsif ($self->{state} == NUMBER_FRACTION_STATE) {      } elsif ($self->{state} == NUMBER_FRACTION_STATE) {
1107        ## NOTE: The character immediately following |.| at the beginning of |num|.        ## NOTE: The character immediately following |.| at the beginning of |num|.
1108        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
1109          $current_token->{value} .= '.' . chr $self->{c};          $self->{t}->{value} .= '.' . chr $self->{c};
1110          $self->{state} = NUMBER_DOT_NUMBER_STATE;          $self->{state} = NUMBER_DOT_NUMBER_STATE;
1111          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1112          redo A;          redo A;
1113        } else {        } else {
1114          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
1115          $self->{c} = $self->{get_char}->();          # reprocess
1116          return {type => DELIM_TOKEN, value => '.'};          return {type => DOT_TOKEN};
1117          #redo A;          #redo A;
1118        }        }
1119      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {
1120        ## NOTE: |[0-9]| in |num| after |.|.        ## NOTE: |[0-9]| in |num| after |.|.
1121        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
1122          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
1123          # stay in the state          # stay in the state
1124          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
1125          redo A;          redo A;
1126        } else {        } else {
1127          $current_token->{number} = $current_token->{value};          $self->{t}->{number} = $self->{t}->{value};
1128          $current_token->{value} = '';          $self->{t}->{value} = '';
1129          $self->{state} = AFTER_NUMBER_STATE;          $self->{state} = AFTER_NUMBER_STATE;
1130          # reprocess          # reprocess
1131          redo A;          redo A;
# Line 937  sub get_next_token ($) { Line 1134  sub get_next_token ($) {
1134        die "$0: Unknown state |$self->{state}|";        die "$0: Unknown state |$self->{state}|";
1135      }      }
1136    } # A    } # A
   
   ## TODO: |URI|, |UNICODE-RANGE|, |COMMENT|  
   
1137  } # get_next_token  } # get_next_token
1138    
1139  1;  1;

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.14

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24