/[suikacvs]/markup/html/whatpm/Whatpm/CSS/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/CSS/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4 by wakaba, Sat Sep 8 02:58:24 2007 UTC revision 1.6 by wakaba, Sat Sep 8 05:57:05 2007 UTC
# Line 36  sub NUMBER_TOKEN () { 11 } Line 36  sub NUMBER_TOKEN () { 11 }
36  sub DIMENSION_TOKEN () { 12 }  sub DIMENSION_TOKEN () { 12 }
37  sub PERCENTAGE_TOKEN () { 13 }  sub PERCENTAGE_TOKEN () { 13 }
38  sub UNICODE_RANGE_TOKEN () { 14 }  sub UNICODE_RANGE_TOKEN () { 14 }
 sub UNICODE_RANGE_INVALID_TOKEN () { 15 }  
39  sub DELIM_TOKEN () { 16 }  sub DELIM_TOKEN () { 16 }
40  sub PLUS_TOKEN () { 17 }  sub PLUS_TOKEN () { 17 }
41  sub GREATER_TOKEN () { 18 }  sub GREATER_TOKEN () { 18 }
# Line 64  sub EOF_TOKEN () { 38 } Line 63  sub EOF_TOKEN () { 38 }
63  our @TokenName = qw(  our @TokenName = qw(
64    0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID    0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID
65    STRING INVALID NUMBER DIMENSION PERCENTAGE UNICODE_RANGE    STRING INVALID NUMBER DIMENSION PERCENTAGE UNICODE_RANGE
66    UNICODE_RANGE_INVALID DELIM PLUS GREATER COMMA TILDE DASHMATCH    0 DELIM PLUS GREATER COMMA TILDE DASHMATCH
67    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON
68    LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT    LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT
69    COMMENT_INVALID EOF    COMMENT_INVALID EOF
# Line 80  sub init ($) { Line 79  sub init ($) {
79    my $self = shift;    my $self = shift;
80    $self->{state} = BEFORE_TOKEN_STATE;    $self->{state} = BEFORE_TOKEN_STATE;
81    $self->{c} = $self->{get_char}->();    $self->{c} = $self->{get_char}->();
82      #$self->{t} = {type => token-type, value => value, number => number};
83  } # init  } # init
84    
85  sub get_next_token ($) {  sub get_next_token ($) {
# Line 88  sub get_next_token ($) { Line 88  sub get_next_token ($) {
88      return shift @{$self->{token}};      return shift @{$self->{token}};
89    }    }
90    
   my $current_token;  
91    my $char;    my $char;
92    my $num; # |{num}|, if any.    my $num; # |{num}|, if any.
93    my $i; # |$i + 1|th character in |unicode| in |escape|.    my $i; # |$i + 1|th character in |unicode| in |escape|.
# Line 103  sub get_next_token ($) { Line 102  sub get_next_token ($) {
102      if ($self->{state} == BEFORE_TOKEN_STATE) {      if ($self->{state} == BEFORE_TOKEN_STATE) {
103        if ($self->{c} == 0x002D) { # -        if ($self->{c} == 0x002D) { # -
104          ## NOTE: |-| in |ident| in |IDENT|          ## NOTE: |-| in |ident| in |IDENT|
105          $current_token = {type => IDENT_TOKEN, value => '-'};          $self->{t} = {type => IDENT_TOKEN, value => '-'};
106          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = BEFORE_NMSTART_STATE;
107          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
108          redo A;          redo A;
109          } elsif ($self->{c} == 0x0055 or $self->{c} == 0x0075) { # U or u
110            $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}};
111            $self->{c} = $self->{get_char}->();
112            if ($self->{c} == 0x002B) { # +
113              $self->{c} = $self->{get_char}->();
114              if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
115                  (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
116                  (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
117                  $self->{c} == 0x003F) { # ?
118                $self->{t}->{value} .= '+' . chr $self->{c};
119                $self->{t}->{type} = UNICODE_RANGE_TOKEN;
120                $self->{c} = $self->{get_char}->();
121                C: for (2..6) {
122                  if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
123                      (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
124                      (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
125                      $self->{c} == 0x003F) { # ?
126                    $self->{t}->{value} .= chr $self->{c};
127                    $self->{c} = $self->{get_char}->();
128                  } else {
129                    last C;
130                  }
131                } # C
132    
133                if ($self->{c} == 0x002D) { # -
134                  $self->{c} = $self->{get_char}->();
135                  if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
136                      (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
137                      (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f
138                    $self->{t}->{value} .= '-' . chr $self->{c};
139                    $self->{c} = $self->{get_char}->();
140                    C: for (2..6) {
141                      if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
142                          (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
143                          (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f
144                        $self->{t}->{value} .= chr $self->{c};
145                        $self->{c} = $self->{get_char}->();
146                      } else {
147                        last C;
148                      }
149                    } # C
150                    
151                    #
152                  } else {
153                    my $token = $self->{t};
154                    $self->{t} = {type => IDENT_TOKEN, value => '-'};
155                    $self->{state} = BEFORE_NMSTART_STATE;
156                    # reprocess
157                    return $token;
158                    #redo A;
159                  }
160                }
161    
162                $self->{state} = BEFORE_TOKEN_STATE;
163                # reprocess
164                return $self->{t};
165                #redo A;
166              } else {
167                unshift @{$self->{token}}, {type => PLUS_TOKEN};
168                $self->{state} = BEFORE_TOKEN_STATE;
169                # reprocess
170                return $self->{t};
171                #redo A;
172              }
173            } else {
174              $self->{state} = NAME_STATE;
175              # reprocess
176              redo A;
177            }
178        } elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z        } elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
179                 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z                 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
180                 $self->{c} == 0x005F or # _                 $self->{c} == 0x005F or # _
181                 $self->{c} > 0x007F) { # nonascii                 $self->{c} > 0x007F) { # nonascii
182          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
183          $current_token = {type => IDENT_TOKEN, value => chr $self->{c}};          $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}};
184          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
185          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
186          redo A;          redo A;
187        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
188          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
189          $current_token = {type => IDENT_TOKEN, value => ''};          $self->{t} = {type => IDENT_TOKEN, value => ''};
190          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
191          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
192          redo A;          redo A;
193        } elsif ($self->{c} == 0x0040) { # @        } elsif ($self->{c} == 0x0040) { # @
194          ## NOTE: |@| in |ATKEYWORD|          ## NOTE: |@| in |ATKEYWORD|
195          $current_token = {type => ATKEYWORD_TOKEN, value => ''};          $self->{t} = {type => ATKEYWORD_TOKEN, value => ''};
196          $self->{state} = AFTER_AT_STATE;          $self->{state} = AFTER_AT_STATE;
197          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
198          redo A;          redo A;
199        } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '        } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '
200          $current_token = {type => STRING_TOKEN, value => ''};          $self->{t} = {type => STRING_TOKEN, value => ''};
201          $self->{state} = STRING_STATE; $q = $self->{c};          $self->{state} = STRING_STATE; $q = $self->{c};
202          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
203          redo A;          redo A;
204        } elsif ($self->{c} == 0x0023) { # #        } elsif ($self->{c} == 0x0023) { # #
205          ## NOTE: |#| in |HASH|.          ## NOTE: |#| in |HASH|.
206          $current_token = {type => HASH_TOKEN, value => ''};          $self->{t} = {type => HASH_TOKEN, value => ''};
207          $self->{state} = HASH_OPEN_STATE;          $self->{state} = HASH_OPEN_STATE;
208          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
209          redo A;          redo A;
210        } elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9        } elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
211          ## NOTE: |num|.          ## NOTE: |num|.
212          $current_token = {type => NUMBER_TOKEN, value => chr $self->{c}};          $self->{t} = {type => NUMBER_TOKEN, value => chr $self->{c}};
213          $self->{state} = NUMBER_STATE;          $self->{state} = NUMBER_STATE;
214          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
215          redo A;          redo A;
216        } elsif ($self->{c} == 0x002E) { # .        } elsif ($self->{c} == 0x002E) { # .
217          ## NOTE: |num|.          ## NOTE: |num|.
218          $current_token = {type => NUMBER_TOKEN, value => '0'};          $self->{t} = {type => NUMBER_TOKEN, value => '0'};
219          $self->{state} = NUMBER_FRACTION_STATE;          $self->{state} = NUMBER_FRACTION_STATE;
220          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
221          redo A;          redo A;
# Line 201  sub get_next_token ($) { Line 269  sub get_next_token ($) {
269              } else {              } else {
270                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};
271                ## NOTE: |-| in |ident| in |IDENT|                ## NOTE: |-| in |ident| in |IDENT|
272                $current_token = {type => IDENT_TOKEN, value => '-'};                $self->{t} = {type => IDENT_TOKEN, value => '-'};
273                $self->{state} = BEFORE_NMSTART_STATE;                $self->{state} = BEFORE_NMSTART_STATE;
274                #reprocess                #reprocess
275                return {type => DELIM_TOKEN, value => '<'};                return {type => DELIM_TOKEN, value => '<'};
# Line 321  sub get_next_token ($) { Line 389  sub get_next_token ($) {
389          #redo A;          #redo A;
390        } else {        } else {
391          # stay in the state          # stay in the state
392          $current_token = {type => DELIM_TOKEN, value => chr $self->{c}};          $self->{t} = {type => DELIM_TOKEN, value => chr $self->{c}};
393          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
394          return $current_token;          return $self->{t};
395          #redo A;          #redo A;
396        }        }
397      } elsif ($self->{state} == BEFORE_NMSTART_STATE) {      } elsif ($self->{state} == BEFORE_NMSTART_STATE) {
# Line 333  sub get_next_token ($) { Line 401  sub get_next_token ($) {
401            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
402            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
403            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
404          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
405          $current_token->{type} = DIMENSION_TOKEN          $self->{t}->{type} = DIMENSION_TOKEN
406              if $current_token->{type} == NUMBER_TOKEN;              if $self->{t}->{type} == NUMBER_TOKEN;
407          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
408          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
409          redo A;          redo A;
# Line 345  sub get_next_token ($) { Line 413  sub get_next_token ($) {
413          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
414          redo A;          redo A;
415        } elsif ($self->{c} == 0x002D and # -        } elsif ($self->{c} == 0x002D and # -
416                 $current_token->{type} == IDENT_TOKEN) {                 $self->{t}->{type} == IDENT_TOKEN) {
417          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
418          if ($self->{c} == 0x003E) { # >          if ($self->{c} == 0x003E) { # >
419            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
# Line 354  sub get_next_token ($) { Line 422  sub get_next_token ($) {
422            #redo A;            #redo A;
423          } else {          } else {
424            ## NOTE: |-|, |-|, $self->{c}            ## NOTE: |-|, |-|, $self->{c}
425            #$current_token = {type => IDENT_TOKEN, value => '-'};            #$self->{t} = {type => IDENT_TOKEN, value => '-'};
426            # stay in the state            # stay in the state
427            # reconsume            # reconsume
428            return {type => DELIM_TOKEN, value => '-'};            return {type => DELIM_TOKEN, value => '-'};
429            #redo A;            #redo A;
430          }          }
431        } else {        } else {
432          if ($current_token->{type} == NUMBER_TOKEN) {          if ($self->{t}->{type} == NUMBER_TOKEN) {
433            ## NOTE: |-| after |NUMBER|.            ## NOTE: |-| after |NUMBER|.
434            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
435            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
436            # reconsume            # reconsume
437            $current_token->{value} = $current_token->{number};            $self->{t}->{value} = $self->{t}->{number};
438            delete $current_token->{number};            delete $self->{t}->{number};
439            return $current_token;            return $self->{t};
440          } else {          } else {
441            ## NOTE: |-| not followed by |nmstart|.            ## NOTE: |-| not followed by |nmstart|.
442            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
# Line 381  sub get_next_token ($) { Line 449  sub get_next_token ($) {
449            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
450            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
451            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
452          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
453          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
454          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
455          redo A;          redo A;
456        } elsif ($self->{c} == 0x002D) { # -        } elsif ($self->{c} == 0x002D) { # -
457          $current_token->{value} .= '-';          $self->{t}->{value} .= '-';
458          $self->{state} = AFTER_AT_HYPHEN_STATE;          $self->{state} = AFTER_AT_HYPHEN_STATE;
459          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
460          redo A;          redo A;
# Line 404  sub get_next_token ($) { Line 472  sub get_next_token ($) {
472            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
473            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
474            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
475          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
476          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
477          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
478          redo A;          redo A;
# Line 418  sub get_next_token ($) { Line 486  sub get_next_token ($) {
486            #redo A;            #redo A;
487          } else {          } else {
488            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
489            $current_token = {type => IDENT_TOKEN, value => '-'};            $self->{t} = {type => IDENT_TOKEN, value => '-'};
490            $self->{state} = BEFORE_NMSTART_STATE;            $self->{state} = BEFORE_NMSTART_STATE;
491            # reprocess            # reprocess
492            return {type => DELIM_TOKEN, value => '@'};            return {type => DELIM_TOKEN, value => '@'};
# Line 438  sub get_next_token ($) { Line 506  sub get_next_token ($) {
506      } elsif ($self->{state} == AFTER_NUMBER_STATE) {      } elsif ($self->{state} == AFTER_NUMBER_STATE) {
507        if ($self->{c} == 0x002D) { # -        if ($self->{c} == 0x002D) { # -
508          ## NOTE: |-| in |ident|.          ## NOTE: |-| in |ident|.
509          $current_token->{value} = '-';          $self->{t}->{value} = '-';
510          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = BEFORE_NMSTART_STATE;
511          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
512          redo A;          redo A;
# Line 447  sub get_next_token ($) { Line 515  sub get_next_token ($) {
515                 $self->{c} == 0x005F or # _                 $self->{c} == 0x005F or # _
516                 $self->{c} > 0x007F) { # nonascii                 $self->{c} > 0x007F) { # nonascii
517          ## NOTE: |nmstart| in |ident|.          ## NOTE: |nmstart| in |ident|.
518          $current_token->{value} = chr $self->{c};          $self->{t}->{value} = chr $self->{c};
519          $current_token->{type} = DIMENSION_TOKEN;          $self->{t}->{type} = DIMENSION_TOKEN;
520          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
521          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
522          redo A;          redo A;
523        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
524          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
525          $current_token->{value} = '';          $self->{t}->{value} = '';
526          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
527          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
528          redo A;          redo A;
529        } elsif ($self->{c} == 0x0025) { # %        } elsif ($self->{c} == 0x0025) { # %
530          $current_token->{type} = PERCENTAGE_TOKEN;          $self->{t}->{type} = PERCENTAGE_TOKEN;
531          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
532          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
533          return $current_token;          return $self->{t};
534          #redo A;          #redo A;
535        } else {        } else {
536          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
537          # reprocess          # reprocess
538          return $current_token;          return $self->{t};
539          #redo A;          #redo A;
540        }        }
541      } elsif ($self->{state} == HASH_OPEN_STATE) {      } elsif ($self->{state} == HASH_OPEN_STATE) {
# Line 478  sub get_next_token ($) { Line 546  sub get_next_token ($) {
546            $self->{c} == 0x002D or # -            $self->{c} == 0x002D or # -
547            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
548            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
549          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
550          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
551          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
552          redo A;          redo A;
# Line 500  sub get_next_token ($) { Line 568  sub get_next_token ($) {
568            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
569            $self->{c} == 0x002D or # -            $self->{c} == 0x002D or # -
570            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
571          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
572          # stay in the state          # stay in the state
573          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
574          redo A;          redo A;
# Line 509  sub get_next_token ($) { Line 577  sub get_next_token ($) {
577          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
578          redo A;          redo A;
579        } elsif ($self->{c} == 0x0028 and # (        } elsif ($self->{c} == 0x0028 and # (
580                 $current_token->{type} == IDENT_TOKEN) { # (                 $self->{t}->{type} == IDENT_TOKEN) { # (
581          my $func_name = $current_token->{value};          my $func_name = $self->{t}->{value};
582          $func_name =~ tr/A-Z/a-z/; ## TODO: Unicode or ASCII case-insensitive?          $func_name =~ tr/A-Z/a-z/; ## TODO: Unicode or ASCII case-insensitive?
583          if ($func_name eq 'url' or $func_name eq 'url-prefix') {          if ($func_name eq 'url' or $func_name eq 'url-prefix') {
584            if ($current_token->{has_escape}) {            if ($self->{t}->{has_escape}) {
585              ## TODO: warn              ## TODO: warn
586            }            }
587            $current_token->{type}            $self->{t}->{type}
588                = $func_name eq 'url' ? URI_TOKEN : URI_PREFIX_TOKEN;                = $func_name eq 'url' ? URI_TOKEN : URI_PREFIX_TOKEN;
589            $current_token->{value} = '';            $self->{t}->{value} = '';
590            $self->{state} = URI_BEFORE_WSP_STATE;            $self->{state} = URI_BEFORE_WSP_STATE;
591            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
592            redo A;            redo A;
593          } else {          } else {
594            $current_token->{type} = FUNCTION_TOKEN;            $self->{t}->{type} = FUNCTION_TOKEN;
595            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
596            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
597            return $current_token;            return $self->{t};
598            #redo A;            #redo A;
599          }          }
600        } else {        } else {
601          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
602          # reconsume          # reconsume
603          return $current_token;          return $self->{t};
604          #redo A;          #redo A;
605        }        }
606      } elsif ($self->{state} == URI_BEFORE_WSP_STATE) {      } elsif ($self->{state} == URI_BEFORE_WSP_STATE) {
# Line 546  sub get_next_token ($) { Line 614  sub get_next_token ($) {
614          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
615        }        }
616        if ($self->{c} == -1) {        if ($self->{c} == -1) {
617          $current_token->{type} = {          $self->{t}->{type} = {
618              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
619              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
620              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
621              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
622          }->{$current_token->{type}};                  }->{$self->{t}->{type}};        
623          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
624          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
625          return $current_token;          return $self->{t};
626          #redo A;          #redo A;
627        } elsif ($self->{c} < 0x0020 or $self->{c} == 0x0028) { # C0 or (        } elsif ($self->{c} < 0x0020 or $self->{c} == 0x0028) { # C0 or (
628          ## TODO: Should we consider matches of "(" and ")"?          ## TODO: Should we consider matches of "(" and ")"?
629          $current_token->{type} = {          $self->{t}->{type} = {
630              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
631              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
632              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
633              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
634          }->{$current_token->{type}};          }->{$self->{t}->{type}};
635          $self->{state} = URI_UNQUOTED_STATE;          $self->{state} = URI_UNQUOTED_STATE;
636          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
637          redo A;          redo A;
# Line 574  sub get_next_token ($) { Line 642  sub get_next_token ($) {
642        } elsif ($self->{c} == 0x0029) { # )        } elsif ($self->{c} == 0x0029) { # )
643          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
644          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
645          return $current_token;          return $self->{t};
646          #redo A;          #redo A;
647        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
648          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
649          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
650          redo A;          redo A;
651        } else {        } else {
652          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
653          $self->{state} = URI_UNQUOTED_STATE;          $self->{state} = URI_UNQUOTED_STATE;
654          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
655          redo A;          redo A;
# Line 598  sub get_next_token ($) { Line 666  sub get_next_token ($) {
666          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
667          redo A;          redo A;
668        } elsif ($self->{c} == -1) {        } elsif ($self->{c} == -1) {
669          $current_token->{type} = {          $self->{t}->{type} = {
670              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
671              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
672              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
673              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
674          }->{$current_token->{type}};                  }->{$self->{t}->{type}};        
675          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
676          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
677          return $current_token;          return $self->{t};
678          #redo A;          #redo A;
679        } elsif ($self->{c} < 0x0020 or {        } elsif ($self->{c} < 0x0020 or {
680            0x0022 => 1, # "            0x0022 => 1, # "
# Line 614  sub get_next_token ($) { Line 682  sub get_next_token ($) {
682            0x0028 => 1, # (            0x0028 => 1, # (
683        }->{$self->{c}}) { # C0 or (        }->{$self->{c}}) { # C0 or (
684          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?
685          $current_token->{type} = {          $self->{t}->{type} = {
686              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
687              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
688              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
689              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
690          }->{$current_token->{type}};          }->{$self->{t}->{type}};
691          # stay in the state.          # stay in the state.
692          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
693          redo A;          redo A;
694        } elsif ($self->{c} == 0x0029) { # )        } elsif ($self->{c} == 0x0029) { # )
695          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
696          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
697          return $current_token;          return $self->{t};
698          #redo A;          #redo A;
699        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
700          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
701          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
702          redo A;          redo A;
703        } else {        } else {
704          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
705          # stay in the state.          # stay in the state.
706          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
707          redo A;          redo A;
# Line 650  sub get_next_token ($) { Line 718  sub get_next_token ($) {
718          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
719          redo A;          redo A;
720        } elsif ($self->{c} == -1) {        } elsif ($self->{c} == -1) {
721          $current_token->{type} = {          $self->{t}->{type} = {
722              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
723              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
724              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
725              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
726          }->{$current_token->{type}};                  }->{$self->{t}->{type}};        
727          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
728          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
729          return $current_token;          return $self->{t};
730          #redo A;          #redo A;
731        } elsif ($self->{c} == 0x0029) { # )        } elsif ($self->{c} == 0x0029) { # )
732          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
733          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
734          return $current_token;          return $self->{t};
735          #redo A;          #redo A;
736        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
737          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
# Line 671  sub get_next_token ($) { Line 739  sub get_next_token ($) {
739          redo A;          redo A;
740        } else {        } else {
741          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?
742          $current_token->{type} = {          $self->{t}->{type} = {
743              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
744              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
745              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
746              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
747          }->{$current_token->{type}};          }->{$self->{t}->{type}};
748          # stay in the state.          # stay in the state.
749          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
750          redo A;          redo A;
751        }        }
752      } elsif ($self->{state} == ESCAPE_OPEN_STATE) {      } elsif ($self->{state} == ESCAPE_OPEN_STATE) {
753        $current_token->{has_escape} = 1;        $self->{t}->{has_escape} = 1;
754        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
755          ## NOTE: second character of |unicode| in |escape|.          ## NOTE: second character of |unicode| in |escape|.
756          $char = $self->{c} - 0x0030;          $char = $self->{c} - 0x0030;
# Line 707  sub get_next_token ($) { Line 775  sub get_next_token ($) {
775            ## NOTE: In |escape| in ... in |ident|.            ## NOTE: In |escape| in ... in |ident|.
776            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
777            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
778            return $current_token;            return $self->{t};
779            # reconsume            # reconsume
780            #redo A;            #redo A;
781          } elsif ($q == 1) {          } elsif ($q == 1) {
782            ## NOTE: In |escape| in |URI|.            ## NOTE: In |escape| in |URI|.
783            $current_token->{type} = {            $self->{t}->{type} = {
784                URI_TOKEN, URI_INVALID_TOKEN,                URI_TOKEN, URI_INVALID_TOKEN,
785                URI_INVALID_TOKEN, URI_INVALID_TOKEN,                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
786                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
787                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
788            }->{$current_token->{type}};            }->{$self->{t}->{type}};
789            $current_token->{value} .= chr $self->{c};            $self->{t}->{value} .= chr $self->{c};
790            $self->{state} = URI_UNQUOTED_STATE;            $self->{state} = URI_UNQUOTED_STATE;
791            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
792            redo A;            redo A;
793          } else {          } else {
794            ## Note: In |nl| in ... in |string| or |ident|.            ## Note: In |nl| in ... in |string| or |ident|.
795            $current_token->{value} .= chr $self->{c};            $self->{t}->{value} .= chr $self->{c};
796            $self->{state} = STRING_STATE;            $self->{state} = STRING_STATE;
797            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
798            redo A;            redo A;
# Line 734  sub get_next_token ($) { Line 802  sub get_next_token ($) {
802            ## NOTE: In |escape| in ... in |ident|.            ## NOTE: In |escape| in ... in |ident|.
803            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
804            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
805            return $current_token;            return $self->{t};
806            # reconsume            # reconsume
807            #redo A;            #redo A;
808          } elsif ($q == 1) {          } elsif ($q == 1) {
809            $current_token->{type} = {            $self->{t}->{type} = {
810                URI_TOKEN, URI_INVALID_TOKEN,                URI_TOKEN, URI_INVALID_TOKEN,
811                URI_INVALID_TOKEN, URI_INVALID_TOKEN,                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
812                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
813                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
814            }->{$current_token->{type}};            }->{$self->{t}->{type}};
815            $current_token->{value} .= "\x0D\x0A";            $self->{t}->{value} .= "\x0D\x0A";
816            $self->{state} = URI_UNQUOTED_STATE;            $self->{state} = URI_UNQUOTED_STATE;
817            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
818            redo A;            redo A;
819          } else {          } else {
820            ## Note: In |nl| in ... in |string| or |ident|.            ## Note: In |nl| in ... in |string| or |ident|.
821            $current_token->{value} .= "\x0D\x0A";            $self->{t}->{value} .= "\x0D\x0A";
822            $self->{state} = ESCAPE_BEFORE_LF_STATE;            $self->{state} = ESCAPE_BEFORE_LF_STATE;
823            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
824            redo A;            redo A;
825          }          }
826        } else {        } else {
827          ## NOTE: second character of |escape|.          ## NOTE: second character of |escape|.
828          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
829          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
830              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
831          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
# Line 784  sub get_next_token ($) { Line 852  sub get_next_token ($) {
852                 $self->{c} == 0x000A or # \n                 $self->{c} == 0x000A or # \n
853                 $self->{c} == 0x0009 or # \t                 $self->{c} == 0x0009 or # \t
854                 $self->{c} == 0x000C) { # \f                 $self->{c} == 0x000C) { # \f
855          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
856          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
857              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
858          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
# Line 794  sub get_next_token ($) { Line 862  sub get_next_token ($) {
862          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
863          redo A;          redo A;
864        } else {        } else {
865          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
866          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
867              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
868          # reconsume          # reconsume
# Line 806  sub get_next_token ($) { Line 874  sub get_next_token ($) {
874            $self->{c} == 0x000A or # \n            $self->{c} == 0x000A or # \n
875            $self->{c} == 0x0009 or # \t            $self->{c} == 0x0009 or # \t
876            $self->{c} == 0x000C) { # \f            $self->{c} == 0x000C) { # \f
877          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
878          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
879              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
880          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
# Line 816  sub get_next_token ($) { Line 884  sub get_next_token ($) {
884          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
885          redo A;          redo A;
886        } else {        } else {
887          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
888          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
889              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
890          # reconsume          # reconsume
# Line 825  sub get_next_token ($) { Line 893  sub get_next_token ($) {
893      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {
894        ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.        ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.
895        if ($self->{c} == 0x000A) { # \n        if ($self->{c} == 0x000A) { # \n
896          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
897          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
898              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
899          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
900          redo A;          redo A;
901        } else {        } else {
902          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
903          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
904              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
905          # reconsume          # reconsume
# Line 847  sub get_next_token ($) { Line 915  sub get_next_token ($) {
915          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
916          redo A;          redo A;
917        } elsif ($self->{c} == $q) { # " | '        } elsif ($self->{c} == $q) { # " | '
918          if ($current_token->{type} == STRING_TOKEN) {          if ($self->{t}->{type} == STRING_TOKEN) {
919            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
920            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
921            return $current_token;            return $self->{t};
922            #redo A;            #redo A;
923          } else {          } else {
924            $self->{state} = URI_AFTER_WSP_STATE;            $self->{state} = URI_AFTER_WSP_STATE;
# Line 861  sub get_next_token ($) { Line 929  sub get_next_token ($) {
929                 $self->{c} == 0x000D or # \r                 $self->{c} == 0x000D or # \r
930                 $self->{c} == 0x000C or # \f                 $self->{c} == 0x000C or # \f
931                 $self->{c} == -1) {                 $self->{c} == -1) {
932          $current_token->{type} = INVALID_TOKEN;          $self->{t}->{type} = INVALID_TOKEN;
933          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
934          # reconsume          # reconsume
935          return $current_token;          return $self->{t};
936          #redo A;          #redo A;
937        } else {        } else {
938          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
939          # stay in the state          # stay in the state
940          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
941          redo A;          redo A;
# Line 875  sub get_next_token ($) { Line 943  sub get_next_token ($) {
943      } elsif ($self->{state} == NUMBER_STATE) {      } elsif ($self->{state} == NUMBER_STATE) {
944        ## NOTE: 2nd, 3rd, or ... character in |num| before |.|.        ## NOTE: 2nd, 3rd, or ... character in |num| before |.|.
945        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
946          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
947          # stay in the state          # stay in the state
948          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
949          redo A;          redo A;
# Line 884  sub get_next_token ($) { Line 952  sub get_next_token ($) {
952          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
953          redo A;          redo A;
954        } else {        } else {
955          $current_token->{number} = $current_token->{value};          $self->{t}->{number} = $self->{t}->{value};
956          $current_token->{value} = '';          $self->{t}->{value} = '';
957          $self->{state} = AFTER_NUMBER_STATE;          $self->{state} = AFTER_NUMBER_STATE;
958          # reprocess          # reprocess
959          redo A;          redo A;
# Line 893  sub get_next_token ($) { Line 961  sub get_next_token ($) {
961      } elsif ($self->{state} == NUMBER_DOT_STATE) {      } elsif ($self->{state} == NUMBER_DOT_STATE) {
962        ## NOTE: The character immediately following |.| in |num|.        ## NOTE: The character immediately following |.| in |num|.
963        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
964          $current_token->{value} .= '.' . chr $self->{c};          $self->{t}->{value} .= '.' . chr $self->{c};
965          $self->{state} = NUMBER_DOT_NUMBER_STATE;          $self->{state} = NUMBER_DOT_NUMBER_STATE;
966          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
967          redo A;          redo A;
968        } else {        } else {
969          unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'};          unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'};
970          $current_token->{number} = $current_token->{value};          $self->{t}->{number} = $self->{t}->{value};
971          $current_token->{value} = '';          $self->{t}->{value} = '';
972          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
973          # reprocess          # reprocess
974          return $current_token;          return $self->{t};
975          #redo A;          #redo A;
976        }        }
977      } elsif ($self->{state} == NUMBER_FRACTION_STATE) {      } elsif ($self->{state} == NUMBER_FRACTION_STATE) {
978        ## NOTE: The character immediately following |.| at the beginning of |num|.        ## NOTE: The character immediately following |.| at the beginning of |num|.
979        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
980          $current_token->{value} .= '.' . chr $self->{c};          $self->{t}->{value} .= '.' . chr $self->{c};
981          $self->{state} = NUMBER_DOT_NUMBER_STATE;          $self->{state} = NUMBER_DOT_NUMBER_STATE;
982          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
983          redo A;          redo A;
# Line 922  sub get_next_token ($) { Line 990  sub get_next_token ($) {
990      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {
991        ## NOTE: |[0-9]| in |num| after |.|.        ## NOTE: |[0-9]| in |num| after |.|.
992        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
993          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
994          # stay in the state          # stay in the state
995          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
996          redo A;          redo A;
997        } else {        } else {
998          $current_token->{number} = $current_token->{value};          $self->{t}->{number} = $self->{t}->{value};
999          $current_token->{value} = '';          $self->{t}->{value} = '';
1000          $self->{state} = AFTER_NUMBER_STATE;          $self->{state} = AFTER_NUMBER_STATE;
1001          # reprocess          # reprocess
1002          redo A;          redo A;
# Line 937  sub get_next_token ($) { Line 1005  sub get_next_token ($) {
1005        die "$0: Unknown state |$self->{state}|";        die "$0: Unknown state |$self->{state}|";
1006      }      }
1007    } # A    } # A
   
   ## TODO: |URI|, |UNICODE-RANGE|, |COMMENT|  
   
1008  } # get_next_token  } # get_next_token
1009    
1010  1;  1;

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.6

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24