/[suikacvs]/markup/html/whatpm/Whatpm/CSS/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/CSS/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2 by wakaba, Sat Sep 8 01:31:44 2007 UTC revision 1.5 by wakaba, Sat Sep 8 03:25:05 2007 UTC
# Line 17  sub ESCAPE_BEFORE_NL_STATE () { 12 } Line 17  sub ESCAPE_BEFORE_NL_STATE () { 12 }
17  sub NUMBER_DOT_STATE () { 13 }  sub NUMBER_DOT_STATE () { 13 }
18  sub NUMBER_DOT_NUMBER_STATE () { 14 }  sub NUMBER_DOT_NUMBER_STATE () { 14 }
19  sub DELIM_STATE () { 15 }  sub DELIM_STATE () { 15 }
20    sub URI_UNQUOTED_STATE () { 16 }
21    sub URI_AFTER_WSP_STATE () { 17 }
22    sub AFTER_AT_STATE () { 18 }
23    sub AFTER_AT_HYPHEN_STATE () { 19 }
24    
25  sub IDENT_TOKEN () { 1 }  sub IDENT_TOKEN () { 1 }
26  sub ATKEYWORD_TOKEN () { 2 }  sub ATKEYWORD_TOKEN () { 2 }
# Line 58  sub COMMENT_INVALID_TOKEN () { 37 } Line 62  sub COMMENT_INVALID_TOKEN () { 37 }
62  sub EOF_TOKEN () { 38 }  sub EOF_TOKEN () { 38 }
63    
64  our @TokenName = qw(  our @TokenName = qw(
65    0 IDENT ATKWTWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID    0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID
66    STRING INVALID NUMBER DIMENSION PERCENTAGE UNICODE_RANGE    STRING INVALID NUMBER DIMENSION PERCENTAGE UNICODE_RANGE
67    UNICODE_RANGE_INVALID DELIM PLUS GREATER COMMA TILDE DASHMATCH    UNICODE_RANGE_INVALID DELIM PLUS GREATER COMMA TILDE DASHMATCH
68    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON
# Line 76  sub init ($) { Line 80  sub init ($) {
80    my $self = shift;    my $self = shift;
81    $self->{state} = BEFORE_TOKEN_STATE;    $self->{state} = BEFORE_TOKEN_STATE;
82    $self->{c} = $self->{get_char}->();    $self->{c} = $self->{get_char}->();
83      #$self->{t} = {type => token-type, value => value, number => number};
84  } # init  } # init
85    
86  sub get_next_token ($) {  sub get_next_token ($) {
# Line 84  sub get_next_token ($) { Line 89  sub get_next_token ($) {
89      return shift @{$self->{token}};      return shift @{$self->{token}};
90    }    }
91    
   my $current_token;  
92    my $char;    my $char;
93    my $num; # |{num}|, if any.    my $num; # |{num}|, if any.
94    my $i; # |$i + 1|th character in |unicode| in |escape|.    my $i; # |$i + 1|th character in |unicode| in |escape|.
95    my $q; # |$q == 0 ? "in |ident|" : "in |string$q| or in |invalid$q|"|    my $q;
96          ## NOTE:
97          ##   0: in |ident|.
98          ##   1: in |URI| outside of |string|.
99          ##   0x0022: in |string1| or |invalid1|.
100          ##   0x0027: in |string2| or |invalid2|.
101    
102    A: {    A: {
103      if ($self->{state} == BEFORE_TOKEN_STATE) {      if ($self->{state} == BEFORE_TOKEN_STATE) {
104        if ($self->{c} == 0x002D) { # -        if ($self->{c} == 0x002D) { # -
105          ## NOTE: |-| in |ident| in |IDENT|          ## NOTE: |-| in |ident| in |IDENT|
106          $current_token = {type => IDENT_TOKEN, value => '-'};          $self->{t} = {type => IDENT_TOKEN, value => '-'};
107          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = BEFORE_NMSTART_STATE;
108          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
109          redo A;          redo A;
110          } elsif ($self->{c} == 0x0055 or $self->{c} == 0x0075) { # U or u
111            $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}};
112            $self->{c} = $self->{get_char}->();
113            if ($self->{c} == 0x002B) { # +
114              $self->{c} = $self->{get_char}->();
115              if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
116                  (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
117                  (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
118                  $self->{c} == 0x003F) { # ?
119                $self->{t}->{value} .= '+' . chr $self->{c};
120                $self->{t}->{type} = UNICODE_RANGE_TOKEN;
121                $self->{c} = $self->{get_char}->();
122                C: for (2..6) {
123                  if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
124                      (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
125                      (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
126                      $self->{c} == 0x003F) { # ?
127                    $self->{t}->{value} .= chr $self->{c};
128                    $self->{c} = $self->{get_char}->();
129                  } else {
130                    last C;
131                  }
132                } # C
133    
134                if ($self->{c} == 0x002D) { # -
135                  $self->{c} = $self->{get_char}->();
136                  if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
137                      (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
138                      (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f
139                    $self->{t}->{value} .= '-' . chr $self->{c};
140                    $self->{c} = $self->{get_char}->();
141                    C: for (2..6) {
142                      if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
143                          (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
144                          (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f
145                        $self->{t}->{value} .= chr $self->{c};
146                        $self->{c} = $self->{get_char}->();
147                      } else {
148                        last C;
149                      }
150                    } # C
151                    
152                    #
153                  } else {
154                    my $token = $self->{t};
155                    $self->{t} = {type => IDENT_TOKEN, value => '-'};
156                    $self->{state} = BEFORE_NMSTART_STATE;
157                    # reprocess
158                    return $token;
159                    #redo A;
160                  }
161                }
162    
163                $self->{state} = BEFORE_TOKEN_STATE;
164                # reprocess
165                return $self->{t};
166                #redo A;
167              } else {
168                unshift @{$self->{token}}, {type => PLUS_TOKEN};
169                $self->{state} = BEFORE_TOKEN_STATE;
170                # reprocess
171                return $self->{t};
172                #redo A;
173              }
174            } else {
175              $self->{state} = NAME_STATE;
176              # reprocess
177              redo A;
178            }
179        } elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z        } elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
180                 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z                 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
181                 $self->{c} == 0x005F or # _                 $self->{c} == 0x005F or # _
182                 $self->{c} > 0x007F) { # nonascii                 $self->{c} > 0x007F) { # nonascii
183          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
184          $current_token = {type => IDENT_TOKEN, value => chr $self->{c}};          $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}};
185          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
186          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
187          redo A;          redo A;
188        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
189          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
190          $current_token = {type => IDENT_TOKEN, value => ''};          $self->{t} = {type => IDENT_TOKEN, value => ''};
191          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
192          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
193          redo A;          redo A;
194        } elsif ($self->{c} == 0x0040) { # @        } elsif ($self->{c} == 0x0040) { # @
195          ## NOTE: |@| in |ATKEYWORD|          ## NOTE: |@| in |ATKEYWORD|
196          $current_token = {type => ATKEYWORD_TOKEN, value => ''};          $self->{t} = {type => ATKEYWORD_TOKEN, value => ''};
197          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = AFTER_AT_STATE;
198          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
199          redo A;          redo A;
200        } elsif ($self->{c} == 0x0022) { # "        } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '
201          ## NOTE: |"| in |string1| in |string| in |STRING|, or          $self->{t} = {type => STRING_TOKEN, value => ''};
202          ## |"| in |invalid1| in |invalid| in |INVALID|.          $self->{state} = STRING_STATE; $q = $self->{c};
         $current_token = {type => STRING_TOKEN, value => ''};  
         $self->{state} = STRING_STATE; $q = 1;  
         $self->{c} = $self->{get_char}->();  
         redo A;  
       } elsif ($self->{c} == 0x0027) { # '  
         ## NOTE: |'| in |string2| in |string| in |STRING|, or  
         ## |'| in |invalid2| in |invalid| in |INVALID|.  
         $current_token = {type => STRING_TOKEN, value => ''};  
         $self->{state} = STRING_STATE; $q = 2;  
203          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
204          redo A;          redo A;
205        } elsif ($self->{c} == 0x0023) { # #        } elsif ($self->{c} == 0x0023) { # #
206          ## NOTE: |#| in |HASH|.          ## NOTE: |#| in |HASH|.
207          $current_token = {type => HASH_TOKEN, value => ''};          $self->{t} = {type => HASH_TOKEN, value => ''};
208          $self->{state} = HASH_OPEN_STATE;          $self->{state} = HASH_OPEN_STATE;
209          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
210          redo A;          redo A;
211        } elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9        } elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
212          ## NOTE: |num|.          ## NOTE: |num|.
213          $current_token = {type => NUMBER_TOKEN, value => chr $self->{c}};          $self->{t} = {type => NUMBER_TOKEN, value => chr $self->{c}};
214          $self->{state} = NUMBER_STATE;          $self->{state} = NUMBER_STATE;
215          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
216          redo A;          redo A;
217        } elsif ($self->{c} == 0x002E) { # .        } elsif ($self->{c} == 0x002E) { # .
218          ## NOTE: |num|.          ## NOTE: |num|.
219          $current_token = {type => NUMBER_TOKEN, value => '0'};          $self->{t} = {type => NUMBER_TOKEN, value => '0'};
220          $self->{state} = NUMBER_FRACTION_STATE;          $self->{state} = NUMBER_FRACTION_STATE;
221          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
222          redo A;          redo A;
223          } elsif ($self->{c} == 0x002F) { # /
224            $self->{c} = $self->{get_char}->();
225            if ($self->{c} == 0x002A) { # *
226              C: {
227                $self->{c} = $self->{get_char}->();
228                if ($self->{c} == 0x002A) { # *
229                  D: {
230                    $self->{c} = $self->{get_char}->();
231                    if ($self->{c} == 0x002F) { # /
232                      #
233                    } elsif ($self->{c} == 0x002A) { # *
234                      redo D;
235                    } else {
236                      redo C;
237                    }
238                  } # D
239                } elsif ($self->{c} == -1) {
240                  # stay in the state
241                  # reprocess
242                  return {type => COMMENT_INVALID_TOKEN};
243                  #redo A;
244                } else {
245                  redo C;
246                }
247              } # C
248    
249              # stay in the state.
250              $self->{c} = $self->{get_char}->();
251              redo A;
252            } else {
253              # stay in the state.
254              # reprocess
255              return {type => DELIM_STATE, value => '/'};
256              #redo A;
257            }        
258        } elsif ($self->{c} == 0x003C) { # <        } elsif ($self->{c} == 0x003C) { # <
259          ## NOTE: |CDO|          ## NOTE: |CDO|
260          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
# Line 166  sub get_next_token ($) { Line 270  sub get_next_token ($) {
270              } else {              } else {
271                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};
272                ## NOTE: |-| in |ident| in |IDENT|                ## NOTE: |-| in |ident| in |IDENT|
273                $current_token = {type => IDENT_TOKEN, value => '-'};                $self->{t} = {type => IDENT_TOKEN, value => '-'};
274                $self->{state} = BEFORE_NMSTART_STATE;                $self->{state} = BEFORE_NMSTART_STATE;
275                #reprocess                #reprocess
276                return {type => DELIM_TOKEN, value => '<'};                return {type => DELIM_TOKEN, value => '<'};
# Line 286  sub get_next_token ($) { Line 390  sub get_next_token ($) {
390          #redo A;          #redo A;
391        } else {        } else {
392          # stay in the state          # stay in the state
393          $current_token = {type => DELIM_TOKEN, value => chr $self->{c}};          $self->{t} = {type => DELIM_TOKEN, value => chr $self->{c}};
394          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
395          return $current_token;          return $self->{t};
396          #redo A;          #redo A;
397        }        }
398      } elsif ($self->{state} == BEFORE_NMSTART_STATE) {      } elsif ($self->{state} == BEFORE_NMSTART_STATE) {
399        ## NOTE: |nmstart| in |ident| in (|IDENT|, |DIMENSION|, or |ATKEYWORD|)        ## NOTE: |nmstart| in |ident| in (|IDENT|, |DIMENSION|, or
400          ## |FUNCTION|)
401        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
402            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
403            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
404            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
405          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
406          $current_token->{type} = DIMENSION_TOKEN          $self->{t}->{type} = DIMENSION_TOKEN
407              if $current_token->{type} == NUMBER_TOKEN;              if $self->{t}->{type} == NUMBER_TOKEN;
408          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
409          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
410          redo A;          redo A;
# Line 309  sub get_next_token ($) { Line 414  sub get_next_token ($) {
414          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
415          redo A;          redo A;
416        } elsif ($self->{c} == 0x002D and # -        } elsif ($self->{c} == 0x002D and # -
417                 $current_token->{type} == IDENT_TOKEN) {                 $self->{t}->{type} == IDENT_TOKEN) {
418          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
419          if ($self->{c} == 0x003E) { # >          if ($self->{c} == 0x003E) { # >
420            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
# Line 318  sub get_next_token ($) { Line 423  sub get_next_token ($) {
423            #redo A;            #redo A;
424          } else {          } else {
425            ## NOTE: |-|, |-|, $self->{c}            ## NOTE: |-|, |-|, $self->{c}
426            #$current_token = {type => IDENT_TOKEN, value => '-'};            #$self->{t} = {type => IDENT_TOKEN, value => '-'};
427            # stay in the state            # stay in the state
428            # reconsume            # reconsume
429            return {type => DELIM_TOKEN, value => '-'};            return {type => DELIM_TOKEN, value => '-'};
430            #redo A;            #redo A;
431          }          }
432        } else {        } else {
433          if ($current_token->{type} == NUMBER_TOKEN) {          if ($self->{t}->{type} == NUMBER_TOKEN) {
           ## NOTE: |-| after |num|.  
           unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};  
           $self->{state} = BEFORE_TOKEN_STATE;  
           $self->{c} = $self->{get_char}->();  
           return $current_token;  
         } elsif ($current_token->{type} == ATKEYWORD_TOKEN) {  
           ## NOTE: |-| after |@|.  
           unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '@'};  
           $self->{state} = BEFORE_TOKEN_STATE;  
           $self->{c} = $self->{get_char}->();  
           return $current_token;  
         } elsif ($current_token->{type} == NUMBER_TOKEN) {  
434            ## NOTE: |-| after |NUMBER|.            ## NOTE: |-| after |NUMBER|.
435            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
436            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
437            # reconsume            # reconsume
438            $current_token->{value} = $current_token->{number};            $self->{t}->{value} = $self->{t}->{number};
439            delete $current_token->{number};            delete $self->{t}->{number};
440            return $current_token;            return $self->{t};
441          } else {          } else {
442            ## NOTE: |-| not followed by |nmstart|.            ## NOTE: |-| not followed by |nmstart|.
443            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
# Line 352  sub get_next_token ($) { Line 445  sub get_next_token ($) {
445            return {type => DELIM_TOKEN, value => '-'};            return {type => DELIM_TOKEN, value => '-'};
446          }          }
447        }        }
448        } elsif ($self->{state} == AFTER_AT_STATE) {
449          if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
450              (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
451              $self->{c} == 0x005F or # _
452              $self->{c} > 0x007F) { # nonascii
453            $self->{t}->{value} .= chr $self->{c};
454            $self->{state} = NAME_STATE;
455            $self->{c} = $self->{get_char}->();
456            redo A;
457          } elsif ($self->{c} == 0x002D) { # -
458            $self->{t}->{value} .= '-';
459            $self->{state} = AFTER_AT_HYPHEN_STATE;
460            $self->{c} = $self->{get_char}->();
461            redo A;
462          } elsif ($self->{c} == 0x005C) { # \
463            $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
464            $self->{c} = $self->{get_char}->();
465            redo A;
466          } else {
467            $self->{state} = BEFORE_TOKEN_STATE;
468            # reprocess
469            return {type => DELIM_TOKEN, value => '@'};
470          }
471        } elsif ($self->{state} == AFTER_AT_HYPHEN_STATE) {
472          if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
473              (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
474              $self->{c} == 0x005F or # _
475              $self->{c} > 0x007F) { # nonascii
476            $self->{t}->{value} .= chr $self->{c};
477            $self->{state} = NAME_STATE;
478            $self->{c} = $self->{get_char}->();
479            redo A;
480          } elsif ($self->{c} == 0x002D) { # -
481            $self->{c} = $self->{get_char}->();
482            if ($self->{c} == 0x003E) { # >
483              unshift @{$self->{token}}, {type => CDC_TOKEN};
484              $self->{state} = BEFORE_TOKEN_STATE;
485              $self->{c} = $self->{get_char}->();
486              return {type => DELIM_TOKEN, value => '@'};
487              #redo A;
488            } else {
489              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
490              $self->{t} = {type => IDENT_TOKEN, value => '-'};
491              $self->{state} = BEFORE_NMSTART_STATE;
492              # reprocess
493              return {type => DELIM_TOKEN, value => '@'};
494              #redo A;
495            }
496          } elsif ($self->{c} == 0x005C) { # \
497            ## TODO: @-\{nl}
498            $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
499            $self->{c} = $self->{get_char}->();
500            redo A;
501          } else {
502            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
503            $self->{state} = BEFORE_TOKEN_STATE;
504            # reprocess
505            return {type => DELIM_TOKEN, value => '@'};
506          }
507      } elsif ($self->{state} == AFTER_NUMBER_STATE) {      } elsif ($self->{state} == AFTER_NUMBER_STATE) {
508        if ($self->{c} == 0x002D) { # -        if ($self->{c} == 0x002D) { # -
509          ## NOTE: |-| in |ident|.          ## NOTE: |-| in |ident|.
510          $current_token->{value} = '-';          $self->{t}->{value} = '-';
511          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = BEFORE_NMSTART_STATE;
512          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
513          redo A;          redo A;
# Line 364  sub get_next_token ($) { Line 516  sub get_next_token ($) {
516                 $self->{c} == 0x005F or # _                 $self->{c} == 0x005F or # _
517                 $self->{c} > 0x007F) { # nonascii                 $self->{c} > 0x007F) { # nonascii
518          ## NOTE: |nmstart| in |ident|.          ## NOTE: |nmstart| in |ident|.
519          $current_token->{value} = chr $self->{c};          $self->{t}->{value} = chr $self->{c};
520          $current_token->{type} = DIMENSION_TOKEN;          $self->{t}->{type} = DIMENSION_TOKEN;
521          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
522          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
523          redo A;          redo A;
524        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
525          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
526          $current_token->{value} = '';          $self->{t}->{value} = '';
527          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
528          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
529          redo A;          redo A;
530        } elsif ($self->{c} == 0x0025) { # %        } elsif ($self->{c} == 0x0025) { # %
531          $current_token->{type} = PERCENTAGE_TOKEN;          $self->{t}->{type} = PERCENTAGE_TOKEN;
532          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
533          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
534          return $current_token;          return $self->{t};
535          #redo A;          #redo A;
536        } else {        } else {
537          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
538          # reprocess          # reprocess
539          return $current_token;          return $self->{t};
540          #redo A;          #redo A;
541        }        }
542      } elsif ($self->{state} == HASH_OPEN_STATE) {      } elsif ($self->{state} == HASH_OPEN_STATE) {
# Line 395  sub get_next_token ($) { Line 547  sub get_next_token ($) {
547            $self->{c} == 0x002D or # -            $self->{c} == 0x002D or # -
548            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
549            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
550          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
551          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
552          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
553          redo A;          redo A;
# Line 417  sub get_next_token ($) { Line 569  sub get_next_token ($) {
569            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
570            $self->{c} == 0x002D or # -            $self->{c} == 0x002D or # -
571            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
572          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
573          # stay in the state          # stay in the state
574          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
575          redo A;          redo A;
576        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
577          $self->{state} = ESCAPE_OPEN_STATE; # $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
578          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
579          redo A;          redo A;
580        } elsif ($self->{c} == 0x0028 and # (        } elsif ($self->{c} == 0x0028 and # (
581                 $current_token->{type} == IDENT_TOKEN) { # (                 $self->{t}->{type} == IDENT_TOKEN) { # (
582          if (not $current_token->{has_escape} and          my $func_name = $self->{t}->{value};
583              {url => 1, Url => 1, uRl => 1, urL => 1,          $func_name =~ tr/A-Z/a-z/; ## TODO: Unicode or ASCII case-insensitive?
584               URl => 1, UrL => 1, uRL => 1, URL => 1}          if ($func_name eq 'url' or $func_name eq 'url-prefix') {
585              ->{$current_token->{value}}) {            if ($self->{t}->{has_escape}) {
586            $current_token->{type} = URI_TOKEN;              ## TODO: warn
587              }
588              $self->{t}->{type}
589                  = $func_name eq 'url' ? URI_TOKEN : URI_PREFIX_TOKEN;
590              $self->{t}->{value} = '';
591            $self->{state} = URI_BEFORE_WSP_STATE;            $self->{state} = URI_BEFORE_WSP_STATE;
592            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
   
           ## NOTE: This version of the tokenizer does not support the |URI|  
           ## token type.  Note that browsers disagree in how to tokenize  
           ## |url| function.  
           $current_token->{type} = FUNCTION_TOKEN;  
           $self->{state} = BEFORE_TOKEN_STATE;  
           $self->{c} = $self->{get_char}->();  
           return $current_token;  
   
593            redo A;            redo A;
594          } else {          } else {
595            $current_token->{type} = FUNCTION_TOKEN;            $self->{t}->{type} = FUNCTION_TOKEN;
596            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
597            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
598            return $current_token;            return $self->{t};
599            #redo A;            #redo A;
600          }          }
601        } else {        } else {
602          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
603          # reconsume          # reconsume
604          return $current_token;          return $self->{t};
605            #redo A;
606          }
607        } elsif ($self->{state} == URI_BEFORE_WSP_STATE) {
608          while ({
609                    0x0020 => 1, # SP
610                    0x0009 => 1, # \t
611                    0x000D => 1, # \r
612                    0x000A => 1, # \n
613                    0x000C => 1, # \f
614                 }->{$self->{c}}) {
615            $self->{c} = $self->{get_char}->();
616          }
617          if ($self->{c} == -1) {
618            $self->{t}->{type} = {
619                URI_TOKEN, URI_INVALID_TOKEN,
620                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
621                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
622                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
623            }->{$self->{t}->{type}};        
624            $self->{state} = BEFORE_TOKEN_STATE;
625            $self->{c} = $self->{get_char}->();
626            return $self->{t};
627            #redo A;
628          } elsif ($self->{c} < 0x0020 or $self->{c} == 0x0028) { # C0 or (
629            ## TODO: Should we consider matches of "(" and ")"?
630            $self->{t}->{type} = {
631                URI_TOKEN, URI_INVALID_TOKEN,
632                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
633                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
634                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
635            }->{$self->{t}->{type}};
636            $self->{state} = URI_UNQUOTED_STATE;
637            $self->{c} = $self->{get_char}->();
638            redo A;
639          } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '
640            $self->{state} = STRING_STATE; $q = $self->{c};
641            $self->{c} = $self->{get_char}->();
642            redo A;
643          } elsif ($self->{c} == 0x0029) { # )
644            $self->{state} = BEFORE_TOKEN_STATE;
645            $self->{c} = $self->{get_char}->();
646            return $self->{t};
647            #redo A;
648          } elsif ($self->{c} == 0x005C) { # \
649            $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
650            $self->{c} = $self->{get_char}->();
651            redo A;
652          } else {
653            $self->{t}->{value} .= chr $self->{c};
654            $self->{state} = URI_UNQUOTED_STATE;
655            $self->{c} = $self->{get_char}->();
656            redo A;
657          }
658        } elsif ($self->{state} == URI_UNQUOTED_STATE) {
659          if ({
660               0x0020 => 1, # SP
661               0x0009 => 1, # \t
662               0x000D => 1, # \r
663               0x000A => 1, # \n
664               0x000C => 1, # \f
665              }->{$self->{c}}) {
666            $self->{state} = URI_AFTER_WSP_STATE;
667            $self->{c} = $self->{get_char}->();
668            redo A;
669          } elsif ($self->{c} == -1) {
670            $self->{t}->{type} = {
671                URI_TOKEN, URI_INVALID_TOKEN,
672                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
673                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
674                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
675            }->{$self->{t}->{type}};        
676            $self->{state} = BEFORE_TOKEN_STATE;
677            $self->{c} = $self->{get_char}->();
678            return $self->{t};
679            #redo A;
680          } elsif ($self->{c} < 0x0020 or {
681              0x0022 => 1, # "
682              0x0027 => 1, # '
683              0x0028 => 1, # (
684          }->{$self->{c}}) { # C0 or (
685            ## TODO: Should we consider matches of "(" and ")", '"', or "'"?
686            $self->{t}->{type} = {
687                URI_TOKEN, URI_INVALID_TOKEN,
688                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
689                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
690                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
691            }->{$self->{t}->{type}};
692            # stay in the state.
693            $self->{c} = $self->{get_char}->();
694            redo A;
695          } elsif ($self->{c} == 0x0029) { # )
696            $self->{state} = BEFORE_TOKEN_STATE;
697            $self->{c} = $self->{get_char}->();
698            return $self->{t};
699            #redo A;
700          } elsif ($self->{c} == 0x005C) { # \
701            $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
702            $self->{c} = $self->{get_char}->();
703            redo A;
704          } else {
705            $self->{t}->{value} .= chr $self->{c};
706            # stay in the state.
707            $self->{c} = $self->{get_char}->();
708            redo A;
709          }
710        } elsif ($self->{state} == URI_AFTER_WSP_STATE) {
711          if ({
712               0x0020 => 1, # SP
713               0x0009 => 1, # \t
714               0x000D => 1, # \r
715               0x000A => 1, # \n
716               0x000C => 1, # \f
717              }->{$self->{c}}) {
718            # stay in the state.
719            $self->{c} = $self->{get_char}->();
720            redo A;
721          } elsif ($self->{c} == -1) {
722            $self->{t}->{type} = {
723                URI_TOKEN, URI_INVALID_TOKEN,
724                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
725                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
726                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
727            }->{$self->{t}->{type}};        
728            $self->{state} = BEFORE_TOKEN_STATE;
729            $self->{c} = $self->{get_char}->();
730            return $self->{t};
731          #redo A;          #redo A;
732          } elsif ($self->{c} == 0x0029) { # )
733            $self->{state} = BEFORE_TOKEN_STATE;
734            $self->{c} = $self->{get_char}->();
735            return $self->{t};
736            #redo A;
737          } elsif ($self->{c} == 0x005C) { # \
738            $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
739            $self->{c} = $self->{get_char}->();
740            redo A;
741          } else {
742            ## TODO: Should we consider matches of "(" and ")", '"', or "'"?
743            $self->{t}->{type} = {
744                URI_TOKEN, URI_INVALID_TOKEN,
745                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
746                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
747                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
748            }->{$self->{t}->{type}};
749            # stay in the state.
750            $self->{c} = $self->{get_char}->();
751            redo A;
752        }        }
753      } elsif ($self->{state} == ESCAPE_OPEN_STATE) {      } elsif ($self->{state} == ESCAPE_OPEN_STATE) {
754        $current_token->{has_escape} = 1;        $self->{t}->{has_escape} = 1;
755        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
756          ## NOTE: second character of |unicode| in |escape|.          ## NOTE: second character of |unicode| in |escape|.
757          $char = $self->{c} - 0x0030;          $char = $self->{c} - 0x0030;
# Line 483  sub get_next_token ($) { Line 776  sub get_next_token ($) {
776            ## NOTE: In |escape| in ... in |ident|.            ## NOTE: In |escape| in ... in |ident|.
777            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
778            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
779            return $current_token;            return $self->{t};
780            # reconsume            # reconsume
781            #redo A;            #redo A;
782            } elsif ($q == 1) {
783              ## NOTE: In |escape| in |URI|.
784              $self->{t}->{type} = {
785                  URI_TOKEN, URI_INVALID_TOKEN,
786                  URI_INVALID_TOKEN, URI_INVALID_TOKEN,
787                  URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
788                  URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
789              }->{$self->{t}->{type}};
790              $self->{t}->{value} .= chr $self->{c};
791              $self->{state} = URI_UNQUOTED_STATE;
792              $self->{c} = $self->{get_char}->();
793              redo A;
794          } else {          } else {
795            ## Note: In |nl| in ... in |string| or |ident|.            ## Note: In |nl| in ... in |string| or |ident|.
796            $current_token->{value} .= chr $self->{c};            $self->{t}->{value} .= chr $self->{c};
797            $self->{state} = STRING_STATE;            $self->{state} = STRING_STATE;
798            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
799            redo A;            redo A;
# Line 498  sub get_next_token ($) { Line 803  sub get_next_token ($) {
803            ## NOTE: In |escape| in ... in |ident|.            ## NOTE: In |escape| in ... in |ident|.
804            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
805            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
806            return $current_token;            return $self->{t};
807            # reconsume            # reconsume
808            #redo A;            #redo A;
809            } elsif ($q == 1) {
810              $self->{t}->{type} = {
811                  URI_TOKEN, URI_INVALID_TOKEN,
812                  URI_INVALID_TOKEN, URI_INVALID_TOKEN,
813                  URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
814                  URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
815              }->{$self->{t}->{type}};
816              $self->{t}->{value} .= "\x0D\x0A";
817              $self->{state} = URI_UNQUOTED_STATE;
818              $self->{c} = $self->{get_char}->();
819              redo A;
820          } else {          } else {
821            ## Note: In |nl| in ... in |string| or |ident|.            ## Note: In |nl| in ... in |string| or |ident|.
822            $current_token->{value} .= "\x0D\x0A";            $self->{t}->{value} .= "\x0D\x0A";
823            $self->{state} = ESCAPE_BEFORE_LF_STATE;            $self->{state} = ESCAPE_BEFORE_LF_STATE;
824            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
825            redo A;            redo A;
826          }          }
827        } else {        } else {
828          ## NOTE: second character of |escape|.          ## NOTE: second character of |escape|.
829          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
830          $self->{state} = $q == 0 ? NAME_STATE : STRING_STATE;          $self->{state} = $q == 0 ? NAME_STATE :
831                $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
832          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
833          redo A;          redo A;
834        }        }
# Line 536  sub get_next_token ($) { Line 853  sub get_next_token ($) {
853                 $self->{c} == 0x000A or # \n                 $self->{c} == 0x000A or # \n
854                 $self->{c} == 0x0009 or # \t                 $self->{c} == 0x0009 or # \t
855                 $self->{c} == 0x000C) { # \f                 $self->{c} == 0x000C) { # \f
856          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
857          $self->{state} = $q == 0 ? NAME_STATE : STRING_STATE;          $self->{state} = $q == 0 ? NAME_STATE :
858                $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
859          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
860          redo A;          redo A;
861        } elsif ($self->{c} == 0x000D) { # \r        } elsif ($self->{c} == 0x000D) { # \r
# Line 545  sub get_next_token ($) { Line 863  sub get_next_token ($) {
863          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
864          redo A;          redo A;
865        } else {        } else {
866          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
867          $self->{state} = $q == 0 ? NAME_STATE : STRING_STATE;          $self->{state} = $q == 0 ? NAME_STATE :
868                $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
869          # reconsume          # reconsume
870          redo A;          redo A;
871        }        }
# Line 556  sub get_next_token ($) { Line 875  sub get_next_token ($) {
875            $self->{c} == 0x000A or # \n            $self->{c} == 0x000A or # \n
876            $self->{c} == 0x0009 or # \t            $self->{c} == 0x0009 or # \t
877            $self->{c} == 0x000C) { # \f            $self->{c} == 0x000C) { # \f
878          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
879          $self->{state} = $q == 0 ? NAME_STATE : STRING_STATE;          $self->{state} = $q == 0 ? NAME_STATE :
880                $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
881          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
882          redo A;          redo A;
883        } elsif ($self->{c} == 0x000D) { # \r        } elsif ($self->{c} == 0x000D) { # \r
# Line 565  sub get_next_token ($) { Line 885  sub get_next_token ($) {
885          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
886          redo A;          redo A;
887        } else {        } else {
888          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
889          $self->{state} = $q == 0 ? NAME_STATE : STRING_STATE;          $self->{state} = $q == 0 ? NAME_STATE :
890                $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
891          # reconsume          # reconsume
892          redo A;          redo A;
893        }        }
894      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {
895        ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.        ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.
896        if ($self->{c} == 0x000A) { # \n        if ($self->{c} == 0x000A) { # \n
897          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
898          $self->{state} = $q == 0 ? NAME_STATE : STRING_STATE;          $self->{state} = $q == 0 ? NAME_STATE :
899                $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
900          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
901          redo A;          redo A;
902        } else {        } else {
903          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
904          $self->{state} = $q == 0 ? NAME_STATE : STRING_STATE;          $self->{state} = $q == 0 ? NAME_STATE :
905                $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
906          # reconsume          # reconsume
907          redo A;          redo A;
908        }        }
# Line 587  sub get_next_token ($) { Line 910  sub get_next_token ($) {
910        ## NOTE: A character in |string$Q| in |string| in |STRING|, or        ## NOTE: A character in |string$Q| in |string| in |STRING|, or
911        ## a character in |invalid$Q| in |invalid| in |INVALID|,        ## a character in |invalid$Q| in |invalid| in |INVALID|,
912        ## where |$Q = $q == 0x0022 ? 1 : 2|.        ## where |$Q = $q == 0x0022 ? 1 : 2|.
913          ## Or, in |URI|.
914        if ($self->{c} == 0x005C) { # \        if ($self->{c} == 0x005C) { # \
915          $self->{state} = ESCAPE_OPEN_STATE;          $self->{state} = ESCAPE_OPEN_STATE;
916          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
917          redo A;          redo A;
918        } elsif ($self->{c} == $q) { # " | '        } elsif ($self->{c} == $q) { # " | '
919          $self->{state} = BEFORE_TOKEN_STATE;          if ($self->{t}->{type} == STRING_TOKEN) {
920          $self->{c} = $self->{get_char}->();            $self->{state} = BEFORE_TOKEN_STATE;
921          return $current_token;            $self->{c} = $self->{get_char}->();
922          #redo A;            return $self->{t};
923              #redo A;
924            } else {
925              $self->{state} = URI_AFTER_WSP_STATE;
926              $self->{c} = $self->{get_char}->();
927              redo A;
928            }
929        } elsif ($self->{c} == 0x000A or # \n        } elsif ($self->{c} == 0x000A or # \n
930                 $self->{c} == 0x000D or # \r                 $self->{c} == 0x000D or # \r
931                 $self->{c} == 0x000C or # \f                 $self->{c} == 0x000C or # \f
932                 $self->{c} == -1) {                 $self->{c} == -1) {
933          $current_token->{type} = INVALID_TOKEN;          $self->{t}->{type} = INVALID_TOKEN;
934          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
935          # reconsume          # reconsume
936          return $current_token;          return $self->{t};
937          #redo A;          #redo A;
938        } else {        } else {
939          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
940          # stay in the state          # stay in the state
941          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
942          redo A;          redo A;
# Line 614  sub get_next_token ($) { Line 944  sub get_next_token ($) {
944      } elsif ($self->{state} == NUMBER_STATE) {      } elsif ($self->{state} == NUMBER_STATE) {
945        ## NOTE: 2nd, 3rd, or ... character in |num| before |.|.        ## NOTE: 2nd, 3rd, or ... character in |num| before |.|.
946        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
947          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
948          # stay in the state          # stay in the state
949          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
950          redo A;          redo A;
# Line 623  sub get_next_token ($) { Line 953  sub get_next_token ($) {
953          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
954          redo A;          redo A;
955        } else {        } else {
956          $current_token->{number} = $current_token->{value};          $self->{t}->{number} = $self->{t}->{value};
957          $current_token->{value} = '';          $self->{t}->{value} = '';
958          $self->{state} = AFTER_NUMBER_STATE;          $self->{state} = AFTER_NUMBER_STATE;
959          # reprocess          # reprocess
960          redo A;          redo A;
# Line 632  sub get_next_token ($) { Line 962  sub get_next_token ($) {
962      } elsif ($self->{state} == NUMBER_DOT_STATE) {      } elsif ($self->{state} == NUMBER_DOT_STATE) {
963        ## NOTE: The character immediately following |.| in |num|.        ## NOTE: The character immediately following |.| in |num|.
964        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
965          $current_token->{value} .= '.' . chr $self->{c};          $self->{t}->{value} .= '.' . chr $self->{c};
966          $self->{state} = NUMBER_DOT_NUMBER_STATE;          $self->{state} = NUMBER_DOT_NUMBER_STATE;
967          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
968          redo A;          redo A;
969        } else {        } else {
970          unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'};          unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'};
971          $current_token->{number} = $current_token->{value};          $self->{t}->{number} = $self->{t}->{value};
972          $current_token->{value} = '';          $self->{t}->{value} = '';
973          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
974          # reprocess          # reprocess
975          return $current_token;          return $self->{t};
976          #redo A;          #redo A;
977        }        }
978      } elsif ($self->{state} == NUMBER_FRACTION_STATE) {      } elsif ($self->{state} == NUMBER_FRACTION_STATE) {
979        ## NOTE: The character immediately following |.| at the beginning of |num|.        ## NOTE: The character immediately following |.| at the beginning of |num|.
980        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
981          $current_token->{value} .= '.' . chr $self->{c};          $self->{t}->{value} .= '.' . chr $self->{c};
982          $self->{state} = NUMBER_DOT_NUMBER_STATE;          $self->{state} = NUMBER_DOT_NUMBER_STATE;
983          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
984          redo A;          redo A;
# Line 661  sub get_next_token ($) { Line 991  sub get_next_token ($) {
991      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {
992        ## NOTE: |[0-9]| in |num| after |.|.        ## NOTE: |[0-9]| in |num| after |.|.
993        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
994          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
995          # stay in the state          # stay in the state
996          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
997          redo A;          redo A;
998        } else {        } else {
999          $current_token->{number} = $current_token->{value};          $self->{t}->{number} = $self->{t}->{value};
1000          $current_token->{value} = '';          $self->{t}->{value} = '';
1001          $self->{state} = AFTER_NUMBER_STATE;          $self->{state} = AFTER_NUMBER_STATE;
1002          # reprocess          # reprocess
1003          redo A;          redo A;
# Line 676  sub get_next_token ($) { Line 1006  sub get_next_token ($) {
1006        die "$0: Unknown state |$self->{state}|";        die "$0: Unknown state |$self->{state}|";
1007      }      }
1008    } # A    } # A
   
   ## TODO: |URI|, |UNICODE-RANGE|, |COMMENT|  
   
1009  } # get_next_token  } # get_next_token
1010    
1011  1;  1;

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.5

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24