/[suikacvs]/markup/html/whatpm/Whatpm/CSS/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/CSS/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4 by wakaba, Sat Sep 8 02:58:24 2007 UTC revision 1.5 by wakaba, Sat Sep 8 03:25:05 2007 UTC
# Line 80  sub init ($) { Line 80  sub init ($) {
80    my $self = shift;    my $self = shift;
81    $self->{state} = BEFORE_TOKEN_STATE;    $self->{state} = BEFORE_TOKEN_STATE;
82    $self->{c} = $self->{get_char}->();    $self->{c} = $self->{get_char}->();
83      #$self->{t} = {type => token-type, value => value, number => number};
84  } # init  } # init
85    
86  sub get_next_token ($) {  sub get_next_token ($) {
# Line 88  sub get_next_token ($) { Line 89  sub get_next_token ($) {
89      return shift @{$self->{token}};      return shift @{$self->{token}};
90    }    }
91    
   my $current_token;  
92    my $char;    my $char;
93    my $num; # |{num}|, if any.    my $num; # |{num}|, if any.
94    my $i; # |$i + 1|th character in |unicode| in |escape|.    my $i; # |$i + 1|th character in |unicode| in |escape|.
# Line 103  sub get_next_token ($) { Line 103  sub get_next_token ($) {
103      if ($self->{state} == BEFORE_TOKEN_STATE) {      if ($self->{state} == BEFORE_TOKEN_STATE) {
104        if ($self->{c} == 0x002D) { # -        if ($self->{c} == 0x002D) { # -
105          ## NOTE: |-| in |ident| in |IDENT|          ## NOTE: |-| in |ident| in |IDENT|
106          $current_token = {type => IDENT_TOKEN, value => '-'};          $self->{t} = {type => IDENT_TOKEN, value => '-'};
107          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = BEFORE_NMSTART_STATE;
108          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
109          redo A;          redo A;
110          } elsif ($self->{c} == 0x0055 or $self->{c} == 0x0075) { # U or u
111            $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}};
112            $self->{c} = $self->{get_char}->();
113            if ($self->{c} == 0x002B) { # +
114              $self->{c} = $self->{get_char}->();
115              if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
116                  (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
117                  (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
118                  $self->{c} == 0x003F) { # ?
119                $self->{t}->{value} .= '+' . chr $self->{c};
120                $self->{t}->{type} = UNICODE_RANGE_TOKEN;
121                $self->{c} = $self->{get_char}->();
122                C: for (2..6) {
123                  if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
124                      (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
125                      (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
126                      $self->{c} == 0x003F) { # ?
127                    $self->{t}->{value} .= chr $self->{c};
128                    $self->{c} = $self->{get_char}->();
129                  } else {
130                    last C;
131                  }
132                } # C
133    
134                if ($self->{c} == 0x002D) { # -
135                  $self->{c} = $self->{get_char}->();
136                  if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
137                      (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
138                      (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f
139                    $self->{t}->{value} .= '-' . chr $self->{c};
140                    $self->{c} = $self->{get_char}->();
141                    C: for (2..6) {
142                      if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
143                          (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
144                          (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f
145                        $self->{t}->{value} .= chr $self->{c};
146                        $self->{c} = $self->{get_char}->();
147                      } else {
148                        last C;
149                      }
150                    } # C
151                    
152                    #
153                  } else {
154                    my $token = $self->{t};
155                    $self->{t} = {type => IDENT_TOKEN, value => '-'};
156                    $self->{state} = BEFORE_NMSTART_STATE;
157                    # reprocess
158                    return $token;
159                    #redo A;
160                  }
161                }
162    
163                $self->{state} = BEFORE_TOKEN_STATE;
164                # reprocess
165                return $self->{t};
166                #redo A;
167              } else {
168                unshift @{$self->{token}}, {type => PLUS_TOKEN};
169                $self->{state} = BEFORE_TOKEN_STATE;
170                # reprocess
171                return $self->{t};
172                #redo A;
173              }
174            } else {
175              $self->{state} = NAME_STATE;
176              # reprocess
177              redo A;
178            }
179        } elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z        } elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
180                 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z                 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
181                 $self->{c} == 0x005F or # _                 $self->{c} == 0x005F or # _
182                 $self->{c} > 0x007F) { # nonascii                 $self->{c} > 0x007F) { # nonascii
183          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
184          $current_token = {type => IDENT_TOKEN, value => chr $self->{c}};          $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}};
185          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
186          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
187          redo A;          redo A;
188        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
189          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
190          $current_token = {type => IDENT_TOKEN, value => ''};          $self->{t} = {type => IDENT_TOKEN, value => ''};
191          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
192          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
193          redo A;          redo A;
194        } elsif ($self->{c} == 0x0040) { # @        } elsif ($self->{c} == 0x0040) { # @
195          ## NOTE: |@| in |ATKEYWORD|          ## NOTE: |@| in |ATKEYWORD|
196          $current_token = {type => ATKEYWORD_TOKEN, value => ''};          $self->{t} = {type => ATKEYWORD_TOKEN, value => ''};
197          $self->{state} = AFTER_AT_STATE;          $self->{state} = AFTER_AT_STATE;
198          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
199          redo A;          redo A;
200        } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '        } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '
201          $current_token = {type => STRING_TOKEN, value => ''};          $self->{t} = {type => STRING_TOKEN, value => ''};
202          $self->{state} = STRING_STATE; $q = $self->{c};          $self->{state} = STRING_STATE; $q = $self->{c};
203          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
204          redo A;          redo A;
205        } elsif ($self->{c} == 0x0023) { # #        } elsif ($self->{c} == 0x0023) { # #
206          ## NOTE: |#| in |HASH|.          ## NOTE: |#| in |HASH|.
207          $current_token = {type => HASH_TOKEN, value => ''};          $self->{t} = {type => HASH_TOKEN, value => ''};
208          $self->{state} = HASH_OPEN_STATE;          $self->{state} = HASH_OPEN_STATE;
209          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
210          redo A;          redo A;
211        } elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9        } elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
212          ## NOTE: |num|.          ## NOTE: |num|.
213          $current_token = {type => NUMBER_TOKEN, value => chr $self->{c}};          $self->{t} = {type => NUMBER_TOKEN, value => chr $self->{c}};
214          $self->{state} = NUMBER_STATE;          $self->{state} = NUMBER_STATE;
215          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
216          redo A;          redo A;
217        } elsif ($self->{c} == 0x002E) { # .        } elsif ($self->{c} == 0x002E) { # .
218          ## NOTE: |num|.          ## NOTE: |num|.
219          $current_token = {type => NUMBER_TOKEN, value => '0'};          $self->{t} = {type => NUMBER_TOKEN, value => '0'};
220          $self->{state} = NUMBER_FRACTION_STATE;          $self->{state} = NUMBER_FRACTION_STATE;
221          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
222          redo A;          redo A;
# Line 201  sub get_next_token ($) { Line 270  sub get_next_token ($) {
270              } else {              } else {
271                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};
272                ## NOTE: |-| in |ident| in |IDENT|                ## NOTE: |-| in |ident| in |IDENT|
273                $current_token = {type => IDENT_TOKEN, value => '-'};                $self->{t} = {type => IDENT_TOKEN, value => '-'};
274                $self->{state} = BEFORE_NMSTART_STATE;                $self->{state} = BEFORE_NMSTART_STATE;
275                #reprocess                #reprocess
276                return {type => DELIM_TOKEN, value => '<'};                return {type => DELIM_TOKEN, value => '<'};
# Line 321  sub get_next_token ($) { Line 390  sub get_next_token ($) {
390          #redo A;          #redo A;
391        } else {        } else {
392          # stay in the state          # stay in the state
393          $current_token = {type => DELIM_TOKEN, value => chr $self->{c}};          $self->{t} = {type => DELIM_TOKEN, value => chr $self->{c}};
394          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
395          return $current_token;          return $self->{t};
396          #redo A;          #redo A;
397        }        }
398      } elsif ($self->{state} == BEFORE_NMSTART_STATE) {      } elsif ($self->{state} == BEFORE_NMSTART_STATE) {
# Line 333  sub get_next_token ($) { Line 402  sub get_next_token ($) {
402            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
403            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
404            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
405          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
406          $current_token->{type} = DIMENSION_TOKEN          $self->{t}->{type} = DIMENSION_TOKEN
407              if $current_token->{type} == NUMBER_TOKEN;              if $self->{t}->{type} == NUMBER_TOKEN;
408          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
409          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
410          redo A;          redo A;
# Line 345  sub get_next_token ($) { Line 414  sub get_next_token ($) {
414          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
415          redo A;          redo A;
416        } elsif ($self->{c} == 0x002D and # -        } elsif ($self->{c} == 0x002D and # -
417                 $current_token->{type} == IDENT_TOKEN) {                 $self->{t}->{type} == IDENT_TOKEN) {
418          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
419          if ($self->{c} == 0x003E) { # >          if ($self->{c} == 0x003E) { # >
420            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
# Line 354  sub get_next_token ($) { Line 423  sub get_next_token ($) {
423            #redo A;            #redo A;
424          } else {          } else {
425            ## NOTE: |-|, |-|, $self->{c}            ## NOTE: |-|, |-|, $self->{c}
426            #$current_token = {type => IDENT_TOKEN, value => '-'};            #$self->{t} = {type => IDENT_TOKEN, value => '-'};
427            # stay in the state            # stay in the state
428            # reconsume            # reconsume
429            return {type => DELIM_TOKEN, value => '-'};            return {type => DELIM_TOKEN, value => '-'};
430            #redo A;            #redo A;
431          }          }
432        } else {        } else {
433          if ($current_token->{type} == NUMBER_TOKEN) {          if ($self->{t}->{type} == NUMBER_TOKEN) {
434            ## NOTE: |-| after |NUMBER|.            ## NOTE: |-| after |NUMBER|.
435            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
436            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
437            # reconsume            # reconsume
438            $current_token->{value} = $current_token->{number};            $self->{t}->{value} = $self->{t}->{number};
439            delete $current_token->{number};            delete $self->{t}->{number};
440            return $current_token;            return $self->{t};
441          } else {          } else {
442            ## NOTE: |-| not followed by |nmstart|.            ## NOTE: |-| not followed by |nmstart|.
443            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
# Line 381  sub get_next_token ($) { Line 450  sub get_next_token ($) {
450            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
451            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
452            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
453          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
454          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
455          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
456          redo A;          redo A;
457        } elsif ($self->{c} == 0x002D) { # -        } elsif ($self->{c} == 0x002D) { # -
458          $current_token->{value} .= '-';          $self->{t}->{value} .= '-';
459          $self->{state} = AFTER_AT_HYPHEN_STATE;          $self->{state} = AFTER_AT_HYPHEN_STATE;
460          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
461          redo A;          redo A;
# Line 404  sub get_next_token ($) { Line 473  sub get_next_token ($) {
473            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z            (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
474            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
475            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
476          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
477          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
478          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
479          redo A;          redo A;
# Line 418  sub get_next_token ($) { Line 487  sub get_next_token ($) {
487            #redo A;            #redo A;
488          } else {          } else {
489            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
490            $current_token = {type => IDENT_TOKEN, value => '-'};            $self->{t} = {type => IDENT_TOKEN, value => '-'};
491            $self->{state} = BEFORE_NMSTART_STATE;            $self->{state} = BEFORE_NMSTART_STATE;
492            # reprocess            # reprocess
493            return {type => DELIM_TOKEN, value => '@'};            return {type => DELIM_TOKEN, value => '@'};
# Line 438  sub get_next_token ($) { Line 507  sub get_next_token ($) {
507      } elsif ($self->{state} == AFTER_NUMBER_STATE) {      } elsif ($self->{state} == AFTER_NUMBER_STATE) {
508        if ($self->{c} == 0x002D) { # -        if ($self->{c} == 0x002D) { # -
509          ## NOTE: |-| in |ident|.          ## NOTE: |-| in |ident|.
510          $current_token->{value} = '-';          $self->{t}->{value} = '-';
511          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = BEFORE_NMSTART_STATE;
512          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
513          redo A;          redo A;
# Line 447  sub get_next_token ($) { Line 516  sub get_next_token ($) {
516                 $self->{c} == 0x005F or # _                 $self->{c} == 0x005F or # _
517                 $self->{c} > 0x007F) { # nonascii                 $self->{c} > 0x007F) { # nonascii
518          ## NOTE: |nmstart| in |ident|.          ## NOTE: |nmstart| in |ident|.
519          $current_token->{value} = chr $self->{c};          $self->{t}->{value} = chr $self->{c};
520          $current_token->{type} = DIMENSION_TOKEN;          $self->{t}->{type} = DIMENSION_TOKEN;
521          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
522          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
523          redo A;          redo A;
524        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
525          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
526          $current_token->{value} = '';          $self->{t}->{value} = '';
527          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
528          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
529          redo A;          redo A;
530        } elsif ($self->{c} == 0x0025) { # %        } elsif ($self->{c} == 0x0025) { # %
531          $current_token->{type} = PERCENTAGE_TOKEN;          $self->{t}->{type} = PERCENTAGE_TOKEN;
532          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
533          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
534          return $current_token;          return $self->{t};
535          #redo A;          #redo A;
536        } else {        } else {
537          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
538          # reprocess          # reprocess
539          return $current_token;          return $self->{t};
540          #redo A;          #redo A;
541        }        }
542      } elsif ($self->{state} == HASH_OPEN_STATE) {      } elsif ($self->{state} == HASH_OPEN_STATE) {
# Line 478  sub get_next_token ($) { Line 547  sub get_next_token ($) {
547            $self->{c} == 0x002D or # -            $self->{c} == 0x002D or # -
548            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
549            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
550          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
551          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
552          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
553          redo A;          redo A;
# Line 500  sub get_next_token ($) { Line 569  sub get_next_token ($) {
569            $self->{c} == 0x005F or # _            $self->{c} == 0x005F or # _
570            $self->{c} == 0x002D or # -            $self->{c} == 0x002D or # -
571            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
572          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
573          # stay in the state          # stay in the state
574          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
575          redo A;          redo A;
# Line 509  sub get_next_token ($) { Line 578  sub get_next_token ($) {
578          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
579          redo A;          redo A;
580        } elsif ($self->{c} == 0x0028 and # (        } elsif ($self->{c} == 0x0028 and # (
581                 $current_token->{type} == IDENT_TOKEN) { # (                 $self->{t}->{type} == IDENT_TOKEN) { # (
582          my $func_name = $current_token->{value};          my $func_name = $self->{t}->{value};
583          $func_name =~ tr/A-Z/a-z/; ## TODO: Unicode or ASCII case-insensitive?          $func_name =~ tr/A-Z/a-z/; ## TODO: Unicode or ASCII case-insensitive?
584          if ($func_name eq 'url' or $func_name eq 'url-prefix') {          if ($func_name eq 'url' or $func_name eq 'url-prefix') {
585            if ($current_token->{has_escape}) {            if ($self->{t}->{has_escape}) {
586              ## TODO: warn              ## TODO: warn
587            }            }
588            $current_token->{type}            $self->{t}->{type}
589                = $func_name eq 'url' ? URI_TOKEN : URI_PREFIX_TOKEN;                = $func_name eq 'url' ? URI_TOKEN : URI_PREFIX_TOKEN;
590            $current_token->{value} = '';            $self->{t}->{value} = '';
591            $self->{state} = URI_BEFORE_WSP_STATE;            $self->{state} = URI_BEFORE_WSP_STATE;
592            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
593            redo A;            redo A;
594          } else {          } else {
595            $current_token->{type} = FUNCTION_TOKEN;            $self->{t}->{type} = FUNCTION_TOKEN;
596            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
597            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
598            return $current_token;            return $self->{t};
599            #redo A;            #redo A;
600          }          }
601        } else {        } else {
602          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
603          # reconsume          # reconsume
604          return $current_token;          return $self->{t};
605          #redo A;          #redo A;
606        }        }
607      } elsif ($self->{state} == URI_BEFORE_WSP_STATE) {      } elsif ($self->{state} == URI_BEFORE_WSP_STATE) {
# Line 546  sub get_next_token ($) { Line 615  sub get_next_token ($) {
615          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
616        }        }
617        if ($self->{c} == -1) {        if ($self->{c} == -1) {
618          $current_token->{type} = {          $self->{t}->{type} = {
619              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
620              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
621              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
622              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
623          }->{$current_token->{type}};                  }->{$self->{t}->{type}};        
624          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
625          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
626          return $current_token;          return $self->{t};
627          #redo A;          #redo A;
628        } elsif ($self->{c} < 0x0020 or $self->{c} == 0x0028) { # C0 or (        } elsif ($self->{c} < 0x0020 or $self->{c} == 0x0028) { # C0 or (
629          ## TODO: Should we consider matches of "(" and ")"?          ## TODO: Should we consider matches of "(" and ")"?
630          $current_token->{type} = {          $self->{t}->{type} = {
631              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
632              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
633              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
634              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
635          }->{$current_token->{type}};          }->{$self->{t}->{type}};
636          $self->{state} = URI_UNQUOTED_STATE;          $self->{state} = URI_UNQUOTED_STATE;
637          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
638          redo A;          redo A;
# Line 574  sub get_next_token ($) { Line 643  sub get_next_token ($) {
643        } elsif ($self->{c} == 0x0029) { # )        } elsif ($self->{c} == 0x0029) { # )
644          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
645          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
646          return $current_token;          return $self->{t};
647          #redo A;          #redo A;
648        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
649          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
650          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
651          redo A;          redo A;
652        } else {        } else {
653          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
654          $self->{state} = URI_UNQUOTED_STATE;          $self->{state} = URI_UNQUOTED_STATE;
655          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
656          redo A;          redo A;
# Line 598  sub get_next_token ($) { Line 667  sub get_next_token ($) {
667          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
668          redo A;          redo A;
669        } elsif ($self->{c} == -1) {        } elsif ($self->{c} == -1) {
670          $current_token->{type} = {          $self->{t}->{type} = {
671              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
672              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
673              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
674              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
675          }->{$current_token->{type}};                  }->{$self->{t}->{type}};        
676          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
677          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
678          return $current_token;          return $self->{t};
679          #redo A;          #redo A;
680        } elsif ($self->{c} < 0x0020 or {        } elsif ($self->{c} < 0x0020 or {
681            0x0022 => 1, # "            0x0022 => 1, # "
# Line 614  sub get_next_token ($) { Line 683  sub get_next_token ($) {
683            0x0028 => 1, # (            0x0028 => 1, # (
684        }->{$self->{c}}) { # C0 or (        }->{$self->{c}}) { # C0 or (
685          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?
686          $current_token->{type} = {          $self->{t}->{type} = {
687              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
688              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
689              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
690              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
691          }->{$current_token->{type}};          }->{$self->{t}->{type}};
692          # stay in the state.          # stay in the state.
693          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
694          redo A;          redo A;
695        } elsif ($self->{c} == 0x0029) { # )        } elsif ($self->{c} == 0x0029) { # )
696          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
697          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
698          return $current_token;          return $self->{t};
699          #redo A;          #redo A;
700        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
701          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
702          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
703          redo A;          redo A;
704        } else {        } else {
705          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
706          # stay in the state.          # stay in the state.
707          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
708          redo A;          redo A;
# Line 650  sub get_next_token ($) { Line 719  sub get_next_token ($) {
719          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
720          redo A;          redo A;
721        } elsif ($self->{c} == -1) {        } elsif ($self->{c} == -1) {
722          $current_token->{type} = {          $self->{t}->{type} = {
723              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
724              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
725              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
726              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
727          }->{$current_token->{type}};                  }->{$self->{t}->{type}};        
728          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
729          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
730          return $current_token;          return $self->{t};
731          #redo A;          #redo A;
732        } elsif ($self->{c} == 0x0029) { # )        } elsif ($self->{c} == 0x0029) { # )
733          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
734          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
735          return $current_token;          return $self->{t};
736          #redo A;          #redo A;
737        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
738          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
# Line 671  sub get_next_token ($) { Line 740  sub get_next_token ($) {
740          redo A;          redo A;
741        } else {        } else {
742          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?
743          $current_token->{type} = {          $self->{t}->{type} = {
744              URI_TOKEN, URI_INVALID_TOKEN,              URI_TOKEN, URI_INVALID_TOKEN,
745              URI_INVALID_TOKEN, URI_INVALID_TOKEN,              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
746              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
747              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
748          }->{$current_token->{type}};          }->{$self->{t}->{type}};
749          # stay in the state.          # stay in the state.
750          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
751          redo A;          redo A;
752        }        }
753      } elsif ($self->{state} == ESCAPE_OPEN_STATE) {      } elsif ($self->{state} == ESCAPE_OPEN_STATE) {
754        $current_token->{has_escape} = 1;        $self->{t}->{has_escape} = 1;
755        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
756          ## NOTE: second character of |unicode| in |escape|.          ## NOTE: second character of |unicode| in |escape|.
757          $char = $self->{c} - 0x0030;          $char = $self->{c} - 0x0030;
# Line 707  sub get_next_token ($) { Line 776  sub get_next_token ($) {
776            ## NOTE: In |escape| in ... in |ident|.            ## NOTE: In |escape| in ... in |ident|.
777            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
778            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
779            return $current_token;            return $self->{t};
780            # reconsume            # reconsume
781            #redo A;            #redo A;
782          } elsif ($q == 1) {          } elsif ($q == 1) {
783            ## NOTE: In |escape| in |URI|.            ## NOTE: In |escape| in |URI|.
784            $current_token->{type} = {            $self->{t}->{type} = {
785                URI_TOKEN, URI_INVALID_TOKEN,                URI_TOKEN, URI_INVALID_TOKEN,
786                URI_INVALID_TOKEN, URI_INVALID_TOKEN,                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
787                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
788                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
789            }->{$current_token->{type}};            }->{$self->{t}->{type}};
790            $current_token->{value} .= chr $self->{c};            $self->{t}->{value} .= chr $self->{c};
791            $self->{state} = URI_UNQUOTED_STATE;            $self->{state} = URI_UNQUOTED_STATE;
792            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
793            redo A;            redo A;
794          } else {          } else {
795            ## Note: In |nl| in ... in |string| or |ident|.            ## Note: In |nl| in ... in |string| or |ident|.
796            $current_token->{value} .= chr $self->{c};            $self->{t}->{value} .= chr $self->{c};
797            $self->{state} = STRING_STATE;            $self->{state} = STRING_STATE;
798            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
799            redo A;            redo A;
# Line 734  sub get_next_token ($) { Line 803  sub get_next_token ($) {
803            ## NOTE: In |escape| in ... in |ident|.            ## NOTE: In |escape| in ... in |ident|.
804            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
805            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
806            return $current_token;            return $self->{t};
807            # reconsume            # reconsume
808            #redo A;            #redo A;
809          } elsif ($q == 1) {          } elsif ($q == 1) {
810            $current_token->{type} = {            $self->{t}->{type} = {
811                URI_TOKEN, URI_INVALID_TOKEN,                URI_TOKEN, URI_INVALID_TOKEN,
812                URI_INVALID_TOKEN, URI_INVALID_TOKEN,                URI_INVALID_TOKEN, URI_INVALID_TOKEN,
813                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
814                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
815            }->{$current_token->{type}};            }->{$self->{t}->{type}};
816            $current_token->{value} .= "\x0D\x0A";            $self->{t}->{value} .= "\x0D\x0A";
817            $self->{state} = URI_UNQUOTED_STATE;            $self->{state} = URI_UNQUOTED_STATE;
818            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
819            redo A;            redo A;
820          } else {          } else {
821            ## Note: In |nl| in ... in |string| or |ident|.            ## Note: In |nl| in ... in |string| or |ident|.
822            $current_token->{value} .= "\x0D\x0A";            $self->{t}->{value} .= "\x0D\x0A";
823            $self->{state} = ESCAPE_BEFORE_LF_STATE;            $self->{state} = ESCAPE_BEFORE_LF_STATE;
824            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
825            redo A;            redo A;
826          }          }
827        } else {        } else {
828          ## NOTE: second character of |escape|.          ## NOTE: second character of |escape|.
829          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
830          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
831              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
832          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
# Line 784  sub get_next_token ($) { Line 853  sub get_next_token ($) {
853                 $self->{c} == 0x000A or # \n                 $self->{c} == 0x000A or # \n
854                 $self->{c} == 0x0009 or # \t                 $self->{c} == 0x0009 or # \t
855                 $self->{c} == 0x000C) { # \f                 $self->{c} == 0x000C) { # \f
856          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
857          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
858              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
859          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
# Line 794  sub get_next_token ($) { Line 863  sub get_next_token ($) {
863          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
864          redo A;          redo A;
865        } else {        } else {
866          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
867          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
868              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
869          # reconsume          # reconsume
# Line 806  sub get_next_token ($) { Line 875  sub get_next_token ($) {
875            $self->{c} == 0x000A or # \n            $self->{c} == 0x000A or # \n
876            $self->{c} == 0x0009 or # \t            $self->{c} == 0x0009 or # \t
877            $self->{c} == 0x000C) { # \f            $self->{c} == 0x000C) { # \f
878          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
879          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
880              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
881          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
# Line 816  sub get_next_token ($) { Line 885  sub get_next_token ($) {
885          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
886          redo A;          redo A;
887        } else {        } else {
888          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
889          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
890              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
891          # reconsume          # reconsume
# Line 825  sub get_next_token ($) { Line 894  sub get_next_token ($) {
894      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {
895        ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.        ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.
896        if ($self->{c} == 0x000A) { # \n        if ($self->{c} == 0x000A) { # \n
897          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
898          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
899              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
900          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
901          redo A;          redo A;
902        } else {        } else {
903          $current_token->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
904          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
905              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
906          # reconsume          # reconsume
# Line 847  sub get_next_token ($) { Line 916  sub get_next_token ($) {
916          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
917          redo A;          redo A;
918        } elsif ($self->{c} == $q) { # " | '        } elsif ($self->{c} == $q) { # " | '
919          if ($current_token->{type} == STRING_TOKEN) {          if ($self->{t}->{type} == STRING_TOKEN) {
920            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
921            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->();
922            return $current_token;            return $self->{t};
923            #redo A;            #redo A;
924          } else {          } else {
925            $self->{state} = URI_AFTER_WSP_STATE;            $self->{state} = URI_AFTER_WSP_STATE;
# Line 861  sub get_next_token ($) { Line 930  sub get_next_token ($) {
930                 $self->{c} == 0x000D or # \r                 $self->{c} == 0x000D or # \r
931                 $self->{c} == 0x000C or # \f                 $self->{c} == 0x000C or # \f
932                 $self->{c} == -1) {                 $self->{c} == -1) {
933          $current_token->{type} = INVALID_TOKEN;          $self->{t}->{type} = INVALID_TOKEN;
934          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
935          # reconsume          # reconsume
936          return $current_token;          return $self->{t};
937          #redo A;          #redo A;
938        } else {        } else {
939          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
940          # stay in the state          # stay in the state
941          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
942          redo A;          redo A;
# Line 875  sub get_next_token ($) { Line 944  sub get_next_token ($) {
944      } elsif ($self->{state} == NUMBER_STATE) {      } elsif ($self->{state} == NUMBER_STATE) {
945        ## NOTE: 2nd, 3rd, or ... character in |num| before |.|.        ## NOTE: 2nd, 3rd, or ... character in |num| before |.|.
946        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
947          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
948          # stay in the state          # stay in the state
949          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
950          redo A;          redo A;
# Line 884  sub get_next_token ($) { Line 953  sub get_next_token ($) {
953          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
954          redo A;          redo A;
955        } else {        } else {
956          $current_token->{number} = $current_token->{value};          $self->{t}->{number} = $self->{t}->{value};
957          $current_token->{value} = '';          $self->{t}->{value} = '';
958          $self->{state} = AFTER_NUMBER_STATE;          $self->{state} = AFTER_NUMBER_STATE;
959          # reprocess          # reprocess
960          redo A;          redo A;
# Line 893  sub get_next_token ($) { Line 962  sub get_next_token ($) {
962      } elsif ($self->{state} == NUMBER_DOT_STATE) {      } elsif ($self->{state} == NUMBER_DOT_STATE) {
963        ## NOTE: The character immediately following |.| in |num|.        ## NOTE: The character immediately following |.| in |num|.
964        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
965          $current_token->{value} .= '.' . chr $self->{c};          $self->{t}->{value} .= '.' . chr $self->{c};
966          $self->{state} = NUMBER_DOT_NUMBER_STATE;          $self->{state} = NUMBER_DOT_NUMBER_STATE;
967          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
968          redo A;          redo A;
969        } else {        } else {
970          unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'};          unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'};
971          $current_token->{number} = $current_token->{value};          $self->{t}->{number} = $self->{t}->{value};
972          $current_token->{value} = '';          $self->{t}->{value} = '';
973          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
974          # reprocess          # reprocess
975          return $current_token;          return $self->{t};
976          #redo A;          #redo A;
977        }        }
978      } elsif ($self->{state} == NUMBER_FRACTION_STATE) {      } elsif ($self->{state} == NUMBER_FRACTION_STATE) {
979        ## NOTE: The character immediately following |.| at the beginning of |num|.        ## NOTE: The character immediately following |.| at the beginning of |num|.
980        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
981          $current_token->{value} .= '.' . chr $self->{c};          $self->{t}->{value} .= '.' . chr $self->{c};
982          $self->{state} = NUMBER_DOT_NUMBER_STATE;          $self->{state} = NUMBER_DOT_NUMBER_STATE;
983          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
984          redo A;          redo A;
# Line 922  sub get_next_token ($) { Line 991  sub get_next_token ($) {
991      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {
992        ## NOTE: |[0-9]| in |num| after |.|.        ## NOTE: |[0-9]| in |num| after |.|.
993        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
994          $current_token->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
995          # stay in the state          # stay in the state
996          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->();
997          redo A;          redo A;
998        } else {        } else {
999          $current_token->{number} = $current_token->{value};          $self->{t}->{number} = $self->{t}->{value};
1000          $current_token->{value} = '';          $self->{t}->{value} = '';
1001          $self->{state} = AFTER_NUMBER_STATE;          $self->{state} = AFTER_NUMBER_STATE;
1002          # reprocess          # reprocess
1003          redo A;          redo A;
# Line 937  sub get_next_token ($) { Line 1006  sub get_next_token ($) {
1006        die "$0: Unknown state |$self->{state}|";        die "$0: Unknown state |$self->{state}|";
1007      }      }
1008    } # A    } # A
   
   ## TODO: |URI|, |UNICODE-RANGE|, |COMMENT|  
   
1009  } # get_next_token  } # get_next_token
1010    
1011  1;  1;

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.5

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24