/[suikacvs]/markup/html/whatpm/Whatpm/CSS/Tokenizer.pm
Suika

Diff of /markup/html/whatpm/Whatpm/CSS/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.7 by wakaba, Sat Sep 8 10:21:04 2007 UTC revision 1.20 by wakaba, Sat Jan 26 14:48:09 2008 UTC
# Line 1  Line 1 
1  package Whatpm::CSS::Tokenizer;  package Whatpm::CSS::Tokenizer;
2  use strict;  use strict;
3    our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4    
5    require Exporter;
6    push our @ISA, 'Exporter';
7    
8  sub BEFORE_TOKEN_STATE () { 0 }  sub BEFORE_TOKEN_STATE () { 0 }
9  sub BEFORE_NMSTART_STATE () { 1 }  sub BEFORE_NMSTART_STATE () { 1 }
# Line 59  sub CDC_TOKEN () { 35 } Line 63  sub CDC_TOKEN () { 35 }
63  sub COMMENT_TOKEN () { 36 }  sub COMMENT_TOKEN () { 36 }
64  sub COMMENT_INVALID_TOKEN () { 37 }  sub COMMENT_INVALID_TOKEN () { 37 }
65  sub EOF_TOKEN () { 38 }  sub EOF_TOKEN () { 38 }
66    sub MINUS_TOKEN () { 39 }
67    sub STAR_TOKEN () { 40 }
68    sub VBAR_TOKEN () { 41 }
69    sub DOT_TOKEN () { 42 }
70    sub COLON_TOKEN () { 43 }
71    sub MATCH_TOKEN () { 44 }
72    sub EXCLAMATION_TOKEN () { 45 }
73    
74  our @TokenName = qw(  our @TokenName = qw(
75    0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID    0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID
# Line 66  our @TokenName = qw( Line 77  our @TokenName = qw(
77    0 DELIM PLUS GREATER COMMA TILDE DASHMATCH    0 DELIM PLUS GREATER COMMA TILDE DASHMATCH
78    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON    PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON
79    LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT    LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT
80    COMMENT_INVALID EOF    COMMENT_INVALID EOF MINUS STAR VBAR DOT COLON MATCH EXCLAMATION
81    );
82    
83    our @EXPORT_OK = qw(
84      IDENT_TOKEN ATKEYWORD_TOKEN HASH_TOKEN FUNCTION_TOKEN URI_TOKEN
85      URI_INVALID_TOKEN URI_PREFIX_TOKEN URI_PREFIX_INVALID_TOKEN
86      STRING_TOKEN INVALID_TOKEN NUMBER_TOKEN DIMENSION_TOKEN PERCENTAGE_TOKEN
87      UNICODE_RANGE_TOKEN DELIM_TOKEN PLUS_TOKEN GREATER_TOKEN COMMA_TOKEN
88      TILDE_TOKEN DASHMATCH_TOKEN PREFIXMATCH_TOKEN SUFFIXMATCH_TOKEN
89      SUBSTRINGMATCH_TOKEN INCLUDES_TOKEN SEMICOLON_TOKEN LBRACE_TOKEN
90      RBRACE_TOKEN LPAREN_TOKEN RPAREN_TOKEN LBRACKET_TOKEN RBRACKET_TOKEN
91      S_TOKEN CDO_TOKEN CDC_TOKEN COMMENT_TOKEN COMMENT_INVALID_TOKEN EOF_TOKEN
92      MINUS_TOKEN STAR_TOKEN VBAR_TOKEN DOT_TOKEN COLON_TOKEN MATCH_TOKEN
93      EXCLAMATION_TOKEN
94  );  );
95    
96    our %EXPORT_TAGS = ('token' => [@EXPORT_OK]);
97    
98  sub new ($) {  sub new ($) {
99    my $self = bless {token => [], get_char => sub { -1 },    my $self = bless {token => [], get_char => sub { -1 }}, shift;
                     onerror => sub { }}, shift;  
100    return $self;    return $self;
101  } # new  } # new
102    
103  sub init ($) {  sub init ($) {
104    my $self = shift;    my $self = shift;
105    $self->{state} = BEFORE_TOKEN_STATE;    $self->{state} = BEFORE_TOKEN_STATE;
106    $self->{c} = $self->{get_char}->();    $self->{c} = $self->{get_char}->($self);
107    #$self->{t} = {type => token-type, value => value, number => number};    #$self->{t} = {type => token-type, value => value, number => number};
108  } # init  } # init
109    
# Line 102  sub get_next_token ($) { Line 127  sub get_next_token ($) {
127      if ($self->{state} == BEFORE_TOKEN_STATE) {      if ($self->{state} == BEFORE_TOKEN_STATE) {
128        if ($self->{c} == 0x002D) { # -        if ($self->{c} == 0x002D) { # -
129          ## NOTE: |-| in |ident| in |IDENT|          ## NOTE: |-| in |ident| in |IDENT|
130          $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1};          $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1,
131                          line => $self->{line}, column => $self->{column}};
132          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = BEFORE_NMSTART_STATE;
133          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
134          redo A;          redo A;
135        } elsif ($self->{c} == 0x0055 or $self->{c} == 0x0075) { # U or u        } elsif ($self->{c} == 0x0055 or $self->{c} == 0x0075) { # U or u
136          $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}};          $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c},
137          $self->{c} = $self->{get_char}->();                        line => $self->{line}, column => $self->{column}};
138            $self->{c} = $self->{get_char}->($self);
139          if ($self->{c} == 0x002B) { # +          if ($self->{c} == 0x002B) { # +
140            $self->{c} = $self->{get_char}->();            my ($l, $c) = ($self->{line}, $self->{column});
141              $self->{c} = $self->{get_char}->($self);
142            if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9            if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
143                (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F                (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
144                (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f                (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
145                $self->{c} == 0x003F) { # ?                $self->{c} == 0x003F) { # ?
146              $self->{t}->{value} .= '+' . chr $self->{c};              $self->{t}->{value} = chr $self->{c};
147              $self->{t}->{type} = UNICODE_RANGE_TOKEN;              $self->{t}->{type} = UNICODE_RANGE_TOKEN;
148              $self->{c} = $self->{get_char}->();              $self->{c} = $self->{get_char}->($self);
149              C: for (2..6) {              C: for (2..6) {
150                if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9                if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
151                    (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F                    (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
152                    (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f                    (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
153                    $self->{c} == 0x003F) { # ?                    $self->{c} == 0x003F) { # ?
154                  $self->{t}->{value} .= chr $self->{c};                  $self->{t}->{value} .= chr $self->{c};
155                  $self->{c} = $self->{get_char}->();                  $self->{c} = $self->{get_char}->($self);
156                } else {                } else {
157                  last C;                  last C;
158                }                }
159              } # C              } # C
160    
161              if ($self->{c} == 0x002D) { # -              if ($self->{c} == 0x002D) { # -
162                $self->{c} = $self->{get_char}->();                $self->{c} = $self->{get_char}->($self);
163                if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9                if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
164                    (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F                    (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
165                    (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f                    (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f
166                  $self->{t}->{value} .= '-' . chr $self->{c};                  $self->{t}->{value} .= '-' . chr $self->{c};
167                  $self->{c} = $self->{get_char}->();                  $self->{c} = $self->{get_char}->($self);
168                  C: for (2..6) {                  C: for (2..6) {
169                    if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9                    if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
170                        (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F                        (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
171                        (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f                        (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f
172                      $self->{t}->{value} .= chr $self->{c};                      $self->{t}->{value} .= chr $self->{c};
173                      $self->{c} = $self->{get_char}->();                      $self->{c} = $self->{get_char}->($self);
174                    } else {                    } else {
175                      last C;                      last C;
176                    }                    }
# Line 151  sub get_next_token ($) { Line 179  sub get_next_token ($) {
179                  #                  #
180                } else {                } else {
181                  my $token = $self->{t};                  my $token = $self->{t};
182                  $self->{t} = {type => IDENT_TOKEN, value => '-'};                  $self->{t} = {type => IDENT_TOKEN, value => '-',
183                                  line => $self->{line},
184                                  column => $self->{column}};
185                  $self->{state} = BEFORE_NMSTART_STATE;                  $self->{state} = BEFORE_NMSTART_STATE;
186                  # reprocess                  # reprocess
187                  return $token;                  return $token;
# Line 164  sub get_next_token ($) { Line 194  sub get_next_token ($) {
194              return $self->{t};              return $self->{t};
195              #redo A;              #redo A;
196            } else {            } else {
197              unshift @{$self->{token}}, {type => PLUS_TOKEN};              unshift @{$self->{token}},
198                    {type => PLUS_TOKEN, line => $l, column => $c};
199              $self->{state} = BEFORE_TOKEN_STATE;              $self->{state} = BEFORE_TOKEN_STATE;
200              # reprocess              # reprocess
201              return $self->{t};              return $self->{t};
# Line 180  sub get_next_token ($) { Line 211  sub get_next_token ($) {
211                 $self->{c} == 0x005F or # _                 $self->{c} == 0x005F or # _
212                 $self->{c} > 0x007F) { # nonascii                 $self->{c} > 0x007F) { # nonascii
213          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
214          $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}};          $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c},
215                          line => $self->{line}, column => $self->{column}};
216          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
217          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
218          redo A;          redo A;
219        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
220          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
221          $self->{t} = {type => IDENT_TOKEN, value => ''};          $self->{t} = {type => IDENT_TOKEN, value => '',
222                          line => $self->{line}, column => $self->{column}};
223          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
224          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
225          redo A;          redo A;
226        } elsif ($self->{c} == 0x0040) { # @        } elsif ($self->{c} == 0x0040) { # @
227          ## NOTE: |@| in |ATKEYWORD|          ## NOTE: |@| in |ATKEYWORD|
228          $self->{t} = {type => ATKEYWORD_TOKEN, value => ''};          $self->{t} = {type => ATKEYWORD_TOKEN, value => '',
229                          line => $self->{line}, column => $self->{column}};
230          $self->{state} = AFTER_AT_STATE;          $self->{state} = AFTER_AT_STATE;
231          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
232          redo A;          redo A;
233        } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '        } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '
234          $self->{t} = {type => STRING_TOKEN, value => ''};          $self->{t} = {type => STRING_TOKEN, value => '',
235                          line => $self->{line}, column => $self->{column}};
236          $self->{state} = STRING_STATE; $q = $self->{c};          $self->{state} = STRING_STATE; $q = $self->{c};
237          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
238          redo A;          redo A;
239        } elsif ($self->{c} == 0x0023) { # #        } elsif ($self->{c} == 0x0023) { # #
240          ## NOTE: |#| in |HASH|.          ## NOTE: |#| in |HASH|.
241          $self->{t} = {type => HASH_TOKEN, value => ''};          $self->{t} = {type => HASH_TOKEN, value => '',
242                          line => $self->{line}, column => $self->{column}};
243          $self->{state} = HASH_OPEN_STATE;          $self->{state} = HASH_OPEN_STATE;
244          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
245          redo A;          redo A;
246        } elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9        } elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
247          ## NOTE: |num|.          ## NOTE: |num|.
248          $self->{t} = {type => NUMBER_TOKEN, value => chr $self->{c}};          $self->{t} = {type => NUMBER_TOKEN, value => chr $self->{c},
249                          line => $self->{line}, column => $self->{column}};
250            ## NOTE: 'value' is renamed as 'number' later.
251          $self->{state} = NUMBER_STATE;          $self->{state} = NUMBER_STATE;
252          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
253          redo A;          redo A;
254        } elsif ($self->{c} == 0x002E) { # .        } elsif ($self->{c} == 0x002E) { # .
255          ## NOTE: |num|.          ## NOTE: |num|.
256          $self->{t} = {type => NUMBER_TOKEN, value => '0'};          $self->{t} = {type => NUMBER_TOKEN, value => '0',
257                          line => $self->{line}, column => $self->{column}};
258            ## NOTE: 'value' is renamed as 'number' later.
259          $self->{state} = NUMBER_FRACTION_STATE;          $self->{state} = NUMBER_FRACTION_STATE;
260          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
261          redo A;          redo A;
262        } elsif ($self->{c} == 0x002F) { # /        } elsif ($self->{c} == 0x002F) { # /
263          $self->{c} = $self->{get_char}->();          my ($l, $c) = ($self->{line}, $self->{column});
264            $self->{c} = $self->{get_char}->($self);
265          if ($self->{c} == 0x002A) { # *          if ($self->{c} == 0x002A) { # *
266            C: {            C: {
267              $self->{c} = $self->{get_char}->();              $self->{c} = $self->{get_char}->($self);
268              if ($self->{c} == 0x002A) { # *              if ($self->{c} == 0x002A) { # *
269                D: {                D: {
270                  $self->{c} = $self->{get_char}->();                  $self->{c} = $self->{get_char}->($self);
271                  if ($self->{c} == 0x002F) { # /                  if ($self->{c} == 0x002F) { # /
272                    #                    #
273                  } elsif ($self->{c} == 0x002A) { # *                  } elsif ($self->{c} == 0x002A) { # *
# Line 246  sub get_next_token ($) { Line 287  sub get_next_token ($) {
287            } # C            } # C
288    
289            # stay in the state.            # stay in the state.
290            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->($self);
291            redo A;            redo A;
292          } else {          } else {
293            # stay in the state.            # stay in the state.
294            # reprocess            # reprocess
295            return {type => DELIM_STATE, value => '/'};            return {type => DELIM_TOKEN, value => '/', line => $l, column => $c};
296            #redo A;            #redo A;
297          }                  }        
298        } elsif ($self->{c} == 0x003C) { # <        } elsif ($self->{c} == 0x003C) { # <
299            my ($l, $c) = ($self->{line}, $self->{column});
300          ## NOTE: |CDO|          ## NOTE: |CDO|
301          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
302          if ($self->{c} == 0x0021) { # !          if ($self->{c} == 0x0021) { # !
303            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->($self);
304            if ($self->{c} == 0x002C) { # -            if ($self->{c} == 0x002D) { # -
305              $self->{c} = $self->{get_char}->();              $self->{c} = $self->{get_char}->($self);
306              if ($self->{c} == 0x002C) { # -              if ($self->{c} == 0x002D) { # -
307                $self->{state} = BEFORE_TOKEN_STATE;                $self->{state} = BEFORE_TOKEN_STATE;
308                $self->{c} = $self->{get_char}->();                $self->{c} = $self->{get_char}->($self);
309                return {type => CDO_TOKEN};                return {type => CDO_TOKEN, line => $l, column => $c};
310                #redo A;                #redo A;
311              } else {              } else {
312                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};                unshift @{$self->{token}},
313                      {type => EXCLAMATION_TOKEN, line => $l, column => $c + 1};
314                ## NOTE: |-| in |ident| in |IDENT|                ## NOTE: |-| in |ident| in |IDENT|
315                $self->{t} = {type => IDENT_TOKEN, value => '-'};                $self->{t} = {type => IDENT_TOKEN, value => '-',
316                                line => $l, column => $c + 2};
317                $self->{state} = BEFORE_NMSTART_STATE;                $self->{state} = BEFORE_NMSTART_STATE;
318                #reprocess                #reprocess
319                return {type => DELIM_TOKEN, value => '<'};                return {type => DELIM_TOKEN, value => '<',
320                          line => $l, column => $c};
321                #redo A;                #redo A;
322              }              }
323            } else {            } else {
324              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};              unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN,
325                                            line => $l, column => $c + 1};
326              $self->{state} = BEFORE_TOKEN_STATE;              $self->{state} = BEFORE_TOKEN_STATE;
327              #reprocess              #reprocess
328              return {type => DELIM_TOKEN, value => '<'};              return {type => DELIM_TOKEN, value => '<',
329                        line => $l, column => $c};
330              #redo A;              #redo A;
331            }            }
332          } else {          } else {
333            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
334            #reprocess            #reprocess
335            return {type => DELIM_TOKEN, value => '<'};            return {type => DELIM_TOKEN, value => '<',
336                      line => $l, column => $c};
337            #redo A;            #redo A;
338          }          }
339        } elsif (my $t = {        } elsif (my $t = {
340                  0x003B => SEMICOLON_TOKEN, # ;                          0x0021 => EXCLAMATION_TOKEN, # !
341                  0x007B => LBRACE_TOKEN, # {                          0x002D => MINUS_TOKEN, # -
342                  0x007D => RBRACE_TOKEN, # }                          0x002E => DOT_TOKEN, # .
343                  0x0028 => LPAREN_TOKEN, # (                          0x003A => COLON_TOKEN, # :
344                  0x0029 => RPAREN_TOKEN, # )                          0x003B => SEMICOLON_TOKEN, # ;
345                  0x005B => LBRACKET_TOKEN, # [                          0x003D => MATCH_TOKEN, # =
346                  0x005D => RBRACKET_TOKEN, # ]                          0x007B => LBRACE_TOKEN, # {
347                            0x007D => RBRACE_TOKEN, # }
348                            0x0028 => LPAREN_TOKEN, # (
349                            0x0029 => RPAREN_TOKEN, # )
350                            0x005B => LBRACKET_TOKEN, # [
351                            0x005D => RBRACKET_TOKEN, # ]
352                 }->{$self->{c}}) {                 }->{$self->{c}}) {
353            my ($l, $c) = ($self->{line}, $self->{column});
354          # stay in the state          # stay in the state
355          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
356          return {type => $t};          return {type => $t, line => $l, column => $c};
357          # redo A;          # redo A;
358        } elsif ({        } elsif ({
359                  0x0020 => 1, # SP                  0x0020 => 1, # SP
# Line 308  sub get_next_token ($) { Line 362  sub get_next_token ($) {
362                  0x000A => 1, # \n                  0x000A => 1, # \n
363                  0x000C => 1, # \f                  0x000C => 1, # \f
364                 }->{$self->{c}}) {                 }->{$self->{c}}) {
365            my ($l, $c) = ($self->{line}, $self->{column});
366          W: {          W: {
367            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->($self);
368            if ({            if ({
369                  0x0020 => 1, # SP                  0x0020 => 1, # SP
370                  0x0009 => 1, # \t                  0x0009 => 1, # \t
# Line 324  sub get_next_token ($) { Line 379  sub get_next_token ($) {
379                              0x002C => COMMA_TOKEN, # ,                              0x002C => COMMA_TOKEN, # ,
380                              0x007E => TILDE_TOKEN, # ~                              0x007E => TILDE_TOKEN, # ~
381                             }->{$self->{c}}) {                             }->{$self->{c}}) {
382                my ($l, $c) = ($self->{line}, $self->{column});
383              # stay in the state              # stay in the state
384              $self->{c} = $self->{get_char}->();              $self->{c} = $self->{get_char}->($self);
385              return {type => $v};              return {type => $v, line => $l, column => $c};
386              #redo A;              #redo A;
387            } else {            } else {
388              # stay in the state              # stay in the state
389              # reprocess              # reprocess
390              return {type => S_TOKEN};              return {type => S_TOKEN, line => $l, column => $c};
391              #redo A;              #redo A;
392            }            }
393          } # W          } # W
# Line 341  sub get_next_token ($) { Line 397  sub get_next_token ($) {
397                          0x0024 => SUFFIXMATCH_TOKEN, # $                          0x0024 => SUFFIXMATCH_TOKEN, # $
398                          0x002A => SUBSTRINGMATCH_TOKEN, # *                          0x002A => SUBSTRINGMATCH_TOKEN, # *
399                         }->{$self->{c}}) {                         }->{$self->{c}}) {
400            my ($line, $column) = ($self->{line}, $self->{column});
401          my $c = $self->{c};          my $c = $self->{c};
402          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
403          if ($self->{c} == 0x003D) { # =          if ($self->{c} == 0x003D) { # =
404            # stay in the state            # stay in the state
405            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->($self);
406            return {type => $v};            return {type => $v, line => $line, column => $column};
407              #redo A;
408            } elsif ($v = {
409                           0x002A => STAR_TOKEN, # *
410                           0x007C => VBAR_TOKEN, # |
411                          }->{$c}) {
412              # stay in the state.
413              # reprocess
414              return {type => $v, line => $line, column => $column};
415            #redo A;            #redo A;
416          } else {          } else {
417            # stay in the state            # stay in the state
418            # reprocess            # reprocess
419            return {type => DELIM_TOKEN, value => chr $c};            return {type => DELIM_TOKEN, value => chr $c,
420                      line => $line, column => $column};
421            #redo A;            #redo A;
422          }          }
423        } elsif ($self->{c} == 0x002B) { # +        } elsif ($self->{c} == 0x002B) { # +
424            my ($l, $c) = ($self->{line}, $self->{column});
425          # stay in the state          # stay in the state
426          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
427          return {type => PLUS_TOKEN};          return {type => PLUS_TOKEN, line => $l, column => $c};
428          #redo A;          #redo A;
429        } elsif ($self->{c} == 0x003E) { # >        } elsif ($self->{c} == 0x003E) { # >
430            my ($l, $c) = ($self->{line}, $self->{column});
431          # stay in the state          # stay in the state
432          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
433          return {type => GREATER_TOKEN};          return {type => GREATER_TOKEN, line => $l, column => $c};
434          #redo A;          #redo A;
435        } elsif ($self->{c} == 0x002C) { # ,        } elsif ($self->{c} == 0x002C) { # ,
436            my ($l, $c) = ($self->{line}, $self->{column});
437          # stay in the state          # stay in the state
438          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
439          return {type => COMMA_TOKEN};          return {type => COMMA_TOKEN, line => $l, column => $c};
440          #redo A;          #redo A;
441        } elsif ($self->{c} == 0x007E) { # ~        } elsif ($self->{c} == 0x007E) { # ~
442          $self->{c} = $self->{get_char}->();          my ($l, $c) = ($self->{line}, $self->{column});
443            $self->{c} = $self->{get_char}->($self);
444          if ($self->{c} == 0x003D) { # =          if ($self->{c} == 0x003D) { # =
445            # stay in the state            # stay in the state
446            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->($self);
447            return {type => INCLUDES_TOKEN};            return {type => INCLUDES_TOKEN, line => $l, column => $c};
448            #redo A;            #redo A;
449          } else {          } else {
450            # stay in the state            # stay in the state
451            # reprocess            # reprocess
452            return {type => TILDE_TOKEN};            return {type => TILDE_TOKEN, line => $l, column => $c};
453            #redo A;            #redo A;
454          }          }
455        } elsif ($self->{c} == -1) {        } elsif ($self->{c} == -1) {
456          # stay in the state          # stay in the state
457          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
458          return {type => EOF_TOKEN};          return {type => EOF_TOKEN,
459                    line => $self->{line}, column => $self->{column}};
460          #redo A;          #redo A;
461        } else {        } else {
462          # stay in the state          # stay in the state
463          $self->{t} = {type => DELIM_TOKEN, value => chr $self->{c}};          $self->{t} = {type => DELIM_TOKEN, value => chr $self->{c},
464          $self->{c} = $self->{get_char}->();                        line => $self->{line}, column => $self->{column}};
465            $self->{c} = $self->{get_char}->($self);
466          return $self->{t};          return $self->{t};
467          #redo A;          #redo A;
468        }        }
# Line 405  sub get_next_token ($) { Line 477  sub get_next_token ($) {
477          $self->{t}->{type} = DIMENSION_TOKEN          $self->{t}->{type} = DIMENSION_TOKEN
478              if $self->{t}->{type} == NUMBER_TOKEN;              if $self->{t}->{type} == NUMBER_TOKEN;
479          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
480          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
481          redo A;          redo A;
482        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
 ## TODO: 12-\X, 12-\{nl}  
483          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
484          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
485          redo A;          redo A;
486        } elsif ($self->{c} == 0x002D and # -        } elsif ($self->{c} == 0x002D) { # -
487                 $self->{t}->{type} == IDENT_TOKEN) {          if ($self->{t}->{type} == IDENT_TOKEN) {
488          $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->($self);
489          if ($self->{c} == 0x003E) { # >            if ($self->{c} == 0x003E) { # >
490            $self->{state} = BEFORE_TOKEN_STATE;              $self->{state} = BEFORE_TOKEN_STATE;
491            $self->{c} = $self->{get_char}->();              $self->{c} = $self->{get_char}->($self);
492            return {type => CDC_TOKEN};              return {type => CDC_TOKEN,
493            #redo A;                      line => $self->{t}->{line},
494                        column => $self->{t}->{column}};
495                #redo A;
496              } else {
497                ## NOTE: |-|, |-|, $self->{c}
498                #$self->{t} = {type => IDENT_TOKEN, value => '-'};
499                $self->{t}->{column}++;
500                # stay in the state
501                # reconsume
502                return {type => MINUS_TOKEN,
503                        line => $self->{t}->{line},
504                        column => $self->{t}->{column} - 1};
505                #redo A;
506              }
507            } elsif ($self->{t}->{type} == DIMENSION_TOKEN) {
508              my ($l, $c) = ($self->{line}, $self->{column}); # second '-'
509              $self->{c} = $self->{get_char}->($self);
510              if ($self->{c} == 0x003E) { # >
511                unshift @{$self->{token}}, {type => CDC_TOKEN};
512                $self->{t}->{type} = NUMBER_TOKEN;
513                $self->{t}->{value} = '';
514                $self->{state} = BEFORE_TOKEN_STATE;
515                $self->{c} = $self->{get_char}->($self);
516                return $self->{t};
517                #redo A;
518              } else {
519                ## NOTE: NUMBER, |-|, |-|, $self->{c}
520                my $t = $self->{t};
521                $t->{type} = NUMBER_TOKEN;
522                $t->{value} = '';
523                $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1,
524                              line => $l, column => $c};
525                unshift @{$self->{token}}, {type => MINUS_TOKEN,
526                                            line => $l, column => $c - 1};
527                # stay in the state
528                # reconsume
529                return $t;
530                #redo A;
531              }
532          } else {          } else {
533            ## NOTE: |-|, |-|, $self->{c}            #
           #$self->{t} = {type => IDENT_TOKEN, value => '-'};  
           # stay in the state  
           # reconsume  
           return {type => DELIM_TOKEN, value => '-'};  
           #redo A;  
534          }          }
535        } else {        } else {
536          if ($self->{t}->{type} == NUMBER_TOKEN) {          #
537            ## NOTE: |-| after |NUMBER|.        }
538            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};        
539            $self->{state} = BEFORE_TOKEN_STATE;        if ($self->{t}->{type} == DIMENSION_TOKEN) {
540            # reprocess          ## NOTE: |-| after |NUMBER|.
541            $self->{t}->{value} = $self->{t}->{number};          unshift @{$self->{token}}, {type => MINUS_TOKEN,
542            delete $self->{t}->{number};                                      line => $self->{line},
543            return $self->{t};                                      column => $self->{column} - 1};
544          } else {          ## BUG: column might be wrong if on the line boundary.
545            ## NOTE: |-| not followed by |nmstart|.          $self->{state} = BEFORE_TOKEN_STATE;
546            $self->{state} = BEFORE_TOKEN_STATE;          # reprocess
547            # reprocess          $self->{t}->{type} = NUMBER_TOKEN;
548            return {type => DELIM_TOKEN, value => '-'};          $self->{t}->{value} = '';
549          }          return $self->{t};
550          } else {
551            ## NOTE: |-| not followed by |nmstart|.
552            $self->{state} = BEFORE_TOKEN_STATE;
553            # reprocess
554            return {type => MINUS_TOKEN,
555                    line => $self->{line}, column => $self->{column} - 1};
556            ## BUG: column might be wrong if on the line boundary.
557        }        }
558      } elsif ($self->{state} == AFTER_AT_STATE) {      } elsif ($self->{state} == AFTER_AT_STATE) {
559        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
# Line 451  sub get_next_token ($) { Line 562  sub get_next_token ($) {
562            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
563          $self->{t}->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
564          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
565          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
566          redo A;          redo A;
567        } elsif ($self->{c} == 0x002D) { # -        } elsif ($self->{c} == 0x002D) { # -
568          $self->{t}->{value} .= '-';          $self->{t}->{value} .= '-';
569          $self->{state} = AFTER_AT_HYPHEN_STATE;          $self->{state} = AFTER_AT_HYPHEN_STATE;
570          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
571          redo A;          redo A;
572        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
573          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
574          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
575          redo A;          redo A;
576        } else {        } else {
577          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
578          # reprocess          # reprocess
579          return {type => DELIM_TOKEN, value => '@'};          return {type => DELIM_TOKEN, value => '@',
580                    line => $self->{t}->{line},
581                    column => $self->{t}->{column}};
582        }        }
583      } elsif ($self->{state} == AFTER_AT_HYPHEN_STATE) {      } elsif ($self->{state} == AFTER_AT_HYPHEN_STATE) {
584        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z        if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
# Line 474  sub get_next_token ($) { Line 587  sub get_next_token ($) {
587            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
588          $self->{t}->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
589          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
590          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
591          redo A;          redo A;
592        } elsif ($self->{c} == 0x002D) { # -        } elsif ($self->{c} == 0x002D) { # -
593          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
594          if ($self->{c} == 0x003E) { # >          if ($self->{c} == 0x003E) { # >
595            unshift @{$self->{token}}, {type => CDC_TOKEN};            unshift @{$self->{token}}, {type => CDC_TOKEN};
596            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
597            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->($self);
598            return {type => DELIM_TOKEN, value => '@'};            return {type => DELIM_TOKEN, value => '@'};
599            #redo A;            #redo A;
600          } else {          } else {
601            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};            unshift @{$self->{token}}, {type => MINUS_TOKEN};
602            $self->{t} = {type => IDENT_TOKEN, value => '-'};            $self->{t} = {type => IDENT_TOKEN, value => '-'};
603            $self->{state} = BEFORE_NMSTART_STATE;            $self->{state} = BEFORE_NMSTART_STATE;
604            # reprocess            # reprocess
# Line 495  sub get_next_token ($) { Line 608  sub get_next_token ($) {
608        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
609          ## TODO: @-\{nl}          ## TODO: @-\{nl}
610          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
611          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
612          redo A;          redo A;
613        } else {        } else {
614          unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};          unshift @{$self->{token}}, {type => MINUS_TOKEN};
615          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
616          # reprocess          # reprocess
617          return {type => DELIM_TOKEN, value => '@'};          return {type => DELIM_TOKEN, value => '@'};
# Line 506  sub get_next_token ($) { Line 619  sub get_next_token ($) {
619      } elsif ($self->{state} == AFTER_NUMBER_STATE) {      } elsif ($self->{state} == AFTER_NUMBER_STATE) {
620        if ($self->{c} == 0x002D) { # -        if ($self->{c} == 0x002D) { # -
621          ## NOTE: |-| in |ident|.          ## NOTE: |-| in |ident|.
622            $self->{t}->{hyphen} = 1;
623          $self->{t}->{value} = '-';          $self->{t}->{value} = '-';
624            $self->{t}->{type} = DIMENSION_TOKEN;
625          $self->{state} = BEFORE_NMSTART_STATE;          $self->{state} = BEFORE_NMSTART_STATE;
626          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
627          redo A;          redo A;
628        } elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z        } elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
629                 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z                 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
# Line 518  sub get_next_token ($) { Line 633  sub get_next_token ($) {
633          $self->{t}->{value} = chr $self->{c};          $self->{t}->{value} = chr $self->{c};
634          $self->{t}->{type} = DIMENSION_TOKEN;          $self->{t}->{type} = DIMENSION_TOKEN;
635          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
636          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
637          redo A;          redo A;
638        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
639          ## NOTE: |nmstart| in |ident| in |IDENT|          ## NOTE: |nmstart| in |ident| in |IDENT|
640          $self->{t}->{value} = '';          $self->{t}->{value} = '';
641            $self->{t}->{type} = DIMENSION_TOKEN;
642          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
643          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
644          redo A;          redo A;
645        } elsif ($self->{c} == 0x0025) { # %        } elsif ($self->{c} == 0x0025) { # %
646          $self->{t}->{type} = PERCENTAGE_TOKEN;          $self->{t}->{type} = PERCENTAGE_TOKEN;
647          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
648          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
649          return $self->{t};          return $self->{t};
650          #redo A;          #redo A;
651        } else {        } else {
# Line 548  sub get_next_token ($) { Line 664  sub get_next_token ($) {
664            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
665          $self->{t}->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
666          $self->{state} = NAME_STATE;          $self->{state} = NAME_STATE;
667          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
668          redo A;          redo A;
669        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
670          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
671          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
672          redo A;          redo A;
673        } else {        } else {
674          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
675          $self->{c} = $self->{get_char}->();          # reprocess
676          return {type => DELIM_TOKEN, value => '#'};          return {type => DELIM_TOKEN, value => '#',
677                    line => $self->{t}->{line},
678                    column => $self->{t}->{column}};
679          #redo A;          #redo A;
680        }        }
681      } elsif ($self->{state} == NAME_STATE) {      } elsif ($self->{state} == NAME_STATE) {
# Line 570  sub get_next_token ($) { Line 688  sub get_next_token ($) {
688            $self->{c} > 0x007F) { # nonascii            $self->{c} > 0x007F) { # nonascii
689          $self->{t}->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
690          # stay in the state          # stay in the state
691          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
692          redo A;          redo A;
693        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
694          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;          $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
695          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
696          redo A;          redo A;
697        } elsif ($self->{c} == 0x0028 and # (        } elsif ($self->{c} == 0x0028 and # (
698                 $self->{t}->{type} == IDENT_TOKEN) { # (                 $self->{t}->{type} == IDENT_TOKEN) { # (
# Line 588  sub get_next_token ($) { Line 706  sub get_next_token ($) {
706                = $func_name eq 'url' ? URI_TOKEN : URI_PREFIX_TOKEN;                = $func_name eq 'url' ? URI_TOKEN : URI_PREFIX_TOKEN;
707            $self->{t}->{value} = '';            $self->{t}->{value} = '';
708            $self->{state} = URI_BEFORE_WSP_STATE;            $self->{state} = URI_BEFORE_WSP_STATE;
709            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->($self);
710            redo A;            redo A;
711          } else {          } else {
712            $self->{t}->{type} = FUNCTION_TOKEN;            $self->{t}->{type} = FUNCTION_TOKEN;
713            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
714            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->($self);
715            return $self->{t};            return $self->{t};
716            #redo A;            #redo A;
717          }          }
# Line 611  sub get_next_token ($) { Line 729  sub get_next_token ($) {
729                  0x000A => 1, # \n                  0x000A => 1, # \n
730                  0x000C => 1, # \f                  0x000C => 1, # \f
731               }->{$self->{c}}) {               }->{$self->{c}}) {
732          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
733        }        }
734        if ($self->{c} == -1) {        if ($self->{c} == -1) {
735          $self->{t}->{type} = {          $self->{t}->{type} = {
# Line 621  sub get_next_token ($) { Line 739  sub get_next_token ($) {
739              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
740          }->{$self->{t}->{type}};                  }->{$self->{t}->{type}};        
741          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
742          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
743          return $self->{t};          return $self->{t};
744          #redo A;          #redo A;
745        } elsif ($self->{c} < 0x0020 or $self->{c} == 0x0028) { # C0 or (        } elsif ($self->{c} < 0x0020 or $self->{c} == 0x0028) { # C0 or (
# Line 633  sub get_next_token ($) { Line 751  sub get_next_token ($) {
751              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
752          }->{$self->{t}->{type}};          }->{$self->{t}->{type}};
753          $self->{state} = URI_UNQUOTED_STATE;          $self->{state} = URI_UNQUOTED_STATE;
754          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
755          redo A;          redo A;
756        } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '        } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '
757          $self->{state} = STRING_STATE; $q = $self->{c};          $self->{state} = STRING_STATE; $q = $self->{c};
758          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
759          redo A;          redo A;
760        } elsif ($self->{c} == 0x0029) { # )        } elsif ($self->{c} == 0x0029) { # )
761          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
762          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
763          return $self->{t};          return $self->{t};
764          #redo A;          #redo A;
765        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
766          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
767          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
768          redo A;          redo A;
769        } else {        } else {
770          $self->{t}->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
771          $self->{state} = URI_UNQUOTED_STATE;          $self->{state} = URI_UNQUOTED_STATE;
772          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
773          redo A;          redo A;
774        }        }
775      } elsif ($self->{state} == URI_UNQUOTED_STATE) {      } elsif ($self->{state} == URI_UNQUOTED_STATE) {
# Line 663  sub get_next_token ($) { Line 781  sub get_next_token ($) {
781             0x000C => 1, # \f             0x000C => 1, # \f
782            }->{$self->{c}}) {            }->{$self->{c}}) {
783          $self->{state} = URI_AFTER_WSP_STATE;          $self->{state} = URI_AFTER_WSP_STATE;
784          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
785          redo A;          redo A;
786        } elsif ($self->{c} == -1) {        } elsif ($self->{c} == -1) {
787          $self->{t}->{type} = {          $self->{t}->{type} = {
# Line 673  sub get_next_token ($) { Line 791  sub get_next_token ($) {
791              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
792          }->{$self->{t}->{type}};                  }->{$self->{t}->{type}};        
793          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
794          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
795          return $self->{t};          return $self->{t};
796          #redo A;          #redo A;
797        } elsif ($self->{c} < 0x0020 or {        } elsif ($self->{c} < 0x0020 or {
# Line 689  sub get_next_token ($) { Line 807  sub get_next_token ($) {
807              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
808          }->{$self->{t}->{type}};          }->{$self->{t}->{type}};
809          # stay in the state.          # stay in the state.
810          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
811          redo A;          redo A;
812        } elsif ($self->{c} == 0x0029) { # )        } elsif ($self->{c} == 0x0029) { # )
813          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
814          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
815          return $self->{t};          return $self->{t};
816          #redo A;          #redo A;
817        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
818          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
819          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
820          redo A;          redo A;
821        } else {        } else {
822          $self->{t}->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
823          # stay in the state.          # stay in the state.
824          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
825          redo A;          redo A;
826        }        }
827      } elsif ($self->{state} == URI_AFTER_WSP_STATE) {      } elsif ($self->{state} == URI_AFTER_WSP_STATE) {
# Line 715  sub get_next_token ($) { Line 833  sub get_next_token ($) {
833             0x000C => 1, # \f             0x000C => 1, # \f
834            }->{$self->{c}}) {            }->{$self->{c}}) {
835          # stay in the state.          # stay in the state.
836          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
837          redo A;          redo A;
838        } elsif ($self->{c} == -1) {        } elsif ($self->{c} == -1) {
839          $self->{t}->{type} = {          $self->{t}->{type} = {
# Line 725  sub get_next_token ($) { Line 843  sub get_next_token ($) {
843              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
844          }->{$self->{t}->{type}};                  }->{$self->{t}->{type}};        
845          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
846          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
847          return $self->{t};          return $self->{t};
848          #redo A;          #redo A;
849        } elsif ($self->{c} == 0x0029) { # )        } elsif ($self->{c} == 0x0029) { # )
850          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
851          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
852          return $self->{t};          return $self->{t};
853          #redo A;          #redo A;
854        } elsif ($self->{c} == 0x005C) { # \        } elsif ($self->{c} == 0x005C) { # \
855          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;          $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
856          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
857          redo A;          redo A;
858        } else {        } else {
859          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?          ## TODO: Should we consider matches of "(" and ")", '"', or "'"?
# Line 746  sub get_next_token ($) { Line 864  sub get_next_token ($) {
864              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
865          }->{$self->{t}->{type}};          }->{$self->{t}->{type}};
866          # stay in the state.          # stay in the state.
867          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
868          redo A;          redo A;
869        }        }
870      } elsif ($self->{state} == ESCAPE_OPEN_STATE) {      } elsif ($self->{state} == ESCAPE_OPEN_STATE) {
# Line 755  sub get_next_token ($) { Line 873  sub get_next_token ($) {
873          ## NOTE: second character of |unicode| in |escape|.          ## NOTE: second character of |unicode| in |escape|.
874          $char = $self->{c} - 0x0030;          $char = $self->{c} - 0x0030;
875          $self->{state} = ESCAPE_STATE; $i = 2;          $self->{state} = ESCAPE_STATE; $i = 2;
876          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
877          redo A;          redo A;
878        } elsif (0x0041 <= $self->{c} and $self->{c} <= 0x0046) { # A..F        } elsif (0x0041 <= $self->{c} and $self->{c} <= 0x0046) { # A..F
879          ## NOTE: second character of |unicode| in |escape|.          ## NOTE: second character of |unicode| in |escape|.
880          $char = $self->{c} - 0x0041 + 0xA;          $char = $self->{c} - 0x0041 + 0xA;
881          $self->{state} = ESCAPE_STATE; $i = 2;          $self->{state} = ESCAPE_STATE; $i = 2;
882          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
883          redo A;          redo A;
884        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f
885          ## NOTE: second character of |unicode| in |escape|.          ## NOTE: second character of |unicode| in |escape|.
886          $char = $self->{c} - 0x0061 + 0xA;          $char = $self->{c} - 0x0061 + 0xA;
887          $self->{state} = ESCAPE_STATE; $i = 2;          $self->{state} = ESCAPE_STATE; $i = 2;
888          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
889          redo A;          redo A;
890        } elsif ($self->{c} == 0x000A or # \n        } elsif ($self->{c} == 0x000A or # \n
891                 $self->{c} == 0x000C) { # \f                 $self->{c} == 0x000C) { # \f
# Line 783  sub get_next_token ($) { Line 901  sub get_next_token ($) {
901            }->{$self->{t}->{type}};            }->{$self->{t}->{type}};
902            $self->{t}->{value} .= chr $self->{c};            $self->{t}->{value} .= chr $self->{c};
903            $self->{state} = URI_UNQUOTED_STATE;            $self->{state} = URI_UNQUOTED_STATE;
904            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->($self);
905            redo A;            redo A;
906          } else {          } else {
907            ## Note: In |nl| in ... in |string| or |ident|.            ## Note: In |nl| in ... in |string| or |ident|.
           $self->{t}->{value} .= chr $self->{c};  
908            $self->{state} = STRING_STATE;            $self->{state} = STRING_STATE;
909            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->($self);
910            redo A;            redo A;
911          }          }
912        } elsif ($self->{c} == 0x000D) { # \r        } elsif ($self->{c} == 0x000D) { # \r
# Line 803  sub get_next_token ($) { Line 920  sub get_next_token ($) {
920                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
921                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,                URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
922            }->{$self->{t}->{type}};            }->{$self->{t}->{type}};
923            $self->{t}->{value} .= "\x0D\x0A";            $self->{state} = ESCAPE_BEFORE_LF_STATE;
924            $self->{state} = URI_UNQUOTED_STATE;            $self->{c} = $self->{get_char}->($self);
           $self->{c} = $self->{get_char}->();  
925            redo A;            redo A;
926          } else {          } else {
927            ## Note: In |nl| in ... in |string| or |ident|.            ## Note: In |nl| in ... in |string| or |ident|.
           $self->{t}->{value} .= "\x0D\x0A";  
928            $self->{state} = ESCAPE_BEFORE_LF_STATE;            $self->{state} = ESCAPE_BEFORE_LF_STATE;
929            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->($self);
930            redo A;            redo A;
931          }          }
932        } elsif ($self->{c} == -1) {        } elsif ($self->{c} == -1) {
# Line 821  sub get_next_token ($) { Line 936  sub get_next_token ($) {
936          $self->{t}->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
937          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
938              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
939          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
940          redo A;          redo A;
941        }        }
942    
943        if ($q == 0) {        if ($q == 0) {
944          $self->{state} = BEFORE_TOKEN_STATE;          if ($self->{t}->{type} == DIMENSION_TOKEN) {
945          # reprocess            if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') {
946          if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') {              $self->{state} = BEFORE_TOKEN_STATE;
947            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};              # reprocess
948            return {type => DELIM_TOKEN, value => '-'};              unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\',
949            #redo A;                                          line => $self->{line},
950          } elsif (length $self->{t}->{value}) {                                          column => $self->{column} - 2};
951            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};              unshift @{$self->{token}}, {type => MINUS_TOKEN,
952            return $self->{t};                                          line => $self->{line},
953            #redo A;                                          column => $self->{column} - 1};
954                ## BUG: line and column might be wrong if they are on the
955                ## line boundary.
956                $self->{t}->{type} = NUMBER_TOKEN;
957                $self->{t}->{value} = '';
958                return $self->{t};
959                #redo A;
960              } elsif (length $self->{t}->{value}) {
961                $self->{state} = BEFORE_TOKEN_STATE;
962                # reprocess
963                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\',
964                                            line => $self->{line},
965                                            column => $self->{column} - 1};
966                ## BUG: line and column might be wrong if they are on the
967                ## line boundary.
968                return $self->{t};
969                #redo A;
970              } else {
971                $self->{state} = BEFORE_TOKEN_STATE;
972                # reprocess
973                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\',
974                                            line => $self->{line},
975                                            column => $self->{column} - 1};
976                ## BUG: line and column might be wrong if they are on the
977                ## line boundary.
978                $self->{t}->{type} = NUMBER_TOKEN;
979                $self->{t}->{value} = '';
980                return $self->{t};
981                #redo A;
982              }
983          } else {          } else {
984            return {type => DELIM_TOKEN, value => '\\'};            if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') {
985            #redo A;              $self->{state} = BEFORE_TOKEN_STATE;
986                # reprocess
987                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\',
988                                            line => $self->{line},
989                                            column => $self->{column} - 2};
990                return {type => MINUS_TOKEN,
991                        line => $self->{line},
992                        column => $self->{column} - 1};
993                ## BUG: line and column might be wrong if they are on the
994                ## line boundary.
995                #redo A;
996              } elsif (length $self->{t}->{value}) {
997                $self->{state} = BEFORE_TOKEN_STATE;
998                # reprocess
999                unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\',
1000                                            line => $self->{line},
1001                                            column => $self->{column} - 1};
1002                ## BUG: line and column might be wrong if they are on the
1003                ## line boundary.
1004                return $self->{t};
1005                #redo A;
1006              } else {
1007                $self->{state} = BEFORE_TOKEN_STATE;
1008                # reprocess
1009                return {type => DELIM_TOKEN, value => '\\',
1010                        line => $self->{line},
1011                        column => $self->{column} - 1};
1012                ## BUG: line and column might be wrong if they are on the
1013                ## line boundary.
1014                #redo A;
1015              }
1016          }          }
1017        } else {        } elsif ($q == 1) {
1018          $self->{state} = $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;          $self->{state} = URI_UNQUOTED_STATE;
1019          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
1020          redo A;          redo A;
1021          } else {
1022            unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\',
1023                                        line => $self->{line},
1024                                        column => $self->{column} - 1};
1025            ## BUG: line and column might be wrong if they are on the
1026            ## line boundary.
1027            $self->{t}->{type} = {
1028              STRING_TOKEN, INVALID_TOKEN,
1029              URI_TOKEN, URI_INVALID_TOKEN,
1030              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
1031            }->{$self->{t}->{type}} || $self->{t}->{type};
1032            $self->{state} = BEFORE_TOKEN_STATE;
1033            # reprocess
1034            return $self->{t};
1035            #redo A;
1036        }        }
1037      } elsif ($self->{state} == ESCAPE_STATE) {      } elsif ($self->{state} == ESCAPE_STATE) {
1038        ## NOTE: third..seventh character of |unicode| in |escape|.        ## NOTE: third..seventh character of |unicode| in |escape|.
1039        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
1040          $char = $char * 0x10 + $self->{c} - 0x0030;          $char = $char * 0x10 + $self->{c} - 0x0030;
1041          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
1042          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
1043          redo A;          redo A;
1044        } elsif (0x0041 <= $self->{c} and $self->{c} <= 0x0046) { # A..F        } elsif (0x0041 <= $self->{c} and $self->{c} <= 0x0046) { # A..F
1045          $char = $char * 0x10 + $self->{c} - 0x0041 + 0xA;          $char = $char * 0x10 + $self->{c} - 0x0041 + 0xA;
1046          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
1047          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
1048          redo A;          redo A;
1049        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f        } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f
1050          $char = $char * 0x10 + $self->{c} - 0x0061 + 0xA;          $char = $char * 0x10 + $self->{c} - 0x0061 + 0xA;
1051          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;          $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
1052          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
1053          redo A;          redo A;
1054        } elsif ($self->{c} == 0x0020 or # SP        } elsif ($self->{c} == 0x0020 or # SP
1055                 $self->{c} == 0x000A or # \n                 $self->{c} == 0x000A or # \n
# Line 869  sub get_next_token ($) { Line 1058  sub get_next_token ($) {
1058          $self->{t}->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
1059          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
1060              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
1061          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
1062          redo A;          redo A;
1063        } elsif ($self->{c} == 0x000D) { # \r        } elsif ($self->{c} == 0x000D) { # \r
1064          $self->{state} = ESCAPE_BEFORE_LF_STATE;          $self->{state} = ESCAPE_BEFORE_LF_STATE;
1065          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
1066          redo A;          redo A;
1067        } else {        } else {
1068          $self->{t}->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
# Line 891  sub get_next_token ($) { Line 1080  sub get_next_token ($) {
1080          $self->{t}->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
1081          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
1082              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
1083          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
1084          redo A;          redo A;
1085        } elsif ($self->{c} == 0x000D) { # \r        } elsif ($self->{c} == 0x000D) { # \r
1086          $self->{state} = ESCAPE_BEFORE_NL_STATE;          $self->{state} = ESCAPE_BEFORE_NL_STATE;
1087          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
1088          redo A;          redo A;
1089        } else {        } else {
1090          $self->{t}->{value} .= chr $char;          $self->{t}->{value} .= chr $char;
# Line 905  sub get_next_token ($) { Line 1094  sub get_next_token ($) {
1094          redo A;          redo A;
1095        }        }
1096      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {      } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {
1097        ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.        ## NOTE: |\n| in |\r\n| in |nl| in |escape|.
1098        if ($self->{c} == 0x000A) { # \n        if ($self->{c} == 0x000A) { # \n
         $self->{t}->{value} .= chr $char;  
1099          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
1100              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
1101          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
1102          redo A;          redo A;
1103        } else {        } else {
         $self->{t}->{value} .= chr $char;  
1104          $self->{state} = $q == 0 ? NAME_STATE :          $self->{state} = $q == 0 ? NAME_STATE :
1105              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;              $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
1106          # reconsume          # reprocess
1107          redo A;          redo A;
1108        }        }
1109      } elsif ($self->{state} == STRING_STATE) {      } elsif ($self->{state} == STRING_STATE) {
# Line 926  sub get_next_token ($) { Line 1113  sub get_next_token ($) {
1113        ## Or, in |URI|.        ## Or, in |URI|.
1114        if ($self->{c} == 0x005C) { # \        if ($self->{c} == 0x005C) { # \
1115          $self->{state} = ESCAPE_OPEN_STATE;          $self->{state} = ESCAPE_OPEN_STATE;
1116          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
1117          redo A;          redo A;
1118        } elsif ($self->{c} == $q) { # " | '        } elsif ($self->{c} == $q) { # " | '
1119          if ($self->{t}->{type} == STRING_TOKEN) {          if ($self->{t}->{type} == STRING_TOKEN) {
1120            $self->{state} = BEFORE_TOKEN_STATE;            $self->{state} = BEFORE_TOKEN_STATE;
1121            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->($self);
1122            return $self->{t};            return $self->{t};
1123            #redo A;            #redo A;
1124          } else {          } else {
1125            $self->{state} = URI_AFTER_WSP_STATE;            $self->{state} = URI_AFTER_WSP_STATE;
1126            $self->{c} = $self->{get_char}->();            $self->{c} = $self->{get_char}->($self);
1127            redo A;            redo A;
1128          }          }
1129        } elsif ($self->{c} == 0x000A or # \n        } elsif ($self->{c} == 0x000A or # \n
1130                 $self->{c} == 0x000D or # \r                 $self->{c} == 0x000D or # \r
1131                 $self->{c} == 0x000C or # \f                 $self->{c} == 0x000C or # \f
1132                 $self->{c} == -1) {                 $self->{c} == -1) {
1133          $self->{t}->{type} = INVALID_TOKEN;          $self->{t}->{type} = {
1134              STRING_TOKEN, INVALID_TOKEN,
1135              INVALID_TOKEN, INVALID_TOKEN,
1136              URI_TOKEN, URI_INVALID_TOKEN,
1137              URI_INVALID_TOKEN, URI_INVALID_TOKEN,
1138              URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
1139              URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
1140            }->{$self->{t}->{type}};
1141          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
1142          # reconsume          # reconsume
1143          return $self->{t};          return $self->{t};
# Line 951  sub get_next_token ($) { Line 1145  sub get_next_token ($) {
1145        } else {        } else {
1146          $self->{t}->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
1147          # stay in the state          # stay in the state
1148          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
1149          redo A;          redo A;
1150        }        }
1151      } elsif ($self->{state} == NUMBER_STATE) {      } elsif ($self->{state} == NUMBER_STATE) {
# Line 959  sub get_next_token ($) { Line 1153  sub get_next_token ($) {
1153        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
1154          $self->{t}->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
1155          # stay in the state          # stay in the state
1156          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
1157          redo A;          redo A;
1158        } elsif ($self->{c} == 0x002E) { # .        } elsif ($self->{c} == 0x002E) { # .
1159          $self->{state} = NUMBER_DOT_STATE;          $self->{state} = NUMBER_DOT_STATE;
1160          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
1161          redo A;          redo A;
1162        } else {        } else {
1163          $self->{t}->{number} = $self->{t}->{value};          $self->{t}->{number} = 0+$self->{t}->{value};
1164          $self->{t}->{value} = '';          $self->{t}->{value} = '';
1165          $self->{state} = AFTER_NUMBER_STATE;          $self->{state} = AFTER_NUMBER_STATE;
1166          # reprocess          # reprocess
# Line 977  sub get_next_token ($) { Line 1171  sub get_next_token ($) {
1171        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
1172          $self->{t}->{value} .= '.' . chr $self->{c};          $self->{t}->{value} .= '.' . chr $self->{c};
1173          $self->{state} = NUMBER_DOT_NUMBER_STATE;          $self->{state} = NUMBER_DOT_NUMBER_STATE;
1174          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
1175          redo A;          redo A;
1176        } else {        } else {
1177          unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'};          unshift @{$self->{token}}, {type => DOT_TOKEN};
1178          $self->{t}->{number} = $self->{t}->{value};          $self->{t}->{number} = 0+$self->{t}->{value};
1179          $self->{t}->{value} = '';          $self->{t}->{value} = '';
1180          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
1181          # reprocess          # reprocess
# Line 993  sub get_next_token ($) { Line 1187  sub get_next_token ($) {
1187        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
1188          $self->{t}->{value} .= '.' . chr $self->{c};          $self->{t}->{value} .= '.' . chr $self->{c};
1189          $self->{state} = NUMBER_DOT_NUMBER_STATE;          $self->{state} = NUMBER_DOT_NUMBER_STATE;
1190          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
1191          redo A;          redo A;
1192        } else {        } else {
1193          $self->{state} = BEFORE_TOKEN_STATE;          $self->{state} = BEFORE_TOKEN_STATE;
1194          $self->{c} = $self->{get_char}->();          # reprocess
1195          return {type => DELIM_TOKEN, value => '.'};          return {type => DOT_TOKEN,
1196                    line => $self->{line}, column => $self->{column} - 1};
1197            ## BUG: line and column might be wrong if they are on the
1198            ## line boundary.
1199          #redo A;          #redo A;
1200        }        }
1201      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {      } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {
# Line 1006  sub get_next_token ($) { Line 1203  sub get_next_token ($) {
1203        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {        if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
1204          $self->{t}->{value} .= chr $self->{c};          $self->{t}->{value} .= chr $self->{c};
1205          # stay in the state          # stay in the state
1206          $self->{c} = $self->{get_char}->();          $self->{c} = $self->{get_char}->($self);
1207          redo A;          redo A;
1208        } else {        } else {
1209          $self->{t}->{number} = $self->{t}->{value};          $self->{t}->{number} = 0+$self->{t}->{value};
1210          $self->{t}->{value} = '';          $self->{t}->{value} = '';
1211          $self->{state} = AFTER_NUMBER_STATE;          $self->{state} = AFTER_NUMBER_STATE;
1212          # reprocess          # reprocess
# Line 1021  sub get_next_token ($) { Line 1218  sub get_next_token ($) {
1218    } # A    } # A
1219  } # get_next_token  } # get_next_token
1220    
1221    sub serialize_token ($$) {
1222      shift;
1223      my $t = shift;
1224    
1225      ## NOTE: This function is not intended for roundtrip-able serialization.
1226    
1227      if ($t->{type} == IDENT_TOKEN) {
1228        return $t->{value};
1229      } elsif ($t->{type} == ATKEYWORD_TOKEN) {
1230        return '@' . $t->{value};
1231      } elsif ($t->{type} == HASH_TOKEN) {
1232        return '#' . $t->{value};
1233      } elsif ($t->{type} == FUNCTION_TOKEN) {
1234        return $t->{value} . '(';
1235      } elsif ($t->{type} == URI_TOKEN) {
1236        return 'url(' . $t->{value} . ')';
1237      } elsif ($t->{type} == URI_INVALID_TOKEN) {
1238        return 'url(' . $t->{value};
1239      } elsif ($t->{type} == URI_PREFIX_TOKEN) {
1240        return 'url-prefix(' . $t->{value} . ')';
1241      } elsif ($t->{type} == URI_PREFIX_INVALID_TOKEN) {
1242        return 'url-prefix(' . $t->{value};
1243      } elsif ($t->{type} == STRING_TOKEN) {
1244        return '"' . $t->{value} . '"';
1245      } elsif ($t->{type} == INVALID_TOKEN) {
1246        return '"' . $t->{value};
1247      } elsif ($t->{type} == NUMBER_TOKEN) {
1248        return $t->{number};
1249      } elsif ($t->{type} == DIMENSION_TOKEN) {
1250        return $t->{number} . $t->{value};
1251      } elsif ($t->{type} == PERCENTAGE_TOKEN) {
1252        return $t->{number} . '%';
1253      } elsif ($t->{type} == UNICODE_RANGE_TOKEN) {
1254        return 'U+' . $t->{value};
1255      } elsif ($t->{type} == DELIM_TOKEN) {
1256        return $t->{value};
1257      } elsif ($t->{type} == PLUS_TOKEN) {
1258        return '+';
1259      } elsif ($t->{type} == GREATER_TOKEN) {
1260        return '>';
1261      } elsif ($t->{type} == COMMA_TOKEN) {
1262        return ',';
1263      } elsif ($t->{type} == TILDE_TOKEN) {
1264        return '~';
1265      } elsif ($t->{type} == DASHMATCH_TOKEN) {
1266        return '|=';
1267      } elsif ($t->{type} == PREFIXMATCH_TOKEN) {
1268        return '^=';
1269      } elsif ($t->{type} == SUFFIXMATCH_TOKEN) {
1270        return '$=';
1271      } elsif ($t->{type} == SUBSTRINGMATCH_TOKEN) {
1272        return '*=';
1273      } elsif ($t->{type} == INCLUDES_TOKEN) {
1274        return '~=';
1275      } elsif ($t->{type} == SEMICOLON_TOKEN) {
1276        return ';';
1277      } elsif ($t->{type} == LBRACE_TOKEN) {
1278        return '{';
1279      } elsif ($t->{type} == RBRACE_TOKEN) {
1280        return '}';
1281      } elsif ($t->{type} == LPAREN_TOKEN) {
1282        return '(';
1283      } elsif ($t->{type} == RPAREN_TOKEN) {
1284        return ')';
1285      } elsif ($t->{type} == LBRACKET_TOKEN) {
1286        return '[';
1287      } elsif ($t->{type} == RBRACKET_TOKEN) {
1288        return ']';
1289      } elsif ($t->{type} == S_TOKEN) {
1290        return ' ';
1291      } elsif ($t->{type} == CDO_TOKEN) {
1292        return '<!--';
1293      } elsif ($t->{type} == CDC_TOKEN) {
1294        return '-->';
1295      } elsif ($t->{type} == COMMENT_TOKEN) {
1296        return '/**/';
1297      } elsif ($t->{type} == COMMENT_INVALID_TOKEN) {
1298        return '/*';
1299      } elsif ($t->{type} == EOF_TOKEN) {
1300        return '{EOF}';
1301      } elsif ($t->{type} == MINUS_TOKEN) {
1302        return '-';
1303      } elsif ($t->{type} == STAR_TOKEN) {
1304        return '*';
1305      } elsif ($t->{type} == VBAR_TOKEN) {
1306        return '|';
1307      } elsif ($t->{type} == COLON_TOKEN) {
1308        return ':';
1309      } elsif ($t->{type} == MATCH_TOKEN) {
1310        return '=';
1311      } elsif ($t->{type} == EXCLAMATION_TOKEN) {
1312        return '!';
1313      } else {
1314        return '{'.$t->{type}.'}';
1315      }
1316    } # serialize_token
1317    
1318    =head1 LICENSE
1319    
1320    Copyright 2007 Wakaba <w@suika.fam.cx>
1321    
1322    This library is free software; you can redistribute it
1323    and/or modify it under the same terms as Perl itself.
1324    
1325    =cut
1326    
1327  1;  1;
1328  # $Date$  # $Date$

Legend:
Removed from v.1.7  
changed lines
  Added in v.1.20

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24