/[suikacvs]/markup/html/whatpm/Whatpm/CSS/Tokenizer.pm
Suika

Contents of /markup/html/whatpm/Whatpm/CSS/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (hide annotations) (download)
Sat Sep 8 03:25:05 2007 UTC (17 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.4: +166 -100 lines
++ whatpm/Whatpm/CSS/ChangeLog	8 Sep 2007 03:25:00 -0000
	* Tokenizer.pm: |UNICODE-RANGE| is implemented.

2007-09-08  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::CSS::Tokenizer;
2     use strict;
3    
4 wakaba 1.2 sub BEFORE_TOKEN_STATE () { 0 }
5     sub BEFORE_NMSTART_STATE () { 1 }
6     sub NAME_STATE () { 2 }
7     sub ESCAPE_OPEN_STATE () { 3 }
8     sub STRING_STATE () { 4 }
9     sub HASH_OPEN_STATE () { 5 }
10     sub NUMBER_STATE () { 6 }
11     sub NUMBER_FRACTION_STATE () { 7 }
12     sub AFTER_NUMBER_STATE () { 8 }
13     sub URI_BEFORE_WSP_STATE () { 9 }
14     sub ESCAPE_STATE () { 10 }
15     sub ESCAPE_BEFORE_LF_STATE () { 11 }
16     sub ESCAPE_BEFORE_NL_STATE () { 12 }
17     sub NUMBER_DOT_STATE () { 13 }
18     sub NUMBER_DOT_NUMBER_STATE () { 14 }
19     sub DELIM_STATE () { 15 }
20 wakaba 1.3 sub URI_UNQUOTED_STATE () { 16 }
21     sub URI_AFTER_WSP_STATE () { 17 }
22     sub AFTER_AT_STATE () { 18 }
23     sub AFTER_AT_HYPHEN_STATE () { 19 }
24 wakaba 1.2
25     sub IDENT_TOKEN () { 1 }
26     sub ATKEYWORD_TOKEN () { 2 }
27     sub HASH_TOKEN () { 3 }
28     sub FUNCTION_TOKEN () { 4 }
29     sub URI_TOKEN () { 5 }
30     sub URI_INVALID_TOKEN () { 6 }
31     sub URI_PREFIX_TOKEN () { 7 }
32     sub URI_PREFIX_INVALID_TOKEN () { 8 }
33     sub STRING_TOKEN () { 9 }
34     sub INVALID_TOKEN () { 10 }
35     sub NUMBER_TOKEN () { 11 }
36     sub DIMENSION_TOKEN () { 12 }
37     sub PERCENTAGE_TOKEN () { 13 }
38     sub UNICODE_RANGE_TOKEN () { 14 }
39     sub UNICODE_RANGE_INVALID_TOKEN () { 15 }
40     sub DELIM_TOKEN () { 16 }
41     sub PLUS_TOKEN () { 17 }
42     sub GREATER_TOKEN () { 18 }
43     sub COMMA_TOKEN () { 19 }
44     sub TILDE_TOKEN () { 20 }
45     sub DASHMATCH_TOKEN () { 21 }
46     sub PREFIXMATCH_TOKEN () { 22 }
47     sub SUFFIXMATCH_TOKEN () { 23 }
48     sub SUBSTRINGMATCH_TOKEN () { 24 }
49     sub INCLUDES_TOKEN () { 25 }
50     sub SEMICOLON_TOKEN () { 26 }
51     sub LBRACE_TOKEN () { 27 }
52     sub RBRACE_TOKEN () { 28 }
53     sub LPAREN_TOKEN () { 29 }
54     sub RPAREN_TOKEN () { 30 }
55     sub LBRACKET_TOKEN () { 31 }
56     sub RBRACKET_TOKEN () { 32 }
57     sub S_TOKEN () { 33 }
58     sub CDO_TOKEN () { 34 }
59     sub CDC_TOKEN () { 35 }
60     sub COMMENT_TOKEN () { 36 }
61     sub COMMENT_INVALID_TOKEN () { 37 }
62     sub EOF_TOKEN () { 38 }
63    
64     our @TokenName = qw(
65 wakaba 1.3 0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID
66 wakaba 1.2 STRING INVALID NUMBER DIMENSION PERCENTAGE UNICODE_RANGE
67     UNICODE_RANGE_INVALID DELIM PLUS GREATER COMMA TILDE DASHMATCH
68     PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON
69     LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT
70     COMMENT_INVALID EOF
71     );
72    
73 wakaba 1.1 sub new ($) {
74 wakaba 1.2 my $self = bless {token => [], get_char => sub { -1 },
75     onerror => sub { }}, shift;
76 wakaba 1.1 return $self;
77     } # new
78    
79     sub init ($) {
80     my $self = shift;
81     $self->{state} = BEFORE_TOKEN_STATE;
82     $self->{c} = $self->{get_char}->();
83 wakaba 1.5 #$self->{t} = {type => token-type, value => value, number => number};
84 wakaba 1.1 } # init
85    
86     sub get_next_token ($) {
87     my $self = shift;
88     if (@{$self->{token}}) {
89     return shift @{$self->{token}};
90     }
91    
92     my $char;
93     my $num; # |{num}|, if any.
94     my $i; # |$i + 1|th character in |unicode| in |escape|.
95 wakaba 1.3 my $q;
96     ## NOTE:
97     ## 0: in |ident|.
98     ## 1: in |URI| outside of |string|.
99     ## 0x0022: in |string1| or |invalid1|.
100     ## 0x0027: in |string2| or |invalid2|.
101 wakaba 1.1
102     A: {
103     if ($self->{state} == BEFORE_TOKEN_STATE) {
104     if ($self->{c} == 0x002D) { # -
105     ## NOTE: |-| in |ident| in |IDENT|
106 wakaba 1.5 $self->{t} = {type => IDENT_TOKEN, value => '-'};
107 wakaba 1.1 $self->{state} = BEFORE_NMSTART_STATE;
108     $self->{c} = $self->{get_char}->();
109     redo A;
110 wakaba 1.5 } elsif ($self->{c} == 0x0055 or $self->{c} == 0x0075) { # U or u
111     $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}};
112     $self->{c} = $self->{get_char}->();
113     if ($self->{c} == 0x002B) { # +
114     $self->{c} = $self->{get_char}->();
115     if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
116     (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
117     (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
118     $self->{c} == 0x003F) { # ?
119     $self->{t}->{value} .= '+' . chr $self->{c};
120     $self->{t}->{type} = UNICODE_RANGE_TOKEN;
121     $self->{c} = $self->{get_char}->();
122     C: for (2..6) {
123     if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
124     (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
125     (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
126     $self->{c} == 0x003F) { # ?
127     $self->{t}->{value} .= chr $self->{c};
128     $self->{c} = $self->{get_char}->();
129     } else {
130     last C;
131     }
132     } # C
133    
134     if ($self->{c} == 0x002D) { # -
135     $self->{c} = $self->{get_char}->();
136     if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
137     (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
138     (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f
139     $self->{t}->{value} .= '-' . chr $self->{c};
140     $self->{c} = $self->{get_char}->();
141     C: for (2..6) {
142     if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
143     (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
144     (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f
145     $self->{t}->{value} .= chr $self->{c};
146     $self->{c} = $self->{get_char}->();
147     } else {
148     last C;
149     }
150     } # C
151    
152     #
153     } else {
154     my $token = $self->{t};
155     $self->{t} = {type => IDENT_TOKEN, value => '-'};
156     $self->{state} = BEFORE_NMSTART_STATE;
157     # reprocess
158     return $token;
159     #redo A;
160     }
161     }
162    
163     $self->{state} = BEFORE_TOKEN_STATE;
164     # reprocess
165     return $self->{t};
166     #redo A;
167     } else {
168     unshift @{$self->{token}}, {type => PLUS_TOKEN};
169     $self->{state} = BEFORE_TOKEN_STATE;
170     # reprocess
171     return $self->{t};
172     #redo A;
173     }
174     } else {
175     $self->{state} = NAME_STATE;
176     # reprocess
177     redo A;
178     }
179 wakaba 1.2 } elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
180     (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
181 wakaba 1.1 $self->{c} == 0x005F or # _
182     $self->{c} > 0x007F) { # nonascii
183     ## NOTE: |nmstart| in |ident| in |IDENT|
184 wakaba 1.5 $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}};
185 wakaba 1.1 $self->{state} = NAME_STATE;
186     $self->{c} = $self->{get_char}->();
187     redo A;
188     } elsif ($self->{c} == 0x005C) { # \
189     ## NOTE: |nmstart| in |ident| in |IDENT|
190 wakaba 1.5 $self->{t} = {type => IDENT_TOKEN, value => ''};
191 wakaba 1.1 $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
192     $self->{c} = $self->{get_char}->();
193     redo A;
194     } elsif ($self->{c} == 0x0040) { # @
195     ## NOTE: |@| in |ATKEYWORD|
196 wakaba 1.5 $self->{t} = {type => ATKEYWORD_TOKEN, value => ''};
197 wakaba 1.3 $self->{state} = AFTER_AT_STATE;
198 wakaba 1.1 $self->{c} = $self->{get_char}->();
199     redo A;
200 wakaba 1.3 } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '
201 wakaba 1.5 $self->{t} = {type => STRING_TOKEN, value => ''};
202 wakaba 1.3 $self->{state} = STRING_STATE; $q = $self->{c};
203 wakaba 1.1 $self->{c} = $self->{get_char}->();
204     redo A;
205     } elsif ($self->{c} == 0x0023) { # #
206     ## NOTE: |#| in |HASH|.
207 wakaba 1.5 $self->{t} = {type => HASH_TOKEN, value => ''};
208 wakaba 1.1 $self->{state} = HASH_OPEN_STATE;
209     $self->{c} = $self->{get_char}->();
210     redo A;
211     } elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
212     ## NOTE: |num|.
213 wakaba 1.5 $self->{t} = {type => NUMBER_TOKEN, value => chr $self->{c}};
214 wakaba 1.1 $self->{state} = NUMBER_STATE;
215     $self->{c} = $self->{get_char}->();
216     redo A;
217     } elsif ($self->{c} == 0x002E) { # .
218     ## NOTE: |num|.
219 wakaba 1.5 $self->{t} = {type => NUMBER_TOKEN, value => '0'};
220 wakaba 1.1 $self->{state} = NUMBER_FRACTION_STATE;
221     $self->{c} = $self->{get_char}->();
222     redo A;
223 wakaba 1.4 } elsif ($self->{c} == 0x002F) { # /
224     $self->{c} = $self->{get_char}->();
225     if ($self->{c} == 0x002A) { # *
226     C: {
227     $self->{c} = $self->{get_char}->();
228     if ($self->{c} == 0x002A) { # *
229     D: {
230     $self->{c} = $self->{get_char}->();
231     if ($self->{c} == 0x002F) { # /
232     #
233     } elsif ($self->{c} == 0x002A) { # *
234     redo D;
235     } else {
236     redo C;
237     }
238     } # D
239     } elsif ($self->{c} == -1) {
240     # stay in the state
241     # reprocess
242     return {type => COMMENT_INVALID_TOKEN};
243     #redo A;
244     } else {
245     redo C;
246     }
247     } # C
248    
249     # stay in the state.
250     $self->{c} = $self->{get_char}->();
251     redo A;
252     } else {
253     # stay in the state.
254     # reprocess
255     return {type => DELIM_STATE, value => '/'};
256     #redo A;
257     }
258 wakaba 1.1 } elsif ($self->{c} == 0x003C) { # <
259     ## NOTE: |CDO|
260     $self->{c} = $self->{get_char}->();
261     if ($self->{c} == 0x0021) { # !
262     $self->{c} = $self->{get_char}->();
263     if ($self->{c} == 0x002C) { # -
264     $self->{c} = $self->{get_char}->();
265     if ($self->{c} == 0x002C) { # -
266     $self->{state} = BEFORE_TOKEN_STATE;
267     $self->{c} = $self->{get_char}->();
268     return {type => CDO_TOKEN};
269     #redo A;
270     } else {
271     unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};
272     ## NOTE: |-| in |ident| in |IDENT|
273 wakaba 1.5 $self->{t} = {type => IDENT_TOKEN, value => '-'};
274 wakaba 1.1 $self->{state} = BEFORE_NMSTART_STATE;
275     #reprocess
276     return {type => DELIM_TOKEN, value => '<'};
277     #redo A;
278     }
279     } else {
280     unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};
281     $self->{state} = BEFORE_TOKEN_STATE;
282     #reprocess
283     return {type => DELIM_TOKEN, value => '<'};
284     #redo A;
285     }
286     } else {
287     $self->{state} = BEFORE_TOKEN_STATE;
288     #reprocess
289     return {type => DELIM_TOKEN, value => '<'};
290     #redo A;
291     }
292 wakaba 1.2 } elsif (my $t = {
293     0x003B => SEMICOLON_TOKEN, # ;
294     0x007B => LBRACE_TOKEN, # {
295     0x007D => RBRACE_TOKEN, # }
296     0x0028 => LPAREN_TOKEN, # (
297     0x0029 => RPAREN_TOKEN, # )
298     0x005B => LBRACKET_TOKEN, # [
299     0x005D => RBRACKET_TOKEN, # ]
300 wakaba 1.1 }->{$self->{c}}) {
301     # stay in the state
302     $self->{c} = $self->{get_char}->();
303 wakaba 1.2 return {type => $t};
304 wakaba 1.1 # redo A;
305     } elsif ({
306     0x0020 => 1, # SP
307     0x0009 => 1, # \t
308     0x000D => 1, # \r
309     0x000A => 1, # \n
310     0x000C => 1, # \f
311     }->{$self->{c}}) {
312     W: {
313     $self->{c} = $self->{get_char}->();
314     if ({
315     0x0020 => 1, # SP
316     0x0009 => 1, # \t
317     0x000D => 1, # \r
318     0x000A => 1, # \n
319     0x000C => 1, # \f
320     }->{$self->{c}}) {
321     redo W;
322     } elsif (my $v = {
323     0x002B => PLUS_TOKEN, # +
324     0x003E => GREATER_TOKEN, # >
325     0x002C => COMMA_TOKEN, # ,
326     0x007E => TILDE_TOKEN, # ~
327     }->{$self->{c}}) {
328     # stay in the state
329     $self->{c} = $self->{get_char}->();
330     return {type => $v};
331     #redo A;
332     } else {
333     # stay in the state
334     # reprocess
335     return {type => S_TOKEN};
336     #redo A;
337     }
338     } # W
339     } elsif (my $v = {
340     0x007C => DASHMATCH_TOKEN, # |
341     0x005E => PREFIXMATCH_TOKEN, # ^
342     0x0024 => SUFFIXMATCH_TOKEN, # $
343     0x002A => SUBSTRINGMATCH_TOKEN, # *
344     }->{$self->{c}}) {
345 wakaba 1.2 my $c = $self->{c};
346 wakaba 1.1 $self->{c} = $self->{get_char}->();
347     if ($self->{c} == 0x003D) { # =
348     # stay in the state
349     $self->{c} = $self->{get_char}->();
350     return {type => $v};
351     #redo A;
352     } else {
353     # stay in the state
354     # reprocess
355 wakaba 1.2 return {type => DELIM_TOKEN, value => chr $c};
356 wakaba 1.1 #redo A;
357     }
358     } elsif ($self->{c} == 0x002B) { # +
359     # stay in the state
360     $self->{c} = $self->{get_char}->();
361     return {type => PLUS_TOKEN};
362     #redo A;
363     } elsif ($self->{c} == 0x003E) { # >
364     # stay in the state
365     $self->{c} = $self->{get_char}->();
366     return {type => GREATER_TOKEN};
367     #redo A;
368     } elsif ($self->{c} == 0x002C) { # ,
369     # stay in the state
370     $self->{c} = $self->{get_char}->();
371     return {type => COMMA_TOKEN};
372     #redo A;
373     } elsif ($self->{c} == 0x007E) { # ~
374     $self->{c} = $self->{get_char}->();
375     if ($self->{c} == 0x003D) { # =
376     # stay in the state
377     $self->{c} = $self->{get_char}->();
378     return {type => INCLUDES_TOKEN};
379     #redo A;
380     } else {
381     # stay in the state
382     # reprocess
383     return {type => TILDE_TOKEN};
384     #redo A;
385     }
386     } elsif ($self->{c} == -1) {
387     # stay in the state
388     $self->{c} = $self->{get_char}->();
389     return {type => EOF_TOKEN};
390     #redo A;
391     } else {
392     # stay in the state
393 wakaba 1.5 $self->{t} = {type => DELIM_TOKEN, value => chr $self->{c}};
394 wakaba 1.1 $self->{c} = $self->{get_char}->();
395 wakaba 1.5 return $self->{t};
396 wakaba 1.1 #redo A;
397     }
398     } elsif ($self->{state} == BEFORE_NMSTART_STATE) {
399 wakaba 1.3 ## NOTE: |nmstart| in |ident| in (|IDENT|, |DIMENSION|, or
400     ## |FUNCTION|)
401 wakaba 1.2 if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
402     (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
403 wakaba 1.1 $self->{c} == 0x005F or # _
404     $self->{c} > 0x007F) { # nonascii
405 wakaba 1.5 $self->{t}->{value} .= chr $self->{c};
406     $self->{t}->{type} = DIMENSION_TOKEN
407     if $self->{t}->{type} == NUMBER_TOKEN;
408 wakaba 1.1 $self->{state} = NAME_STATE;
409     $self->{c} = $self->{get_char}->();
410     redo A;
411     } elsif ($self->{c} == 0x005C) { # \
412 wakaba 1.2 ## TODO: 12-\X, 12-\{nl}
413 wakaba 1.1 $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
414     $self->{c} = $self->{get_char}->();
415     redo A;
416     } elsif ($self->{c} == 0x002D and # -
417 wakaba 1.5 $self->{t}->{type} == IDENT_TOKEN) {
418 wakaba 1.1 $self->{c} = $self->{get_char}->();
419     if ($self->{c} == 0x003E) { # >
420     $self->{state} = BEFORE_TOKEN_STATE;
421     $self->{c} = $self->{get_char}->();
422     return {type => CDC_TOKEN};
423     #redo A;
424     } else {
425     ## NOTE: |-|, |-|, $self->{c}
426 wakaba 1.5 #$self->{t} = {type => IDENT_TOKEN, value => '-'};
427 wakaba 1.1 # stay in the state
428     # reconsume
429     return {type => DELIM_TOKEN, value => '-'};
430     #redo A;
431     }
432     } else {
433 wakaba 1.5 if ($self->{t}->{type} == NUMBER_TOKEN) {
434 wakaba 1.2 ## NOTE: |-| after |NUMBER|.
435     unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
436     $self->{state} = BEFORE_TOKEN_STATE;
437     # reconsume
438 wakaba 1.5 $self->{t}->{value} = $self->{t}->{number};
439     delete $self->{t}->{number};
440     return $self->{t};
441 wakaba 1.1 } else {
442     ## NOTE: |-| not followed by |nmstart|.
443     $self->{state} = BEFORE_TOKEN_STATE;
444     $self->{c} = $self->{get_char}->();
445     return {type => DELIM_TOKEN, value => '-'};
446     }
447     }
448 wakaba 1.3 } elsif ($self->{state} == AFTER_AT_STATE) {
449     if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
450     (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
451     $self->{c} == 0x005F or # _
452     $self->{c} > 0x007F) { # nonascii
453 wakaba 1.5 $self->{t}->{value} .= chr $self->{c};
454 wakaba 1.3 $self->{state} = NAME_STATE;
455     $self->{c} = $self->{get_char}->();
456     redo A;
457     } elsif ($self->{c} == 0x002D) { # -
458 wakaba 1.5 $self->{t}->{value} .= '-';
459 wakaba 1.3 $self->{state} = AFTER_AT_HYPHEN_STATE;
460     $self->{c} = $self->{get_char}->();
461     redo A;
462     } elsif ($self->{c} == 0x005C) { # \
463     $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
464     $self->{c} = $self->{get_char}->();
465     redo A;
466     } else {
467     $self->{state} = BEFORE_TOKEN_STATE;
468     # reprocess
469     return {type => DELIM_TOKEN, value => '@'};
470     }
471     } elsif ($self->{state} == AFTER_AT_HYPHEN_STATE) {
472     if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
473     (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
474     $self->{c} == 0x005F or # _
475     $self->{c} > 0x007F) { # nonascii
476 wakaba 1.5 $self->{t}->{value} .= chr $self->{c};
477 wakaba 1.3 $self->{state} = NAME_STATE;
478     $self->{c} = $self->{get_char}->();
479     redo A;
480     } elsif ($self->{c} == 0x002D) { # -
481     $self->{c} = $self->{get_char}->();
482     if ($self->{c} == 0x003E) { # >
483 wakaba 1.4 unshift @{$self->{token}}, {type => CDC_TOKEN};
484 wakaba 1.3 $self->{state} = BEFORE_TOKEN_STATE;
485     $self->{c} = $self->{get_char}->();
486 wakaba 1.4 return {type => DELIM_TOKEN, value => '@'};
487 wakaba 1.3 #redo A;
488     } else {
489     unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
490 wakaba 1.5 $self->{t} = {type => IDENT_TOKEN, value => '-'};
491 wakaba 1.3 $self->{state} = BEFORE_NMSTART_STATE;
492     # reprocess
493     return {type => DELIM_TOKEN, value => '@'};
494     #redo A;
495     }
496     } elsif ($self->{c} == 0x005C) { # \
497     ## TODO: @-\{nl}
498     $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
499     $self->{c} = $self->{get_char}->();
500     redo A;
501     } else {
502     unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
503     $self->{state} = BEFORE_TOKEN_STATE;
504     # reprocess
505     return {type => DELIM_TOKEN, value => '@'};
506     }
507 wakaba 1.1 } elsif ($self->{state} == AFTER_NUMBER_STATE) {
508     if ($self->{c} == 0x002D) { # -
509     ## NOTE: |-| in |ident|.
510 wakaba 1.5 $self->{t}->{value} = '-';
511 wakaba 1.1 $self->{state} = BEFORE_NMSTART_STATE;
512     $self->{c} = $self->{get_char}->();
513     redo A;
514 wakaba 1.2 } elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
515     (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
516 wakaba 1.1 $self->{c} == 0x005F or # _
517     $self->{c} > 0x007F) { # nonascii
518     ## NOTE: |nmstart| in |ident|.
519 wakaba 1.5 $self->{t}->{value} = chr $self->{c};
520     $self->{t}->{type} = DIMENSION_TOKEN;
521 wakaba 1.1 $self->{state} = NAME_STATE;
522     $self->{c} = $self->{get_char}->();
523     redo A;
524     } elsif ($self->{c} == 0x005C) { # \
525     ## NOTE: |nmstart| in |ident| in |IDENT|
526 wakaba 1.5 $self->{t}->{value} = '';
527 wakaba 1.1 $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
528     $self->{c} = $self->{get_char}->();
529     redo A;
530     } elsif ($self->{c} == 0x0025) { # %
531 wakaba 1.5 $self->{t}->{type} = PERCENTAGE_TOKEN;
532 wakaba 1.1 $self->{state} = BEFORE_TOKEN_STATE;
533     $self->{c} = $self->{get_char}->();
534 wakaba 1.5 return $self->{t};
535 wakaba 1.1 #redo A;
536     } else {
537     $self->{state} = BEFORE_TOKEN_STATE;
538     # reprocess
539 wakaba 1.5 return $self->{t};
540 wakaba 1.1 #redo A;
541     }
542     } elsif ($self->{state} == HASH_OPEN_STATE) {
543     ## NOTE: The first |nmchar| in |name| in |HASH|.
544 wakaba 1.2 if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
545     (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
546     (0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
547 wakaba 1.1 $self->{c} == 0x002D or # -
548     $self->{c} == 0x005F or # _
549     $self->{c} > 0x007F) { # nonascii
550 wakaba 1.5 $self->{t}->{value} .= chr $self->{c};
551 wakaba 1.1 $self->{state} = NAME_STATE;
552     $self->{c} = $self->{get_char}->();
553     redo A;
554     } elsif ($self->{c} == 0x005C) { # \
555     $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
556     $self->{c} = $self->{get_char}->();
557     redo A;
558     } else {
559     $self->{state} = BEFORE_TOKEN_STATE;
560     $self->{c} = $self->{get_char}->();
561     return {type => DELIM_TOKEN, value => '#'};
562     #redo A;
563     }
564     } elsif ($self->{state} == NAME_STATE) {
565     ## NOTE: |nmchar| in (|ident| or |name|).
566 wakaba 1.2 if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
567     (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
568     (0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
569 wakaba 1.1 $self->{c} == 0x005F or # _
570     $self->{c} == 0x002D or # -
571     $self->{c} > 0x007F) { # nonascii
572 wakaba 1.5 $self->{t}->{value} .= chr $self->{c};
573 wakaba 1.1 # stay in the state
574     $self->{c} = $self->{get_char}->();
575     redo A;
576     } elsif ($self->{c} == 0x005C) { # \
577 wakaba 1.3 $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
578 wakaba 1.1 $self->{c} = $self->{get_char}->();
579     redo A;
580     } elsif ($self->{c} == 0x0028 and # (
581 wakaba 1.5 $self->{t}->{type} == IDENT_TOKEN) { # (
582     my $func_name = $self->{t}->{value};
583 wakaba 1.3 $func_name =~ tr/A-Z/a-z/; ## TODO: Unicode or ASCII case-insensitive?
584     if ($func_name eq 'url' or $func_name eq 'url-prefix') {
585 wakaba 1.5 if ($self->{t}->{has_escape}) {
586 wakaba 1.3 ## TODO: warn
587     }
588 wakaba 1.5 $self->{t}->{type}
589 wakaba 1.3 = $func_name eq 'url' ? URI_TOKEN : URI_PREFIX_TOKEN;
590 wakaba 1.5 $self->{t}->{value} = '';
591 wakaba 1.1 $self->{state} = URI_BEFORE_WSP_STATE;
592     $self->{c} = $self->{get_char}->();
593     redo A;
594     } else {
595 wakaba 1.5 $self->{t}->{type} = FUNCTION_TOKEN;
596 wakaba 1.1 $self->{state} = BEFORE_TOKEN_STATE;
597     $self->{c} = $self->{get_char}->();
598 wakaba 1.5 return $self->{t};
599 wakaba 1.1 #redo A;
600     }
601     } else {
602     $self->{state} = BEFORE_TOKEN_STATE;
603     # reconsume
604 wakaba 1.5 return $self->{t};
605 wakaba 1.1 #redo A;
606     }
607 wakaba 1.3 } elsif ($self->{state} == URI_BEFORE_WSP_STATE) {
608     while ({
609     0x0020 => 1, # SP
610     0x0009 => 1, # \t
611     0x000D => 1, # \r
612     0x000A => 1, # \n
613     0x000C => 1, # \f
614     }->{$self->{c}}) {
615     $self->{c} = $self->{get_char}->();
616     }
617     if ($self->{c} == -1) {
618 wakaba 1.5 $self->{t}->{type} = {
619 wakaba 1.3 URI_TOKEN, URI_INVALID_TOKEN,
620     URI_INVALID_TOKEN, URI_INVALID_TOKEN,
621     URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
622     URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
623 wakaba 1.5 }->{$self->{t}->{type}};
624 wakaba 1.3 $self->{state} = BEFORE_TOKEN_STATE;
625     $self->{c} = $self->{get_char}->();
626 wakaba 1.5 return $self->{t};
627 wakaba 1.3 #redo A;
628     } elsif ($self->{c} < 0x0020 or $self->{c} == 0x0028) { # C0 or (
629     ## TODO: Should we consider matches of "(" and ")"?
630 wakaba 1.5 $self->{t}->{type} = {
631 wakaba 1.3 URI_TOKEN, URI_INVALID_TOKEN,
632     URI_INVALID_TOKEN, URI_INVALID_TOKEN,
633     URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
634     URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
635 wakaba 1.5 }->{$self->{t}->{type}};
636 wakaba 1.3 $self->{state} = URI_UNQUOTED_STATE;
637     $self->{c} = $self->{get_char}->();
638     redo A;
639     } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '
640     $self->{state} = STRING_STATE; $q = $self->{c};
641     $self->{c} = $self->{get_char}->();
642     redo A;
643     } elsif ($self->{c} == 0x0029) { # )
644     $self->{state} = BEFORE_TOKEN_STATE;
645     $self->{c} = $self->{get_char}->();
646 wakaba 1.5 return $self->{t};
647 wakaba 1.3 #redo A;
648     } elsif ($self->{c} == 0x005C) { # \
649     $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
650     $self->{c} = $self->{get_char}->();
651     redo A;
652     } else {
653 wakaba 1.5 $self->{t}->{value} .= chr $self->{c};
654 wakaba 1.3 $self->{state} = URI_UNQUOTED_STATE;
655     $self->{c} = $self->{get_char}->();
656     redo A;
657     }
658     } elsif ($self->{state} == URI_UNQUOTED_STATE) {
659     if ({
660     0x0020 => 1, # SP
661     0x0009 => 1, # \t
662     0x000D => 1, # \r
663     0x000A => 1, # \n
664     0x000C => 1, # \f
665     }->{$self->{c}}) {
666     $self->{state} = URI_AFTER_WSP_STATE;
667     $self->{c} = $self->{get_char}->();
668     redo A;
669     } elsif ($self->{c} == -1) {
670 wakaba 1.5 $self->{t}->{type} = {
671 wakaba 1.3 URI_TOKEN, URI_INVALID_TOKEN,
672     URI_INVALID_TOKEN, URI_INVALID_TOKEN,
673     URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
674     URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
675 wakaba 1.5 }->{$self->{t}->{type}};
676 wakaba 1.3 $self->{state} = BEFORE_TOKEN_STATE;
677     $self->{c} = $self->{get_char}->();
678 wakaba 1.5 return $self->{t};
679 wakaba 1.3 #redo A;
680     } elsif ($self->{c} < 0x0020 or {
681     0x0022 => 1, # "
682     0x0027 => 1, # '
683     0x0028 => 1, # (
684     }->{$self->{c}}) { # C0 or (
685     ## TODO: Should we consider matches of "(" and ")", '"', or "'"?
686 wakaba 1.5 $self->{t}->{type} = {
687 wakaba 1.3 URI_TOKEN, URI_INVALID_TOKEN,
688     URI_INVALID_TOKEN, URI_INVALID_TOKEN,
689     URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
690     URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
691 wakaba 1.5 }->{$self->{t}->{type}};
692 wakaba 1.3 # stay in the state.
693     $self->{c} = $self->{get_char}->();
694     redo A;
695     } elsif ($self->{c} == 0x0029) { # )
696     $self->{state} = BEFORE_TOKEN_STATE;
697     $self->{c} = $self->{get_char}->();
698 wakaba 1.5 return $self->{t};
699 wakaba 1.3 #redo A;
700     } elsif ($self->{c} == 0x005C) { # \
701     $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
702     $self->{c} = $self->{get_char}->();
703     redo A;
704     } else {
705 wakaba 1.5 $self->{t}->{value} .= chr $self->{c};
706 wakaba 1.3 # stay in the state.
707     $self->{c} = $self->{get_char}->();
708     redo A;
709     }
710     } elsif ($self->{state} == URI_AFTER_WSP_STATE) {
711     if ({
712     0x0020 => 1, # SP
713     0x0009 => 1, # \t
714     0x000D => 1, # \r
715     0x000A => 1, # \n
716     0x000C => 1, # \f
717     }->{$self->{c}}) {
718     # stay in the state.
719     $self->{c} = $self->{get_char}->();
720     redo A;
721     } elsif ($self->{c} == -1) {
722 wakaba 1.5 $self->{t}->{type} = {
723 wakaba 1.3 URI_TOKEN, URI_INVALID_TOKEN,
724     URI_INVALID_TOKEN, URI_INVALID_TOKEN,
725     URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
726     URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
727 wakaba 1.5 }->{$self->{t}->{type}};
728 wakaba 1.3 $self->{state} = BEFORE_TOKEN_STATE;
729     $self->{c} = $self->{get_char}->();
730 wakaba 1.5 return $self->{t};
731 wakaba 1.3 #redo A;
732     } elsif ($self->{c} == 0x0029) { # )
733     $self->{state} = BEFORE_TOKEN_STATE;
734     $self->{c} = $self->{get_char}->();
735 wakaba 1.5 return $self->{t};
736 wakaba 1.3 #redo A;
737     } elsif ($self->{c} == 0x005C) { # \
738     $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
739     $self->{c} = $self->{get_char}->();
740     redo A;
741     } else {
742     ## TODO: Should we consider matches of "(" and ")", '"', or "'"?
743 wakaba 1.5 $self->{t}->{type} = {
744 wakaba 1.3 URI_TOKEN, URI_INVALID_TOKEN,
745     URI_INVALID_TOKEN, URI_INVALID_TOKEN,
746     URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
747     URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
748 wakaba 1.5 }->{$self->{t}->{type}};
749 wakaba 1.3 # stay in the state.
750     $self->{c} = $self->{get_char}->();
751     redo A;
752     }
753 wakaba 1.1 } elsif ($self->{state} == ESCAPE_OPEN_STATE) {
754 wakaba 1.5 $self->{t}->{has_escape} = 1;
755 wakaba 1.1 if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
756     ## NOTE: second character of |unicode| in |escape|.
757     $char = $self->{c} - 0x0030;
758     $self->{state} = ESCAPE_STATE; $i = 2;
759     $self->{c} = $self->{get_char}->();
760     redo A;
761     } elsif (0x0041 <= $self->{c} and $self->{c} <= 0x0046) { # A..F
762     ## NOTE: second character of |unicode| in |escape|.
763     $char = $self->{c} - 0x0041 + 0xA;
764     $self->{state} = ESCAPE_STATE; $i = 2;
765     $self->{c} = $self->{get_char}->();
766     redo A;
767 wakaba 1.2 } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f
768 wakaba 1.1 ## NOTE: second character of |unicode| in |escape|.
769     $char = $self->{c} - 0x0061 - 0xA;
770     $self->{state} = ESCAPE_STATE; $i = 2;
771     $self->{c} = $self->{get_char}->();
772     redo A;
773     } elsif ($self->{c} == 0x000A or # \n
774     $self->{c} == 0x000C) { # \f
775     if ($q == 0) {
776     ## NOTE: In |escape| in ... in |ident|.
777     $self->{state} = BEFORE_TOKEN_STATE;
778     unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
779 wakaba 1.5 return $self->{t};
780 wakaba 1.1 # reconsume
781     #redo A;
782 wakaba 1.3 } elsif ($q == 1) {
783     ## NOTE: In |escape| in |URI|.
784 wakaba 1.5 $self->{t}->{type} = {
785 wakaba 1.3 URI_TOKEN, URI_INVALID_TOKEN,
786     URI_INVALID_TOKEN, URI_INVALID_TOKEN,
787     URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
788     URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
789 wakaba 1.5 }->{$self->{t}->{type}};
790     $self->{t}->{value} .= chr $self->{c};
791 wakaba 1.3 $self->{state} = URI_UNQUOTED_STATE;
792     $self->{c} = $self->{get_char}->();
793     redo A;
794 wakaba 1.1 } else {
795     ## Note: In |nl| in ... in |string| or |ident|.
796 wakaba 1.5 $self->{t}->{value} .= chr $self->{c};
797 wakaba 1.1 $self->{state} = STRING_STATE;
798     $self->{c} = $self->{get_char}->();
799     redo A;
800     }
801     } elsif ($self->{c} == 0x000D) { # \r
802     if ($q == 0) {
803     ## NOTE: In |escape| in ... in |ident|.
804     $self->{state} = BEFORE_TOKEN_STATE;
805     unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
806 wakaba 1.5 return $self->{t};
807 wakaba 1.1 # reconsume
808     #redo A;
809 wakaba 1.3 } elsif ($q == 1) {
810 wakaba 1.5 $self->{t}->{type} = {
811 wakaba 1.3 URI_TOKEN, URI_INVALID_TOKEN,
812     URI_INVALID_TOKEN, URI_INVALID_TOKEN,
813     URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
814     URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
815 wakaba 1.5 }->{$self->{t}->{type}};
816     $self->{t}->{value} .= "\x0D\x0A";
817 wakaba 1.3 $self->{state} = URI_UNQUOTED_STATE;
818     $self->{c} = $self->{get_char}->();
819     redo A;
820 wakaba 1.1 } else {
821     ## Note: In |nl| in ... in |string| or |ident|.
822 wakaba 1.5 $self->{t}->{value} .= "\x0D\x0A";
823 wakaba 1.1 $self->{state} = ESCAPE_BEFORE_LF_STATE;
824     $self->{c} = $self->{get_char}->();
825     redo A;
826     }
827     } else {
828     ## NOTE: second character of |escape|.
829 wakaba 1.5 $self->{t}->{value} .= chr $self->{c};
830 wakaba 1.3 $self->{state} = $q == 0 ? NAME_STATE :
831     $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
832 wakaba 1.1 $self->{c} = $self->{get_char}->();
833     redo A;
834     }
835     } elsif ($self->{state} == ESCAPE_STATE) {
836     ## NOTE: third..seventh character of |unicode| in |escape|.
837     if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
838     $char = $char * 0x10 + $self->{c} - 0x0030;
839     $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
840     $self->{c} = $self->{get_char}->();
841     redo A;
842     } elsif (0x0041 <= $self->{c} and $self->{c} <= 0x0046) { # A..F
843     $char = $char * 0x10 + $self->{c} - 0x0041 + 0xA;
844     $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
845     $self->{c} = $self->{get_char}->();
846     redo A;
847 wakaba 1.2 } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f
848 wakaba 1.1 $char = $char * 0x10 + $self->{c} - 0x0061 - 0xA;
849     $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
850     $self->{c} = $self->{get_char}->();
851     redo A;
852     } elsif ($self->{c} == 0x0020 or # SP
853     $self->{c} == 0x000A or # \n
854     $self->{c} == 0x0009 or # \t
855     $self->{c} == 0x000C) { # \f
856 wakaba 1.5 $self->{t}->{value} .= chr $char;
857 wakaba 1.3 $self->{state} = $q == 0 ? NAME_STATE :
858     $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
859 wakaba 1.1 $self->{c} = $self->{get_char}->();
860     redo A;
861     } elsif ($self->{c} == 0x000D) { # \r
862     $self->{state} = ESCAPE_BEFORE_LF_STATE;
863     $self->{c} = $self->{get_char}->();
864     redo A;
865     } else {
866 wakaba 1.5 $self->{t}->{value} .= chr $char;
867 wakaba 1.3 $self->{state} = $q == 0 ? NAME_STATE :
868     $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
869 wakaba 1.1 # reconsume
870     redo A;
871     }
872     } elsif ($self->{state} == ESCAPE_BEFORE_NL_STATE) {
873     ## NOTE: eightth character of |unicode| in |escape|.
874     if ($self->{c} == 0x0020 or # SP
875     $self->{c} == 0x000A or # \n
876     $self->{c} == 0x0009 or # \t
877     $self->{c} == 0x000C) { # \f
878 wakaba 1.5 $self->{t}->{value} .= chr $char;
879 wakaba 1.3 $self->{state} = $q == 0 ? NAME_STATE :
880     $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
881 wakaba 1.1 $self->{c} = $self->{get_char}->();
882     redo A;
883     } elsif ($self->{c} == 0x000D) { # \r
884     $self->{state} = ESCAPE_BEFORE_NL_STATE;
885     $self->{c} = $self->{get_char}->();
886     redo A;
887     } else {
888 wakaba 1.5 $self->{t}->{value} .= chr $char;
889 wakaba 1.3 $self->{state} = $q == 0 ? NAME_STATE :
890     $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
891 wakaba 1.1 # reconsume
892     redo A;
893     }
894     } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {
895     ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.
896     if ($self->{c} == 0x000A) { # \n
897 wakaba 1.5 $self->{t}->{value} .= chr $char;
898 wakaba 1.3 $self->{state} = $q == 0 ? NAME_STATE :
899     $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
900 wakaba 1.1 $self->{c} = $self->{get_char}->();
901     redo A;
902     } else {
903 wakaba 1.5 $self->{t}->{value} .= chr $char;
904 wakaba 1.3 $self->{state} = $q == 0 ? NAME_STATE :
905     $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
906 wakaba 1.1 # reconsume
907     redo A;
908     }
909     } elsif ($self->{state} == STRING_STATE) {
910     ## NOTE: A character in |string$Q| in |string| in |STRING|, or
911     ## a character in |invalid$Q| in |invalid| in |INVALID|,
912     ## where |$Q = $q == 0x0022 ? 1 : 2|.
913 wakaba 1.3 ## Or, in |URI|.
914 wakaba 1.1 if ($self->{c} == 0x005C) { # \
915     $self->{state} = ESCAPE_OPEN_STATE;
916     $self->{c} = $self->{get_char}->();
917     redo A;
918     } elsif ($self->{c} == $q) { # " | '
919 wakaba 1.5 if ($self->{t}->{type} == STRING_TOKEN) {
920 wakaba 1.3 $self->{state} = BEFORE_TOKEN_STATE;
921     $self->{c} = $self->{get_char}->();
922 wakaba 1.5 return $self->{t};
923 wakaba 1.3 #redo A;
924     } else {
925     $self->{state} = URI_AFTER_WSP_STATE;
926     $self->{c} = $self->{get_char}->();
927     redo A;
928     }
929 wakaba 1.1 } elsif ($self->{c} == 0x000A or # \n
930     $self->{c} == 0x000D or # \r
931     $self->{c} == 0x000C or # \f
932     $self->{c} == -1) {
933 wakaba 1.5 $self->{t}->{type} = INVALID_TOKEN;
934 wakaba 1.1 $self->{state} = BEFORE_TOKEN_STATE;
935     # reconsume
936 wakaba 1.5 return $self->{t};
937 wakaba 1.1 #redo A;
938     } else {
939 wakaba 1.5 $self->{t}->{value} .= chr $self->{c};
940 wakaba 1.1 # stay in the state
941     $self->{c} = $self->{get_char}->();
942     redo A;
943     }
944     } elsif ($self->{state} == NUMBER_STATE) {
945     ## NOTE: 2nd, 3rd, or ... character in |num| before |.|.
946     if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
947 wakaba 1.5 $self->{t}->{value} .= chr $self->{c};
948 wakaba 1.1 # stay in the state
949     $self->{c} = $self->{get_char}->();
950     redo A;
951     } elsif ($self->{c} == 0x002E) { # .
952     $self->{state} = NUMBER_DOT_STATE;
953     $self->{c} = $self->{get_char}->();
954     redo A;
955     } else {
956 wakaba 1.5 $self->{t}->{number} = $self->{t}->{value};
957     $self->{t}->{value} = '';
958 wakaba 1.1 $self->{state} = AFTER_NUMBER_STATE;
959     # reprocess
960 wakaba 1.2 redo A;
961 wakaba 1.1 }
962     } elsif ($self->{state} == NUMBER_DOT_STATE) {
963     ## NOTE: The character immediately following |.| in |num|.
964     if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
965 wakaba 1.5 $self->{t}->{value} .= '.' . chr $self->{c};
966 wakaba 1.1 $self->{state} = NUMBER_DOT_NUMBER_STATE;
967     $self->{c} = $self->{get_char}->();
968     redo A;
969     } else {
970     unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'};
971 wakaba 1.5 $self->{t}->{number} = $self->{t}->{value};
972     $self->{t}->{value} = '';
973 wakaba 1.1 $self->{state} = BEFORE_TOKEN_STATE;
974     # reprocess
975 wakaba 1.5 return $self->{t};
976 wakaba 1.1 #redo A;
977     }
978     } elsif ($self->{state} == NUMBER_FRACTION_STATE) {
979     ## NOTE: The character immediately following |.| at the beginning of |num|.
980     if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
981 wakaba 1.5 $self->{t}->{value} .= '.' . chr $self->{c};
982 wakaba 1.1 $self->{state} = NUMBER_DOT_NUMBER_STATE;
983     $self->{c} = $self->{get_char}->();
984     redo A;
985     } else {
986     $self->{state} = BEFORE_TOKEN_STATE;
987     $self->{c} = $self->{get_char}->();
988     return {type => DELIM_TOKEN, value => '.'};
989     #redo A;
990     }
991     } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {
992     ## NOTE: |[0-9]| in |num| after |.|.
993     if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
994 wakaba 1.5 $self->{t}->{value} .= chr $self->{c};
995 wakaba 1.1 # stay in the state
996     $self->{c} = $self->{get_char}->();
997     redo A;
998     } else {
999 wakaba 1.5 $self->{t}->{number} = $self->{t}->{value};
1000     $self->{t}->{value} = '';
1001 wakaba 1.1 $self->{state} = AFTER_NUMBER_STATE;
1002     # reprocess
1003 wakaba 1.2 redo A;
1004 wakaba 1.1 }
1005     } else {
1006     die "$0: Unknown state |$self->{state}|";
1007     }
1008     } # A
1009     } # get_next_token
1010    
1011     1;
1012 wakaba 1.5 # $Date: 2007/09/08 02:58:24 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24