/[suikacvs]/markup/html/whatpm/Whatpm/CSS/Tokenizer.pm
Suika

Contents of /markup/html/whatpm/Whatpm/CSS/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (show annotations) (download)
Sat Sep 8 13:43:58 2007 UTC (18 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.9: +101 -41 lines
++ whatpm/t/ChangeLog	8 Sep 2007 13:43:45 -0000
	* css-token-1.test: |NUMBER| and |DIMENSION| test
	cases are added.

2007-09-08  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/CSS/ChangeLog	8 Sep 2007 13:43:27 -0000
	* Tokenizer.pm: |DIMENSION| and |NUMBER| related
	bugs are fixed.

2007-09-08  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::CSS::Tokenizer;
2 use strict;
3
4 sub BEFORE_TOKEN_STATE () { 0 }
5 sub BEFORE_NMSTART_STATE () { 1 }
6 sub NAME_STATE () { 2 }
7 sub ESCAPE_OPEN_STATE () { 3 }
8 sub STRING_STATE () { 4 }
9 sub HASH_OPEN_STATE () { 5 }
10 sub NUMBER_STATE () { 6 }
11 sub NUMBER_FRACTION_STATE () { 7 }
12 sub AFTER_NUMBER_STATE () { 8 }
13 sub URI_BEFORE_WSP_STATE () { 9 }
14 sub ESCAPE_STATE () { 10 }
15 sub ESCAPE_BEFORE_LF_STATE () { 11 }
16 sub ESCAPE_BEFORE_NL_STATE () { 12 }
17 sub NUMBER_DOT_STATE () { 13 }
18 sub NUMBER_DOT_NUMBER_STATE () { 14 }
19 sub DELIM_STATE () { 15 }
20 sub URI_UNQUOTED_STATE () { 16 }
21 sub URI_AFTER_WSP_STATE () { 17 }
22 sub AFTER_AT_STATE () { 18 }
23 sub AFTER_AT_HYPHEN_STATE () { 19 }
24
25 sub IDENT_TOKEN () { 1 }
26 sub ATKEYWORD_TOKEN () { 2 }
27 sub HASH_TOKEN () { 3 }
28 sub FUNCTION_TOKEN () { 4 }
29 sub URI_TOKEN () { 5 }
30 sub URI_INVALID_TOKEN () { 6 }
31 sub URI_PREFIX_TOKEN () { 7 }
32 sub URI_PREFIX_INVALID_TOKEN () { 8 }
33 sub STRING_TOKEN () { 9 }
34 sub INVALID_TOKEN () { 10 }
35 sub NUMBER_TOKEN () { 11 }
36 sub DIMENSION_TOKEN () { 12 }
37 sub PERCENTAGE_TOKEN () { 13 }
38 sub UNICODE_RANGE_TOKEN () { 14 }
39 sub DELIM_TOKEN () { 16 }
40 sub PLUS_TOKEN () { 17 }
41 sub GREATER_TOKEN () { 18 }
42 sub COMMA_TOKEN () { 19 }
43 sub TILDE_TOKEN () { 20 }
44 sub DASHMATCH_TOKEN () { 21 }
45 sub PREFIXMATCH_TOKEN () { 22 }
46 sub SUFFIXMATCH_TOKEN () { 23 }
47 sub SUBSTRINGMATCH_TOKEN () { 24 }
48 sub INCLUDES_TOKEN () { 25 }
49 sub SEMICOLON_TOKEN () { 26 }
50 sub LBRACE_TOKEN () { 27 }
51 sub RBRACE_TOKEN () { 28 }
52 sub LPAREN_TOKEN () { 29 }
53 sub RPAREN_TOKEN () { 30 }
54 sub LBRACKET_TOKEN () { 31 }
55 sub RBRACKET_TOKEN () { 32 }
56 sub S_TOKEN () { 33 }
57 sub CDO_TOKEN () { 34 }
58 sub CDC_TOKEN () { 35 }
59 sub COMMENT_TOKEN () { 36 }
60 sub COMMENT_INVALID_TOKEN () { 37 }
61 sub EOF_TOKEN () { 38 }
62
63 our @TokenName = qw(
64 0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID
65 STRING INVALID NUMBER DIMENSION PERCENTAGE UNICODE_RANGE
66 0 DELIM PLUS GREATER COMMA TILDE DASHMATCH
67 PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON
68 LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT
69 COMMENT_INVALID EOF
70 );
71
72 sub new ($) {
73 my $self = bless {token => [], get_char => sub { -1 },
74 onerror => sub { }}, shift;
75 return $self;
76 } # new
77
78 sub init ($) {
79 my $self = shift;
80 $self->{state} = BEFORE_TOKEN_STATE;
81 $self->{c} = $self->{get_char}->();
82 #$self->{t} = {type => token-type, value => value, number => number};
83 } # init
84
85 sub get_next_token ($) {
86 my $self = shift;
87 if (@{$self->{token}}) {
88 return shift @{$self->{token}};
89 }
90
91 my $char;
92 my $num; # |{num}|, if any.
93 my $i; # |$i + 1|th character in |unicode| in |escape|.
94 my $q;
95 ## NOTE:
96 ## 0: in |ident|.
97 ## 1: in |URI| outside of |string|.
98 ## 0x0022: in |string1| or |invalid1|.
99 ## 0x0027: in |string2| or |invalid2|.
100
101 A: {
102 if ($self->{state} == BEFORE_TOKEN_STATE) {
103 if ($self->{c} == 0x002D) { # -
104 ## NOTE: |-| in |ident| in |IDENT|
105 $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1};
106 $self->{state} = BEFORE_NMSTART_STATE;
107 $self->{c} = $self->{get_char}->();
108 redo A;
109 } elsif ($self->{c} == 0x0055 or $self->{c} == 0x0075) { # U or u
110 $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}};
111 $self->{c} = $self->{get_char}->();
112 if ($self->{c} == 0x002B) { # +
113 $self->{c} = $self->{get_char}->();
114 if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
115 (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
116 (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
117 $self->{c} == 0x003F) { # ?
118 $self->{t}->{value} .= '+' . chr $self->{c};
119 $self->{t}->{type} = UNICODE_RANGE_TOKEN;
120 $self->{c} = $self->{get_char}->();
121 C: for (2..6) {
122 if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
123 (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
124 (0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f
125 $self->{c} == 0x003F) { # ?
126 $self->{t}->{value} .= chr $self->{c};
127 $self->{c} = $self->{get_char}->();
128 } else {
129 last C;
130 }
131 } # C
132
133 if ($self->{c} == 0x002D) { # -
134 $self->{c} = $self->{get_char}->();
135 if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
136 (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
137 (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f
138 $self->{t}->{value} .= '-' . chr $self->{c};
139 $self->{c} = $self->{get_char}->();
140 C: for (2..6) {
141 if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
142 (0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F
143 (0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f
144 $self->{t}->{value} .= chr $self->{c};
145 $self->{c} = $self->{get_char}->();
146 } else {
147 last C;
148 }
149 } # C
150
151 #
152 } else {
153 my $token = $self->{t};
154 $self->{t} = {type => IDENT_TOKEN, value => '-'};
155 $self->{state} = BEFORE_NMSTART_STATE;
156 # reprocess
157 return $token;
158 #redo A;
159 }
160 }
161
162 $self->{state} = BEFORE_TOKEN_STATE;
163 # reprocess
164 return $self->{t};
165 #redo A;
166 } else {
167 unshift @{$self->{token}}, {type => PLUS_TOKEN};
168 $self->{state} = BEFORE_TOKEN_STATE;
169 # reprocess
170 return $self->{t};
171 #redo A;
172 }
173 } else {
174 $self->{state} = NAME_STATE;
175 # reprocess
176 redo A;
177 }
178 } elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
179 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
180 $self->{c} == 0x005F or # _
181 $self->{c} > 0x007F) { # nonascii
182 ## NOTE: |nmstart| in |ident| in |IDENT|
183 $self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}};
184 $self->{state} = NAME_STATE;
185 $self->{c} = $self->{get_char}->();
186 redo A;
187 } elsif ($self->{c} == 0x005C) { # \
188 ## NOTE: |nmstart| in |ident| in |IDENT|
189 $self->{t} = {type => IDENT_TOKEN, value => ''};
190 $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
191 $self->{c} = $self->{get_char}->();
192 redo A;
193 } elsif ($self->{c} == 0x0040) { # @
194 ## NOTE: |@| in |ATKEYWORD|
195 $self->{t} = {type => ATKEYWORD_TOKEN, value => ''};
196 $self->{state} = AFTER_AT_STATE;
197 $self->{c} = $self->{get_char}->();
198 redo A;
199 } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '
200 $self->{t} = {type => STRING_TOKEN, value => ''};
201 $self->{state} = STRING_STATE; $q = $self->{c};
202 $self->{c} = $self->{get_char}->();
203 redo A;
204 } elsif ($self->{c} == 0x0023) { # #
205 ## NOTE: |#| in |HASH|.
206 $self->{t} = {type => HASH_TOKEN, value => ''};
207 $self->{state} = HASH_OPEN_STATE;
208 $self->{c} = $self->{get_char}->();
209 redo A;
210 } elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
211 ## NOTE: |num|.
212 $self->{t} = {type => NUMBER_TOKEN, value => chr $self->{c}};
213 $self->{state} = NUMBER_STATE;
214 $self->{c} = $self->{get_char}->();
215 redo A;
216 } elsif ($self->{c} == 0x002E) { # .
217 ## NOTE: |num|.
218 $self->{t} = {type => NUMBER_TOKEN, value => '0'};
219 $self->{state} = NUMBER_FRACTION_STATE;
220 $self->{c} = $self->{get_char}->();
221 redo A;
222 } elsif ($self->{c} == 0x002F) { # /
223 $self->{c} = $self->{get_char}->();
224 if ($self->{c} == 0x002A) { # *
225 C: {
226 $self->{c} = $self->{get_char}->();
227 if ($self->{c} == 0x002A) { # *
228 D: {
229 $self->{c} = $self->{get_char}->();
230 if ($self->{c} == 0x002F) { # /
231 #
232 } elsif ($self->{c} == 0x002A) { # *
233 redo D;
234 } else {
235 redo C;
236 }
237 } # D
238 } elsif ($self->{c} == -1) {
239 # stay in the state
240 # reprocess
241 return {type => COMMENT_INVALID_TOKEN};
242 #redo A;
243 } else {
244 redo C;
245 }
246 } # C
247
248 # stay in the state.
249 $self->{c} = $self->{get_char}->();
250 redo A;
251 } else {
252 # stay in the state.
253 # reprocess
254 return {type => DELIM_TOKEN, value => '/'};
255 #redo A;
256 }
257 } elsif ($self->{c} == 0x003C) { # <
258 ## NOTE: |CDO|
259 $self->{c} = $self->{get_char}->();
260 if ($self->{c} == 0x0021) { # !
261 $self->{c} = $self->{get_char}->();
262 if ($self->{c} == 0x002D) { # -
263 $self->{c} = $self->{get_char}->();
264 if ($self->{c} == 0x002D) { # -
265 $self->{state} = BEFORE_TOKEN_STATE;
266 $self->{c} = $self->{get_char}->();
267 return {type => CDO_TOKEN};
268 #redo A;
269 } else {
270 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};
271 ## NOTE: |-| in |ident| in |IDENT|
272 $self->{t} = {type => IDENT_TOKEN, value => '-'};
273 $self->{state} = BEFORE_NMSTART_STATE;
274 #reprocess
275 return {type => DELIM_TOKEN, value => '<'};
276 #redo A;
277 }
278 } else {
279 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};
280 $self->{state} = BEFORE_TOKEN_STATE;
281 #reprocess
282 return {type => DELIM_TOKEN, value => '<'};
283 #redo A;
284 }
285 } else {
286 $self->{state} = BEFORE_TOKEN_STATE;
287 #reprocess
288 return {type => DELIM_TOKEN, value => '<'};
289 #redo A;
290 }
291 } elsif (my $t = {
292 0x003B => SEMICOLON_TOKEN, # ;
293 0x007B => LBRACE_TOKEN, # {
294 0x007D => RBRACE_TOKEN, # }
295 0x0028 => LPAREN_TOKEN, # (
296 0x0029 => RPAREN_TOKEN, # )
297 0x005B => LBRACKET_TOKEN, # [
298 0x005D => RBRACKET_TOKEN, # ]
299 }->{$self->{c}}) {
300 # stay in the state
301 $self->{c} = $self->{get_char}->();
302 return {type => $t};
303 # redo A;
304 } elsif ({
305 0x0020 => 1, # SP
306 0x0009 => 1, # \t
307 0x000D => 1, # \r
308 0x000A => 1, # \n
309 0x000C => 1, # \f
310 }->{$self->{c}}) {
311 W: {
312 $self->{c} = $self->{get_char}->();
313 if ({
314 0x0020 => 1, # SP
315 0x0009 => 1, # \t
316 0x000D => 1, # \r
317 0x000A => 1, # \n
318 0x000C => 1, # \f
319 }->{$self->{c}}) {
320 redo W;
321 } elsif (my $v = {
322 0x002B => PLUS_TOKEN, # +
323 0x003E => GREATER_TOKEN, # >
324 0x002C => COMMA_TOKEN, # ,
325 0x007E => TILDE_TOKEN, # ~
326 }->{$self->{c}}) {
327 # stay in the state
328 $self->{c} = $self->{get_char}->();
329 return {type => $v};
330 #redo A;
331 } else {
332 # stay in the state
333 # reprocess
334 return {type => S_TOKEN};
335 #redo A;
336 }
337 } # W
338 } elsif (my $v = {
339 0x007C => DASHMATCH_TOKEN, # |
340 0x005E => PREFIXMATCH_TOKEN, # ^
341 0x0024 => SUFFIXMATCH_TOKEN, # $
342 0x002A => SUBSTRINGMATCH_TOKEN, # *
343 }->{$self->{c}}) {
344 my $c = $self->{c};
345 $self->{c} = $self->{get_char}->();
346 if ($self->{c} == 0x003D) { # =
347 # stay in the state
348 $self->{c} = $self->{get_char}->();
349 return {type => $v};
350 #redo A;
351 } else {
352 # stay in the state
353 # reprocess
354 return {type => DELIM_TOKEN, value => chr $c};
355 #redo A;
356 }
357 } elsif ($self->{c} == 0x002B) { # +
358 # stay in the state
359 $self->{c} = $self->{get_char}->();
360 return {type => PLUS_TOKEN};
361 #redo A;
362 } elsif ($self->{c} == 0x003E) { # >
363 # stay in the state
364 $self->{c} = $self->{get_char}->();
365 return {type => GREATER_TOKEN};
366 #redo A;
367 } elsif ($self->{c} == 0x002C) { # ,
368 # stay in the state
369 $self->{c} = $self->{get_char}->();
370 return {type => COMMA_TOKEN};
371 #redo A;
372 } elsif ($self->{c} == 0x007E) { # ~
373 $self->{c} = $self->{get_char}->();
374 if ($self->{c} == 0x003D) { # =
375 # stay in the state
376 $self->{c} = $self->{get_char}->();
377 return {type => INCLUDES_TOKEN};
378 #redo A;
379 } else {
380 # stay in the state
381 # reprocess
382 return {type => TILDE_TOKEN};
383 #redo A;
384 }
385 } elsif ($self->{c} == -1) {
386 # stay in the state
387 $self->{c} = $self->{get_char}->();
388 return {type => EOF_TOKEN};
389 #redo A;
390 } else {
391 # stay in the state
392 $self->{t} = {type => DELIM_TOKEN, value => chr $self->{c}};
393 $self->{c} = $self->{get_char}->();
394 return $self->{t};
395 #redo A;
396 }
397 } elsif ($self->{state} == BEFORE_NMSTART_STATE) {
398 ## NOTE: |nmstart| in |ident| in (|IDENT|, |DIMENSION|, or
399 ## |FUNCTION|)
400 if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
401 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
402 $self->{c} == 0x005F or # _
403 $self->{c} > 0x007F) { # nonascii
404 $self->{t}->{value} .= chr $self->{c};
405 $self->{t}->{type} = DIMENSION_TOKEN
406 if $self->{t}->{type} == NUMBER_TOKEN;
407 $self->{state} = NAME_STATE;
408 $self->{c} = $self->{get_char}->();
409 redo A;
410 } elsif ($self->{c} == 0x005C) { # \
411 $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
412 $self->{c} = $self->{get_char}->();
413 redo A;
414 } elsif ($self->{c} == 0x002D) { # -
415 if ($self->{t}->{type} == IDENT_TOKEN) {
416 $self->{c} = $self->{get_char}->();
417 if ($self->{c} == 0x003E) { # >
418 $self->{state} = BEFORE_TOKEN_STATE;
419 $self->{c} = $self->{get_char}->();
420 return {type => CDC_TOKEN};
421 #redo A;
422 } else {
423 ## NOTE: |-|, |-|, $self->{c}
424 #$self->{t} = {type => IDENT_TOKEN, value => '-'};
425 # stay in the state
426 # reconsume
427 return {type => DELIM_TOKEN, value => '-'};
428 #redo A;
429 }
430 } elsif ($self->{t}->{type} == DIMENSION_TOKEN) {
431 $self->{c} = $self->{get_char}->();
432 if ($self->{c} == 0x003E) { # >
433 unshift @{$self->{token}}, {type => CDC_TOKEN};
434 $self->{t}->{type} = NUMBER_TOKEN;
435 $self->{t}->{value} = '';
436 $self->{state} = BEFORE_TOKEN_STATE;
437 $self->{c} = $self->{get_char}->();
438 return $self->{t};
439 #redo A;
440 } else {
441 ## NOTE: |-|, |-|, $self->{c}
442 my $t = $self->{t};
443 $t->{type} = NUMBER_TOKEN;
444 $t->{value} = '';
445 $self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1};
446 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
447 # stay in the state
448 # reconsume
449 return $t;
450 #redo A;
451 }
452 } else {
453 #
454 }
455 } else {
456 #
457 }
458
459 if ($self->{t}->{type} == DIMENSION_TOKEN) {
460 ## NOTE: |-| after |NUMBER|.
461 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
462 $self->{state} = BEFORE_TOKEN_STATE;
463 # reprocess
464 $self->{t}->{type} = NUMBER_TOKEN;
465 $self->{t}->{value} = '';
466 return $self->{t};
467 } else {
468 ## NOTE: |-| not followed by |nmstart|.
469 $self->{state} = BEFORE_TOKEN_STATE;
470 # reprocess
471 return {type => DELIM_TOKEN, value => '-'};
472 }
473 } elsif ($self->{state} == AFTER_AT_STATE) {
474 if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
475 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
476 $self->{c} == 0x005F or # _
477 $self->{c} > 0x007F) { # nonascii
478 $self->{t}->{value} .= chr $self->{c};
479 $self->{state} = NAME_STATE;
480 $self->{c} = $self->{get_char}->();
481 redo A;
482 } elsif ($self->{c} == 0x002D) { # -
483 $self->{t}->{value} .= '-';
484 $self->{state} = AFTER_AT_HYPHEN_STATE;
485 $self->{c} = $self->{get_char}->();
486 redo A;
487 } elsif ($self->{c} == 0x005C) { # \
488 $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
489 $self->{c} = $self->{get_char}->();
490 redo A;
491 } else {
492 $self->{state} = BEFORE_TOKEN_STATE;
493 # reprocess
494 return {type => DELIM_TOKEN, value => '@'};
495 }
496 } elsif ($self->{state} == AFTER_AT_HYPHEN_STATE) {
497 if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
498 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
499 $self->{c} == 0x005F or # _
500 $self->{c} > 0x007F) { # nonascii
501 $self->{t}->{value} .= chr $self->{c};
502 $self->{state} = NAME_STATE;
503 $self->{c} = $self->{get_char}->();
504 redo A;
505 } elsif ($self->{c} == 0x002D) { # -
506 $self->{c} = $self->{get_char}->();
507 if ($self->{c} == 0x003E) { # >
508 unshift @{$self->{token}}, {type => CDC_TOKEN};
509 $self->{state} = BEFORE_TOKEN_STATE;
510 $self->{c} = $self->{get_char}->();
511 return {type => DELIM_TOKEN, value => '@'};
512 #redo A;
513 } else {
514 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
515 $self->{t} = {type => IDENT_TOKEN, value => '-'};
516 $self->{state} = BEFORE_NMSTART_STATE;
517 # reprocess
518 return {type => DELIM_TOKEN, value => '@'};
519 #redo A;
520 }
521 } elsif ($self->{c} == 0x005C) { # \
522 ## TODO: @-\{nl}
523 $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
524 $self->{c} = $self->{get_char}->();
525 redo A;
526 } else {
527 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
528 $self->{state} = BEFORE_TOKEN_STATE;
529 # reprocess
530 return {type => DELIM_TOKEN, value => '@'};
531 }
532 } elsif ($self->{state} == AFTER_NUMBER_STATE) {
533 if ($self->{c} == 0x002D) { # -
534 ## NOTE: |-| in |ident|.
535 $self->{t}->{hyphen} = 1;
536 $self->{t}->{value} = '-';
537 $self->{t}->{type} = DIMENSION_TOKEN;
538 $self->{state} = BEFORE_NMSTART_STATE;
539 $self->{c} = $self->{get_char}->();
540 redo A;
541 } elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
542 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
543 $self->{c} == 0x005F or # _
544 $self->{c} > 0x007F) { # nonascii
545 ## NOTE: |nmstart| in |ident|.
546 $self->{t}->{value} = chr $self->{c};
547 $self->{t}->{type} = DIMENSION_TOKEN;
548 $self->{state} = NAME_STATE;
549 $self->{c} = $self->{get_char}->();
550 redo A;
551 } elsif ($self->{c} == 0x005C) { # \
552 ## NOTE: |nmstart| in |ident| in |IDENT|
553 $self->{t}->{value} = '';
554 $self->{t}->{type} = DIMENSION_TOKEN;
555 $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
556 $self->{c} = $self->{get_char}->();
557 redo A;
558 } elsif ($self->{c} == 0x0025) { # %
559 $self->{t}->{type} = PERCENTAGE_TOKEN;
560 $self->{state} = BEFORE_TOKEN_STATE;
561 $self->{c} = $self->{get_char}->();
562 return $self->{t};
563 #redo A;
564 } else {
565 $self->{state} = BEFORE_TOKEN_STATE;
566 # reprocess
567 return $self->{t};
568 #redo A;
569 }
570 } elsif ($self->{state} == HASH_OPEN_STATE) {
571 ## NOTE: The first |nmchar| in |name| in |HASH|.
572 if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
573 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
574 (0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
575 $self->{c} == 0x002D or # -
576 $self->{c} == 0x005F or # _
577 $self->{c} > 0x007F) { # nonascii
578 $self->{t}->{value} .= chr $self->{c};
579 $self->{state} = NAME_STATE;
580 $self->{c} = $self->{get_char}->();
581 redo A;
582 } elsif ($self->{c} == 0x005C) { # \
583 $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
584 $self->{c} = $self->{get_char}->();
585 redo A;
586 } else {
587 $self->{state} = BEFORE_TOKEN_STATE;
588 # reprocess
589 return {type => DELIM_TOKEN, value => '#'};
590 #redo A;
591 }
592 } elsif ($self->{state} == NAME_STATE) {
593 ## NOTE: |nmchar| in (|ident| or |name|).
594 if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z
595 (0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z
596 (0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9
597 $self->{c} == 0x005F or # _
598 $self->{c} == 0x002D or # -
599 $self->{c} > 0x007F) { # nonascii
600 $self->{t}->{value} .= chr $self->{c};
601 # stay in the state
602 $self->{c} = $self->{get_char}->();
603 redo A;
604 } elsif ($self->{c} == 0x005C) { # \
605 $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
606 $self->{c} = $self->{get_char}->();
607 redo A;
608 } elsif ($self->{c} == 0x0028 and # (
609 $self->{t}->{type} == IDENT_TOKEN) { # (
610 my $func_name = $self->{t}->{value};
611 $func_name =~ tr/A-Z/a-z/; ## TODO: Unicode or ASCII case-insensitive?
612 if ($func_name eq 'url' or $func_name eq 'url-prefix') {
613 if ($self->{t}->{has_escape}) {
614 ## TODO: warn
615 }
616 $self->{t}->{type}
617 = $func_name eq 'url' ? URI_TOKEN : URI_PREFIX_TOKEN;
618 $self->{t}->{value} = '';
619 $self->{state} = URI_BEFORE_WSP_STATE;
620 $self->{c} = $self->{get_char}->();
621 redo A;
622 } else {
623 $self->{t}->{type} = FUNCTION_TOKEN;
624 $self->{state} = BEFORE_TOKEN_STATE;
625 $self->{c} = $self->{get_char}->();
626 return $self->{t};
627 #redo A;
628 }
629 } else {
630 $self->{state} = BEFORE_TOKEN_STATE;
631 # reconsume
632 return $self->{t};
633 #redo A;
634 }
635 } elsif ($self->{state} == URI_BEFORE_WSP_STATE) {
636 while ({
637 0x0020 => 1, # SP
638 0x0009 => 1, # \t
639 0x000D => 1, # \r
640 0x000A => 1, # \n
641 0x000C => 1, # \f
642 }->{$self->{c}}) {
643 $self->{c} = $self->{get_char}->();
644 }
645 if ($self->{c} == -1) {
646 $self->{t}->{type} = {
647 URI_TOKEN, URI_INVALID_TOKEN,
648 URI_INVALID_TOKEN, URI_INVALID_TOKEN,
649 URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
650 URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
651 }->{$self->{t}->{type}};
652 $self->{state} = BEFORE_TOKEN_STATE;
653 $self->{c} = $self->{get_char}->();
654 return $self->{t};
655 #redo A;
656 } elsif ($self->{c} < 0x0020 or $self->{c} == 0x0028) { # C0 or (
657 ## TODO: Should we consider matches of "(" and ")"?
658 $self->{t}->{type} = {
659 URI_TOKEN, URI_INVALID_TOKEN,
660 URI_INVALID_TOKEN, URI_INVALID_TOKEN,
661 URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
662 URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
663 }->{$self->{t}->{type}};
664 $self->{state} = URI_UNQUOTED_STATE;
665 $self->{c} = $self->{get_char}->();
666 redo A;
667 } elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or '
668 $self->{state} = STRING_STATE; $q = $self->{c};
669 $self->{c} = $self->{get_char}->();
670 redo A;
671 } elsif ($self->{c} == 0x0029) { # )
672 $self->{state} = BEFORE_TOKEN_STATE;
673 $self->{c} = $self->{get_char}->();
674 return $self->{t};
675 #redo A;
676 } elsif ($self->{c} == 0x005C) { # \
677 $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
678 $self->{c} = $self->{get_char}->();
679 redo A;
680 } else {
681 $self->{t}->{value} .= chr $self->{c};
682 $self->{state} = URI_UNQUOTED_STATE;
683 $self->{c} = $self->{get_char}->();
684 redo A;
685 }
686 } elsif ($self->{state} == URI_UNQUOTED_STATE) {
687 if ({
688 0x0020 => 1, # SP
689 0x0009 => 1, # \t
690 0x000D => 1, # \r
691 0x000A => 1, # \n
692 0x000C => 1, # \f
693 }->{$self->{c}}) {
694 $self->{state} = URI_AFTER_WSP_STATE;
695 $self->{c} = $self->{get_char}->();
696 redo A;
697 } elsif ($self->{c} == -1) {
698 $self->{t}->{type} = {
699 URI_TOKEN, URI_INVALID_TOKEN,
700 URI_INVALID_TOKEN, URI_INVALID_TOKEN,
701 URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
702 URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
703 }->{$self->{t}->{type}};
704 $self->{state} = BEFORE_TOKEN_STATE;
705 $self->{c} = $self->{get_char}->();
706 return $self->{t};
707 #redo A;
708 } elsif ($self->{c} < 0x0020 or {
709 0x0022 => 1, # "
710 0x0027 => 1, # '
711 0x0028 => 1, # (
712 }->{$self->{c}}) { # C0 or (
713 ## TODO: Should we consider matches of "(" and ")", '"', or "'"?
714 $self->{t}->{type} = {
715 URI_TOKEN, URI_INVALID_TOKEN,
716 URI_INVALID_TOKEN, URI_INVALID_TOKEN,
717 URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
718 URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
719 }->{$self->{t}->{type}};
720 # stay in the state.
721 $self->{c} = $self->{get_char}->();
722 redo A;
723 } elsif ($self->{c} == 0x0029) { # )
724 $self->{state} = BEFORE_TOKEN_STATE;
725 $self->{c} = $self->{get_char}->();
726 return $self->{t};
727 #redo A;
728 } elsif ($self->{c} == 0x005C) { # \
729 $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
730 $self->{c} = $self->{get_char}->();
731 redo A;
732 } else {
733 $self->{t}->{value} .= chr $self->{c};
734 # stay in the state.
735 $self->{c} = $self->{get_char}->();
736 redo A;
737 }
738 } elsif ($self->{state} == URI_AFTER_WSP_STATE) {
739 if ({
740 0x0020 => 1, # SP
741 0x0009 => 1, # \t
742 0x000D => 1, # \r
743 0x000A => 1, # \n
744 0x000C => 1, # \f
745 }->{$self->{c}}) {
746 # stay in the state.
747 $self->{c} = $self->{get_char}->();
748 redo A;
749 } elsif ($self->{c} == -1) {
750 $self->{t}->{type} = {
751 URI_TOKEN, URI_INVALID_TOKEN,
752 URI_INVALID_TOKEN, URI_INVALID_TOKEN,
753 URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
754 URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
755 }->{$self->{t}->{type}};
756 $self->{state} = BEFORE_TOKEN_STATE;
757 $self->{c} = $self->{get_char}->();
758 return $self->{t};
759 #redo A;
760 } elsif ($self->{c} == 0x0029) { # )
761 $self->{state} = BEFORE_TOKEN_STATE;
762 $self->{c} = $self->{get_char}->();
763 return $self->{t};
764 #redo A;
765 } elsif ($self->{c} == 0x005C) { # \
766 $self->{state} = ESCAPE_OPEN_STATE; $q = 1;
767 $self->{c} = $self->{get_char}->();
768 redo A;
769 } else {
770 ## TODO: Should we consider matches of "(" and ")", '"', or "'"?
771 $self->{t}->{type} = {
772 URI_TOKEN, URI_INVALID_TOKEN,
773 URI_INVALID_TOKEN, URI_INVALID_TOKEN,
774 URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
775 URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
776 }->{$self->{t}->{type}};
777 # stay in the state.
778 $self->{c} = $self->{get_char}->();
779 redo A;
780 }
781 } elsif ($self->{state} == ESCAPE_OPEN_STATE) {
782 $self->{t}->{has_escape} = 1;
783 if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
784 ## NOTE: second character of |unicode| in |escape|.
785 $char = $self->{c} - 0x0030;
786 $self->{state} = ESCAPE_STATE; $i = 2;
787 $self->{c} = $self->{get_char}->();
788 redo A;
789 } elsif (0x0041 <= $self->{c} and $self->{c} <= 0x0046) { # A..F
790 ## NOTE: second character of |unicode| in |escape|.
791 $char = $self->{c} - 0x0041 + 0xA;
792 $self->{state} = ESCAPE_STATE; $i = 2;
793 $self->{c} = $self->{get_char}->();
794 redo A;
795 } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f
796 ## NOTE: second character of |unicode| in |escape|.
797 $char = $self->{c} - 0x0061 + 0xA;
798 $self->{state} = ESCAPE_STATE; $i = 2;
799 $self->{c} = $self->{get_char}->();
800 redo A;
801 } elsif ($self->{c} == 0x000A or # \n
802 $self->{c} == 0x000C) { # \f
803 if ($q == 0) {
804 #
805 } elsif ($q == 1) {
806 ## NOTE: In |escape| in |URI|.
807 $self->{t}->{type} = {
808 URI_TOKEN, URI_INVALID_TOKEN,
809 URI_INVALID_TOKEN, URI_INVALID_TOKEN,
810 URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
811 URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
812 }->{$self->{t}->{type}};
813 $self->{t}->{value} .= chr $self->{c};
814 $self->{state} = URI_UNQUOTED_STATE;
815 $self->{c} = $self->{get_char}->();
816 redo A;
817 } else {
818 ## Note: In |nl| in ... in |string| or |ident|.
819 $self->{t}->{value} .= chr $self->{c};
820 $self->{state} = STRING_STATE;
821 $self->{c} = $self->{get_char}->();
822 redo A;
823 }
824 } elsif ($self->{c} == 0x000D) { # \r
825 if ($q == 0) {
826 #
827 } elsif ($q == 1) {
828 ## NOTE: In |escape| in |URI|.
829 $self->{t}->{type} = {
830 URI_TOKEN, URI_INVALID_TOKEN,
831 URI_INVALID_TOKEN, URI_INVALID_TOKEN,
832 URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
833 URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN,
834 }->{$self->{t}->{type}};
835 $self->{t}->{value} .= "\x0D";
836 $self->{state} = ESCAPE_BEFORE_LF_STATE;
837 $self->{c} = $self->{get_char}->();
838 redo A;
839 } else {
840 ## Note: In |nl| in ... in |string| or |ident|.
841 $self->{t}->{value} .= "\x0D";
842 $self->{state} = ESCAPE_BEFORE_LF_STATE;
843 $self->{c} = $self->{get_char}->();
844 redo A;
845 }
846 } elsif ($self->{c} == -1) {
847 #
848 } else {
849 ## NOTE: second character of |escape|.
850 $self->{t}->{value} .= chr $self->{c};
851 $self->{state} = $q == 0 ? NAME_STATE :
852 $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
853 $self->{c} = $self->{get_char}->();
854 redo A;
855 }
856
857 if ($q == 0) {
858 if ($self->{t}->{type} == DIMENSION_TOKEN) {
859 if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') {
860 $self->{state} = BEFORE_TOKEN_STATE;
861 # reprocess
862 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
863 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
864 $self->{t}->{type} = NUMBER_TOKEN;
865 $self->{t}->{value} = '';
866 return $self->{t};
867 #redo A;
868 } elsif (length $self->{t}->{value}) {
869 $self->{state} = BEFORE_TOKEN_STATE;
870 # reprocess
871 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
872 return $self->{t};
873 #redo A;
874 } else {
875 $self->{state} = BEFORE_TOKEN_STATE;
876 # reprocess
877 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
878 $self->{t}->{type} = NUMBER_TOKEN;
879 $self->{t}->{value} = '';
880 return $self->{t};
881 #redo A;
882 }
883 } else {
884 if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') {
885 $self->{state} = BEFORE_TOKEN_STATE;
886 # reprocess
887 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
888 return {type => DELIM_TOKEN, value => '-'};
889 #redo A;
890 } elsif (length $self->{t}->{value}) {
891 $self->{state} = BEFORE_TOKEN_STATE;
892 # reprocess
893 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
894 return $self->{t};
895 #redo A;
896 } else {
897 $self->{state} = BEFORE_TOKEN_STATE;
898 # reprocess
899 return {type => DELIM_TOKEN, value => '\\'};
900 #redo A;
901 }
902 }
903 } elsif ($q == 1) {
904 $self->{state} = URI_UNQUOTED_STATE;
905 $self->{c} = $self->{get_char}->();
906 redo A;
907 } else {
908 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
909 $self->{t}->{type} = {
910 STRING_TOKEN, INVALID_TOKEN,
911 URI_TOKEN, URI_INVALID_TOKEN,
912 URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN,
913 }->{$self->{t}->{type}} || $self->{t}->{type};
914 $self->{state} = BEFORE_TOKEN_STATE;
915 # reprocess
916 return $self->{t};
917 #redo A;
918 }
919 } elsif ($self->{state} == ESCAPE_STATE) {
920 ## NOTE: third..seventh character of |unicode| in |escape|.
921 if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
922 $char = $char * 0x10 + $self->{c} - 0x0030;
923 $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
924 $self->{c} = $self->{get_char}->();
925 redo A;
926 } elsif (0x0041 <= $self->{c} and $self->{c} <= 0x0046) { # A..F
927 $char = $char * 0x10 + $self->{c} - 0x0041 + 0xA;
928 $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
929 $self->{c} = $self->{get_char}->();
930 redo A;
931 } elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f
932 $char = $char * 0x10 + $self->{c} - 0x0061 + 0xA;
933 $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
934 $self->{c} = $self->{get_char}->();
935 redo A;
936 } elsif ($self->{c} == 0x0020 or # SP
937 $self->{c} == 0x000A or # \n
938 $self->{c} == 0x0009 or # \t
939 $self->{c} == 0x000C) { # \f
940 $self->{t}->{value} .= chr $char;
941 $self->{state} = $q == 0 ? NAME_STATE :
942 $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
943 $self->{c} = $self->{get_char}->();
944 redo A;
945 } elsif ($self->{c} == 0x000D) { # \r
946 $self->{state} = ESCAPE_BEFORE_LF_STATE;
947 $self->{c} = $self->{get_char}->();
948 redo A;
949 } else {
950 $self->{t}->{value} .= chr $char;
951 $self->{state} = $q == 0 ? NAME_STATE :
952 $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
953 # reconsume
954 redo A;
955 }
956 } elsif ($self->{state} == ESCAPE_BEFORE_NL_STATE) {
957 ## NOTE: eightth character of |unicode| in |escape|.
958 if ($self->{c} == 0x0020 or # SP
959 $self->{c} == 0x000A or # \n
960 $self->{c} == 0x0009 or # \t
961 $self->{c} == 0x000C) { # \f
962 $self->{t}->{value} .= chr $char;
963 $self->{state} = $q == 0 ? NAME_STATE :
964 $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
965 $self->{c} = $self->{get_char}->();
966 redo A;
967 } elsif ($self->{c} == 0x000D) { # \r
968 $self->{state} = ESCAPE_BEFORE_NL_STATE;
969 $self->{c} = $self->{get_char}->();
970 redo A;
971 } else {
972 $self->{t}->{value} .= chr $char;
973 $self->{state} = $q == 0 ? NAME_STATE :
974 $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
975 # reconsume
976 redo A;
977 }
978 } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {
979 ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.
980 if ($self->{c} == 0x000A) { # \n
981 $self->{t}->{value} .= chr $self->{c};
982 $self->{state} = $q == 0 ? NAME_STATE :
983 $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
984 $self->{c} = $self->{get_char}->();
985 redo A;
986 } else {
987 $self->{state} = $q == 0 ? NAME_STATE :
988 $q == 1 ? URI_UNQUOTED_STATE : STRING_STATE;
989 # reprocess
990 redo A;
991 }
992 } elsif ($self->{state} == STRING_STATE) {
993 ## NOTE: A character in |string$Q| in |string| in |STRING|, or
994 ## a character in |invalid$Q| in |invalid| in |INVALID|,
995 ## where |$Q = $q == 0x0022 ? 1 : 2|.
996 ## Or, in |URI|.
997 if ($self->{c} == 0x005C) { # \
998 $self->{state} = ESCAPE_OPEN_STATE;
999 $self->{c} = $self->{get_char}->();
1000 redo A;
1001 } elsif ($self->{c} == $q) { # " | '
1002 if ($self->{t}->{type} == STRING_TOKEN) {
1003 $self->{state} = BEFORE_TOKEN_STATE;
1004 $self->{c} = $self->{get_char}->();
1005 return $self->{t};
1006 #redo A;
1007 } else {
1008 $self->{state} = URI_AFTER_WSP_STATE;
1009 $self->{c} = $self->{get_char}->();
1010 redo A;
1011 }
1012 } elsif ($self->{c} == 0x000A or # \n
1013 $self->{c} == 0x000D or # \r
1014 $self->{c} == 0x000C or # \f
1015 $self->{c} == -1) {
1016 $self->{t}->{type} = INVALID_TOKEN;
1017 $self->{state} = BEFORE_TOKEN_STATE;
1018 # reconsume
1019 return $self->{t};
1020 #redo A;
1021 } else {
1022 $self->{t}->{value} .= chr $self->{c};
1023 # stay in the state
1024 $self->{c} = $self->{get_char}->();
1025 redo A;
1026 }
1027 } elsif ($self->{state} == NUMBER_STATE) {
1028 ## NOTE: 2nd, 3rd, or ... character in |num| before |.|.
1029 if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
1030 $self->{t}->{value} .= chr $self->{c};
1031 # stay in the state
1032 $self->{c} = $self->{get_char}->();
1033 redo A;
1034 } elsif ($self->{c} == 0x002E) { # .
1035 $self->{state} = NUMBER_DOT_STATE;
1036 $self->{c} = $self->{get_char}->();
1037 redo A;
1038 } else {
1039 $self->{t}->{number} = $self->{t}->{value};
1040 $self->{t}->{value} = '';
1041 $self->{state} = AFTER_NUMBER_STATE;
1042 # reprocess
1043 redo A;
1044 }
1045 } elsif ($self->{state} == NUMBER_DOT_STATE) {
1046 ## NOTE: The character immediately following |.| in |num|.
1047 if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
1048 $self->{t}->{value} .= '.' . chr $self->{c};
1049 $self->{state} = NUMBER_DOT_NUMBER_STATE;
1050 $self->{c} = $self->{get_char}->();
1051 redo A;
1052 } else {
1053 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '.'};
1054 $self->{t}->{number} = $self->{t}->{value};
1055 $self->{t}->{value} = '';
1056 $self->{state} = BEFORE_TOKEN_STATE;
1057 # reprocess
1058 return $self->{t};
1059 #redo A;
1060 }
1061 } elsif ($self->{state} == NUMBER_FRACTION_STATE) {
1062 ## NOTE: The character immediately following |.| at the beginning of |num|.
1063 if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
1064 $self->{t}->{value} .= '.' . chr $self->{c};
1065 $self->{state} = NUMBER_DOT_NUMBER_STATE;
1066 $self->{c} = $self->{get_char}->();
1067 redo A;
1068 } else {
1069 $self->{state} = BEFORE_TOKEN_STATE;
1070 # reprocess
1071 return {type => DELIM_TOKEN, value => '.'};
1072 #redo A;
1073 }
1074 } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {
1075 ## NOTE: |[0-9]| in |num| after |.|.
1076 if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
1077 $self->{t}->{value} .= chr $self->{c};
1078 # stay in the state
1079 $self->{c} = $self->{get_char}->();
1080 redo A;
1081 } else {
1082 $self->{t}->{number} = $self->{t}->{value};
1083 $self->{t}->{value} = '';
1084 $self->{state} = AFTER_NUMBER_STATE;
1085 # reprocess
1086 redo A;
1087 }
1088 } else {
1089 die "$0: Unknown state |$self->{state}|";
1090 }
1091 } # A
1092 } # get_next_token
1093
1094 1;
1095 # $Date: 2007/09/08 11:44:32 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24