/[suikacvs]/markup/html/whatpm/Whatpm/CSS/Tokenizer.pm
Suika

Contents of /markup/html/whatpm/Whatpm/CSS/Tokenizer.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (show annotations) (download)
Fri Aug 17 11:53:52 2007 UTC (18 years, 7 months ago) by wakaba
Branch: MAIN
++ whatpm/t/ChangeLog	17 Aug 2007 07:08:23 -0000
	* content-model-2.dat: New tests for |base|
	following URI or hyperlink are added.

2007-08-17  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	17 Aug 2007 07:44:01 -0000
	* CSS/: New directory.

2007-08-17  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/CSS/ChangeLog	17 Aug 2007 11:53:38 -0000
2007-08-17  Wakaba  <wakaba@suika.fam.cx>

	* Tokenizer.pm: New module.

	* ChangeLog: New file.

++ whatpm/Whatpm/ContentChecker/ChangeLog	17 Aug 2007 07:08:56 -0000
	* HTML.pm: Raise new errors if |base| is following
	URI attributes or hyperlink attributes.

2007-08-17  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::CSS::Tokenizer;
2 use strict;
3
4 sub new ($) {
5 my $self = bless {token => []}, shift;
6 return $self;
7 } # new
8
9 sub init ($) {
10 my $self = shift;
11 $self->{state} = BEFORE_TOKEN_STATE;
12 $self->{c} = $self->{get_char}->();
13 } # init
14
15 sub get_next_token ($) {
16 my $self = shift;
17 if (@{$self->{token}}) {
18 return shift @{$self->{token}};
19 }
20
21 my $current_token;
22 my $char;
23 my $num; # |{num}|, if any.
24 my $i; # |$i + 1|th character in |unicode| in |escape|.
25 my $q; # |$q == 0 ? "in |ident|" : "in |string$q| or in |invalid$q|"|
26
27 A: {
28 if ($self->{state} == BEFORE_TOKEN_STATE) {
29 if ($self->{c} == 0x002D) { # -
30 ## NOTE: |-| in |ident| in |IDENT|
31 $current_token = {type => IDENT_TOKEN, value => '-'};
32 $self->{state} = BEFORE_NMSTART_STATE;
33 $self->{c} = $self->{get_char}->();
34 redo A;
35 } elsif ((0x0041 <= $self->{c} or $self->{c} <= 0x005A) or # A..Z
36 (0x0061 <= $self->{c} or $self->{c} <= 0x007A) or # a..z
37 $self->{c} == 0x005F or # _
38 $self->{c} > 0x007F) { # nonascii
39 ## NOTE: |nmstart| in |ident| in |IDENT|
40 $current_token = {type => IDENT_TOKEN, value => chr $self->{char}};
41 $self->{state} = NAME_STATE;
42 $self->{c} = $self->{get_char}->();
43 redo A;
44 } elsif ($self->{c} == 0x005C) { # \
45 ## NOTE: |nmstart| in |ident| in |IDENT|
46 $current_token = {type => IDENT_TOKEN, value => ''};
47 $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
48 $self->{c} = $self->{get_char}->();
49 redo A;
50 } elsif ($self->{c} == 0x0040) { # @
51 ## NOTE: |@| in |ATKEYWORD|
52 $current_token = {type => ATKEYWORD_TOKEN, value => ''};
53 $self->{state} = BEFORE_NMSTART_STATE;
54 $self->{c} = $self->{get_char}->();
55 redo A;
56 } elsif ($self->{c} == 0x0022) { # "
57 ## NOTE: |"| in |string1| in |string| in |STRING|, or
58 ## |"| in |invalid1| in |invalid| in |INVALID|.
59 $current_token = {type => STRING_TOKEN, value => ''};
60 $self->{state} = STRING_STATE; $q = 1;
61 $self->{c} = $self->{get_char}->();
62 redo A;
63 } elsif ($self->{c} == 0x0027) { # '
64 ## NOTE: |'| in |string2| in |string| in |STRING|, or
65 ## |'| in |invalid2| in |invalid| in |INVALID|.
66 $current_token = {type => STRING_TOKEN, value => ''};
67 $self->{state} = STRING_STATE; $q = 2;
68 $self->{c} = $self->{get_char}->();
69 redo A;
70 } elsif ($self->{c} == 0x0023) { # #
71 ## NOTE: |#| in |HASH|.
72 $current_token = {type => HASH_TOKEN, value => ''};
73 $self->{state} = HASH_OPEN_STATE;
74 $self->{c} = $self->{get_char}->();
75 redo A;
76 } elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
77 ## NOTE: |num|.
78 $current_token = {type => NUMBER_TOKEN, value => chr $self->{c}};
79 $self->{state} = NUMBER_STATE;
80 $self->{c} = $self->{get_char}->();
81 redo A;
82 } elsif ($self->{c} == 0x002E) { # .
83 ## NOTE: |num|.
84 $current_token = {type => NUMBER_TOKEN, value => '.'};
85 $self->{state} = NUMBER_FRACTION_STATE;
86 $self->{c} = $self->{get_char}->();
87 redo A;
88 } elsif ($self->{c} == 0x003C) { # <
89 ## NOTE: |CDO|
90 $self->{c} = $self->{get_char}->();
91 if ($self->{c} == 0x0021) { # !
92 $self->{c} = $self->{get_char}->();
93 if ($self->{c} == 0x002C) { # -
94 $self->{c} = $self->{get_char}->();
95 if ($self->{c} == 0x002C) { # -
96 $self->{state} = BEFORE_TOKEN_STATE;
97 $self->{c} = $self->{get_char}->();
98 return {type => CDO_TOKEN};
99 #redo A;
100 } else {
101 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};
102 ## NOTE: |-| in |ident| in |IDENT|
103 $current_token = {type => IDENT_TOKEN, value => '-'};
104 $self->{state} = BEFORE_NMSTART_STATE;
105 #reprocess
106 return {type => DELIM_TOKEN, value => '<'};
107 #redo A;
108 }
109 } else {
110 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'};
111 $self->{state} = BEFORE_TOKEN_STATE;
112 #reprocess
113 return {type => DELIM_TOKEN, value => '<'};
114 #redo A;
115 }
116 } else {
117 $self->{state} = BEFORE_TOKEN_STATE;
118 #reprocess
119 return {type => DELIM_TOKEN, value => '<'};
120 #redo A;
121 }
122 } elsif ({
123 0x003B => 1, # ;
124 0x007B => 1, # {
125 0x007D => 1, # }
126 0x0028 => 1, # (
127 0x0029 => 1, # )
128 0x005B => 1, # [
129 0x005D => 1, # ]
130 }->{$self->{c}}) {
131 # stay in the state
132 $self->{c} = $self->{get_char}->();
133 return {type => chr $self->{c}};
134 # redo A;
135 } elsif ({
136 0x0020 => 1, # SP
137 0x0009 => 1, # \t
138 0x000D => 1, # \r
139 0x000A => 1, # \n
140 0x000C => 1, # \f
141 }->{$self->{c}}) {
142 W: {
143 $self->{c} = $self->{get_char}->();
144 if ({
145 0x0020 => 1, # SP
146 0x0009 => 1, # \t
147 0x000D => 1, # \r
148 0x000A => 1, # \n
149 0x000C => 1, # \f
150 }->{$self->{c}}) {
151 redo W;
152 } elsif (my $v = {
153 0x002B => PLUS_TOKEN, # +
154 0x003E => GREATER_TOKEN, # >
155 0x002C => COMMA_TOKEN, # ,
156 0x007E => TILDE_TOKEN, # ~
157 }->{$self->{c}}) {
158 # stay in the state
159 $self->{c} = $self->{get_char}->();
160 return {type => $v};
161 #redo A;
162 } else {
163 # stay in the state
164 # reprocess
165 return {type => S_TOKEN};
166 #redo A;
167 }
168 } # W
169 } elsif (my $v = {
170 0x007C => DASHMATCH_TOKEN, # |
171 0x005E => PREFIXMATCH_TOKEN, # ^
172 0x0024 => SUFFIXMATCH_TOKEN, # $
173 0x002A => SUBSTRINGMATCH_TOKEN, # *
174 }->{$self->{c}}) {
175 $self->{c} = $self->{get_char}->();
176 if ($self->{c} == 0x003D) { # =
177 # stay in the state
178 $self->{c} = $self->{get_char}->();
179 return {type => $v};
180 #redo A;
181 } else {
182 # stay in the state
183 # reprocess
184 return {type => DELIM_TOKEN, value => chr $self->{c}};
185 #redo A;
186 }
187 } elsif ($self->{c} == 0x002B) { # +
188 # stay in the state
189 $self->{c} = $self->{get_char}->();
190 return {type => PLUS_TOKEN};
191 #redo A;
192 } elsif ($self->{c} == 0x003E) { # >
193 # stay in the state
194 $self->{c} = $self->{get_char}->();
195 return {type => GREATER_TOKEN};
196 #redo A;
197 } elsif ($self->{c} == 0x002C) { # ,
198 # stay in the state
199 $self->{c} = $self->{get_char}->();
200 return {type => COMMA_TOKEN};
201 #redo A;
202 } elsif ($self->{c} == 0x007E) { # ~
203 $self->{c} = $self->{get_char}->();
204 if ($self->{c} == 0x003D) { # =
205 # stay in the state
206 $self->{c} = $self->{get_char}->();
207 return {type => INCLUDES_TOKEN};
208 #redo A;
209 } else {
210 # stay in the state
211 # reprocess
212 return {type => TILDE_TOKEN};
213 #redo A;
214 }
215 } elsif ($self->{c} == -1) {
216 # stay in the state
217 $self->{c} = $self->{get_char}->();
218 return {type => EOF_TOKEN};
219 #redo A;
220 } else {
221 # stay in the state
222 $current_token = {type => DELIM_TOKEN, value => chr $self->{c}};
223 $self->{c} = $self->{get_char}->();
224 return $current_token;
225 #redo A;
226 }
227 } elsif ($self->{state} == BEFORE_NMSTART_STATE) {
228 ## NOTE: |nmstart| in |ident| in (|IDENT| or |ATKEYWORD|)
229 if ((0x0041 <= $self->{c} or $self->{c} <= 0x005A) or # A..Z
230 (0x0061 <= $self->{c} or $self->{c} <= 0x007A) or # a..z
231 $self->{c} == 0x005F or # _
232 $self->{c} > 0x007F) { # nonascii
233 $current_token->{value} .= chr $self->{char};
234 $self->{state} = NAME_STATE;
235 $self->{c} = $self->{get_char}->();
236 redo A;
237 } elsif ($self->{c} == 0x005C) { # \
238 $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
239 $self->{c} = $self->{get_char}->();
240 redo A;
241 } elsif ($self->{c} == 0x002D and # -
242 $current_token->{type} == IDENT_TOKEN) {
243 $self->{c} = $self->{get_char}->();
244 if ($self->{c} == 0x003E) { # >
245 $self->{state} = BEFORE_TOKEN_STATE;
246 $self->{c} = $self->{get_char}->();
247 return {type => CDC_TOKEN};
248 #redo A;
249 } else {
250 ## NOTE: |-|, |-|, $self->{c}
251 #$current_token = {type => IDENT_TOKEN, value => '-'};
252 # stay in the state
253 # reconsume
254 return {type => DELIM_TOKEN, value => '-'};
255 #redo A;
256 }
257 } else {
258 if ($current_token->{type} == NUMBER_TOKEN) {
259 ## NOTE: |-| after |num|.
260 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'};
261 $self->{state} = BEFORE_TOKEN_STATE;
262 $self->{c} = $self->{get_char}->();
263 return $current_token;
264 } elsif ($current_token->{type} == ATKEYWORD_TOKEN) {
265 ## NOTE: |-| after |@|.
266 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '@'};
267 $self->{state} = BEFORE_TOKEN_STATE;
268 $self->{c} = $self->{get_char}->();
269 return $current_token;
270 } else {
271 ## NOTE: |-| not followed by |nmstart|.
272 $self->{state} = BEFORE_TOKEN_STATE;
273 $self->{c} = $self->{get_char}->();
274 return {type => DELIM_TOKEN, value => '-'};
275 }
276 }
277 } elsif ($self->{state} == AFTER_NUMBER_STATE) {
278 if ($self->{c} == 0x002D) { # -
279 ## NOTE: |-| in |ident|.
280 $current_token->{value} = '-';
281 $self->{state} = BEFORE_NMSTART_STATE;
282 $self->{c} = $self->{get_char}->();
283 redo A;
284 } elsif ((0x0041 <= $self->{c} or $self->{c} <= 0x005A) or # A..Z
285 (0x0061 <= $self->{c} or $self->{c} <= 0x007A) or # a..z
286 $self->{c} == 0x005F or # _
287 $self->{c} > 0x007F) { # nonascii
288 ## NOTE: |nmstart| in |ident|.
289 $current_token->{value} = chr $self->{char};
290 $self->{state} = NAME_STATE;
291 $self->{c} = $self->{get_char}->();
292 redo A;
293 } elsif ($self->{c} == 0x005C) { # \
294 ## NOTE: |nmstart| in |ident| in |IDENT|
295 $current_token->{value} = '';
296 $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
297 $self->{c} = $self->{get_char}->();
298 redo A;
299 } elsif ($self->{c} == 0x0025) { # %
300 $current_token->{type} = PERCENTAGE_TOKEN;
301 $self->{state} = BEFORE_TOKEN_STATE;
302 $self->{c} = $self->{get_char}->();
303 return $current_token;
304 #redo A;
305 } else {
306 $self->{state} = BEFORE_TOKEN_STATE;
307 # reprocess
308 return $current_token;
309 #redo A;
310 }
311 } elsif ($self->{state} == HASH_OPEN_STATE) {
312 ## NOTE: The first |nmchar| in |name| in |HASH|.
313 if ((0x0041 <= $self->{c} or $self->{c} <= 0x005A) or # A..Z
314 (0x0061 <= $self->{c} or $self->{c} <= 0x007A) or # a..z
315 (0x0030 <= $self->{c} or $self->{c} <= 0x0039) or # 0..9
316 $self->{c} == 0x002D or # -
317 $self->{c} == 0x005F or # _
318 $self->{c} > 0x007F) { # nonascii
319 $current_token->{value} .= chr $self->{char};
320 $self->{state} = NAME_STATE;
321 $self->{c} = $self->{get_char}->();
322 redo A;
323 } elsif ($self->{c} == 0x005C) { # \
324 $self->{state} = ESCAPE_OPEN_STATE; $q = 0;
325 $self->{c} = $self->{get_char}->();
326 redo A;
327 } else {
328 $self->{state} = BEFORE_TOKEN_STATE;
329 $self->{c} = $self->{get_char}->();
330 return {type => DELIM_TOKEN, value => '#'};
331 #redo A;
332 }
333 } elsif ($self->{state} == NAME_STATE) {
334 ## NOTE: |nmchar| in (|ident| or |name|).
335 if ((0x0041 <= $self->{c} or $self->{c} <= 0x005A) or # A..Z
336 (0x0061 <= $self->{c} or $self->{c} <= 0x007A) or # a..z
337 (0x0030 <= $self->{c} or $self->{c} <= 0x0039) or # 0..9
338 $self->{c} == 0x005F or # _
339 $self->{c} == 0x002D or # -
340 $self->{c} > 0x007F) { # nonascii
341 $current_token->{value} .= chr $self->{char};
342 # stay in the state
343 $self->{c} = $self->{get_char}->();
344 redo A;
345 } elsif ($self->{c} == 0x005C) { # \
346 $self->{state} = ESCAPE_OPEN_STATE; # $q = 0;
347 $self->{c} = $self->{get_char}->();
348 redo A;
349 } elsif ($self->{c} == 0x0028 and # (
350 $current_token->{type} == IDENT_TOKEN) { # (
351 if (not $current_token->{has_escape} and
352 {url => 1, Url => 1, uRl => 1, urL => 1,
353 URl => 1, UrL => 1, uRL => 1, URL => 1}
354 ->{$current_token->{value}}) {
355 $current_token->{type} = URI_TOKEN;
356 $self->{state} = URI_BEFORE_WSP_STATE;
357 $self->{c} = $self->{get_char}->();
358
359 ## NOTE: This version of the tokenizer does not support the |URI|
360 ## token type. Note that browsers disagree in how to tokenize
361 ## |url| function.
362 $current_token->{type} = FUNCTION_TOKEN;
363 $self->{state} = BEFORE_TOKEN_STATE;
364 $self->{c} = $self->{get_char}->();
365 return $current_token;
366
367 redo A;
368 } else {
369 $current_token->{type} = FUNCTION_TOKEN;
370 $self->{state} = BEFORE_TOKEN_STATE;
371 $self->{c} = $self->{get_char}->();
372 return $current_token;
373 #redo A;
374 }
375 } else {
376 $self->{state} = BEFORE_TOKEN_STATE;
377 # reconsume
378 return $current_token;
379 #redo A;
380 }
381 } elsif ($self->{state} == ESCAPE_OPEN_STATE) {
382 $current_token->{has_escape} = 1;
383 if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
384 ## NOTE: second character of |unicode| in |escape|.
385 $char = $self->{c} - 0x0030;
386 $self->{state} = ESCAPE_STATE; $i = 2;
387 $self->{c} = $self->{get_char}->();
388 redo A;
389 } elsif (0x0041 <= $self->{c} and $self->{c} <= 0x0046) { # A..F
390 ## NOTE: second character of |unicode| in |escape|.
391 $char = $self->{c} - 0x0041 + 0xA;
392 $self->{state} = ESCAPE_STATE; $i = 2;
393 $self->{c} = $self->{get_char}->();
394 redo A;
395 } elsif (0x0061 <= $self->{c} or $self->{c} <= 0x0066) { # a..f
396 ## NOTE: second character of |unicode| in |escape|.
397 $char = $self->{c} - 0x0061 - 0xA;
398 $self->{state} = ESCAPE_STATE; $i = 2;
399 $self->{c} = $self->{get_char}->();
400 redo A;
401 } elsif ($self->{c} == 0x000A or # \n
402 $self->{c} == 0x000C) { # \f
403 if ($q == 0) {
404 ## NOTE: In |escape| in ... in |ident|.
405 $self->{state} = BEFORE_TOKEN_STATE;
406 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
407 return $current_token;
408 # reconsume
409 #redo A;
410 } else {
411 ## Note: In |nl| in ... in |string| or |ident|.
412 $current_token->{value} .= chr $self->{c};
413 $self->{state} = STRING_STATE;
414 $self->{c} = $self->{get_char}->();
415 redo A;
416 }
417 } elsif ($self->{c} == 0x000D) { # \r
418 if ($q == 0) {
419 ## NOTE: In |escape| in ... in |ident|.
420 $self->{state} = BEFORE_TOKEN_STATE;
421 unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'};
422 return $current_token;
423 # reconsume
424 #redo A;
425 } else {
426 ## Note: In |nl| in ... in |string| or |ident|.
427 $current_token->{value} .= "\x0D\x0A";
428 $self->{state} = ESCAPE_BEFORE_LF_STATE;
429 $self->{c} = $self->{get_char}->();
430 redo A;
431 }
432 } else {
433 ## NOTE: second character of |escape|.
434 $current_token->{value} .= chr $self->{c};
435 $self->{state} = $q == 0 ? NAME_STATE : STRING_STATE;
436 $self->{c} = $self->{get_char}->();
437 redo A;
438 }
439 } elsif ($self->{state} == ESCAPE_STATE) {
440 ## NOTE: third..seventh character of |unicode| in |escape|.
441 if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9
442 $char = $char * 0x10 + $self->{c} - 0x0030;
443 $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
444 $self->{c} = $self->{get_char}->();
445 redo A;
446 } elsif (0x0041 <= $self->{c} and $self->{c} <= 0x0046) { # A..F
447 $char = $char * 0x10 + $self->{c} - 0x0041 + 0xA;
448 $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
449 $self->{c} = $self->{get_char}->();
450 redo A;
451 } elsif (0x0061 <= $self->{c} or $self->{c} <= 0x0066) { # a..f
452 $char = $char * 0x10 + $self->{c} - 0x0061 - 0xA;
453 $self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE;
454 $self->{c} = $self->{get_char}->();
455 redo A;
456 } elsif ($self->{c} == 0x0020 or # SP
457 $self->{c} == 0x000A or # \n
458 $self->{c} == 0x0009 or # \t
459 $self->{c} == 0x000C) { # \f
460 $current_token->{value} .= chr $char;
461 $self->{state} = $q == 0 ? NAME_STATE : STRING_STATE;
462 $self->{c} = $self->{get_char}->();
463 redo A;
464 } elsif ($self->{c} == 0x000D) { # \r
465 $self->{state} = ESCAPE_BEFORE_LF_STATE;
466 $self->{c} = $self->{get_char}->();
467 redo A;
468 } else {
469 $current_token->{value} .= chr $char;
470 $self->{state} = $q == 0 ? NAME_STATE : STRING_STATE;
471 # reconsume
472 redo A;
473 }
474 } elsif ($self->{state} == ESCAPE_BEFORE_NL_STATE) {
475 ## NOTE: eightth character of |unicode| in |escape|.
476 if ($self->{c} == 0x0020 or # SP
477 $self->{c} == 0x000A or # \n
478 $self->{c} == 0x0009 or # \t
479 $self->{c} == 0x000C) { # \f
480 $current_token->{value} .= chr $char;
481 $self->{state} = $q == 0 ? NAME_STATE : STRING_STATE;
482 $self->{c} = $self->{get_char}->();
483 redo A;
484 } elsif ($self->{c} == 0x000D) { # \r
485 $self->{state} = ESCAPE_BEFORE_NL_STATE;
486 $self->{c} = $self->{get_char}->();
487 redo A;
488 } else {
489 $current_token->{value} .= chr $char;
490 $self->{state} = $q == 0 ? NAME_STATE : STRING_STATE;
491 # reconsume
492 redo A;
493 }
494 } elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) {
495 ## NOTE: |\n| in |\r\n| in |unicode| in |escape|.
496 if ($self->{c} == 0x000A) { # \n
497 $current_token->{value} .= chr $char;
498 $self->{state} = $q == 0 ? NAME_STATE : STRING_STATE;
499 $self->{c} = $self->{get_char}->();
500 redo A;
501 } else {
502 $current_token->{value} .= chr $char;
503 $self->{state} = $q == 0 ? NAME_STATE : STRING_STATE;
504 # reconsume
505 redo A;
506 }
507 } elsif ($self->{state} == STRING_STATE) {
508 ## NOTE: A character in |string$Q| in |string| in |STRING|, or
509 ## a character in |invalid$Q| in |invalid| in |INVALID|,
510 ## where |$Q = $q == 0x0022 ? 1 : 2|.
511 if ($self->{c} == 0x005C) { # \
512 $self->{state} = ESCAPE_OPEN_STATE;
513 $self->{c} = $self->{get_char}->();
514 redo A;
515 } elsif ($self->{c} == $q) { # " | '
516 $self->{state} = BEFORE_TOKEN_STATE;
517 $self->{c} = $self->{get_char}->();
518 return $current_token;
519 #redo A;
520 } elsif ($self->{c} == 0x000A or # \n
521 $self->{c} == 0x000D or # \r
522 $self->{c} == 0x000C or # \f
523 $self->{c} == -1) {
524 $current_token->{type} = INVALID_TOKEN;
525 $self->{state} = BEFORE_TOKEN_STATE;
526 # reconsume
527 return $current_token;
528 #redo A;
529 } else {
530 $current_token->{value} .= chr $self->{c};
531 # stay in the state
532 $self->{c} = $self->{get_char}->();
533 redo A;
534 }
535 } elsif ($self->{state} == NUMBER_STATE) {
536 ## NOTE: 2nd, 3rd, or ... character in |num| before |.|.
537 if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
538 $current_token->{value} .= chr $self->{c};
539 # stay in the state
540 $self->{c} = $self->{get_char}->();
541 redo A;
542 } elsif ($self->{c} == 0x002E) { # .
543 $self->{state} = NUMBER_DOT_STATE;
544 $self->{c} = $self->{get_char}->();
545 redo A;
546 } else {
547 $self->{number} = $self->{value};
548 $self->{value} = '';
549 $self->{state} = AFTER_NUMBER_STATE;
550 # reprocess
551 return $current_token;
552 #redo A;
553 }
554 } elsif ($self->{state} == NUMBER_DOT_STATE) {
555 ## NOTE: The character immediately following |.| in |num|.
556 if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
557 $current_token->{value} .= chr $self->{c};
558 $self->{state} = NUMBER_DOT_NUMBER_STATE;
559 $self->{c} = $self->{get_char}->();
560 redo A;
561 } else {
562 unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'};
563 $self->{number} = $self->{value};
564 $self->{value} = '';
565 $self->{state} = BEFORE_TOKEN_STATE;
566 # reprocess
567 return $current_token;
568 #redo A;
569 }
570 } elsif ($self->{state} == NUMBER_FRACTION_STATE) {
571 ## NOTE: The character immediately following |.| at the beginning of |num|.
572 if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
573 $current_token->{value} .= chr $self->{c};
574 $self->{state} = NUMBER_DOT_NUMBER_STATE;
575 $self->{c} = $self->{get_char}->();
576 redo A;
577 } else {
578 $self->{state} = BEFORE_TOKEN_STATE;
579 $self->{c} = $self->{get_char}->();
580 return {type => DELIM_TOKEN, value => '.'};
581 #redo A;
582 }
583 } elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) {
584 ## NOTE: |[0-9]| in |num| after |.|.
585 if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) {
586 $current_token->{value} .= chr $self->{c};
587 # stay in the state
588 $self->{c} = $self->{get_char}->();
589 redo A;
590 } else {
591 $self->{number} = $self->{value};
592 $self->{value} = '';
593 $self->{state} = AFTER_NUMBER_STATE;
594 # reprocess
595 return $current_token;
596 #redo A;
597 }
598 } else {
599 die "$0: Unknown state |$self->{state}|";
600 }
601 } # A
602
603 ## TODO: |URI|, |UNICODE-RANGE|, |COMMENT|
604
605 } # get_next_token
606
607 1;
608 # $Date:$

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24