127 |
if ($self->{state} == BEFORE_TOKEN_STATE) { |
if ($self->{state} == BEFORE_TOKEN_STATE) { |
128 |
if ($self->{c} == 0x002D) { # - |
if ($self->{c} == 0x002D) { # - |
129 |
## NOTE: |-| in |ident| in |IDENT| |
## NOTE: |-| in |ident| in |IDENT| |
130 |
$self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1}; |
$self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1, |
131 |
|
line => $self->{line}, column => $self->{column}}; |
132 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
133 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
134 |
redo A; |
redo A; |
135 |
} elsif ($self->{c} == 0x0055 or $self->{c} == 0x0075) { # U or u |
} elsif ($self->{c} == 0x0055 or $self->{c} == 0x0075) { # U or u |
136 |
$self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}}; |
$self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}, |
137 |
|
line => $self->{line}, column => $self->{column}}; |
138 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
139 |
if ($self->{c} == 0x002B) { # + |
if ($self->{c} == 0x002B) { # + |
140 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
141 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
142 |
if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9 |
if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9 |
143 |
(0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F |
(0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F |
179 |
# |
# |
180 |
} else { |
} else { |
181 |
my $token = $self->{t}; |
my $token = $self->{t}; |
182 |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
$self->{t} = {type => IDENT_TOKEN, value => '-', |
183 |
|
line => $self->{line}, |
184 |
|
column => $self->{column}}; |
185 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
186 |
# reprocess |
# reprocess |
187 |
return $token; |
return $token; |
194 |
return $self->{t}; |
return $self->{t}; |
195 |
#redo A; |
#redo A; |
196 |
} else { |
} else { |
197 |
unshift @{$self->{token}}, {type => PLUS_TOKEN}; |
unshift @{$self->{token}}, |
198 |
|
{type => PLUS_TOKEN, line => $l, column => $c}; |
199 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
200 |
# reprocess |
# reprocess |
201 |
return $self->{t}; |
return $self->{t}; |
211 |
$self->{c} == 0x005F or # _ |
$self->{c} == 0x005F or # _ |
212 |
$self->{c} > 0x007F) { # nonascii |
$self->{c} > 0x007F) { # nonascii |
213 |
## NOTE: |nmstart| in |ident| in |IDENT| |
## NOTE: |nmstart| in |ident| in |IDENT| |
214 |
$self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}}; |
$self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}, |
215 |
|
line => $self->{line}, column => $self->{column}}; |
216 |
$self->{state} = NAME_STATE; |
$self->{state} = NAME_STATE; |
217 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
218 |
redo A; |
redo A; |
219 |
} elsif ($self->{c} == 0x005C) { # \ |
} elsif ($self->{c} == 0x005C) { # \ |
220 |
## NOTE: |nmstart| in |ident| in |IDENT| |
## NOTE: |nmstart| in |ident| in |IDENT| |
221 |
$self->{t} = {type => IDENT_TOKEN, value => ''}; |
$self->{t} = {type => IDENT_TOKEN, value => '', |
222 |
|
line => $self->{line}, column => $self->{column}}; |
223 |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
224 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
225 |
redo A; |
redo A; |
226 |
} elsif ($self->{c} == 0x0040) { # @ |
} elsif ($self->{c} == 0x0040) { # @ |
227 |
## NOTE: |@| in |ATKEYWORD| |
## NOTE: |@| in |ATKEYWORD| |
228 |
$self->{t} = {type => ATKEYWORD_TOKEN, value => ''}; |
$self->{t} = {type => ATKEYWORD_TOKEN, value => '', |
229 |
|
line => $self->{line}, column => $self->{column}}; |
230 |
$self->{state} = AFTER_AT_STATE; |
$self->{state} = AFTER_AT_STATE; |
231 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
232 |
redo A; |
redo A; |
233 |
} elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or ' |
} elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or ' |
234 |
$self->{t} = {type => STRING_TOKEN, value => ''}; |
$self->{t} = {type => STRING_TOKEN, value => '', |
235 |
|
line => $self->{line}, column => $self->{column}}; |
236 |
$self->{state} = STRING_STATE; $q = $self->{c}; |
$self->{state} = STRING_STATE; $q = $self->{c}; |
237 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
238 |
redo A; |
redo A; |
239 |
} elsif ($self->{c} == 0x0023) { # # |
} elsif ($self->{c} == 0x0023) { # # |
240 |
## NOTE: |#| in |HASH|. |
## NOTE: |#| in |HASH|. |
241 |
$self->{t} = {type => HASH_TOKEN, value => ''}; |
$self->{t} = {type => HASH_TOKEN, value => '', |
242 |
|
line => $self->{line}, column => $self->{column}}; |
243 |
$self->{state} = HASH_OPEN_STATE; |
$self->{state} = HASH_OPEN_STATE; |
244 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
245 |
redo A; |
redo A; |
246 |
} elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9 |
} elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9 |
247 |
## NOTE: |num|. |
## NOTE: |num|. |
248 |
$self->{t} = {type => NUMBER_TOKEN, value => chr $self->{c}}; |
$self->{t} = {type => NUMBER_TOKEN, value => chr $self->{c}, |
249 |
|
line => $self->{line}, column => $self->{column}}; |
250 |
$self->{state} = NUMBER_STATE; |
$self->{state} = NUMBER_STATE; |
251 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
252 |
redo A; |
redo A; |
253 |
} elsif ($self->{c} == 0x002E) { # . |
} elsif ($self->{c} == 0x002E) { # . |
254 |
## NOTE: |num|. |
## NOTE: |num|. |
255 |
$self->{t} = {type => NUMBER_TOKEN, value => '0'}; |
$self->{t} = {type => NUMBER_TOKEN, value => '0', |
256 |
|
line => $self->{line}, column => $self->{column}}; |
257 |
$self->{state} = NUMBER_FRACTION_STATE; |
$self->{state} = NUMBER_FRACTION_STATE; |
258 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
259 |
redo A; |
redo A; |
260 |
} elsif ($self->{c} == 0x002F) { # / |
} elsif ($self->{c} == 0x002F) { # / |
261 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
262 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
263 |
if ($self->{c} == 0x002A) { # * |
if ($self->{c} == 0x002A) { # * |
264 |
C: { |
C: { |
290 |
} else { |
} else { |
291 |
# stay in the state. |
# stay in the state. |
292 |
# reprocess |
# reprocess |
293 |
return {type => DELIM_TOKEN, value => '/'}; |
return {type => DELIM_TOKEN, value => '/', line => $l, column => $c}; |
294 |
#redo A; |
#redo A; |
295 |
} |
} |
296 |
} elsif ($self->{c} == 0x003C) { # < |
} elsif ($self->{c} == 0x003C) { # < |
297 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
298 |
## NOTE: |CDO| |
## NOTE: |CDO| |
299 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
300 |
if ($self->{c} == 0x0021) { # ! |
if ($self->{c} == 0x0021) { # ! |
304 |
if ($self->{c} == 0x002D) { # - |
if ($self->{c} == 0x002D) { # - |
305 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
306 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
307 |
return {type => CDO_TOKEN}; |
return {type => CDO_TOKEN, line => $l, column => $c}; |
308 |
#redo A; |
#redo A; |
309 |
} else { |
} else { |
310 |
unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN}; |
unshift @{$self->{token}}, |
311 |
|
{type => EXCLAMATION_TOKEN, line => $l, column => $c + 1}; |
312 |
## NOTE: |-| in |ident| in |IDENT| |
## NOTE: |-| in |ident| in |IDENT| |
313 |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
$self->{t} = {type => IDENT_TOKEN, value => '-', |
314 |
|
line => $l, column => $c + 2}; |
315 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
316 |
#reprocess |
#reprocess |
317 |
return {type => DELIM_TOKEN, value => '<'}; |
return {type => DELIM_TOKEN, value => '<', |
318 |
|
line => $l, column => $c}; |
319 |
#redo A; |
#redo A; |
320 |
} |
} |
321 |
} else { |
} else { |
322 |
unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN}; |
unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN, |
323 |
|
line => $l, column => $c + 1}; |
324 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
325 |
#reprocess |
#reprocess |
326 |
return {type => DELIM_TOKEN, value => '<'}; |
return {type => DELIM_TOKEN, value => '<', |
327 |
|
line => $l, column => $c}; |
328 |
#redo A; |
#redo A; |
329 |
} |
} |
330 |
} else { |
} else { |
331 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
332 |
#reprocess |
#reprocess |
333 |
return {type => DELIM_TOKEN, value => '<'}; |
return {type => DELIM_TOKEN, value => '<', |
334 |
|
line => $l, column => $c}; |
335 |
#redo A; |
#redo A; |
336 |
} |
} |
337 |
} elsif (my $t = { |
} elsif (my $t = { |
360 |
0x000A => 1, # \n |
0x000A => 1, # \n |
361 |
0x000C => 1, # \f |
0x000C => 1, # \f |
362 |
}->{$self->{c}}) { |
}->{$self->{c}}) { |
363 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
364 |
W: { |
W: { |
365 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
366 |
if ({ |
if ({ |
377 |
0x002C => COMMA_TOKEN, # , |
0x002C => COMMA_TOKEN, # , |
378 |
0x007E => TILDE_TOKEN, # ~ |
0x007E => TILDE_TOKEN, # ~ |
379 |
}->{$self->{c}}) { |
}->{$self->{c}}) { |
380 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
381 |
# stay in the state |
# stay in the state |
382 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
383 |
return {type => $v}; |
return {type => $v, line => $l, column => $c}; |
384 |
#redo A; |
#redo A; |
385 |
} else { |
} else { |
386 |
# stay in the state |
# stay in the state |
387 |
# reprocess |
# reprocess |
388 |
return {type => S_TOKEN}; |
return {type => S_TOKEN, line => $l, column => $c}; |
389 |
#redo A; |
#redo A; |
390 |
} |
} |
391 |
} # W |
} # W |
395 |
0x0024 => SUFFIXMATCH_TOKEN, # $ |
0x0024 => SUFFIXMATCH_TOKEN, # $ |
396 |
0x002A => SUBSTRINGMATCH_TOKEN, # * |
0x002A => SUBSTRINGMATCH_TOKEN, # * |
397 |
}->{$self->{c}}) { |
}->{$self->{c}}) { |
398 |
|
my ($line, $column) = ($self->{line}, $self->{column}); |
399 |
my $c = $self->{c}; |
my $c = $self->{c}; |
400 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
401 |
if ($self->{c} == 0x003D) { # = |
if ($self->{c} == 0x003D) { # = |
402 |
# stay in the state |
# stay in the state |
403 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
404 |
return {type => $v}; |
return {type => $v, line => $line, column => $column}; |
405 |
#redo A; |
#redo A; |
406 |
} elsif ($v = { |
} elsif ($v = { |
407 |
0x002A => STAR_TOKEN, # * |
0x002A => STAR_TOKEN, # * |
409 |
}->{$c}) { |
}->{$c}) { |
410 |
# stay in the state. |
# stay in the state. |
411 |
# reprocess |
# reprocess |
412 |
return {type => $v}; |
return {type => $v, line => $line, column => $column}; |
413 |
#redo A; |
#redo A; |
414 |
} else { |
} else { |
415 |
# stay in the state |
# stay in the state |
416 |
# reprocess |
# reprocess |
417 |
return {type => DELIM_TOKEN, value => chr $c}; |
return {type => DELIM_TOKEN, value => chr $c, |
418 |
|
line => $line, column => $column}; |
419 |
#redo A; |
#redo A; |
420 |
} |
} |
421 |
} elsif ($self->{c} == 0x002B) { # + |
} elsif ($self->{c} == 0x002B) { # + |
422 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
423 |
# stay in the state |
# stay in the state |
424 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
425 |
return {type => PLUS_TOKEN}; |
return {type => PLUS_TOKEN, line => $l, column => $c}; |
426 |
#redo A; |
#redo A; |
427 |
} elsif ($self->{c} == 0x003E) { # > |
} elsif ($self->{c} == 0x003E) { # > |
428 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
429 |
# stay in the state |
# stay in the state |
430 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
431 |
return {type => GREATER_TOKEN}; |
return {type => GREATER_TOKEN, line => $l, column => $c}; |
432 |
#redo A; |
#redo A; |
433 |
} elsif ($self->{c} == 0x002C) { # , |
} elsif ($self->{c} == 0x002C) { # , |
434 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
435 |
# stay in the state |
# stay in the state |
436 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
437 |
return {type => COMMA_TOKEN}; |
return {type => COMMA_TOKEN, line => $l, column => $c}; |
438 |
#redo A; |
#redo A; |
439 |
} elsif ($self->{c} == 0x007E) { # ~ |
} elsif ($self->{c} == 0x007E) { # ~ |
440 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
441 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
442 |
if ($self->{c} == 0x003D) { # = |
if ($self->{c} == 0x003D) { # = |
443 |
# stay in the state |
# stay in the state |
444 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
445 |
return {type => INCLUDES_TOKEN}; |
return {type => INCLUDES_TOKEN, line => $l, column => $c}; |
446 |
#redo A; |
#redo A; |
447 |
} else { |
} else { |
448 |
# stay in the state |
# stay in the state |
449 |
# reprocess |
# reprocess |
450 |
return {type => TILDE_TOKEN}; |
return {type => TILDE_TOKEN, line => $l, column => $c}; |
451 |
#redo A; |
#redo A; |
452 |
} |
} |
453 |
} elsif ($self->{c} == -1) { |
} elsif ($self->{c} == -1) { |
454 |
# stay in the state |
# stay in the state |
455 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
456 |
return {type => EOF_TOKEN}; |
return {type => EOF_TOKEN, |
457 |
|
line => $self->{line}, column => $self->{column}}; |
458 |
#redo A; |
#redo A; |
459 |
} else { |
} else { |
460 |
# stay in the state |
# stay in the state |
461 |
$self->{t} = {type => DELIM_TOKEN, value => chr $self->{c}}; |
$self->{t} = {type => DELIM_TOKEN, value => chr $self->{c}, |
462 |
|
line => $self->{line}, column => $self->{column}}; |
463 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
464 |
return $self->{t}; |
return $self->{t}; |
465 |
#redo A; |
#redo A; |
487 |
if ($self->{c} == 0x003E) { # > |
if ($self->{c} == 0x003E) { # > |
488 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
489 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
490 |
return {type => CDC_TOKEN}; |
return {type => CDC_TOKEN, |
491 |
|
line => $self->{t}->{line}, |
492 |
|
column => $self->{t}->{column}}; |
493 |
#redo A; |
#redo A; |
494 |
} else { |
} else { |
495 |
## NOTE: |-|, |-|, $self->{c} |
## NOTE: |-|, |-|, $self->{c} |
496 |
#$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
#$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
497 |
|
$self->{t}->{column}++; |
498 |
# stay in the state |
# stay in the state |
499 |
# reconsume |
# reconsume |
500 |
return {type => MINUS_TOKEN}; |
return {type => MINUS_TOKEN, |
501 |
|
line => $self->{t}->{line}, |
502 |
|
column => $self->{t}->{column} - 1}; |
503 |
#redo A; |
#redo A; |
504 |
} |
} |
505 |
} elsif ($self->{t}->{type} == DIMENSION_TOKEN) { |
} elsif ($self->{t}->{type} == DIMENSION_TOKEN) { |
506 |
|
my ($l, $c) = ($self->{line}, $self->{column}); # second '-' |
507 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
508 |
if ($self->{c} == 0x003E) { # > |
if ($self->{c} == 0x003E) { # > |
509 |
unshift @{$self->{token}}, {type => CDC_TOKEN}; |
unshift @{$self->{token}}, {type => CDC_TOKEN}; |
514 |
return $self->{t}; |
return $self->{t}; |
515 |
#redo A; |
#redo A; |
516 |
} else { |
} else { |
517 |
## NOTE: |-|, |-|, $self->{c} |
## NOTE: NUMBER, |-|, |-|, $self->{c} |
518 |
my $t = $self->{t}; |
my $t = $self->{t}; |
519 |
$t->{type} = NUMBER_TOKEN; |
$t->{type} = NUMBER_TOKEN; |
520 |
$t->{value} = ''; |
$t->{value} = ''; |
521 |
$self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1}; |
$self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1, |
522 |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
line => $l, column => $c}; |
523 |
|
unshift @{$self->{token}}, {type => MINUS_TOKEN, |
524 |
|
line => $l, column => $c - 1}; |
525 |
# stay in the state |
# stay in the state |
526 |
# reconsume |
# reconsume |
527 |
return $t; |
return $t; |
536 |
|
|
537 |
if ($self->{t}->{type} == DIMENSION_TOKEN) { |
if ($self->{t}->{type} == DIMENSION_TOKEN) { |
538 |
## NOTE: |-| after |NUMBER|. |
## NOTE: |-| after |NUMBER|. |
539 |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
unshift @{$self->{token}}, {type => MINUS_TOKEN, |
540 |
|
line => $self->{line}, |
541 |
|
column => $self->{column} - 1}; |
542 |
|
## BUG: column might be wrong if on the line boundary. |
543 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
544 |
# reprocess |
# reprocess |
545 |
$self->{t}->{type} = NUMBER_TOKEN; |
$self->{t}->{type} = NUMBER_TOKEN; |
549 |
## NOTE: |-| not followed by |nmstart|. |
## NOTE: |-| not followed by |nmstart|. |
550 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
551 |
# reprocess |
# reprocess |
552 |
return {type => MINUS_TOKEN}; |
return {type => MINUS_TOKEN, |
553 |
|
line => $self->{line}, column => $self->{column} - 1}; |
554 |
|
## BUG: column might be wrong if on the line boundary. |
555 |
} |
} |
556 |
} elsif ($self->{state} == AFTER_AT_STATE) { |
} elsif ($self->{state} == AFTER_AT_STATE) { |
557 |
if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
574 |
} else { |
} else { |
575 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
576 |
# reprocess |
# reprocess |
577 |
return {type => DELIM_TOKEN, value => '@'}; |
return {type => DELIM_TOKEN, value => '@', |
578 |
|
line => $self->{t}->{line}, |
579 |
|
column => $self->{t}->{column}}; |
580 |
} |
} |
581 |
} elsif ($self->{state} == AFTER_AT_HYPHEN_STATE) { |
} elsif ($self->{state} == AFTER_AT_HYPHEN_STATE) { |
582 |
if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
671 |
} else { |
} else { |
672 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
673 |
# reprocess |
# reprocess |
674 |
return {type => DELIM_TOKEN, value => '#'}; |
return {type => DELIM_TOKEN, value => '#', |
675 |
|
line => $self->{t}->{line}, |
676 |
|
column => $self->{t}->{column}}; |
677 |
#redo A; |
#redo A; |
678 |
} |
} |
679 |
} elsif ($self->{state} == NAME_STATE) { |
} elsif ($self->{state} == NAME_STATE) { |
943 |
if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') { |
if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') { |
944 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
945 |
# reprocess |
# reprocess |
946 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\', |
947 |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
line => $self->{line}, |
948 |
|
column => $self->{column} - 2}; |
949 |
|
unshift @{$self->{token}}, {type => MINUS_TOKEN, |
950 |
|
line => $self->{line}, |
951 |
|
column => $self->{column} - 1}; |
952 |
|
## BUG: line and column might be wrong if they are on the |
953 |
|
## line boundary. |
954 |
$self->{t}->{type} = NUMBER_TOKEN; |
$self->{t}->{type} = NUMBER_TOKEN; |
955 |
$self->{t}->{value} = ''; |
$self->{t}->{value} = ''; |
956 |
return $self->{t}; |
return $self->{t}; |
958 |
} elsif (length $self->{t}->{value}) { |
} elsif (length $self->{t}->{value}) { |
959 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
960 |
# reprocess |
# reprocess |
961 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\', |
962 |
|
line => $self->{line}, |
963 |
|
column => $self->{column} - 1}; |
964 |
|
## BUG: line and column might be wrong if they are on the |
965 |
|
## line boundary. |
966 |
return $self->{t}; |
return $self->{t}; |
967 |
#redo A; |
#redo A; |
968 |
} else { |
} else { |
969 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
970 |
# reprocess |
# reprocess |
971 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\', |
972 |
|
line => $self->{line}, |
973 |
|
column => $self->{column} - 1}; |
974 |
|
## BUG: line and column might be wrong if they are on the |
975 |
|
## line boundary. |
976 |
$self->{t}->{type} = NUMBER_TOKEN; |
$self->{t}->{type} = NUMBER_TOKEN; |
977 |
$self->{t}->{value} = ''; |
$self->{t}->{value} = ''; |
978 |
return $self->{t}; |
return $self->{t}; |
982 |
if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') { |
if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') { |
983 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
984 |
# reprocess |
# reprocess |
985 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\', |
986 |
return {type => MINUS_TOKEN}; |
line => $self->{line}, |
987 |
|
column => $self->{column} - 2}; |
988 |
|
return {type => MINUS_TOKEN, |
989 |
|
line => $self->{line}, |
990 |
|
column => $self->{column} - 1}; |
991 |
|
## BUG: line and column might be wrong if they are on the |
992 |
|
## line boundary. |
993 |
#redo A; |
#redo A; |
994 |
} elsif (length $self->{t}->{value}) { |
} elsif (length $self->{t}->{value}) { |
995 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
996 |
# reprocess |
# reprocess |
997 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\', |
998 |
|
line => $self->{line}, |
999 |
|
column => $self->{column} - 1}; |
1000 |
|
## BUG: line and column might be wrong if they are on the |
1001 |
|
## line boundary. |
1002 |
return $self->{t}; |
return $self->{t}; |
1003 |
#redo A; |
#redo A; |
1004 |
} else { |
} else { |
1005 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
1006 |
# reprocess |
# reprocess |
1007 |
return {type => DELIM_TOKEN, value => '\\'}; |
return {type => DELIM_TOKEN, value => '\\', |
1008 |
|
line => $self->{line}, |
1009 |
|
column => $self->{column} - 1}; |
1010 |
|
## BUG: line and column might be wrong if they are on the |
1011 |
|
## line boundary. |
1012 |
#redo A; |
#redo A; |
1013 |
} |
} |
1014 |
} |
} |
1017 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
1018 |
redo A; |
redo A; |
1019 |
} else { |
} else { |
1020 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\', |
1021 |
|
line => $self->{line}, |
1022 |
|
column => $self->{column} - 1}; |
1023 |
|
## BUG: line and column might be wrong if they are on the |
1024 |
|
## line boundary. |
1025 |
$self->{t}->{type} = { |
$self->{t}->{type} = { |
1026 |
STRING_TOKEN, INVALID_TOKEN, |
STRING_TOKEN, INVALID_TOKEN, |
1027 |
URI_TOKEN, URI_INVALID_TOKEN, |
URI_TOKEN, URI_INVALID_TOKEN, |
1190 |
} else { |
} else { |
1191 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
1192 |
# reprocess |
# reprocess |
1193 |
return {type => DOT_TOKEN}; |
return {type => DOT_TOKEN, |
1194 |
|
line => $self->{line}, column => $self->{column} - 1}; |
1195 |
|
## BUG: line and column might be wrong if they are on the |
1196 |
|
## line boundary. |
1197 |
#redo A; |
#redo A; |
1198 |
} |
} |
1199 |
} elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) { |
} elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) { |