| 36 |
sub DIMENSION_TOKEN () { 12 } |
sub DIMENSION_TOKEN () { 12 } |
| 37 |
sub PERCENTAGE_TOKEN () { 13 } |
sub PERCENTAGE_TOKEN () { 13 } |
| 38 |
sub UNICODE_RANGE_TOKEN () { 14 } |
sub UNICODE_RANGE_TOKEN () { 14 } |
|
sub UNICODE_RANGE_INVALID_TOKEN () { 15 } |
|
| 39 |
sub DELIM_TOKEN () { 16 } |
sub DELIM_TOKEN () { 16 } |
| 40 |
sub PLUS_TOKEN () { 17 } |
sub PLUS_TOKEN () { 17 } |
| 41 |
sub GREATER_TOKEN () { 18 } |
sub GREATER_TOKEN () { 18 } |
| 59 |
sub COMMENT_TOKEN () { 36 } |
sub COMMENT_TOKEN () { 36 } |
| 60 |
sub COMMENT_INVALID_TOKEN () { 37 } |
sub COMMENT_INVALID_TOKEN () { 37 } |
| 61 |
sub EOF_TOKEN () { 38 } |
sub EOF_TOKEN () { 38 } |
| 62 |
|
sub MINUS_TOKEN () { 39 } |
| 63 |
|
sub STAR_TOKEN () { 40 } |
| 64 |
|
sub VBAR_TOKEN () { 41 } |
| 65 |
|
sub DOT_TOKEN () { 42 } |
| 66 |
|
sub COLON_TOKEN () { 43 } |
| 67 |
|
sub MATCH_TOKEN () { 44 } |
| 68 |
|
sub EXCLAMATION_TOKEN () { 45 } |
| 69 |
|
|
| 70 |
our @TokenName = qw( |
our @TokenName = qw( |
| 71 |
0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID |
0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID |
| 72 |
STRING INVALID NUMBER DIMENSION PERCENTAGE UNICODE_RANGE |
STRING INVALID NUMBER DIMENSION PERCENTAGE UNICODE_RANGE |
| 73 |
UNICODE_RANGE_INVALID DELIM PLUS GREATER COMMA TILDE DASHMATCH |
0 DELIM PLUS GREATER COMMA TILDE DASHMATCH |
| 74 |
PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON |
PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON |
| 75 |
LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT |
LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT |
| 76 |
COMMENT_INVALID EOF |
COMMENT_INVALID EOF MINUS STAR VBAR DOT COLON MATCH EXCLAMATION |
| 77 |
); |
); |
| 78 |
|
|
| 79 |
sub new ($) { |
sub new ($) { |
| 109 |
if ($self->{state} == BEFORE_TOKEN_STATE) { |
if ($self->{state} == BEFORE_TOKEN_STATE) { |
| 110 |
if ($self->{c} == 0x002D) { # - |
if ($self->{c} == 0x002D) { # - |
| 111 |
## NOTE: |-| in |ident| in |IDENT| |
## NOTE: |-| in |ident| in |IDENT| |
| 112 |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
$self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1}; |
| 113 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
| 114 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 115 |
redo A; |
redo A; |
| 122 |
(0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F |
(0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F |
| 123 |
(0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f |
(0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f |
| 124 |
$self->{c} == 0x003F) { # ? |
$self->{c} == 0x003F) { # ? |
| 125 |
$self->{t}->{value} .= '+' . chr $self->{c}; |
$self->{t}->{value} = chr $self->{c}; |
| 126 |
$self->{t}->{type} = UNICODE_RANGE_TOKEN; |
$self->{t}->{type} = UNICODE_RANGE_TOKEN; |
| 127 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 128 |
C: for (2..6) { |
C: for (2..6) { |
| 258 |
} else { |
} else { |
| 259 |
# stay in the state. |
# stay in the state. |
| 260 |
# reprocess |
# reprocess |
| 261 |
return {type => DELIM_STATE, value => '/'}; |
return {type => DELIM_TOKEN, value => '/'}; |
| 262 |
#redo A; |
#redo A; |
| 263 |
} |
} |
| 264 |
} elsif ($self->{c} == 0x003C) { # < |
} elsif ($self->{c} == 0x003C) { # < |
| 266 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 267 |
if ($self->{c} == 0x0021) { # ! |
if ($self->{c} == 0x0021) { # ! |
| 268 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 269 |
if ($self->{c} == 0x002C) { # - |
if ($self->{c} == 0x002D) { # - |
| 270 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 271 |
if ($self->{c} == 0x002C) { # - |
if ($self->{c} == 0x002D) { # - |
| 272 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 273 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 274 |
return {type => CDO_TOKEN}; |
return {type => CDO_TOKEN}; |
| 275 |
#redo A; |
#redo A; |
| 276 |
} else { |
} else { |
| 277 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'}; |
unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN}; |
| 278 |
## NOTE: |-| in |ident| in |IDENT| |
## NOTE: |-| in |ident| in |IDENT| |
| 279 |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
| 280 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
| 283 |
#redo A; |
#redo A; |
| 284 |
} |
} |
| 285 |
} else { |
} else { |
| 286 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'}; |
unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN}; |
| 287 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 288 |
#reprocess |
#reprocess |
| 289 |
return {type => DELIM_TOKEN, value => '<'}; |
return {type => DELIM_TOKEN, value => '<'}; |
| 296 |
#redo A; |
#redo A; |
| 297 |
} |
} |
| 298 |
} elsif (my $t = { |
} elsif (my $t = { |
| 299 |
0x003B => SEMICOLON_TOKEN, # ; |
0x0021 => EXCLAMATION_TOKEN, # ! |
| 300 |
0x007B => LBRACE_TOKEN, # { |
0x002D => MINUS_TOKEN, # - |
| 301 |
0x007D => RBRACE_TOKEN, # } |
0x002E => DOT_TOKEN, # . |
| 302 |
0x0028 => LPAREN_TOKEN, # ( |
0x003A => COLON_TOKEN, # : |
| 303 |
0x0029 => RPAREN_TOKEN, # ) |
0x003B => SEMICOLON_TOKEN, # ; |
| 304 |
0x005B => LBRACKET_TOKEN, # [ |
0x003D => MATCH_TOKEN, # = |
| 305 |
0x005D => RBRACKET_TOKEN, # ] |
0x007B => LBRACE_TOKEN, # { |
| 306 |
|
0x007D => RBRACE_TOKEN, # } |
| 307 |
|
0x0028 => LPAREN_TOKEN, # ( |
| 308 |
|
0x0029 => RPAREN_TOKEN, # ) |
| 309 |
|
0x005B => LBRACKET_TOKEN, # [ |
| 310 |
|
0x005D => RBRACKET_TOKEN, # ] |
| 311 |
}->{$self->{c}}) { |
}->{$self->{c}}) { |
| 312 |
# stay in the state |
# stay in the state |
| 313 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 360 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 361 |
return {type => $v}; |
return {type => $v}; |
| 362 |
#redo A; |
#redo A; |
| 363 |
|
} elsif ($v = { |
| 364 |
|
0x002A => STAR_TOKEN, # * |
| 365 |
|
0x007C => VBAR_TOKEN, # | |
| 366 |
|
}->{$c}) { |
| 367 |
|
# stay in the state. |
| 368 |
|
# reprocess |
| 369 |
|
return {type => $v}; |
| 370 |
|
#redo A; |
| 371 |
} else { |
} else { |
| 372 |
# stay in the state |
# stay in the state |
| 373 |
# reprocess |
# reprocess |
| 428 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 429 |
redo A; |
redo A; |
| 430 |
} elsif ($self->{c} == 0x005C) { # \ |
} elsif ($self->{c} == 0x005C) { # \ |
|
## TODO: 12-\X, 12-\{nl} |
|
| 431 |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
| 432 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 433 |
redo A; |
redo A; |
| 434 |
} elsif ($self->{c} == 0x002D and # - |
} elsif ($self->{c} == 0x002D) { # - |
| 435 |
$self->{t}->{type} == IDENT_TOKEN) { |
if ($self->{t}->{type} == IDENT_TOKEN) { |
|
$self->{c} = $self->{get_char}->(); |
|
|
if ($self->{c} == 0x003E) { # > |
|
|
$self->{state} = BEFORE_TOKEN_STATE; |
|
| 436 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 437 |
return {type => CDC_TOKEN}; |
if ($self->{c} == 0x003E) { # > |
| 438 |
#redo A; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 439 |
|
$self->{c} = $self->{get_char}->(); |
| 440 |
|
return {type => CDC_TOKEN}; |
| 441 |
|
#redo A; |
| 442 |
|
} else { |
| 443 |
|
## NOTE: |-|, |-|, $self->{c} |
| 444 |
|
#$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
| 445 |
|
# stay in the state |
| 446 |
|
# reconsume |
| 447 |
|
return {type => MINUS_TOKEN}; |
| 448 |
|
#redo A; |
| 449 |
|
} |
| 450 |
|
} elsif ($self->{t}->{type} == DIMENSION_TOKEN) { |
| 451 |
|
$self->{c} = $self->{get_char}->(); |
| 452 |
|
if ($self->{c} == 0x003E) { # > |
| 453 |
|
unshift @{$self->{token}}, {type => CDC_TOKEN}; |
| 454 |
|
$self->{t}->{type} = NUMBER_TOKEN; |
| 455 |
|
$self->{t}->{value} = ''; |
| 456 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 457 |
|
$self->{c} = $self->{get_char}->(); |
| 458 |
|
return $self->{t}; |
| 459 |
|
#redo A; |
| 460 |
|
} else { |
| 461 |
|
## NOTE: |-|, |-|, $self->{c} |
| 462 |
|
my $t = $self->{t}; |
| 463 |
|
$t->{type} = NUMBER_TOKEN; |
| 464 |
|
$t->{value} = ''; |
| 465 |
|
$self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1}; |
| 466 |
|
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
| 467 |
|
# stay in the state |
| 468 |
|
# reconsume |
| 469 |
|
return $t; |
| 470 |
|
#redo A; |
| 471 |
|
} |
| 472 |
} else { |
} else { |
| 473 |
## NOTE: |-|, |-|, $self->{c} |
# |
|
#$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
|
|
# stay in the state |
|
|
# reconsume |
|
|
return {type => DELIM_TOKEN, value => '-'}; |
|
|
#redo A; |
|
| 474 |
} |
} |
| 475 |
} else { |
} else { |
| 476 |
if ($self->{t}->{type} == NUMBER_TOKEN) { |
# |
| 477 |
## NOTE: |-| after |NUMBER|. |
} |
| 478 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
|
| 479 |
$self->{state} = BEFORE_TOKEN_STATE; |
if ($self->{t}->{type} == DIMENSION_TOKEN) { |
| 480 |
# reconsume |
## NOTE: |-| after |NUMBER|. |
| 481 |
$self->{t}->{value} = $self->{t}->{number}; |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
| 482 |
delete $self->{t}->{number}; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 483 |
return $self->{t}; |
# reprocess |
| 484 |
} else { |
$self->{t}->{type} = NUMBER_TOKEN; |
| 485 |
## NOTE: |-| not followed by |nmstart|. |
$self->{t}->{value} = ''; |
| 486 |
$self->{state} = BEFORE_TOKEN_STATE; |
return $self->{t}; |
| 487 |
$self->{c} = $self->{get_char}->(); |
} else { |
| 488 |
return {type => DELIM_TOKEN, value => '-'}; |
## NOTE: |-| not followed by |nmstart|. |
| 489 |
} |
$self->{state} = BEFORE_TOKEN_STATE; |
| 490 |
|
# reprocess |
| 491 |
|
return {type => MINUS_TOKEN}; |
| 492 |
} |
} |
| 493 |
} elsif ($self->{state} == AFTER_AT_STATE) { |
} elsif ($self->{state} == AFTER_AT_STATE) { |
| 494 |
if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
| 531 |
return {type => DELIM_TOKEN, value => '@'}; |
return {type => DELIM_TOKEN, value => '@'}; |
| 532 |
#redo A; |
#redo A; |
| 533 |
} else { |
} else { |
| 534 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
| 535 |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
| 536 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
| 537 |
# reprocess |
# reprocess |
| 544 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 545 |
redo A; |
redo A; |
| 546 |
} else { |
} else { |
| 547 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
| 548 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 549 |
# reprocess |
# reprocess |
| 550 |
return {type => DELIM_TOKEN, value => '@'}; |
return {type => DELIM_TOKEN, value => '@'}; |
| 552 |
} elsif ($self->{state} == AFTER_NUMBER_STATE) { |
} elsif ($self->{state} == AFTER_NUMBER_STATE) { |
| 553 |
if ($self->{c} == 0x002D) { # - |
if ($self->{c} == 0x002D) { # - |
| 554 |
## NOTE: |-| in |ident|. |
## NOTE: |-| in |ident|. |
| 555 |
|
$self->{t}->{hyphen} = 1; |
| 556 |
$self->{t}->{value} = '-'; |
$self->{t}->{value} = '-'; |
| 557 |
|
$self->{t}->{type} = DIMENSION_TOKEN; |
| 558 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
| 559 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 560 |
redo A; |
redo A; |
| 571 |
} elsif ($self->{c} == 0x005C) { # \ |
} elsif ($self->{c} == 0x005C) { # \ |
| 572 |
## NOTE: |nmstart| in |ident| in |IDENT| |
## NOTE: |nmstart| in |ident| in |IDENT| |
| 573 |
$self->{t}->{value} = ''; |
$self->{t}->{value} = ''; |
| 574 |
|
$self->{t}->{type} = DIMENSION_TOKEN; |
| 575 |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
| 576 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 577 |
redo A; |
redo A; |
| 605 |
redo A; |
redo A; |
| 606 |
} else { |
} else { |
| 607 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 608 |
$self->{c} = $self->{get_char}->(); |
# reprocess |
| 609 |
return {type => DELIM_TOKEN, value => '#'}; |
return {type => DELIM_TOKEN, value => '#'}; |
| 610 |
#redo A; |
#redo A; |
| 611 |
} |
} |
| 814 |
redo A; |
redo A; |
| 815 |
} elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f |
} elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f |
| 816 |
## NOTE: second character of |unicode| in |escape|. |
## NOTE: second character of |unicode| in |escape|. |
| 817 |
$char = $self->{c} - 0x0061 - 0xA; |
$char = $self->{c} - 0x0061 + 0xA; |
| 818 |
$self->{state} = ESCAPE_STATE; $i = 2; |
$self->{state} = ESCAPE_STATE; $i = 2; |
| 819 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 820 |
redo A; |
redo A; |
| 821 |
} elsif ($self->{c} == 0x000A or # \n |
} elsif ($self->{c} == 0x000A or # \n |
| 822 |
$self->{c} == 0x000C) { # \f |
$self->{c} == 0x000C) { # \f |
| 823 |
if ($q == 0) { |
if ($q == 0) { |
| 824 |
## NOTE: In |escape| in ... in |ident|. |
# |
|
$self->{state} = BEFORE_TOKEN_STATE; |
|
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
|
|
return $self->{t}; |
|
|
# reconsume |
|
|
#redo A; |
|
| 825 |
} elsif ($q == 1) { |
} elsif ($q == 1) { |
| 826 |
## NOTE: In |escape| in |URI|. |
## NOTE: In |escape| in |URI|. |
| 827 |
$self->{t}->{type} = { |
$self->{t}->{type} = { |
| 843 |
} |
} |
| 844 |
} elsif ($self->{c} == 0x000D) { # \r |
} elsif ($self->{c} == 0x000D) { # \r |
| 845 |
if ($q == 0) { |
if ($q == 0) { |
| 846 |
## NOTE: In |escape| in ... in |ident|. |
# |
|
$self->{state} = BEFORE_TOKEN_STATE; |
|
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
|
|
return $self->{t}; |
|
|
# reconsume |
|
|
#redo A; |
|
| 847 |
} elsif ($q == 1) { |
} elsif ($q == 1) { |
| 848 |
|
## NOTE: In |escape| in |URI|. |
| 849 |
$self->{t}->{type} = { |
$self->{t}->{type} = { |
| 850 |
URI_TOKEN, URI_INVALID_TOKEN, |
URI_TOKEN, URI_INVALID_TOKEN, |
| 851 |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
| 852 |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
| 853 |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
| 854 |
}->{$self->{t}->{type}}; |
}->{$self->{t}->{type}}; |
| 855 |
$self->{t}->{value} .= "\x0D\x0A"; |
$self->{t}->{value} .= "\x0D"; |
| 856 |
$self->{state} = URI_UNQUOTED_STATE; |
$self->{state} = ESCAPE_BEFORE_LF_STATE; |
| 857 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 858 |
redo A; |
redo A; |
| 859 |
} else { |
} else { |
| 860 |
## Note: In |nl| in ... in |string| or |ident|. |
## Note: In |nl| in ... in |string| or |ident|. |
| 861 |
$self->{t}->{value} .= "\x0D\x0A"; |
$self->{t}->{value} .= "\x0D"; |
| 862 |
$self->{state} = ESCAPE_BEFORE_LF_STATE; |
$self->{state} = ESCAPE_BEFORE_LF_STATE; |
| 863 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 864 |
redo A; |
redo A; |
| 865 |
} |
} |
| 866 |
|
} elsif ($self->{c} == -1) { |
| 867 |
|
# |
| 868 |
} else { |
} else { |
| 869 |
## NOTE: second character of |escape|. |
## NOTE: second character of |escape|. |
| 870 |
$self->{t}->{value} .= chr $self->{c}; |
$self->{t}->{value} .= chr $self->{c}; |
| 873 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 874 |
redo A; |
redo A; |
| 875 |
} |
} |
| 876 |
|
|
| 877 |
|
if ($q == 0) { |
| 878 |
|
if ($self->{t}->{type} == DIMENSION_TOKEN) { |
| 879 |
|
if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') { |
| 880 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 881 |
|
# reprocess |
| 882 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 883 |
|
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
| 884 |
|
$self->{t}->{type} = NUMBER_TOKEN; |
| 885 |
|
$self->{t}->{value} = ''; |
| 886 |
|
return $self->{t}; |
| 887 |
|
#redo A; |
| 888 |
|
} elsif (length $self->{t}->{value}) { |
| 889 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 890 |
|
# reprocess |
| 891 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 892 |
|
return $self->{t}; |
| 893 |
|
#redo A; |
| 894 |
|
} else { |
| 895 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 896 |
|
# reprocess |
| 897 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 898 |
|
$self->{t}->{type} = NUMBER_TOKEN; |
| 899 |
|
$self->{t}->{value} = ''; |
| 900 |
|
return $self->{t}; |
| 901 |
|
#redo A; |
| 902 |
|
} |
| 903 |
|
} else { |
| 904 |
|
if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') { |
| 905 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 906 |
|
# reprocess |
| 907 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 908 |
|
return {type => MINUS_TOKEN}; |
| 909 |
|
#redo A; |
| 910 |
|
} elsif (length $self->{t}->{value}) { |
| 911 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 912 |
|
# reprocess |
| 913 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 914 |
|
return $self->{t}; |
| 915 |
|
#redo A; |
| 916 |
|
} else { |
| 917 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 918 |
|
# reprocess |
| 919 |
|
return {type => DELIM_TOKEN, value => '\\'}; |
| 920 |
|
#redo A; |
| 921 |
|
} |
| 922 |
|
} |
| 923 |
|
} elsif ($q == 1) { |
| 924 |
|
$self->{state} = URI_UNQUOTED_STATE; |
| 925 |
|
$self->{c} = $self->{get_char}->(); |
| 926 |
|
redo A; |
| 927 |
|
} else { |
| 928 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 929 |
|
$self->{t}->{type} = { |
| 930 |
|
STRING_TOKEN, INVALID_TOKEN, |
| 931 |
|
URI_TOKEN, URI_INVALID_TOKEN, |
| 932 |
|
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
| 933 |
|
}->{$self->{t}->{type}} || $self->{t}->{type}; |
| 934 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 935 |
|
# reprocess |
| 936 |
|
return $self->{t}; |
| 937 |
|
#redo A; |
| 938 |
|
} |
| 939 |
} elsif ($self->{state} == ESCAPE_STATE) { |
} elsif ($self->{state} == ESCAPE_STATE) { |
| 940 |
## NOTE: third..seventh character of |unicode| in |escape|. |
## NOTE: third..seventh character of |unicode| in |escape|. |
| 941 |
if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9 |
if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9 |
| 949 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 950 |
redo A; |
redo A; |
| 951 |
} elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f |
} elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f |
| 952 |
$char = $char * 0x10 + $self->{c} - 0x0061 - 0xA; |
$char = $char * 0x10 + $self->{c} - 0x0061 + 0xA; |
| 953 |
$self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE; |
$self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE; |
| 954 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 955 |
redo A; |
redo A; |
| 998 |
} elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) { |
} elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) { |
| 999 |
## NOTE: |\n| in |\r\n| in |unicode| in |escape|. |
## NOTE: |\n| in |\r\n| in |unicode| in |escape|. |
| 1000 |
if ($self->{c} == 0x000A) { # \n |
if ($self->{c} == 0x000A) { # \n |
| 1001 |
$self->{t}->{value} .= chr $char; |
$self->{t}->{value} .= chr $self->{c}; |
| 1002 |
$self->{state} = $q == 0 ? NAME_STATE : |
$self->{state} = $q == 0 ? NAME_STATE : |
| 1003 |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
| 1004 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 1005 |
redo A; |
redo A; |
| 1006 |
} else { |
} else { |
|
$self->{t}->{value} .= chr $char; |
|
| 1007 |
$self->{state} = $q == 0 ? NAME_STATE : |
$self->{state} = $q == 0 ? NAME_STATE : |
| 1008 |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
| 1009 |
# reconsume |
# reprocess |
| 1010 |
redo A; |
redo A; |
| 1011 |
} |
} |
| 1012 |
} elsif ($self->{state} == STRING_STATE) { |
} elsif ($self->{state} == STRING_STATE) { |
| 1033 |
$self->{c} == 0x000D or # \r |
$self->{c} == 0x000D or # \r |
| 1034 |
$self->{c} == 0x000C or # \f |
$self->{c} == 0x000C or # \f |
| 1035 |
$self->{c} == -1) { |
$self->{c} == -1) { |
| 1036 |
$self->{t}->{type} = INVALID_TOKEN; |
$self->{t}->{type} = { |
| 1037 |
|
STRING_TOKEN, INVALID_TOKEN, |
| 1038 |
|
INVALID_TOKEN, INVALID_TOKEN, |
| 1039 |
|
URI_TOKEN, URI_INVALID_TOKEN, |
| 1040 |
|
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
| 1041 |
|
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
| 1042 |
|
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
| 1043 |
|
}->{$self->{t}->{type}}; |
| 1044 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 1045 |
# reconsume |
# reconsume |
| 1046 |
return $self->{t}; |
return $self->{t}; |
| 1077 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 1078 |
redo A; |
redo A; |
| 1079 |
} else { |
} else { |
| 1080 |
unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'}; |
unshift @{$self->{token}}, {type => DOT_TOKEN}; |
| 1081 |
$self->{t}->{number} = $self->{t}->{value}; |
$self->{t}->{number} = $self->{t}->{value}; |
| 1082 |
$self->{t}->{value} = ''; |
$self->{t}->{value} = ''; |
| 1083 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 1094 |
redo A; |
redo A; |
| 1095 |
} else { |
} else { |
| 1096 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 1097 |
$self->{c} = $self->{get_char}->(); |
# reprocess |
| 1098 |
return {type => DELIM_TOKEN, value => '.'}; |
return {type => DOT_TOKEN}; |
| 1099 |
#redo A; |
#redo A; |
| 1100 |
} |
} |
| 1101 |
} elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) { |
} elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) { |