| 59 |
sub COMMENT_TOKEN () { 36 } |
sub COMMENT_TOKEN () { 36 } |
| 60 |
sub COMMENT_INVALID_TOKEN () { 37 } |
sub COMMENT_INVALID_TOKEN () { 37 } |
| 61 |
sub EOF_TOKEN () { 38 } |
sub EOF_TOKEN () { 38 } |
| 62 |
|
sub MINUS_TOKEN () { 39 } |
| 63 |
|
sub STAR_TOKEN () { 40 } |
| 64 |
|
sub VBAR_TOKEN () { 41 } |
| 65 |
|
sub DOT_TOKEN () { 42 } |
| 66 |
|
sub COLON_TOKEN () { 43 } |
| 67 |
|
sub MATCH_TOKEN () { 44 } |
| 68 |
|
sub EXCLAMATION_TOKEN () { 45 } |
| 69 |
|
|
| 70 |
our @TokenName = qw( |
our @TokenName = qw( |
| 71 |
0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID |
0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID |
| 73 |
0 DELIM PLUS GREATER COMMA TILDE DASHMATCH |
0 DELIM PLUS GREATER COMMA TILDE DASHMATCH |
| 74 |
PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON |
PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON |
| 75 |
LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT |
LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT |
| 76 |
COMMENT_INVALID EOF |
COMMENT_INVALID EOF MINUS STAR VBAR DOT COLON MATCH EXCLAMATION |
| 77 |
); |
); |
| 78 |
|
|
| 79 |
sub new ($) { |
sub new ($) { |
| 122 |
(0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F |
(0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F |
| 123 |
(0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f |
(0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f |
| 124 |
$self->{c} == 0x003F) { # ? |
$self->{c} == 0x003F) { # ? |
| 125 |
$self->{t}->{value} .= '+' . chr $self->{c}; |
$self->{t}->{value} = chr $self->{c}; |
| 126 |
$self->{t}->{type} = UNICODE_RANGE_TOKEN; |
$self->{t}->{type} = UNICODE_RANGE_TOKEN; |
| 127 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 128 |
C: for (2..6) { |
C: for (2..6) { |
| 274 |
return {type => CDO_TOKEN}; |
return {type => CDO_TOKEN}; |
| 275 |
#redo A; |
#redo A; |
| 276 |
} else { |
} else { |
| 277 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'}; |
unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN}; |
| 278 |
## NOTE: |-| in |ident| in |IDENT| |
## NOTE: |-| in |ident| in |IDENT| |
| 279 |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
| 280 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
| 283 |
#redo A; |
#redo A; |
| 284 |
} |
} |
| 285 |
} else { |
} else { |
| 286 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'}; |
unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN}; |
| 287 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 288 |
#reprocess |
#reprocess |
| 289 |
return {type => DELIM_TOKEN, value => '<'}; |
return {type => DELIM_TOKEN, value => '<'}; |
| 296 |
#redo A; |
#redo A; |
| 297 |
} |
} |
| 298 |
} elsif (my $t = { |
} elsif (my $t = { |
| 299 |
0x003B => SEMICOLON_TOKEN, # ; |
0x0021 => EXCLAMATION_TOKEN, # ! |
| 300 |
0x007B => LBRACE_TOKEN, # { |
0x002D => MINUS_TOKEN, # - |
| 301 |
0x007D => RBRACE_TOKEN, # } |
0x002E => DOT_TOKEN, # . |
| 302 |
0x0028 => LPAREN_TOKEN, # ( |
0x003A => COLON_TOKEN, # : |
| 303 |
0x0029 => RPAREN_TOKEN, # ) |
0x003B => SEMICOLON_TOKEN, # ; |
| 304 |
0x005B => LBRACKET_TOKEN, # [ |
0x003D => MATCH_TOKEN, # = |
| 305 |
0x005D => RBRACKET_TOKEN, # ] |
0x007B => LBRACE_TOKEN, # { |
| 306 |
|
0x007D => RBRACE_TOKEN, # } |
| 307 |
|
0x0028 => LPAREN_TOKEN, # ( |
| 308 |
|
0x0029 => RPAREN_TOKEN, # ) |
| 309 |
|
0x005B => LBRACKET_TOKEN, # [ |
| 310 |
|
0x005D => RBRACKET_TOKEN, # ] |
| 311 |
}->{$self->{c}}) { |
}->{$self->{c}}) { |
| 312 |
# stay in the state |
# stay in the state |
| 313 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 360 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 361 |
return {type => $v}; |
return {type => $v}; |
| 362 |
#redo A; |
#redo A; |
| 363 |
|
} elsif ($v = { |
| 364 |
|
0x002A => STAR_TOKEN, # * |
| 365 |
|
0x007C => VBAR_TOKEN, # | |
| 366 |
|
}->{$c}) { |
| 367 |
|
# stay in the state. |
| 368 |
|
# reprocess |
| 369 |
|
return {type => $v}; |
| 370 |
|
#redo A; |
| 371 |
} else { |
} else { |
| 372 |
# stay in the state |
# stay in the state |
| 373 |
# reprocess |
# reprocess |
| 444 |
#$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
#$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
| 445 |
# stay in the state |
# stay in the state |
| 446 |
# reconsume |
# reconsume |
| 447 |
return {type => DELIM_TOKEN, value => '-'}; |
return {type => MINUS_TOKEN}; |
| 448 |
#redo A; |
#redo A; |
| 449 |
} |
} |
| 450 |
} elsif ($self->{t}->{type} == DIMENSION_TOKEN) { |
} elsif ($self->{t}->{type} == DIMENSION_TOKEN) { |
| 463 |
$t->{type} = NUMBER_TOKEN; |
$t->{type} = NUMBER_TOKEN; |
| 464 |
$t->{value} = ''; |
$t->{value} = ''; |
| 465 |
$self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1}; |
$self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1}; |
| 466 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
| 467 |
# stay in the state |
# stay in the state |
| 468 |
# reconsume |
# reconsume |
| 469 |
return $t; |
return $t; |
| 478 |
|
|
| 479 |
if ($self->{t}->{type} == DIMENSION_TOKEN) { |
if ($self->{t}->{type} == DIMENSION_TOKEN) { |
| 480 |
## NOTE: |-| after |NUMBER|. |
## NOTE: |-| after |NUMBER|. |
| 481 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
| 482 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 483 |
# reprocess |
# reprocess |
| 484 |
$self->{t}->{type} = NUMBER_TOKEN; |
$self->{t}->{type} = NUMBER_TOKEN; |
| 488 |
## NOTE: |-| not followed by |nmstart|. |
## NOTE: |-| not followed by |nmstart|. |
| 489 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 490 |
# reprocess |
# reprocess |
| 491 |
return {type => DELIM_TOKEN, value => '-'}; |
return {type => MINUS_TOKEN}; |
| 492 |
} |
} |
| 493 |
} elsif ($self->{state} == AFTER_AT_STATE) { |
} elsif ($self->{state} == AFTER_AT_STATE) { |
| 494 |
if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
| 531 |
return {type => DELIM_TOKEN, value => '@'}; |
return {type => DELIM_TOKEN, value => '@'}; |
| 532 |
#redo A; |
#redo A; |
| 533 |
} else { |
} else { |
| 534 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
| 535 |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
| 536 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
| 537 |
# reprocess |
# reprocess |
| 544 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 545 |
redo A; |
redo A; |
| 546 |
} else { |
} else { |
| 547 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
| 548 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 549 |
# reprocess |
# reprocess |
| 550 |
return {type => DELIM_TOKEN, value => '@'}; |
return {type => DELIM_TOKEN, value => '@'}; |
| 880 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 881 |
# reprocess |
# reprocess |
| 882 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 883 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
| 884 |
$self->{t}->{type} = NUMBER_TOKEN; |
$self->{t}->{type} = NUMBER_TOKEN; |
| 885 |
$self->{t}->{value} = ''; |
$self->{t}->{value} = ''; |
| 886 |
return $self->{t}; |
return $self->{t}; |
| 905 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 906 |
# reprocess |
# reprocess |
| 907 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 908 |
return {type => DELIM_TOKEN, value => '-'}; |
return {type => MINUS_TOKEN}; |
| 909 |
#redo A; |
#redo A; |
| 910 |
} elsif (length $self->{t}->{value}) { |
} elsif (length $self->{t}->{value}) { |
| 911 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 1033 |
$self->{c} == 0x000D or # \r |
$self->{c} == 0x000D or # \r |
| 1034 |
$self->{c} == 0x000C or # \f |
$self->{c} == 0x000C or # \f |
| 1035 |
$self->{c} == -1) { |
$self->{c} == -1) { |
| 1036 |
$self->{t}->{type} = INVALID_TOKEN; |
$self->{t}->{type} = { |
| 1037 |
|
STRING_TOKEN, INVALID_TOKEN, |
| 1038 |
|
INVALID_TOKEN, INVALID_TOKEN, |
| 1039 |
|
URI_TOKEN, URI_INVALID_TOKEN, |
| 1040 |
|
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
| 1041 |
|
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
| 1042 |
|
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
| 1043 |
|
}->{$self->{t}->{type}}; |
| 1044 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 1045 |
# reconsume |
# reconsume |
| 1046 |
return $self->{t}; |
return $self->{t}; |
| 1077 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 1078 |
redo A; |
redo A; |
| 1079 |
} else { |
} else { |
| 1080 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '.'}; |
unshift @{$self->{token}}, {type => DOT_TOKEN}; |
| 1081 |
$self->{t}->{number} = $self->{t}->{value}; |
$self->{t}->{number} = $self->{t}->{value}; |
| 1082 |
$self->{t}->{value} = ''; |
$self->{t}->{value} = ''; |
| 1083 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 1095 |
} else { |
} else { |
| 1096 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 1097 |
# reprocess |
# reprocess |
| 1098 |
return {type => DELIM_TOKEN, value => '.'}; |
return {type => DOT_TOKEN}; |
| 1099 |
#redo A; |
#redo A; |
| 1100 |
} |
} |
| 1101 |
} elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) { |
} elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) { |