| 1 |
package Whatpm::CSS::Tokenizer; |
package Whatpm::CSS::Tokenizer; |
| 2 |
use strict; |
use strict; |
| 3 |
|
|
| 4 |
|
require Exporter; |
| 5 |
|
push our @ISA, 'Exporter'; |
| 6 |
|
|
| 7 |
sub BEFORE_TOKEN_STATE () { 0 } |
sub BEFORE_TOKEN_STATE () { 0 } |
| 8 |
sub BEFORE_NMSTART_STATE () { 1 } |
sub BEFORE_NMSTART_STATE () { 1 } |
| 9 |
sub NAME_STATE () { 2 } |
sub NAME_STATE () { 2 } |
| 62 |
sub COMMENT_TOKEN () { 36 } |
sub COMMENT_TOKEN () { 36 } |
| 63 |
sub COMMENT_INVALID_TOKEN () { 37 } |
sub COMMENT_INVALID_TOKEN () { 37 } |
| 64 |
sub EOF_TOKEN () { 38 } |
sub EOF_TOKEN () { 38 } |
| 65 |
|
sub MINUS_TOKEN () { 39 } |
| 66 |
|
sub STAR_TOKEN () { 40 } |
| 67 |
|
sub VBAR_TOKEN () { 41 } |
| 68 |
|
sub DOT_TOKEN () { 42 } |
| 69 |
|
sub COLON_TOKEN () { 43 } |
| 70 |
|
sub MATCH_TOKEN () { 44 } |
| 71 |
|
sub EXCLAMATION_TOKEN () { 45 } |
| 72 |
|
|
| 73 |
our @TokenName = qw( |
our @TokenName = qw( |
| 74 |
0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID |
0 IDENT ATKEYWORD HASH FUNCTION URI URI_INVALID URI_PREFIX URI_PREFIX_INVALID |
| 76 |
0 DELIM PLUS GREATER COMMA TILDE DASHMATCH |
0 DELIM PLUS GREATER COMMA TILDE DASHMATCH |
| 77 |
PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON |
PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH INCLUDES SEMICOLON |
| 78 |
LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT |
LBRACE RBRACE LPAREN RPAREN LBRACKET RBRACKET S CDO CDC COMMENT |
| 79 |
COMMENT_INVALID EOF |
COMMENT_INVALID EOF MINUS STAR VBAR DOT COLON MATCH EXCLAMATION |
| 80 |
|
); |
| 81 |
|
|
| 82 |
|
our @EXPORT_OK = qw( |
| 83 |
|
IDENT_TOKEN ATKEYWORD_TOKEN HASH_TOKEN FUNCTION_TOKEN URI_TOKEN |
| 84 |
|
URI_INVALID_TOKEN URI_PREFIX_TOKEN URI_PREFIX_INVALID_TOKEN |
| 85 |
|
STRING_TOKEN INVALID_TOKEN NUMBER_TOKEN DIMENSION_TOKEN PERCENTAGE_TOKEN |
| 86 |
|
UNICODE_RANGE_TOKEN DELIM_TOKEN PLUS_TOKEN GREATER_TOKEN COMMA_TOKEN |
| 87 |
|
TILDE_TOKEN DASHMATCH_TOKEN PREFIXMATCH_TOKEN SUFFIXMATCH_TOKEN |
| 88 |
|
SUBSTRINGMATCH_TOKEN INCLUDES_TOKEN SEMICOLON_TOKEN LBRACE_TOKEN |
| 89 |
|
RBRACE_TOKEN LPAREN_TOKEN RPAREN_TOKEN LBRACKET_TOKEN RBRACKET_TOKEN |
| 90 |
|
S_TOKEN CDO_TOKEN CDC_TOKEN COMMENT_TOKEN COMMENT_INVALID_TOKEN EOF_TOKEN |
| 91 |
|
MINUS_TOKEN STAR_TOKEN VBAR_TOKEN DOT_TOKEN COLON_TOKEN MATCH_TOKEN |
| 92 |
|
EXCLAMATION_TOKEN |
| 93 |
); |
); |
| 94 |
|
|
| 95 |
|
our %EXPORT_TAGS = ('token' => [@EXPORT_OK]); |
| 96 |
|
|
| 97 |
sub new ($) { |
sub new ($) { |
| 98 |
my $self = bless {token => [], get_char => sub { -1 }, |
my $self = bless {token => [], get_char => sub { -1 }, |
| 99 |
onerror => sub { }}, shift; |
onerror => sub { }}, shift; |
| 292 |
return {type => CDO_TOKEN}; |
return {type => CDO_TOKEN}; |
| 293 |
#redo A; |
#redo A; |
| 294 |
} else { |
} else { |
| 295 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'}; |
unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN}; |
| 296 |
## NOTE: |-| in |ident| in |IDENT| |
## NOTE: |-| in |ident| in |IDENT| |
| 297 |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
| 298 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
| 301 |
#redo A; |
#redo A; |
| 302 |
} |
} |
| 303 |
} else { |
} else { |
| 304 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'}; |
unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN}; |
| 305 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 306 |
#reprocess |
#reprocess |
| 307 |
return {type => DELIM_TOKEN, value => '<'}; |
return {type => DELIM_TOKEN, value => '<'}; |
| 314 |
#redo A; |
#redo A; |
| 315 |
} |
} |
| 316 |
} elsif (my $t = { |
} elsif (my $t = { |
| 317 |
0x003B => SEMICOLON_TOKEN, # ; |
0x0021 => EXCLAMATION_TOKEN, # ! |
| 318 |
0x007B => LBRACE_TOKEN, # { |
0x002D => MINUS_TOKEN, # - |
| 319 |
0x007D => RBRACE_TOKEN, # } |
0x002E => DOT_TOKEN, # . |
| 320 |
0x0028 => LPAREN_TOKEN, # ( |
0x003A => COLON_TOKEN, # : |
| 321 |
0x0029 => RPAREN_TOKEN, # ) |
0x003B => SEMICOLON_TOKEN, # ; |
| 322 |
0x005B => LBRACKET_TOKEN, # [ |
0x003D => MATCH_TOKEN, # = |
| 323 |
0x005D => RBRACKET_TOKEN, # ] |
0x007B => LBRACE_TOKEN, # { |
| 324 |
|
0x007D => RBRACE_TOKEN, # } |
| 325 |
|
0x0028 => LPAREN_TOKEN, # ( |
| 326 |
|
0x0029 => RPAREN_TOKEN, # ) |
| 327 |
|
0x005B => LBRACKET_TOKEN, # [ |
| 328 |
|
0x005D => RBRACKET_TOKEN, # ] |
| 329 |
}->{$self->{c}}) { |
}->{$self->{c}}) { |
| 330 |
# stay in the state |
# stay in the state |
| 331 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 378 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 379 |
return {type => $v}; |
return {type => $v}; |
| 380 |
#redo A; |
#redo A; |
| 381 |
|
} elsif ($v = { |
| 382 |
|
0x002A => STAR_TOKEN, # * |
| 383 |
|
0x007C => VBAR_TOKEN, # | |
| 384 |
|
}->{$c}) { |
| 385 |
|
# stay in the state. |
| 386 |
|
# reprocess |
| 387 |
|
return {type => $v}; |
| 388 |
|
#redo A; |
| 389 |
} else { |
} else { |
| 390 |
# stay in the state |
# stay in the state |
| 391 |
# reprocess |
# reprocess |
| 462 |
#$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
#$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
| 463 |
# stay in the state |
# stay in the state |
| 464 |
# reconsume |
# reconsume |
| 465 |
return {type => DELIM_TOKEN, value => '-'}; |
return {type => MINUS_TOKEN}; |
| 466 |
#redo A; |
#redo A; |
| 467 |
} |
} |
| 468 |
} elsif ($self->{t}->{type} == DIMENSION_TOKEN) { |
} elsif ($self->{t}->{type} == DIMENSION_TOKEN) { |
| 481 |
$t->{type} = NUMBER_TOKEN; |
$t->{type} = NUMBER_TOKEN; |
| 482 |
$t->{value} = ''; |
$t->{value} = ''; |
| 483 |
$self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1}; |
$self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1}; |
| 484 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
| 485 |
# stay in the state |
# stay in the state |
| 486 |
# reconsume |
# reconsume |
| 487 |
return $t; |
return $t; |
| 496 |
|
|
| 497 |
if ($self->{t}->{type} == DIMENSION_TOKEN) { |
if ($self->{t}->{type} == DIMENSION_TOKEN) { |
| 498 |
## NOTE: |-| after |NUMBER|. |
## NOTE: |-| after |NUMBER|. |
| 499 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
| 500 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 501 |
# reprocess |
# reprocess |
| 502 |
$self->{t}->{type} = NUMBER_TOKEN; |
$self->{t}->{type} = NUMBER_TOKEN; |
| 506 |
## NOTE: |-| not followed by |nmstart|. |
## NOTE: |-| not followed by |nmstart|. |
| 507 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 508 |
# reprocess |
# reprocess |
| 509 |
return {type => DELIM_TOKEN, value => '-'}; |
return {type => MINUS_TOKEN}; |
| 510 |
} |
} |
| 511 |
} elsif ($self->{state} == AFTER_AT_STATE) { |
} elsif ($self->{state} == AFTER_AT_STATE) { |
| 512 |
if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
| 549 |
return {type => DELIM_TOKEN, value => '@'}; |
return {type => DELIM_TOKEN, value => '@'}; |
| 550 |
#redo A; |
#redo A; |
| 551 |
} else { |
} else { |
| 552 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
| 553 |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
| 554 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
| 555 |
# reprocess |
# reprocess |
| 562 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 563 |
redo A; |
redo A; |
| 564 |
} else { |
} else { |
| 565 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
| 566 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 567 |
# reprocess |
# reprocess |
| 568 |
return {type => DELIM_TOKEN, value => '@'}; |
return {type => DELIM_TOKEN, value => '@'}; |
| 898 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 899 |
# reprocess |
# reprocess |
| 900 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 901 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
| 902 |
$self->{t}->{type} = NUMBER_TOKEN; |
$self->{t}->{type} = NUMBER_TOKEN; |
| 903 |
$self->{t}->{value} = ''; |
$self->{t}->{value} = ''; |
| 904 |
return $self->{t}; |
return $self->{t}; |
| 923 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 924 |
# reprocess |
# reprocess |
| 925 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 926 |
return {type => DELIM_TOKEN, value => '-'}; |
return {type => MINUS_TOKEN}; |
| 927 |
#redo A; |
#redo A; |
| 928 |
} elsif (length $self->{t}->{value}) { |
} elsif (length $self->{t}->{value}) { |
| 929 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 1095 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 1096 |
redo A; |
redo A; |
| 1097 |
} else { |
} else { |
| 1098 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '.'}; |
unshift @{$self->{token}}, {type => DOT_TOKEN}; |
| 1099 |
$self->{t}->{number} = $self->{t}->{value}; |
$self->{t}->{number} = $self->{t}->{value}; |
| 1100 |
$self->{t}->{value} = ''; |
$self->{t}->{value} = ''; |
| 1101 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 1113 |
} else { |
} else { |
| 1114 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 1115 |
# reprocess |
# reprocess |
| 1116 |
return {type => DELIM_TOKEN, value => '.'}; |
return {type => DOT_TOKEN}; |
| 1117 |
#redo A; |
#redo A; |
| 1118 |
} |
} |
| 1119 |
} elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) { |
} elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) { |