| 1 |
package Whatpm::CSS::Tokenizer; |
package Whatpm::CSS::Tokenizer; |
| 2 |
use strict; |
use strict; |
| 3 |
|
our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
| 4 |
|
|
| 5 |
|
require Exporter; |
| 6 |
|
push our @ISA, 'Exporter'; |
| 7 |
|
|
| 8 |
sub BEFORE_TOKEN_STATE () { 0 } |
sub BEFORE_TOKEN_STATE () { 0 } |
| 9 |
sub BEFORE_NMSTART_STATE () { 1 } |
sub BEFORE_NMSTART_STATE () { 1 } |
| 80 |
COMMENT_INVALID EOF MINUS STAR VBAR DOT COLON MATCH EXCLAMATION |
COMMENT_INVALID EOF MINUS STAR VBAR DOT COLON MATCH EXCLAMATION |
| 81 |
); |
); |
| 82 |
|
|
| 83 |
|
our @EXPORT_OK = qw( |
| 84 |
|
IDENT_TOKEN ATKEYWORD_TOKEN HASH_TOKEN FUNCTION_TOKEN URI_TOKEN |
| 85 |
|
URI_INVALID_TOKEN URI_PREFIX_TOKEN URI_PREFIX_INVALID_TOKEN |
| 86 |
|
STRING_TOKEN INVALID_TOKEN NUMBER_TOKEN DIMENSION_TOKEN PERCENTAGE_TOKEN |
| 87 |
|
UNICODE_RANGE_TOKEN DELIM_TOKEN PLUS_TOKEN GREATER_TOKEN COMMA_TOKEN |
| 88 |
|
TILDE_TOKEN DASHMATCH_TOKEN PREFIXMATCH_TOKEN SUFFIXMATCH_TOKEN |
| 89 |
|
SUBSTRINGMATCH_TOKEN INCLUDES_TOKEN SEMICOLON_TOKEN LBRACE_TOKEN |
| 90 |
|
RBRACE_TOKEN LPAREN_TOKEN RPAREN_TOKEN LBRACKET_TOKEN RBRACKET_TOKEN |
| 91 |
|
S_TOKEN CDO_TOKEN CDC_TOKEN COMMENT_TOKEN COMMENT_INVALID_TOKEN EOF_TOKEN |
| 92 |
|
MINUS_TOKEN STAR_TOKEN VBAR_TOKEN DOT_TOKEN COLON_TOKEN MATCH_TOKEN |
| 93 |
|
EXCLAMATION_TOKEN |
| 94 |
|
); |
| 95 |
|
|
| 96 |
|
our %EXPORT_TAGS = ('token' => [@EXPORT_OK]); |
| 97 |
|
|
| 98 |
sub new ($) { |
sub new ($) { |
| 99 |
my $self = bless {token => [], get_char => sub { -1 }, |
my $self = bless {token => [], get_char => sub { -1 }}, shift; |
|
onerror => sub { }}, shift; |
|
| 100 |
return $self; |
return $self; |
| 101 |
} # new |
} # new |
| 102 |
|
|
| 327 |
0x005B => LBRACKET_TOKEN, # [ |
0x005B => LBRACKET_TOKEN, # [ |
| 328 |
0x005D => RBRACKET_TOKEN, # ] |
0x005D => RBRACKET_TOKEN, # ] |
| 329 |
}->{$self->{c}}) { |
}->{$self->{c}}) { |
| 330 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
| 331 |
# stay in the state |
# stay in the state |
| 332 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->($self); |
| 333 |
return {type => $t}; |
return {type => $t, line => $l, column => $c}; |
| 334 |
# redo A; |
# redo A; |
| 335 |
} elsif ({ |
} elsif ({ |
| 336 |
0x0020 => 1, # SP |
0x0020 => 1, # SP |
| 855 |
redo A; |
redo A; |
| 856 |
} else { |
} else { |
| 857 |
## Note: In |nl| in ... in |string| or |ident|. |
## Note: In |nl| in ... in |string| or |ident|. |
|
$self->{t}->{value} .= chr $self->{c}; |
|
| 858 |
$self->{state} = STRING_STATE; |
$self->{state} = STRING_STATE; |
| 859 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 860 |
redo A; |
redo A; |
| 870 |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
| 871 |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
| 872 |
}->{$self->{t}->{type}}; |
}->{$self->{t}->{type}}; |
|
$self->{t}->{value} .= "\x0D"; |
|
| 873 |
$self->{state} = ESCAPE_BEFORE_LF_STATE; |
$self->{state} = ESCAPE_BEFORE_LF_STATE; |
| 874 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 875 |
redo A; |
redo A; |
| 876 |
} else { |
} else { |
| 877 |
## Note: In |nl| in ... in |string| or |ident|. |
## Note: In |nl| in ... in |string| or |ident|. |
|
$self->{t}->{value} .= "\x0D"; |
|
| 878 |
$self->{state} = ESCAPE_BEFORE_LF_STATE; |
$self->{state} = ESCAPE_BEFORE_LF_STATE; |
| 879 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 880 |
redo A; |
redo A; |
| 1012 |
redo A; |
redo A; |
| 1013 |
} |
} |
| 1014 |
} elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) { |
} elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) { |
| 1015 |
## NOTE: |\n| in |\r\n| in |unicode| in |escape|. |
## NOTE: |\n| in |\r\n| in |nl| in |escape|. |
| 1016 |
if ($self->{c} == 0x000A) { # \n |
if ($self->{c} == 0x000A) { # \n |
|
$self->{t}->{value} .= chr $self->{c}; |
|
| 1017 |
$self->{state} = $q == 0 ? NAME_STATE : |
$self->{state} = $q == 0 ? NAME_STATE : |
| 1018 |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
| 1019 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 1133 |
} # A |
} # A |
| 1134 |
} # get_next_token |
} # get_next_token |
| 1135 |
|
|
| 1136 |
|
sub serialize_token ($$) { |
| 1137 |
|
shift; |
| 1138 |
|
my $t = shift; |
| 1139 |
|
|
| 1140 |
|
## NOTE: This function is not intended for roundtrip-able serialization. |
| 1141 |
|
|
| 1142 |
|
if ($t->{type} == IDENT_TOKEN) { |
| 1143 |
|
return $t->{value}; |
| 1144 |
|
} elsif ($t->{type} == ATKEYWORD_TOKEN) { |
| 1145 |
|
return '@' . $t->{value}; |
| 1146 |
|
} elsif ($t->{type} == HASH_TOKEN) { |
| 1147 |
|
return '#' . $t->{value}; |
| 1148 |
|
} elsif ($t->{type} == FUNCTION_TOKEN) { |
| 1149 |
|
return $t->{value} . '('; |
| 1150 |
|
} elsif ($t->{type} == URI_TOKEN) { |
| 1151 |
|
return 'url(' . $t->{value} . ')'; |
| 1152 |
|
} elsif ($t->{type} == URI_INVALID_TOKEN) { |
| 1153 |
|
return 'url(' . $t->{value}; |
| 1154 |
|
} elsif ($t->{type} == URI_PREFIX_TOKEN) { |
| 1155 |
|
return 'url-prefix(' . $t->{value} . ')'; |
| 1156 |
|
} elsif ($t->{type} == URI_PREFIX_INVALID_TOKEN) { |
| 1157 |
|
return 'url-prefix(' . $t->{value}; |
| 1158 |
|
} elsif ($t->{type} == STRING_TOKEN) { |
| 1159 |
|
return '"' . $t->{value} . '"'; |
| 1160 |
|
} elsif ($t->{type} == INVALID_TOKEN) { |
| 1161 |
|
return '"' . $t->{value}; |
| 1162 |
|
} elsif ($t->{type} == NUMBER_TOKEN) { |
| 1163 |
|
return $t->{number}; |
| 1164 |
|
} elsif ($t->{type} == DIMENSION_TOKEN) { |
| 1165 |
|
return $t->{number} . $t->{value}; |
| 1166 |
|
} elsif ($t->{type} == PERCENTAGE_TOKEN) { |
| 1167 |
|
return $t->{number} . '%'; |
| 1168 |
|
} elsif ($t->{type} == UNICODE_RANGE_TOKEN) { |
| 1169 |
|
return 'U+' . $t->{value}; |
| 1170 |
|
} elsif ($t->{type} == DELIM_TOKEN) { |
| 1171 |
|
return $t->{value}; |
| 1172 |
|
} elsif ($t->{type} == PLUS_TOKEN) { |
| 1173 |
|
return '+'; |
| 1174 |
|
} elsif ($t->{type} == GREATER_TOKEN) { |
| 1175 |
|
return '>'; |
| 1176 |
|
} elsif ($t->{type} == COMMA_TOKEN) { |
| 1177 |
|
return ','; |
| 1178 |
|
} elsif ($t->{type} == TILDE_TOKEN) { |
| 1179 |
|
return '~'; |
| 1180 |
|
} elsif ($t->{type} == DASHMATCH_TOKEN) { |
| 1181 |
|
return '|='; |
| 1182 |
|
} elsif ($t->{type} == PREFIXMATCH_TOKEN) { |
| 1183 |
|
return '^='; |
| 1184 |
|
} elsif ($t->{type} == SUFFIXMATCH_TOKEN) { |
| 1185 |
|
return '$='; |
| 1186 |
|
} elsif ($t->{type} == SUBSTRINGMATCH_TOKEN) { |
| 1187 |
|
return '*='; |
| 1188 |
|
} elsif ($t->{type} == INCLUDES_TOKEN) { |
| 1189 |
|
return '~='; |
| 1190 |
|
} elsif ($t->{type} == SEMICOLON_TOKEN) { |
| 1191 |
|
return ';'; |
| 1192 |
|
} elsif ($t->{type} == LBRACE_TOKEN) { |
| 1193 |
|
return '{'; |
| 1194 |
|
} elsif ($t->{type} == RBRACE_TOKEN) { |
| 1195 |
|
return '}'; |
| 1196 |
|
} elsif ($t->{type} == LPAREN_TOKEN) { |
| 1197 |
|
return '('; |
| 1198 |
|
} elsif ($t->{type} == RPAREN_TOKEN) { |
| 1199 |
|
return ')'; |
| 1200 |
|
} elsif ($t->{type} == LBRACKET_TOKEN) { |
| 1201 |
|
return '['; |
| 1202 |
|
} elsif ($t->{type} == RBRACKET_TOKEN) { |
| 1203 |
|
return ']'; |
| 1204 |
|
} elsif ($t->{type} == S_TOKEN) { |
| 1205 |
|
return ' '; |
| 1206 |
|
} elsif ($t->{type} == CDO_TOKEN) { |
| 1207 |
|
return '<!--'; |
| 1208 |
|
} elsif ($t->{type} == CDC_TOKEN) { |
| 1209 |
|
return '-->'; |
| 1210 |
|
} elsif ($t->{type} == COMMENT_TOKEN) { |
| 1211 |
|
return '/**/'; |
| 1212 |
|
} elsif ($t->{type} == COMMENT_INVALID_TOKEN) { |
| 1213 |
|
return '/*'; |
| 1214 |
|
} elsif ($t->{type} == EOF_TOKEN) { |
| 1215 |
|
return '{EOF}'; |
| 1216 |
|
} elsif ($t->{type} == MINUS_TOKEN) { |
| 1217 |
|
return '-'; |
| 1218 |
|
} elsif ($t->{type} == STAR_TOKEN) { |
| 1219 |
|
return '*'; |
| 1220 |
|
} elsif ($t->{type} == VBAR_TOKEN) { |
| 1221 |
|
return '|'; |
| 1222 |
|
} elsif ($t->{type} == COLON_TOKEN) { |
| 1223 |
|
return ':'; |
| 1224 |
|
} elsif ($t->{type} == MATCH_TOKEN) { |
| 1225 |
|
return '='; |
| 1226 |
|
} elsif ($t->{type} == EXCLAMATION_TOKEN) { |
| 1227 |
|
return '!'; |
| 1228 |
|
} else { |
| 1229 |
|
return '{'.$t->{type}.'}'; |
| 1230 |
|
} |
| 1231 |
|
} # serialize_token |
| 1232 |
|
|
| 1233 |
|
=head1 LICENSE |
| 1234 |
|
|
| 1235 |
|
Copyright 2007 Wakaba <w@suika.fam.cx> |
| 1236 |
|
|
| 1237 |
|
This library is free software; you can redistribute it |
| 1238 |
|
and/or modify it under the same terms as Perl itself. |
| 1239 |
|
|
| 1240 |
|
=cut |
| 1241 |
|
|
| 1242 |
1; |
1; |
| 1243 |
# $Date$ |
# $Date$ |