| 127 |
if ($self->{state} == BEFORE_TOKEN_STATE) { |
if ($self->{state} == BEFORE_TOKEN_STATE) { |
| 128 |
if ($self->{c} == 0x002D) { # - |
if ($self->{c} == 0x002D) { # - |
| 129 |
## NOTE: |-| in |ident| in |IDENT| |
## NOTE: |-| in |ident| in |IDENT| |
| 130 |
$self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1}; |
$self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1, |
| 131 |
|
line => $self->{line}, column => $self->{column}}; |
| 132 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
| 133 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 134 |
redo A; |
redo A; |
| 135 |
} elsif ($self->{c} == 0x0055 or $self->{c} == 0x0075) { # U or u |
} elsif ($self->{c} == 0x0055 or $self->{c} == 0x0075) { # U or u |
| 136 |
$self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}}; |
$self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}, |
| 137 |
|
line => $self->{line}, column => $self->{column}}; |
| 138 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 139 |
if ($self->{c} == 0x002B) { # + |
if ($self->{c} == 0x002B) { # + |
| 140 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
| 141 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 142 |
if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9 |
if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9 |
| 143 |
(0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F |
(0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F |
| 179 |
# |
# |
| 180 |
} else { |
} else { |
| 181 |
my $token = $self->{t}; |
my $token = $self->{t}; |
| 182 |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
$self->{t} = {type => IDENT_TOKEN, value => '-', |
| 183 |
|
line => $self->{line}, |
| 184 |
|
column => $self->{column}}; |
| 185 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
| 186 |
# reprocess |
# reprocess |
| 187 |
return $token; |
return $token; |
| 194 |
return $self->{t}; |
return $self->{t}; |
| 195 |
#redo A; |
#redo A; |
| 196 |
} else { |
} else { |
| 197 |
unshift @{$self->{token}}, {type => PLUS_TOKEN}; |
unshift @{$self->{token}}, |
| 198 |
|
{type => PLUS_TOKEN, line => $l, column => $c}; |
| 199 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 200 |
# reprocess |
# reprocess |
| 201 |
return $self->{t}; |
return $self->{t}; |
| 211 |
$self->{c} == 0x005F or # _ |
$self->{c} == 0x005F or # _ |
| 212 |
$self->{c} > 0x007F) { # nonascii |
$self->{c} > 0x007F) { # nonascii |
| 213 |
## NOTE: |nmstart| in |ident| in |IDENT| |
## NOTE: |nmstart| in |ident| in |IDENT| |
| 214 |
$self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}}; |
$self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}, |
| 215 |
|
line => $self->{line}, column => $self->{column}}; |
| 216 |
$self->{state} = NAME_STATE; |
$self->{state} = NAME_STATE; |
| 217 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 218 |
redo A; |
redo A; |
| 219 |
} elsif ($self->{c} == 0x005C) { # \ |
} elsif ($self->{c} == 0x005C) { # \ |
| 220 |
## NOTE: |nmstart| in |ident| in |IDENT| |
## NOTE: |nmstart| in |ident| in |IDENT| |
| 221 |
$self->{t} = {type => IDENT_TOKEN, value => ''}; |
$self->{t} = {type => IDENT_TOKEN, value => '', |
| 222 |
|
line => $self->{line}, column => $self->{column}}; |
| 223 |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
| 224 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 225 |
redo A; |
redo A; |
| 226 |
} elsif ($self->{c} == 0x0040) { # @ |
} elsif ($self->{c} == 0x0040) { # @ |
| 227 |
## NOTE: |@| in |ATKEYWORD| |
## NOTE: |@| in |ATKEYWORD| |
| 228 |
$self->{t} = {type => ATKEYWORD_TOKEN, value => ''}; |
$self->{t} = {type => ATKEYWORD_TOKEN, value => '', |
| 229 |
|
line => $self->{line}, column => $self->{column}}; |
| 230 |
$self->{state} = AFTER_AT_STATE; |
$self->{state} = AFTER_AT_STATE; |
| 231 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 232 |
redo A; |
redo A; |
| 233 |
} elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or ' |
} elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or ' |
| 234 |
$self->{t} = {type => STRING_TOKEN, value => ''}; |
$self->{t} = {type => STRING_TOKEN, value => '', |
| 235 |
|
line => $self->{line}, column => $self->{column}}; |
| 236 |
$self->{state} = STRING_STATE; $q = $self->{c}; |
$self->{state} = STRING_STATE; $q = $self->{c}; |
| 237 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 238 |
redo A; |
redo A; |
| 239 |
} elsif ($self->{c} == 0x0023) { # # |
} elsif ($self->{c} == 0x0023) { # # |
| 240 |
## NOTE: |#| in |HASH|. |
## NOTE: |#| in |HASH|. |
| 241 |
$self->{t} = {type => HASH_TOKEN, value => ''}; |
$self->{t} = {type => HASH_TOKEN, value => '', |
| 242 |
|
line => $self->{line}, column => $self->{column}}; |
| 243 |
$self->{state} = HASH_OPEN_STATE; |
$self->{state} = HASH_OPEN_STATE; |
| 244 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 245 |
redo A; |
redo A; |
| 246 |
} elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9 |
} elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9 |
| 247 |
## NOTE: |num|. |
## NOTE: |num|. |
| 248 |
$self->{t} = {type => NUMBER_TOKEN, value => chr $self->{c}}; |
$self->{t} = {type => NUMBER_TOKEN, value => chr $self->{c}, |
| 249 |
|
line => $self->{line}, column => $self->{column}}; |
| 250 |
$self->{state} = NUMBER_STATE; |
$self->{state} = NUMBER_STATE; |
| 251 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 252 |
redo A; |
redo A; |
| 253 |
} elsif ($self->{c} == 0x002E) { # . |
} elsif ($self->{c} == 0x002E) { # . |
| 254 |
## NOTE: |num|. |
## NOTE: |num|. |
| 255 |
$self->{t} = {type => NUMBER_TOKEN, value => '0'}; |
$self->{t} = {type => NUMBER_TOKEN, value => '0', |
| 256 |
|
line => $self->{line}, column => $self->{column}}; |
| 257 |
$self->{state} = NUMBER_FRACTION_STATE; |
$self->{state} = NUMBER_FRACTION_STATE; |
| 258 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 259 |
redo A; |
redo A; |
| 260 |
} elsif ($self->{c} == 0x002F) { # / |
} elsif ($self->{c} == 0x002F) { # / |
| 261 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
| 262 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 263 |
if ($self->{c} == 0x002A) { # * |
if ($self->{c} == 0x002A) { # * |
| 264 |
C: { |
C: { |
| 290 |
} else { |
} else { |
| 291 |
# stay in the state. |
# stay in the state. |
| 292 |
# reprocess |
# reprocess |
| 293 |
return {type => DELIM_TOKEN, value => '/'}; |
return {type => DELIM_TOKEN, value => '/', line => $l, column => $c}; |
| 294 |
#redo A; |
#redo A; |
| 295 |
} |
} |
| 296 |
} elsif ($self->{c} == 0x003C) { # < |
} elsif ($self->{c} == 0x003C) { # < |
| 297 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
| 298 |
## NOTE: |CDO| |
## NOTE: |CDO| |
| 299 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 300 |
if ($self->{c} == 0x0021) { # ! |
if ($self->{c} == 0x0021) { # ! |
| 304 |
if ($self->{c} == 0x002D) { # - |
if ($self->{c} == 0x002D) { # - |
| 305 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 306 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 307 |
return {type => CDO_TOKEN}; |
return {type => CDO_TOKEN, line => $l, column => $c}; |
| 308 |
#redo A; |
#redo A; |
| 309 |
} else { |
} else { |
| 310 |
unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN}; |
unshift @{$self->{token}}, |
| 311 |
|
{type => EXCLAMATION_TOKEN, line => $l, column => $c + 1}; |
| 312 |
## NOTE: |-| in |ident| in |IDENT| |
## NOTE: |-| in |ident| in |IDENT| |
| 313 |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
$self->{t} = {type => IDENT_TOKEN, value => '-', |
| 314 |
|
line => $l, column => $c + 2}; |
| 315 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
| 316 |
#reprocess |
#reprocess |
| 317 |
return {type => DELIM_TOKEN, value => '<'}; |
return {type => DELIM_TOKEN, value => '<', |
| 318 |
|
line => $l, column => $c}; |
| 319 |
#redo A; |
#redo A; |
| 320 |
} |
} |
| 321 |
} else { |
} else { |
| 322 |
unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN}; |
unshift @{$self->{token}}, {type => EXCLAMATION_TOKEN, |
| 323 |
|
line => $l, column => $c + 1}; |
| 324 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 325 |
#reprocess |
#reprocess |
| 326 |
return {type => DELIM_TOKEN, value => '<'}; |
return {type => DELIM_TOKEN, value => '<', |
| 327 |
|
line => $l, column => $c}; |
| 328 |
#redo A; |
#redo A; |
| 329 |
} |
} |
| 330 |
} else { |
} else { |
| 331 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 332 |
#reprocess |
#reprocess |
| 333 |
return {type => DELIM_TOKEN, value => '<'}; |
return {type => DELIM_TOKEN, value => '<', |
| 334 |
|
line => $l, column => $c}; |
| 335 |
#redo A; |
#redo A; |
| 336 |
} |
} |
| 337 |
} elsif (my $t = { |
} elsif (my $t = { |
| 360 |
0x000A => 1, # \n |
0x000A => 1, # \n |
| 361 |
0x000C => 1, # \f |
0x000C => 1, # \f |
| 362 |
}->{$self->{c}}) { |
}->{$self->{c}}) { |
| 363 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
| 364 |
W: { |
W: { |
| 365 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 366 |
if ({ |
if ({ |
| 377 |
0x002C => COMMA_TOKEN, # , |
0x002C => COMMA_TOKEN, # , |
| 378 |
0x007E => TILDE_TOKEN, # ~ |
0x007E => TILDE_TOKEN, # ~ |
| 379 |
}->{$self->{c}}) { |
}->{$self->{c}}) { |
| 380 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
| 381 |
# stay in the state |
# stay in the state |
| 382 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 383 |
return {type => $v}; |
return {type => $v, line => $l, column => $c}; |
| 384 |
#redo A; |
#redo A; |
| 385 |
} else { |
} else { |
| 386 |
# stay in the state |
# stay in the state |
| 387 |
# reprocess |
# reprocess |
| 388 |
return {type => S_TOKEN}; |
return {type => S_TOKEN, line => $l, column => $c}; |
| 389 |
#redo A; |
#redo A; |
| 390 |
} |
} |
| 391 |
} # W |
} # W |
| 395 |
0x0024 => SUFFIXMATCH_TOKEN, # $ |
0x0024 => SUFFIXMATCH_TOKEN, # $ |
| 396 |
0x002A => SUBSTRINGMATCH_TOKEN, # * |
0x002A => SUBSTRINGMATCH_TOKEN, # * |
| 397 |
}->{$self->{c}}) { |
}->{$self->{c}}) { |
| 398 |
|
my ($line, $column) = ($self->{line}, $self->{column}); |
| 399 |
my $c = $self->{c}; |
my $c = $self->{c}; |
| 400 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 401 |
if ($self->{c} == 0x003D) { # = |
if ($self->{c} == 0x003D) { # = |
| 402 |
# stay in the state |
# stay in the state |
| 403 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 404 |
return {type => $v}; |
return {type => $v, line => $line, column => $column}; |
| 405 |
#redo A; |
#redo A; |
| 406 |
} elsif ($v = { |
} elsif ($v = { |
| 407 |
0x002A => STAR_TOKEN, # * |
0x002A => STAR_TOKEN, # * |
| 409 |
}->{$c}) { |
}->{$c}) { |
| 410 |
# stay in the state. |
# stay in the state. |
| 411 |
# reprocess |
# reprocess |
| 412 |
return {type => $v}; |
return {type => $v, line => $line, column => $column}; |
| 413 |
#redo A; |
#redo A; |
| 414 |
} else { |
} else { |
| 415 |
# stay in the state |
# stay in the state |
| 416 |
# reprocess |
# reprocess |
| 417 |
return {type => DELIM_TOKEN, value => chr $c}; |
return {type => DELIM_TOKEN, value => chr $c, |
| 418 |
|
line => $line, column => $column}; |
| 419 |
#redo A; |
#redo A; |
| 420 |
} |
} |
| 421 |
} elsif ($self->{c} == 0x002B) { # + |
} elsif ($self->{c} == 0x002B) { # + |
| 422 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
| 423 |
# stay in the state |
# stay in the state |
| 424 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 425 |
return {type => PLUS_TOKEN}; |
return {type => PLUS_TOKEN, line => $l, column => $c}; |
| 426 |
#redo A; |
#redo A; |
| 427 |
} elsif ($self->{c} == 0x003E) { # > |
} elsif ($self->{c} == 0x003E) { # > |
| 428 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
| 429 |
# stay in the state |
# stay in the state |
| 430 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 431 |
return {type => GREATER_TOKEN}; |
return {type => GREATER_TOKEN, line => $l, column => $c}; |
| 432 |
#redo A; |
#redo A; |
| 433 |
} elsif ($self->{c} == 0x002C) { # , |
} elsif ($self->{c} == 0x002C) { # , |
| 434 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
| 435 |
# stay in the state |
# stay in the state |
| 436 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 437 |
return {type => COMMA_TOKEN}; |
return {type => COMMA_TOKEN, line => $l, column => $c}; |
| 438 |
#redo A; |
#redo A; |
| 439 |
} elsif ($self->{c} == 0x007E) { # ~ |
} elsif ($self->{c} == 0x007E) { # ~ |
| 440 |
|
my ($l, $c) = ($self->{line}, $self->{column}); |
| 441 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 442 |
if ($self->{c} == 0x003D) { # = |
if ($self->{c} == 0x003D) { # = |
| 443 |
# stay in the state |
# stay in the state |
| 444 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 445 |
return {type => INCLUDES_TOKEN}; |
return {type => INCLUDES_TOKEN, line => $l, column => $c}; |
| 446 |
#redo A; |
#redo A; |
| 447 |
} else { |
} else { |
| 448 |
# stay in the state |
# stay in the state |
| 449 |
# reprocess |
# reprocess |
| 450 |
return {type => TILDE_TOKEN}; |
return {type => TILDE_TOKEN, line => $l, column => $c}; |
| 451 |
#redo A; |
#redo A; |
| 452 |
} |
} |
| 453 |
} elsif ($self->{c} == -1) { |
} elsif ($self->{c} == -1) { |
| 454 |
# stay in the state |
# stay in the state |
| 455 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 456 |
return {type => EOF_TOKEN}; |
return {type => EOF_TOKEN, |
| 457 |
|
line => $self->{line}, column => $self->{column}}; |
| 458 |
#redo A; |
#redo A; |
| 459 |
} else { |
} else { |
| 460 |
# stay in the state |
# stay in the state |
| 461 |
$self->{t} = {type => DELIM_TOKEN, value => chr $self->{c}}; |
$self->{t} = {type => DELIM_TOKEN, value => chr $self->{c}, |
| 462 |
|
line => $self->{line}, column => $self->{column}}; |
| 463 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 464 |
return $self->{t}; |
return $self->{t}; |
| 465 |
#redo A; |
#redo A; |
| 487 |
if ($self->{c} == 0x003E) { # > |
if ($self->{c} == 0x003E) { # > |
| 488 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 489 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 490 |
return {type => CDC_TOKEN}; |
return {type => CDC_TOKEN, |
| 491 |
|
line => $self->{t}->{line}, |
| 492 |
|
column => $self->{t}->{column}}; |
| 493 |
#redo A; |
#redo A; |
| 494 |
} else { |
} else { |
| 495 |
## NOTE: |-|, |-|, $self->{c} |
## NOTE: |-|, |-|, $self->{c} |
| 496 |
#$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
#$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
| 497 |
|
$self->{t}->{column}++; |
| 498 |
# stay in the state |
# stay in the state |
| 499 |
# reconsume |
# reconsume |
| 500 |
return {type => MINUS_TOKEN}; |
return {type => MINUS_TOKEN, |
| 501 |
|
line => $self->{t}->{line}, |
| 502 |
|
column => $self->{t}->{column} - 1}; |
| 503 |
#redo A; |
#redo A; |
| 504 |
} |
} |
| 505 |
} elsif ($self->{t}->{type} == DIMENSION_TOKEN) { |
} elsif ($self->{t}->{type} == DIMENSION_TOKEN) { |
| 506 |
|
my ($l, $c) = ($self->{line}, $self->{column}); # second '-' |
| 507 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 508 |
if ($self->{c} == 0x003E) { # > |
if ($self->{c} == 0x003E) { # > |
| 509 |
unshift @{$self->{token}}, {type => CDC_TOKEN}; |
unshift @{$self->{token}}, {type => CDC_TOKEN}; |
| 514 |
return $self->{t}; |
return $self->{t}; |
| 515 |
#redo A; |
#redo A; |
| 516 |
} else { |
} else { |
| 517 |
## NOTE: |-|, |-|, $self->{c} |
## NOTE: NUMBER, |-|, |-|, $self->{c} |
| 518 |
my $t = $self->{t}; |
my $t = $self->{t}; |
| 519 |
$t->{type} = NUMBER_TOKEN; |
$t->{type} = NUMBER_TOKEN; |
| 520 |
$t->{value} = ''; |
$t->{value} = ''; |
| 521 |
$self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1}; |
$self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1, |
| 522 |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
line => $l, column => $c}; |
| 523 |
|
unshift @{$self->{token}}, {type => MINUS_TOKEN, |
| 524 |
|
line => $l, column => $c - 1}; |
| 525 |
# stay in the state |
# stay in the state |
| 526 |
# reconsume |
# reconsume |
| 527 |
return $t; |
return $t; |
| 536 |
|
|
| 537 |
if ($self->{t}->{type} == DIMENSION_TOKEN) { |
if ($self->{t}->{type} == DIMENSION_TOKEN) { |
| 538 |
## NOTE: |-| after |NUMBER|. |
## NOTE: |-| after |NUMBER|. |
| 539 |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
unshift @{$self->{token}}, {type => MINUS_TOKEN, |
| 540 |
|
line => $self->{line}, |
| 541 |
|
column => $self->{column} - 1}; |
| 542 |
|
## BUG: column might be wrong if on the line boundary. |
| 543 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 544 |
# reprocess |
# reprocess |
| 545 |
$self->{t}->{type} = NUMBER_TOKEN; |
$self->{t}->{type} = NUMBER_TOKEN; |
| 549 |
## NOTE: |-| not followed by |nmstart|. |
## NOTE: |-| not followed by |nmstart|. |
| 550 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 551 |
# reprocess |
# reprocess |
| 552 |
return {type => MINUS_TOKEN}; |
return {type => MINUS_TOKEN, |
| 553 |
|
line => $self->{line}, column => $self->{column} - 1}; |
| 554 |
|
## BUG: column might be wrong if on the line boundary. |
| 555 |
} |
} |
| 556 |
} elsif ($self->{state} == AFTER_AT_STATE) { |
} elsif ($self->{state} == AFTER_AT_STATE) { |
| 557 |
if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
| 574 |
} else { |
} else { |
| 575 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 576 |
# reprocess |
# reprocess |
| 577 |
return {type => DELIM_TOKEN, value => '@'}; |
return {type => DELIM_TOKEN, value => '@', |
| 578 |
|
line => $self->{t}->{line}, |
| 579 |
|
column => $self->{t}->{column}}; |
| 580 |
} |
} |
| 581 |
} elsif ($self->{state} == AFTER_AT_HYPHEN_STATE) { |
} elsif ($self->{state} == AFTER_AT_HYPHEN_STATE) { |
| 582 |
if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
| 671 |
} else { |
} else { |
| 672 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 673 |
# reprocess |
# reprocess |
| 674 |
return {type => DELIM_TOKEN, value => '#'}; |
return {type => DELIM_TOKEN, value => '#', |
| 675 |
|
line => $self->{t}->{line}, |
| 676 |
|
column => $self->{t}->{column}}; |
| 677 |
#redo A; |
#redo A; |
| 678 |
} |
} |
| 679 |
} elsif ($self->{state} == NAME_STATE) { |
} elsif ($self->{state} == NAME_STATE) { |
| 943 |
if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') { |
if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') { |
| 944 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 945 |
# reprocess |
# reprocess |
| 946 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\', |
| 947 |
unshift @{$self->{token}}, {type => MINUS_TOKEN}; |
line => $self->{line}, |
| 948 |
|
column => $self->{column} - 2}; |
| 949 |
|
unshift @{$self->{token}}, {type => MINUS_TOKEN, |
| 950 |
|
line => $self->{line}, |
| 951 |
|
column => $self->{column} - 1}; |
| 952 |
|
## BUG: line and column might be wrong if they are on the |
| 953 |
|
## line boundary. |
| 954 |
$self->{t}->{type} = NUMBER_TOKEN; |
$self->{t}->{type} = NUMBER_TOKEN; |
| 955 |
$self->{t}->{value} = ''; |
$self->{t}->{value} = ''; |
| 956 |
return $self->{t}; |
return $self->{t}; |
| 958 |
} elsif (length $self->{t}->{value}) { |
} elsif (length $self->{t}->{value}) { |
| 959 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 960 |
# reprocess |
# reprocess |
| 961 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\', |
| 962 |
|
line => $self->{line}, |
| 963 |
|
column => $self->{column} - 1}; |
| 964 |
|
## BUG: line and column might be wrong if they are on the |
| 965 |
|
## line boundary. |
| 966 |
return $self->{t}; |
return $self->{t}; |
| 967 |
#redo A; |
#redo A; |
| 968 |
} else { |
} else { |
| 969 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 970 |
# reprocess |
# reprocess |
| 971 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\', |
| 972 |
|
line => $self->{line}, |
| 973 |
|
column => $self->{column} - 1}; |
| 974 |
|
## BUG: line and column might be wrong if they are on the |
| 975 |
|
## line boundary. |
| 976 |
$self->{t}->{type} = NUMBER_TOKEN; |
$self->{t}->{type} = NUMBER_TOKEN; |
| 977 |
$self->{t}->{value} = ''; |
$self->{t}->{value} = ''; |
| 978 |
return $self->{t}; |
return $self->{t}; |
| 982 |
if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') { |
if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') { |
| 983 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 984 |
# reprocess |
# reprocess |
| 985 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\', |
| 986 |
return {type => MINUS_TOKEN}; |
line => $self->{line}, |
| 987 |
|
column => $self->{column} - 2}; |
| 988 |
|
return {type => MINUS_TOKEN, |
| 989 |
|
line => $self->{line}, |
| 990 |
|
column => $self->{column} - 1}; |
| 991 |
|
## BUG: line and column might be wrong if they are on the |
| 992 |
|
## line boundary. |
| 993 |
#redo A; |
#redo A; |
| 994 |
} elsif (length $self->{t}->{value}) { |
} elsif (length $self->{t}->{value}) { |
| 995 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 996 |
# reprocess |
# reprocess |
| 997 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\', |
| 998 |
|
line => $self->{line}, |
| 999 |
|
column => $self->{column} - 1}; |
| 1000 |
|
## BUG: line and column might be wrong if they are on the |
| 1001 |
|
## line boundary. |
| 1002 |
return $self->{t}; |
return $self->{t}; |
| 1003 |
#redo A; |
#redo A; |
| 1004 |
} else { |
} else { |
| 1005 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 1006 |
# reprocess |
# reprocess |
| 1007 |
return {type => DELIM_TOKEN, value => '\\'}; |
return {type => DELIM_TOKEN, value => '\\', |
| 1008 |
|
line => $self->{line}, |
| 1009 |
|
column => $self->{column} - 1}; |
| 1010 |
|
## BUG: line and column might be wrong if they are on the |
| 1011 |
|
## line boundary. |
| 1012 |
#redo A; |
#redo A; |
| 1013 |
} |
} |
| 1014 |
} |
} |
| 1017 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 1018 |
redo A; |
redo A; |
| 1019 |
} else { |
} else { |
| 1020 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\', |
| 1021 |
|
line => $self->{line}, |
| 1022 |
|
column => $self->{column} - 1}; |
| 1023 |
|
## BUG: line and column might be wrong if they are on the |
| 1024 |
|
## line boundary. |
| 1025 |
$self->{t}->{type} = { |
$self->{t}->{type} = { |
| 1026 |
STRING_TOKEN, INVALID_TOKEN, |
STRING_TOKEN, INVALID_TOKEN, |
| 1027 |
URI_TOKEN, URI_INVALID_TOKEN, |
URI_TOKEN, URI_INVALID_TOKEN, |
| 1190 |
} else { |
} else { |
| 1191 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 1192 |
# reprocess |
# reprocess |
| 1193 |
return {type => DOT_TOKEN}; |
return {type => DOT_TOKEN, |
| 1194 |
|
line => $self->{line}, column => $self->{column} - 1}; |
| 1195 |
|
## BUG: line and column might be wrong if they are on the |
| 1196 |
|
## line boundary. |
| 1197 |
#redo A; |
#redo A; |
| 1198 |
} |
} |
| 1199 |
} elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) { |
} elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) { |