| 115 |
(0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F |
(0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F |
| 116 |
(0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f |
(0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f |
| 117 |
$self->{c} == 0x003F) { # ? |
$self->{c} == 0x003F) { # ? |
| 118 |
$self->{t}->{value} .= '+' . chr $self->{c}; |
$self->{t}->{value} = chr $self->{c}; |
| 119 |
$self->{t}->{type} = UNICODE_RANGE_TOKEN; |
$self->{t}->{type} = UNICODE_RANGE_TOKEN; |
| 120 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 121 |
C: for (2..6) { |
C: for (2..6) { |
| 408 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 409 |
redo A; |
redo A; |
| 410 |
} elsif ($self->{c} == 0x005C) { # \ |
} elsif ($self->{c} == 0x005C) { # \ |
|
## TODO: 12-\X, 12-\{nl} |
|
| 411 |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
| 412 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 413 |
redo A; |
redo A; |
| 414 |
} elsif ($self->{c} == 0x002D and # - |
} elsif ($self->{c} == 0x002D) { # - |
| 415 |
$self->{t}->{type} == IDENT_TOKEN) { |
if ($self->{t}->{type} == IDENT_TOKEN) { |
|
$self->{c} = $self->{get_char}->(); |
|
|
if ($self->{c} == 0x003E) { # > |
|
|
$self->{state} = BEFORE_TOKEN_STATE; |
|
| 416 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 417 |
return {type => CDC_TOKEN}; |
if ($self->{c} == 0x003E) { # > |
| 418 |
#redo A; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 419 |
|
$self->{c} = $self->{get_char}->(); |
| 420 |
|
return {type => CDC_TOKEN}; |
| 421 |
|
#redo A; |
| 422 |
|
} else { |
| 423 |
|
## NOTE: |-|, |-|, $self->{c} |
| 424 |
|
#$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
| 425 |
|
# stay in the state |
| 426 |
|
# reconsume |
| 427 |
|
return {type => DELIM_TOKEN, value => '-'}; |
| 428 |
|
#redo A; |
| 429 |
|
} |
| 430 |
|
} elsif ($self->{t}->{type} == DIMENSION_TOKEN) { |
| 431 |
|
$self->{c} = $self->{get_char}->(); |
| 432 |
|
if ($self->{c} == 0x003E) { # > |
| 433 |
|
unshift @{$self->{token}}, {type => CDC_TOKEN}; |
| 434 |
|
$self->{t}->{type} = NUMBER_TOKEN; |
| 435 |
|
$self->{t}->{value} = ''; |
| 436 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 437 |
|
$self->{c} = $self->{get_char}->(); |
| 438 |
|
return $self->{t}; |
| 439 |
|
#redo A; |
| 440 |
|
} else { |
| 441 |
|
## NOTE: |-|, |-|, $self->{c} |
| 442 |
|
my $t = $self->{t}; |
| 443 |
|
$t->{type} = NUMBER_TOKEN; |
| 444 |
|
$t->{value} = ''; |
| 445 |
|
$self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1}; |
| 446 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
| 447 |
|
# stay in the state |
| 448 |
|
# reconsume |
| 449 |
|
return $t; |
| 450 |
|
#redo A; |
| 451 |
|
} |
| 452 |
} else { |
} else { |
| 453 |
## NOTE: |-|, |-|, $self->{c} |
# |
|
#$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
|
|
# stay in the state |
|
|
# reconsume |
|
|
return {type => DELIM_TOKEN, value => '-'}; |
|
|
#redo A; |
|
| 454 |
} |
} |
| 455 |
} else { |
} else { |
| 456 |
if ($self->{t}->{type} == NUMBER_TOKEN) { |
# |
| 457 |
## NOTE: |-| after |NUMBER|. |
} |
| 458 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
|
| 459 |
$self->{state} = BEFORE_TOKEN_STATE; |
if ($self->{t}->{type} == DIMENSION_TOKEN) { |
| 460 |
# reprocess |
## NOTE: |-| after |NUMBER|. |
| 461 |
$self->{t}->{value} = $self->{t}->{number}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
| 462 |
delete $self->{t}->{number}; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 463 |
return $self->{t}; |
# reprocess |
| 464 |
} else { |
$self->{t}->{type} = NUMBER_TOKEN; |
| 465 |
## NOTE: |-| not followed by |nmstart|. |
$self->{t}->{value} = ''; |
| 466 |
$self->{state} = BEFORE_TOKEN_STATE; |
return $self->{t}; |
| 467 |
# reprocess |
} else { |
| 468 |
return {type => DELIM_TOKEN, value => '-'}; |
## NOTE: |-| not followed by |nmstart|. |
| 469 |
} |
$self->{state} = BEFORE_TOKEN_STATE; |
| 470 |
|
# reprocess |
| 471 |
|
return {type => DELIM_TOKEN, value => '-'}; |
| 472 |
} |
} |
| 473 |
} elsif ($self->{state} == AFTER_AT_STATE) { |
} elsif ($self->{state} == AFTER_AT_STATE) { |
| 474 |
if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
| 532 |
} elsif ($self->{state} == AFTER_NUMBER_STATE) { |
} elsif ($self->{state} == AFTER_NUMBER_STATE) { |
| 533 |
if ($self->{c} == 0x002D) { # - |
if ($self->{c} == 0x002D) { # - |
| 534 |
## NOTE: |-| in |ident|. |
## NOTE: |-| in |ident|. |
| 535 |
|
$self->{t}->{hyphen} = 1; |
| 536 |
$self->{t}->{value} = '-'; |
$self->{t}->{value} = '-'; |
| 537 |
|
$self->{t}->{type} = DIMENSION_TOKEN; |
| 538 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
| 539 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 540 |
redo A; |
redo A; |
| 551 |
} elsif ($self->{c} == 0x005C) { # \ |
} elsif ($self->{c} == 0x005C) { # \ |
| 552 |
## NOTE: |nmstart| in |ident| in |IDENT| |
## NOTE: |nmstart| in |ident| in |IDENT| |
| 553 |
$self->{t}->{value} = ''; |
$self->{t}->{value} = ''; |
| 554 |
|
$self->{t}->{type} = DIMENSION_TOKEN; |
| 555 |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
| 556 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 557 |
redo A; |
redo A; |
| 855 |
} |
} |
| 856 |
|
|
| 857 |
if ($q == 0) { |
if ($q == 0) { |
| 858 |
$self->{state} = BEFORE_TOKEN_STATE; |
if ($self->{t}->{type} == DIMENSION_TOKEN) { |
| 859 |
# reprocess |
if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') { |
| 860 |
if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') { |
$self->{state} = BEFORE_TOKEN_STATE; |
| 861 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
# reprocess |
| 862 |
return {type => DELIM_TOKEN, value => '-'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 863 |
#redo A; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
| 864 |
} elsif (length $self->{t}->{value}) { |
$self->{t}->{type} = NUMBER_TOKEN; |
| 865 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
$self->{t}->{value} = ''; |
| 866 |
return $self->{t}; |
return $self->{t}; |
| 867 |
#redo A; |
#redo A; |
| 868 |
|
} elsif (length $self->{t}->{value}) { |
| 869 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 870 |
|
# reprocess |
| 871 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 872 |
|
return $self->{t}; |
| 873 |
|
#redo A; |
| 874 |
|
} else { |
| 875 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 876 |
|
# reprocess |
| 877 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 878 |
|
$self->{t}->{type} = NUMBER_TOKEN; |
| 879 |
|
$self->{t}->{value} = ''; |
| 880 |
|
return $self->{t}; |
| 881 |
|
#redo A; |
| 882 |
|
} |
| 883 |
} else { |
} else { |
| 884 |
return {type => DELIM_TOKEN, value => '\\'}; |
if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') { |
| 885 |
#redo A; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 886 |
|
# reprocess |
| 887 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 888 |
|
return {type => DELIM_TOKEN, value => '-'}; |
| 889 |
|
#redo A; |
| 890 |
|
} elsif (length $self->{t}->{value}) { |
| 891 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 892 |
|
# reprocess |
| 893 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 894 |
|
return $self->{t}; |
| 895 |
|
#redo A; |
| 896 |
|
} else { |
| 897 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 898 |
|
# reprocess |
| 899 |
|
return {type => DELIM_TOKEN, value => '\\'}; |
| 900 |
|
#redo A; |
| 901 |
|
} |
| 902 |
} |
} |
| 903 |
} elsif ($q == 1) { |
} elsif ($q == 1) { |
| 904 |
$self->{state} = URI_UNQUOTED_STATE; |
$self->{state} = URI_UNQUOTED_STATE; |
| 1013 |
$self->{c} == 0x000D or # \r |
$self->{c} == 0x000D or # \r |
| 1014 |
$self->{c} == 0x000C or # \f |
$self->{c} == 0x000C or # \f |
| 1015 |
$self->{c} == -1) { |
$self->{c} == -1) { |
| 1016 |
$self->{t}->{type} = INVALID_TOKEN; |
$self->{t}->{type} = { |
| 1017 |
|
STRING_TOKEN, INVALID_TOKEN, |
| 1018 |
|
INVALID_TOKEN, INVALID_TOKEN, |
| 1019 |
|
URI_TOKEN, URI_INVALID_TOKEN, |
| 1020 |
|
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
| 1021 |
|
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
| 1022 |
|
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
| 1023 |
|
}->{$self->{t}->{type}}; |
| 1024 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 1025 |
# reconsume |
# reconsume |
| 1026 |
return $self->{t}; |
return $self->{t}; |