| 102 |
if ($self->{state} == BEFORE_TOKEN_STATE) { |
if ($self->{state} == BEFORE_TOKEN_STATE) { |
| 103 |
if ($self->{c} == 0x002D) { # - |
if ($self->{c} == 0x002D) { # - |
| 104 |
## NOTE: |-| in |ident| in |IDENT| |
## NOTE: |-| in |ident| in |IDENT| |
| 105 |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
$self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1}; |
| 106 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
| 107 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 108 |
redo A; |
redo A; |
| 251 |
} else { |
} else { |
| 252 |
# stay in the state. |
# stay in the state. |
| 253 |
# reprocess |
# reprocess |
| 254 |
return {type => DELIM_STATE, value => '/'}; |
return {type => DELIM_TOKEN, value => '/'}; |
| 255 |
#redo A; |
#redo A; |
| 256 |
} |
} |
| 257 |
} elsif ($self->{c} == 0x003C) { # < |
} elsif ($self->{c} == 0x003C) { # < |
| 259 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 260 |
if ($self->{c} == 0x0021) { # ! |
if ($self->{c} == 0x0021) { # ! |
| 261 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 262 |
if ($self->{c} == 0x002C) { # - |
if ($self->{c} == 0x002D) { # - |
| 263 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 264 |
if ($self->{c} == 0x002C) { # - |
if ($self->{c} == 0x002D) { # - |
| 265 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 266 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 267 |
return {type => CDO_TOKEN}; |
return {type => CDO_TOKEN}; |
| 408 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 409 |
redo A; |
redo A; |
| 410 |
} elsif ($self->{c} == 0x005C) { # \ |
} elsif ($self->{c} == 0x005C) { # \ |
|
## TODO: 12-\X, 12-\{nl} |
|
| 411 |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
| 412 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 413 |
redo A; |
redo A; |
| 414 |
} elsif ($self->{c} == 0x002D and # - |
} elsif ($self->{c} == 0x002D) { # - |
| 415 |
$self->{t}->{type} == IDENT_TOKEN) { |
if ($self->{t}->{type} == IDENT_TOKEN) { |
|
$self->{c} = $self->{get_char}->(); |
|
|
if ($self->{c} == 0x003E) { # > |
|
|
$self->{state} = BEFORE_TOKEN_STATE; |
|
| 416 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 417 |
return {type => CDC_TOKEN}; |
if ($self->{c} == 0x003E) { # > |
| 418 |
#redo A; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 419 |
|
$self->{c} = $self->{get_char}->(); |
| 420 |
|
return {type => CDC_TOKEN}; |
| 421 |
|
#redo A; |
| 422 |
|
} else { |
| 423 |
|
## NOTE: |-|, |-|, $self->{c} |
| 424 |
|
#$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
| 425 |
|
# stay in the state |
| 426 |
|
# reconsume |
| 427 |
|
return {type => DELIM_TOKEN, value => '-'}; |
| 428 |
|
#redo A; |
| 429 |
|
} |
| 430 |
|
} elsif ($self->{t}->{type} == DIMENSION_TOKEN) { |
| 431 |
|
$self->{c} = $self->{get_char}->(); |
| 432 |
|
if ($self->{c} == 0x003E) { # > |
| 433 |
|
unshift @{$self->{token}}, {type => CDC_TOKEN}; |
| 434 |
|
$self->{t}->{type} = NUMBER_TOKEN; |
| 435 |
|
$self->{t}->{value} = ''; |
| 436 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 437 |
|
$self->{c} = $self->{get_char}->(); |
| 438 |
|
return $self->{t}; |
| 439 |
|
#redo A; |
| 440 |
|
} else { |
| 441 |
|
## NOTE: |-|, |-|, $self->{c} |
| 442 |
|
my $t = $self->{t}; |
| 443 |
|
$t->{type} = NUMBER_TOKEN; |
| 444 |
|
$t->{value} = ''; |
| 445 |
|
$self->{t} = {type => IDENT_TOKEN, value => '-', hyphen => 1}; |
| 446 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
| 447 |
|
# stay in the state |
| 448 |
|
# reconsume |
| 449 |
|
return $t; |
| 450 |
|
#redo A; |
| 451 |
|
} |
| 452 |
} else { |
} else { |
| 453 |
## NOTE: |-|, |-|, $self->{c} |
# |
|
#$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
|
|
# stay in the state |
|
|
# reconsume |
|
|
return {type => DELIM_TOKEN, value => '-'}; |
|
|
#redo A; |
|
| 454 |
} |
} |
| 455 |
} else { |
} else { |
| 456 |
if ($self->{t}->{type} == NUMBER_TOKEN) { |
# |
| 457 |
## NOTE: |-| after |NUMBER|. |
} |
| 458 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
|
| 459 |
$self->{state} = BEFORE_TOKEN_STATE; |
if ($self->{t}->{type} == DIMENSION_TOKEN) { |
| 460 |
# reconsume |
## NOTE: |-| after |NUMBER|. |
| 461 |
$self->{t}->{value} = $self->{t}->{number}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
| 462 |
delete $self->{t}->{number}; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 463 |
return $self->{t}; |
# reprocess |
| 464 |
} else { |
$self->{t}->{type} = NUMBER_TOKEN; |
| 465 |
## NOTE: |-| not followed by |nmstart|. |
$self->{t}->{value} = ''; |
| 466 |
$self->{state} = BEFORE_TOKEN_STATE; |
return $self->{t}; |
| 467 |
$self->{c} = $self->{get_char}->(); |
} else { |
| 468 |
return {type => DELIM_TOKEN, value => '-'}; |
## NOTE: |-| not followed by |nmstart|. |
| 469 |
} |
$self->{state} = BEFORE_TOKEN_STATE; |
| 470 |
|
# reprocess |
| 471 |
|
return {type => DELIM_TOKEN, value => '-'}; |
| 472 |
} |
} |
| 473 |
} elsif ($self->{state} == AFTER_AT_STATE) { |
} elsif ($self->{state} == AFTER_AT_STATE) { |
| 474 |
if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
if ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
| 532 |
} elsif ($self->{state} == AFTER_NUMBER_STATE) { |
} elsif ($self->{state} == AFTER_NUMBER_STATE) { |
| 533 |
if ($self->{c} == 0x002D) { # - |
if ($self->{c} == 0x002D) { # - |
| 534 |
## NOTE: |-| in |ident|. |
## NOTE: |-| in |ident|. |
| 535 |
|
$self->{t}->{hyphen} = 1; |
| 536 |
$self->{t}->{value} = '-'; |
$self->{t}->{value} = '-'; |
| 537 |
|
$self->{t}->{type} = DIMENSION_TOKEN; |
| 538 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
| 539 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 540 |
redo A; |
redo A; |
| 551 |
} elsif ($self->{c} == 0x005C) { # \ |
} elsif ($self->{c} == 0x005C) { # \ |
| 552 |
## NOTE: |nmstart| in |ident| in |IDENT| |
## NOTE: |nmstart| in |ident| in |IDENT| |
| 553 |
$self->{t}->{value} = ''; |
$self->{t}->{value} = ''; |
| 554 |
|
$self->{t}->{type} = DIMENSION_TOKEN; |
| 555 |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
| 556 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 557 |
redo A; |
redo A; |
| 585 |
redo A; |
redo A; |
| 586 |
} else { |
} else { |
| 587 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 588 |
$self->{c} = $self->{get_char}->(); |
# reprocess |
| 589 |
return {type => DELIM_TOKEN, value => '#'}; |
return {type => DELIM_TOKEN, value => '#'}; |
| 590 |
#redo A; |
#redo A; |
| 591 |
} |
} |
| 794 |
redo A; |
redo A; |
| 795 |
} elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f |
} elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f |
| 796 |
## NOTE: second character of |unicode| in |escape|. |
## NOTE: second character of |unicode| in |escape|. |
| 797 |
$char = $self->{c} - 0x0061 - 0xA; |
$char = $self->{c} - 0x0061 + 0xA; |
| 798 |
$self->{state} = ESCAPE_STATE; $i = 2; |
$self->{state} = ESCAPE_STATE; $i = 2; |
| 799 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 800 |
redo A; |
redo A; |
| 801 |
} elsif ($self->{c} == 0x000A or # \n |
} elsif ($self->{c} == 0x000A or # \n |
| 802 |
$self->{c} == 0x000C) { # \f |
$self->{c} == 0x000C) { # \f |
| 803 |
if ($q == 0) { |
if ($q == 0) { |
| 804 |
## NOTE: In |escape| in ... in |ident|. |
# |
|
$self->{state} = BEFORE_TOKEN_STATE; |
|
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
|
|
return $self->{t}; |
|
|
# reconsume |
|
|
#redo A; |
|
| 805 |
} elsif ($q == 1) { |
} elsif ($q == 1) { |
| 806 |
## NOTE: In |escape| in |URI|. |
## NOTE: In |escape| in |URI|. |
| 807 |
$self->{t}->{type} = { |
$self->{t}->{type} = { |
| 823 |
} |
} |
| 824 |
} elsif ($self->{c} == 0x000D) { # \r |
} elsif ($self->{c} == 0x000D) { # \r |
| 825 |
if ($q == 0) { |
if ($q == 0) { |
| 826 |
## NOTE: In |escape| in ... in |ident|. |
# |
|
$self->{state} = BEFORE_TOKEN_STATE; |
|
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
|
|
return $self->{t}; |
|
|
# reconsume |
|
|
#redo A; |
|
| 827 |
} elsif ($q == 1) { |
} elsif ($q == 1) { |
| 828 |
|
## NOTE: In |escape| in |URI|. |
| 829 |
$self->{t}->{type} = { |
$self->{t}->{type} = { |
| 830 |
URI_TOKEN, URI_INVALID_TOKEN, |
URI_TOKEN, URI_INVALID_TOKEN, |
| 831 |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
| 832 |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
| 833 |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
| 834 |
}->{$self->{t}->{type}}; |
}->{$self->{t}->{type}}; |
| 835 |
$self->{t}->{value} .= "\x0D\x0A"; |
$self->{t}->{value} .= "\x0D"; |
| 836 |
$self->{state} = URI_UNQUOTED_STATE; |
$self->{state} = ESCAPE_BEFORE_LF_STATE; |
| 837 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 838 |
redo A; |
redo A; |
| 839 |
} else { |
} else { |
| 840 |
## Note: In |nl| in ... in |string| or |ident|. |
## Note: In |nl| in ... in |string| or |ident|. |
| 841 |
$self->{t}->{value} .= "\x0D\x0A"; |
$self->{t}->{value} .= "\x0D"; |
| 842 |
$self->{state} = ESCAPE_BEFORE_LF_STATE; |
$self->{state} = ESCAPE_BEFORE_LF_STATE; |
| 843 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 844 |
redo A; |
redo A; |
| 845 |
} |
} |
| 846 |
|
} elsif ($self->{c} == -1) { |
| 847 |
|
# |
| 848 |
} else { |
} else { |
| 849 |
## NOTE: second character of |escape|. |
## NOTE: second character of |escape|. |
| 850 |
$self->{t}->{value} .= chr $self->{c}; |
$self->{t}->{value} .= chr $self->{c}; |
| 853 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 854 |
redo A; |
redo A; |
| 855 |
} |
} |
| 856 |
|
|
| 857 |
|
if ($q == 0) { |
| 858 |
|
if ($self->{t}->{type} == DIMENSION_TOKEN) { |
| 859 |
|
if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') { |
| 860 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 861 |
|
# reprocess |
| 862 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 863 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
| 864 |
|
$self->{t}->{type} = NUMBER_TOKEN; |
| 865 |
|
$self->{t}->{value} = ''; |
| 866 |
|
return $self->{t}; |
| 867 |
|
#redo A; |
| 868 |
|
} elsif (length $self->{t}->{value}) { |
| 869 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 870 |
|
# reprocess |
| 871 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 872 |
|
return $self->{t}; |
| 873 |
|
#redo A; |
| 874 |
|
} else { |
| 875 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 876 |
|
# reprocess |
| 877 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 878 |
|
$self->{t}->{type} = NUMBER_TOKEN; |
| 879 |
|
$self->{t}->{value} = ''; |
| 880 |
|
return $self->{t}; |
| 881 |
|
#redo A; |
| 882 |
|
} |
| 883 |
|
} else { |
| 884 |
|
if ($self->{t}->{hyphen} and $self->{t}->{value} eq '-') { |
| 885 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 886 |
|
# reprocess |
| 887 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 888 |
|
return {type => DELIM_TOKEN, value => '-'}; |
| 889 |
|
#redo A; |
| 890 |
|
} elsif (length $self->{t}->{value}) { |
| 891 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 892 |
|
# reprocess |
| 893 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 894 |
|
return $self->{t}; |
| 895 |
|
#redo A; |
| 896 |
|
} else { |
| 897 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 898 |
|
# reprocess |
| 899 |
|
return {type => DELIM_TOKEN, value => '\\'}; |
| 900 |
|
#redo A; |
| 901 |
|
} |
| 902 |
|
} |
| 903 |
|
} elsif ($q == 1) { |
| 904 |
|
$self->{state} = URI_UNQUOTED_STATE; |
| 905 |
|
$self->{c} = $self->{get_char}->(); |
| 906 |
|
redo A; |
| 907 |
|
} else { |
| 908 |
|
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
| 909 |
|
$self->{t}->{type} = { |
| 910 |
|
STRING_TOKEN, INVALID_TOKEN, |
| 911 |
|
URI_TOKEN, URI_INVALID_TOKEN, |
| 912 |
|
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
| 913 |
|
}->{$self->{t}->{type}} || $self->{t}->{type}; |
| 914 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
| 915 |
|
# reprocess |
| 916 |
|
return $self->{t}; |
| 917 |
|
#redo A; |
| 918 |
|
} |
| 919 |
} elsif ($self->{state} == ESCAPE_STATE) { |
} elsif ($self->{state} == ESCAPE_STATE) { |
| 920 |
## NOTE: third..seventh character of |unicode| in |escape|. |
## NOTE: third..seventh character of |unicode| in |escape|. |
| 921 |
if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9 |
if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9 |
| 929 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 930 |
redo A; |
redo A; |
| 931 |
} elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f |
} elsif (0x0061 <= $self->{c} and $self->{c} <= 0x0066) { # a..f |
| 932 |
$char = $char * 0x10 + $self->{c} - 0x0061 - 0xA; |
$char = $char * 0x10 + $self->{c} - 0x0061 + 0xA; |
| 933 |
$self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE; |
$self->{state} = ++$i == 7 ? ESCAPE_BEFORE_NL_STATE : ESCAPE_STATE; |
| 934 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 935 |
redo A; |
redo A; |
| 978 |
} elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) { |
} elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) { |
| 979 |
## NOTE: |\n| in |\r\n| in |unicode| in |escape|. |
## NOTE: |\n| in |\r\n| in |unicode| in |escape|. |
| 980 |
if ($self->{c} == 0x000A) { # \n |
if ($self->{c} == 0x000A) { # \n |
| 981 |
$self->{t}->{value} .= chr $char; |
$self->{t}->{value} .= chr $self->{c}; |
| 982 |
$self->{state} = $q == 0 ? NAME_STATE : |
$self->{state} = $q == 0 ? NAME_STATE : |
| 983 |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
| 984 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 985 |
redo A; |
redo A; |
| 986 |
} else { |
} else { |
|
$self->{t}->{value} .= chr $char; |
|
| 987 |
$self->{state} = $q == 0 ? NAME_STATE : |
$self->{state} = $q == 0 ? NAME_STATE : |
| 988 |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
| 989 |
# reconsume |
# reprocess |
| 990 |
redo A; |
redo A; |
| 991 |
} |
} |
| 992 |
} elsif ($self->{state} == STRING_STATE) { |
} elsif ($self->{state} == STRING_STATE) { |
| 1013 |
$self->{c} == 0x000D or # \r |
$self->{c} == 0x000D or # \r |
| 1014 |
$self->{c} == 0x000C or # \f |
$self->{c} == 0x000C or # \f |
| 1015 |
$self->{c} == -1) { |
$self->{c} == -1) { |
| 1016 |
$self->{t}->{type} = INVALID_TOKEN; |
$self->{t}->{type} = { |
| 1017 |
|
STRING_TOKEN, INVALID_TOKEN, |
| 1018 |
|
INVALID_TOKEN, INVALID_TOKEN, |
| 1019 |
|
URI_TOKEN, URI_INVALID_TOKEN, |
| 1020 |
|
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
| 1021 |
|
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
| 1022 |
|
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
| 1023 |
|
}->{$self->{t}->{type}}; |
| 1024 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 1025 |
# reconsume |
# reconsume |
| 1026 |
return $self->{t}; |
return $self->{t}; |
| 1057 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
| 1058 |
redo A; |
redo A; |
| 1059 |
} else { |
} else { |
| 1060 |
unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '.'}; |
| 1061 |
$self->{t}->{number} = $self->{t}->{value}; |
$self->{t}->{number} = $self->{t}->{value}; |
| 1062 |
$self->{t}->{value} = ''; |
$self->{t}->{value} = ''; |
| 1063 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 1074 |
redo A; |
redo A; |
| 1075 |
} else { |
} else { |
| 1076 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
| 1077 |
$self->{c} = $self->{get_char}->(); |
# reprocess |
| 1078 |
return {type => DELIM_TOKEN, value => '.'}; |
return {type => DELIM_TOKEN, value => '.'}; |
| 1079 |
#redo A; |
#redo A; |
| 1080 |
} |
} |