80 |
my $self = shift; |
my $self = shift; |
81 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
82 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
83 |
|
#$self->{t} = {type => token-type, value => value, number => number}; |
84 |
} # init |
} # init |
85 |
|
|
86 |
sub get_next_token ($) { |
sub get_next_token ($) { |
89 |
return shift @{$self->{token}}; |
return shift @{$self->{token}}; |
90 |
} |
} |
91 |
|
|
|
my $current_token; |
|
92 |
my $char; |
my $char; |
93 |
my $num; # |{num}|, if any. |
my $num; # |{num}|, if any. |
94 |
my $i; # |$i + 1|th character in |unicode| in |escape|. |
my $i; # |$i + 1|th character in |unicode| in |escape|. |
103 |
if ($self->{state} == BEFORE_TOKEN_STATE) { |
if ($self->{state} == BEFORE_TOKEN_STATE) { |
104 |
if ($self->{c} == 0x002D) { # - |
if ($self->{c} == 0x002D) { # - |
105 |
## NOTE: |-| in |ident| in |IDENT| |
## NOTE: |-| in |ident| in |IDENT| |
106 |
$current_token = {type => IDENT_TOKEN, value => '-'}; |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
107 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
108 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
109 |
redo A; |
redo A; |
110 |
|
} elsif ($self->{c} == 0x0055 or $self->{c} == 0x0075) { # U or u |
111 |
|
$self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}}; |
112 |
|
$self->{c} = $self->{get_char}->(); |
113 |
|
if ($self->{c} == 0x002B) { # + |
114 |
|
$self->{c} = $self->{get_char}->(); |
115 |
|
if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9 |
116 |
|
(0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F |
117 |
|
(0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f |
118 |
|
$self->{c} == 0x003F) { # ? |
119 |
|
$self->{t}->{value} .= '+' . chr $self->{c}; |
120 |
|
$self->{t}->{type} = UNICODE_RANGE_TOKEN; |
121 |
|
$self->{c} = $self->{get_char}->(); |
122 |
|
C: for (2..6) { |
123 |
|
if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9 |
124 |
|
(0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F |
125 |
|
(0x0061 <= $self->{c} and $self->{c} <= 0x0066) or # a..f |
126 |
|
$self->{c} == 0x003F) { # ? |
127 |
|
$self->{t}->{value} .= chr $self->{c}; |
128 |
|
$self->{c} = $self->{get_char}->(); |
129 |
|
} else { |
130 |
|
last C; |
131 |
|
} |
132 |
|
} # C |
133 |
|
|
134 |
|
if ($self->{c} == 0x002D) { # - |
135 |
|
$self->{c} = $self->{get_char}->(); |
136 |
|
if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9 |
137 |
|
(0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F |
138 |
|
(0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f |
139 |
|
$self->{t}->{value} .= '-' . chr $self->{c}; |
140 |
|
$self->{c} = $self->{get_char}->(); |
141 |
|
C: for (2..6) { |
142 |
|
if ((0x0030 <= $self->{c} and $self->{c} <= 0x0039) or # 0..9 |
143 |
|
(0x0041 <= $self->{c} and $self->{c} <= 0x0046) or # A..F |
144 |
|
(0x0061 <= $self->{c} and $self->{c} <= 0x0066)) { # a..f |
145 |
|
$self->{t}->{value} .= chr $self->{c}; |
146 |
|
$self->{c} = $self->{get_char}->(); |
147 |
|
} else { |
148 |
|
last C; |
149 |
|
} |
150 |
|
} # C |
151 |
|
|
152 |
|
# |
153 |
|
} else { |
154 |
|
my $token = $self->{t}; |
155 |
|
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
156 |
|
$self->{state} = BEFORE_NMSTART_STATE; |
157 |
|
# reprocess |
158 |
|
return $token; |
159 |
|
#redo A; |
160 |
|
} |
161 |
|
} |
162 |
|
|
163 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
164 |
|
# reprocess |
165 |
|
return $self->{t}; |
166 |
|
#redo A; |
167 |
|
} else { |
168 |
|
unshift @{$self->{token}}, {type => PLUS_TOKEN}; |
169 |
|
$self->{state} = BEFORE_TOKEN_STATE; |
170 |
|
# reprocess |
171 |
|
return $self->{t}; |
172 |
|
#redo A; |
173 |
|
} |
174 |
|
} else { |
175 |
|
$self->{state} = NAME_STATE; |
176 |
|
# reprocess |
177 |
|
redo A; |
178 |
|
} |
179 |
} elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
} elsif ((0x0041 <= $self->{c} and $self->{c} <= 0x005A) or # A..Z |
180 |
(0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z |
(0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z |
181 |
$self->{c} == 0x005F or # _ |
$self->{c} == 0x005F or # _ |
182 |
$self->{c} > 0x007F) { # nonascii |
$self->{c} > 0x007F) { # nonascii |
183 |
## NOTE: |nmstart| in |ident| in |IDENT| |
## NOTE: |nmstart| in |ident| in |IDENT| |
184 |
$current_token = {type => IDENT_TOKEN, value => chr $self->{c}}; |
$self->{t} = {type => IDENT_TOKEN, value => chr $self->{c}}; |
185 |
$self->{state} = NAME_STATE; |
$self->{state} = NAME_STATE; |
186 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
187 |
redo A; |
redo A; |
188 |
} elsif ($self->{c} == 0x005C) { # \ |
} elsif ($self->{c} == 0x005C) { # \ |
189 |
## NOTE: |nmstart| in |ident| in |IDENT| |
## NOTE: |nmstart| in |ident| in |IDENT| |
190 |
$current_token = {type => IDENT_TOKEN, value => ''}; |
$self->{t} = {type => IDENT_TOKEN, value => ''}; |
191 |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
192 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
193 |
redo A; |
redo A; |
194 |
} elsif ($self->{c} == 0x0040) { # @ |
} elsif ($self->{c} == 0x0040) { # @ |
195 |
## NOTE: |@| in |ATKEYWORD| |
## NOTE: |@| in |ATKEYWORD| |
196 |
$current_token = {type => ATKEYWORD_TOKEN, value => ''}; |
$self->{t} = {type => ATKEYWORD_TOKEN, value => ''}; |
197 |
$self->{state} = AFTER_AT_STATE; |
$self->{state} = AFTER_AT_STATE; |
198 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
199 |
redo A; |
redo A; |
200 |
} elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or ' |
} elsif ($self->{c} == 0x0022 or $self->{c} == 0x0027) { # " or ' |
201 |
$current_token = {type => STRING_TOKEN, value => ''}; |
$self->{t} = {type => STRING_TOKEN, value => ''}; |
202 |
$self->{state} = STRING_STATE; $q = $self->{c}; |
$self->{state} = STRING_STATE; $q = $self->{c}; |
203 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
204 |
redo A; |
redo A; |
205 |
} elsif ($self->{c} == 0x0023) { # # |
} elsif ($self->{c} == 0x0023) { # # |
206 |
## NOTE: |#| in |HASH|. |
## NOTE: |#| in |HASH|. |
207 |
$current_token = {type => HASH_TOKEN, value => ''}; |
$self->{t} = {type => HASH_TOKEN, value => ''}; |
208 |
$self->{state} = HASH_OPEN_STATE; |
$self->{state} = HASH_OPEN_STATE; |
209 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
210 |
redo A; |
redo A; |
211 |
} elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9 |
} elsif (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9 |
212 |
## NOTE: |num|. |
## NOTE: |num|. |
213 |
$current_token = {type => NUMBER_TOKEN, value => chr $self->{c}}; |
$self->{t} = {type => NUMBER_TOKEN, value => chr $self->{c}}; |
214 |
$self->{state} = NUMBER_STATE; |
$self->{state} = NUMBER_STATE; |
215 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
216 |
redo A; |
redo A; |
217 |
} elsif ($self->{c} == 0x002E) { # . |
} elsif ($self->{c} == 0x002E) { # . |
218 |
## NOTE: |num|. |
## NOTE: |num|. |
219 |
$current_token = {type => NUMBER_TOKEN, value => '0'}; |
$self->{t} = {type => NUMBER_TOKEN, value => '0'}; |
220 |
$self->{state} = NUMBER_FRACTION_STATE; |
$self->{state} = NUMBER_FRACTION_STATE; |
221 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
222 |
redo A; |
redo A; |
270 |
} else { |
} else { |
271 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '!'}; |
272 |
## NOTE: |-| in |ident| in |IDENT| |
## NOTE: |-| in |ident| in |IDENT| |
273 |
$current_token = {type => IDENT_TOKEN, value => '-'}; |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
274 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
275 |
#reprocess |
#reprocess |
276 |
return {type => DELIM_TOKEN, value => '<'}; |
return {type => DELIM_TOKEN, value => '<'}; |
390 |
#redo A; |
#redo A; |
391 |
} else { |
} else { |
392 |
# stay in the state |
# stay in the state |
393 |
$current_token = {type => DELIM_TOKEN, value => chr $self->{c}}; |
$self->{t} = {type => DELIM_TOKEN, value => chr $self->{c}}; |
394 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
395 |
return $current_token; |
return $self->{t}; |
396 |
#redo A; |
#redo A; |
397 |
} |
} |
398 |
} elsif ($self->{state} == BEFORE_NMSTART_STATE) { |
} elsif ($self->{state} == BEFORE_NMSTART_STATE) { |
402 |
(0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z |
(0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z |
403 |
$self->{c} == 0x005F or # _ |
$self->{c} == 0x005F or # _ |
404 |
$self->{c} > 0x007F) { # nonascii |
$self->{c} > 0x007F) { # nonascii |
405 |
$current_token->{value} .= chr $self->{c}; |
$self->{t}->{value} .= chr $self->{c}; |
406 |
$current_token->{type} = DIMENSION_TOKEN |
$self->{t}->{type} = DIMENSION_TOKEN |
407 |
if $current_token->{type} == NUMBER_TOKEN; |
if $self->{t}->{type} == NUMBER_TOKEN; |
408 |
$self->{state} = NAME_STATE; |
$self->{state} = NAME_STATE; |
409 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
410 |
redo A; |
redo A; |
414 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
415 |
redo A; |
redo A; |
416 |
} elsif ($self->{c} == 0x002D and # - |
} elsif ($self->{c} == 0x002D and # - |
417 |
$current_token->{type} == IDENT_TOKEN) { |
$self->{t}->{type} == IDENT_TOKEN) { |
418 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
419 |
if ($self->{c} == 0x003E) { # > |
if ($self->{c} == 0x003E) { # > |
420 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
423 |
#redo A; |
#redo A; |
424 |
} else { |
} else { |
425 |
## NOTE: |-|, |-|, $self->{c} |
## NOTE: |-|, |-|, $self->{c} |
426 |
#$current_token = {type => IDENT_TOKEN, value => '-'}; |
#$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
427 |
# stay in the state |
# stay in the state |
428 |
# reconsume |
# reconsume |
429 |
return {type => DELIM_TOKEN, value => '-'}; |
return {type => DELIM_TOKEN, value => '-'}; |
430 |
#redo A; |
#redo A; |
431 |
} |
} |
432 |
} else { |
} else { |
433 |
if ($current_token->{type} == NUMBER_TOKEN) { |
if ($self->{t}->{type} == NUMBER_TOKEN) { |
434 |
## NOTE: |-| after |NUMBER|. |
## NOTE: |-| after |NUMBER|. |
435 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
436 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
437 |
# reconsume |
# reconsume |
438 |
$current_token->{value} = $current_token->{number}; |
$self->{t}->{value} = $self->{t}->{number}; |
439 |
delete $current_token->{number}; |
delete $self->{t}->{number}; |
440 |
return $current_token; |
return $self->{t}; |
441 |
} else { |
} else { |
442 |
## NOTE: |-| not followed by |nmstart|. |
## NOTE: |-| not followed by |nmstart|. |
443 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
450 |
(0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z |
(0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z |
451 |
$self->{c} == 0x005F or # _ |
$self->{c} == 0x005F or # _ |
452 |
$self->{c} > 0x007F) { # nonascii |
$self->{c} > 0x007F) { # nonascii |
453 |
$current_token->{value} .= chr $self->{c}; |
$self->{t}->{value} .= chr $self->{c}; |
454 |
$self->{state} = NAME_STATE; |
$self->{state} = NAME_STATE; |
455 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
456 |
redo A; |
redo A; |
457 |
} elsif ($self->{c} == 0x002D) { # - |
} elsif ($self->{c} == 0x002D) { # - |
458 |
$current_token->{value} .= '-'; |
$self->{t}->{value} .= '-'; |
459 |
$self->{state} = AFTER_AT_HYPHEN_STATE; |
$self->{state} = AFTER_AT_HYPHEN_STATE; |
460 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
461 |
redo A; |
redo A; |
473 |
(0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z |
(0x0061 <= $self->{c} and $self->{c} <= 0x007A) or # a..z |
474 |
$self->{c} == 0x005F or # _ |
$self->{c} == 0x005F or # _ |
475 |
$self->{c} > 0x007F) { # nonascii |
$self->{c} > 0x007F) { # nonascii |
476 |
$current_token->{value} .= chr $self->{c}; |
$self->{t}->{value} .= chr $self->{c}; |
477 |
$self->{state} = NAME_STATE; |
$self->{state} = NAME_STATE; |
478 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
479 |
redo A; |
redo A; |
487 |
#redo A; |
#redo A; |
488 |
} else { |
} else { |
489 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '-'}; |
490 |
$current_token = {type => IDENT_TOKEN, value => '-'}; |
$self->{t} = {type => IDENT_TOKEN, value => '-'}; |
491 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
492 |
# reprocess |
# reprocess |
493 |
return {type => DELIM_TOKEN, value => '@'}; |
return {type => DELIM_TOKEN, value => '@'}; |
507 |
} elsif ($self->{state} == AFTER_NUMBER_STATE) { |
} elsif ($self->{state} == AFTER_NUMBER_STATE) { |
508 |
if ($self->{c} == 0x002D) { # - |
if ($self->{c} == 0x002D) { # - |
509 |
## NOTE: |-| in |ident|. |
## NOTE: |-| in |ident|. |
510 |
$current_token->{value} = '-'; |
$self->{t}->{value} = '-'; |
511 |
$self->{state} = BEFORE_NMSTART_STATE; |
$self->{state} = BEFORE_NMSTART_STATE; |
512 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
513 |
redo A; |
redo A; |
516 |
$self->{c} == 0x005F or # _ |
$self->{c} == 0x005F or # _ |
517 |
$self->{c} > 0x007F) { # nonascii |
$self->{c} > 0x007F) { # nonascii |
518 |
## NOTE: |nmstart| in |ident|. |
## NOTE: |nmstart| in |ident|. |
519 |
$current_token->{value} = chr $self->{c}; |
$self->{t}->{value} = chr $self->{c}; |
520 |
$current_token->{type} = DIMENSION_TOKEN; |
$self->{t}->{type} = DIMENSION_TOKEN; |
521 |
$self->{state} = NAME_STATE; |
$self->{state} = NAME_STATE; |
522 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
523 |
redo A; |
redo A; |
524 |
} elsif ($self->{c} == 0x005C) { # \ |
} elsif ($self->{c} == 0x005C) { # \ |
525 |
## NOTE: |nmstart| in |ident| in |IDENT| |
## NOTE: |nmstart| in |ident| in |IDENT| |
526 |
$current_token->{value} = ''; |
$self->{t}->{value} = ''; |
527 |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
$self->{state} = ESCAPE_OPEN_STATE; $q = 0; |
528 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
529 |
redo A; |
redo A; |
530 |
} elsif ($self->{c} == 0x0025) { # % |
} elsif ($self->{c} == 0x0025) { # % |
531 |
$current_token->{type} = PERCENTAGE_TOKEN; |
$self->{t}->{type} = PERCENTAGE_TOKEN; |
532 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
533 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
534 |
return $current_token; |
return $self->{t}; |
535 |
#redo A; |
#redo A; |
536 |
} else { |
} else { |
537 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
538 |
# reprocess |
# reprocess |
539 |
return $current_token; |
return $self->{t}; |
540 |
#redo A; |
#redo A; |
541 |
} |
} |
542 |
} elsif ($self->{state} == HASH_OPEN_STATE) { |
} elsif ($self->{state} == HASH_OPEN_STATE) { |
547 |
$self->{c} == 0x002D or # - |
$self->{c} == 0x002D or # - |
548 |
$self->{c} == 0x005F or # _ |
$self->{c} == 0x005F or # _ |
549 |
$self->{c} > 0x007F) { # nonascii |
$self->{c} > 0x007F) { # nonascii |
550 |
$current_token->{value} .= chr $self->{c}; |
$self->{t}->{value} .= chr $self->{c}; |
551 |
$self->{state} = NAME_STATE; |
$self->{state} = NAME_STATE; |
552 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
553 |
redo A; |
redo A; |
569 |
$self->{c} == 0x005F or # _ |
$self->{c} == 0x005F or # _ |
570 |
$self->{c} == 0x002D or # - |
$self->{c} == 0x002D or # - |
571 |
$self->{c} > 0x007F) { # nonascii |
$self->{c} > 0x007F) { # nonascii |
572 |
$current_token->{value} .= chr $self->{c}; |
$self->{t}->{value} .= chr $self->{c}; |
573 |
# stay in the state |
# stay in the state |
574 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
575 |
redo A; |
redo A; |
578 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
579 |
redo A; |
redo A; |
580 |
} elsif ($self->{c} == 0x0028 and # ( |
} elsif ($self->{c} == 0x0028 and # ( |
581 |
$current_token->{type} == IDENT_TOKEN) { # ( |
$self->{t}->{type} == IDENT_TOKEN) { # ( |
582 |
my $func_name = $current_token->{value}; |
my $func_name = $self->{t}->{value}; |
583 |
$func_name =~ tr/A-Z/a-z/; ## TODO: Unicode or ASCII case-insensitive? |
$func_name =~ tr/A-Z/a-z/; ## TODO: Unicode or ASCII case-insensitive? |
584 |
if ($func_name eq 'url' or $func_name eq 'url-prefix') { |
if ($func_name eq 'url' or $func_name eq 'url-prefix') { |
585 |
if ($current_token->{has_escape}) { |
if ($self->{t}->{has_escape}) { |
586 |
## TODO: warn |
## TODO: warn |
587 |
} |
} |
588 |
$current_token->{type} |
$self->{t}->{type} |
589 |
= $func_name eq 'url' ? URI_TOKEN : URI_PREFIX_TOKEN; |
= $func_name eq 'url' ? URI_TOKEN : URI_PREFIX_TOKEN; |
590 |
$current_token->{value} = ''; |
$self->{t}->{value} = ''; |
591 |
$self->{state} = URI_BEFORE_WSP_STATE; |
$self->{state} = URI_BEFORE_WSP_STATE; |
592 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
593 |
redo A; |
redo A; |
594 |
} else { |
} else { |
595 |
$current_token->{type} = FUNCTION_TOKEN; |
$self->{t}->{type} = FUNCTION_TOKEN; |
596 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
597 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
598 |
return $current_token; |
return $self->{t}; |
599 |
#redo A; |
#redo A; |
600 |
} |
} |
601 |
} else { |
} else { |
602 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
603 |
# reconsume |
# reconsume |
604 |
return $current_token; |
return $self->{t}; |
605 |
#redo A; |
#redo A; |
606 |
} |
} |
607 |
} elsif ($self->{state} == URI_BEFORE_WSP_STATE) { |
} elsif ($self->{state} == URI_BEFORE_WSP_STATE) { |
615 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
616 |
} |
} |
617 |
if ($self->{c} == -1) { |
if ($self->{c} == -1) { |
618 |
$current_token->{type} = { |
$self->{t}->{type} = { |
619 |
URI_TOKEN, URI_INVALID_TOKEN, |
URI_TOKEN, URI_INVALID_TOKEN, |
620 |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
621 |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
622 |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
623 |
}->{$current_token->{type}}; |
}->{$self->{t}->{type}}; |
624 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
625 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
626 |
return $current_token; |
return $self->{t}; |
627 |
#redo A; |
#redo A; |
628 |
} elsif ($self->{c} < 0x0020 or $self->{c} == 0x0028) { # C0 or ( |
} elsif ($self->{c} < 0x0020 or $self->{c} == 0x0028) { # C0 or ( |
629 |
## TODO: Should we consider matches of "(" and ")"? |
## TODO: Should we consider matches of "(" and ")"? |
630 |
$current_token->{type} = { |
$self->{t}->{type} = { |
631 |
URI_TOKEN, URI_INVALID_TOKEN, |
URI_TOKEN, URI_INVALID_TOKEN, |
632 |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
633 |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
634 |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
635 |
}->{$current_token->{type}}; |
}->{$self->{t}->{type}}; |
636 |
$self->{state} = URI_UNQUOTED_STATE; |
$self->{state} = URI_UNQUOTED_STATE; |
637 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
638 |
redo A; |
redo A; |
643 |
} elsif ($self->{c} == 0x0029) { # ) |
} elsif ($self->{c} == 0x0029) { # ) |
644 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
645 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
646 |
return $current_token; |
return $self->{t}; |
647 |
#redo A; |
#redo A; |
648 |
} elsif ($self->{c} == 0x005C) { # \ |
} elsif ($self->{c} == 0x005C) { # \ |
649 |
$self->{state} = ESCAPE_OPEN_STATE; $q = 1; |
$self->{state} = ESCAPE_OPEN_STATE; $q = 1; |
650 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
651 |
redo A; |
redo A; |
652 |
} else { |
} else { |
653 |
$current_token->{value} .= chr $self->{c}; |
$self->{t}->{value} .= chr $self->{c}; |
654 |
$self->{state} = URI_UNQUOTED_STATE; |
$self->{state} = URI_UNQUOTED_STATE; |
655 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
656 |
redo A; |
redo A; |
667 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
668 |
redo A; |
redo A; |
669 |
} elsif ($self->{c} == -1) { |
} elsif ($self->{c} == -1) { |
670 |
$current_token->{type} = { |
$self->{t}->{type} = { |
671 |
URI_TOKEN, URI_INVALID_TOKEN, |
URI_TOKEN, URI_INVALID_TOKEN, |
672 |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
673 |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
674 |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
675 |
}->{$current_token->{type}}; |
}->{$self->{t}->{type}}; |
676 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
677 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
678 |
return $current_token; |
return $self->{t}; |
679 |
#redo A; |
#redo A; |
680 |
} elsif ($self->{c} < 0x0020 or { |
} elsif ($self->{c} < 0x0020 or { |
681 |
0x0022 => 1, # " |
0x0022 => 1, # " |
683 |
0x0028 => 1, # ( |
0x0028 => 1, # ( |
684 |
}->{$self->{c}}) { # C0 or ( |
}->{$self->{c}}) { # C0 or ( |
685 |
## TODO: Should we consider matches of "(" and ")", '"', or "'"? |
## TODO: Should we consider matches of "(" and ")", '"', or "'"? |
686 |
$current_token->{type} = { |
$self->{t}->{type} = { |
687 |
URI_TOKEN, URI_INVALID_TOKEN, |
URI_TOKEN, URI_INVALID_TOKEN, |
688 |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
689 |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
690 |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
691 |
}->{$current_token->{type}}; |
}->{$self->{t}->{type}}; |
692 |
# stay in the state. |
# stay in the state. |
693 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
694 |
redo A; |
redo A; |
695 |
} elsif ($self->{c} == 0x0029) { # ) |
} elsif ($self->{c} == 0x0029) { # ) |
696 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
697 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
698 |
return $current_token; |
return $self->{t}; |
699 |
#redo A; |
#redo A; |
700 |
} elsif ($self->{c} == 0x005C) { # \ |
} elsif ($self->{c} == 0x005C) { # \ |
701 |
$self->{state} = ESCAPE_OPEN_STATE; $q = 1; |
$self->{state} = ESCAPE_OPEN_STATE; $q = 1; |
702 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
703 |
redo A; |
redo A; |
704 |
} else { |
} else { |
705 |
$current_token->{value} .= chr $self->{c}; |
$self->{t}->{value} .= chr $self->{c}; |
706 |
# stay in the state. |
# stay in the state. |
707 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
708 |
redo A; |
redo A; |
719 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
720 |
redo A; |
redo A; |
721 |
} elsif ($self->{c} == -1) { |
} elsif ($self->{c} == -1) { |
722 |
$current_token->{type} = { |
$self->{t}->{type} = { |
723 |
URI_TOKEN, URI_INVALID_TOKEN, |
URI_TOKEN, URI_INVALID_TOKEN, |
724 |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
725 |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
726 |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
727 |
}->{$current_token->{type}}; |
}->{$self->{t}->{type}}; |
728 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
729 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
730 |
return $current_token; |
return $self->{t}; |
731 |
#redo A; |
#redo A; |
732 |
} elsif ($self->{c} == 0x0029) { # ) |
} elsif ($self->{c} == 0x0029) { # ) |
733 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
734 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
735 |
return $current_token; |
return $self->{t}; |
736 |
#redo A; |
#redo A; |
737 |
} elsif ($self->{c} == 0x005C) { # \ |
} elsif ($self->{c} == 0x005C) { # \ |
738 |
$self->{state} = ESCAPE_OPEN_STATE; $q = 1; |
$self->{state} = ESCAPE_OPEN_STATE; $q = 1; |
740 |
redo A; |
redo A; |
741 |
} else { |
} else { |
742 |
## TODO: Should we consider matches of "(" and ")", '"', or "'"? |
## TODO: Should we consider matches of "(" and ")", '"', or "'"? |
743 |
$current_token->{type} = { |
$self->{t}->{type} = { |
744 |
URI_TOKEN, URI_INVALID_TOKEN, |
URI_TOKEN, URI_INVALID_TOKEN, |
745 |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
746 |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
747 |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
748 |
}->{$current_token->{type}}; |
}->{$self->{t}->{type}}; |
749 |
# stay in the state. |
# stay in the state. |
750 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
751 |
redo A; |
redo A; |
752 |
} |
} |
753 |
} elsif ($self->{state} == ESCAPE_OPEN_STATE) { |
} elsif ($self->{state} == ESCAPE_OPEN_STATE) { |
754 |
$current_token->{has_escape} = 1; |
$self->{t}->{has_escape} = 1; |
755 |
if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9 |
if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { # 0..9 |
756 |
## NOTE: second character of |unicode| in |escape|. |
## NOTE: second character of |unicode| in |escape|. |
757 |
$char = $self->{c} - 0x0030; |
$char = $self->{c} - 0x0030; |
776 |
## NOTE: In |escape| in ... in |ident|. |
## NOTE: In |escape| in ... in |ident|. |
777 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
778 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
779 |
return $current_token; |
return $self->{t}; |
780 |
# reconsume |
# reconsume |
781 |
#redo A; |
#redo A; |
782 |
} elsif ($q == 1) { |
} elsif ($q == 1) { |
783 |
## NOTE: In |escape| in |URI|. |
## NOTE: In |escape| in |URI|. |
784 |
$current_token->{type} = { |
$self->{t}->{type} = { |
785 |
URI_TOKEN, URI_INVALID_TOKEN, |
URI_TOKEN, URI_INVALID_TOKEN, |
786 |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
787 |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
788 |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
789 |
}->{$current_token->{type}}; |
}->{$self->{t}->{type}}; |
790 |
$current_token->{value} .= chr $self->{c}; |
$self->{t}->{value} .= chr $self->{c}; |
791 |
$self->{state} = URI_UNQUOTED_STATE; |
$self->{state} = URI_UNQUOTED_STATE; |
792 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
793 |
redo A; |
redo A; |
794 |
} else { |
} else { |
795 |
## Note: In |nl| in ... in |string| or |ident|. |
## Note: In |nl| in ... in |string| or |ident|. |
796 |
$current_token->{value} .= chr $self->{c}; |
$self->{t}->{value} .= chr $self->{c}; |
797 |
$self->{state} = STRING_STATE; |
$self->{state} = STRING_STATE; |
798 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
799 |
redo A; |
redo A; |
803 |
## NOTE: In |escape| in ... in |ident|. |
## NOTE: In |escape| in ... in |ident|. |
804 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
805 |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
unshift @{$self->{token}}, {type => DELIM_TOKEN, value => '\\'}; |
806 |
return $current_token; |
return $self->{t}; |
807 |
# reconsume |
# reconsume |
808 |
#redo A; |
#redo A; |
809 |
} elsif ($q == 1) { |
} elsif ($q == 1) { |
810 |
$current_token->{type} = { |
$self->{t}->{type} = { |
811 |
URI_TOKEN, URI_INVALID_TOKEN, |
URI_TOKEN, URI_INVALID_TOKEN, |
812 |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
URI_INVALID_TOKEN, URI_INVALID_TOKEN, |
813 |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_TOKEN, URI_PREFIX_INVALID_TOKEN, |
814 |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
URI_PREFIX_INVALID_TOKEN, URI_PREFIX_INVALID_TOKEN, |
815 |
}->{$current_token->{type}}; |
}->{$self->{t}->{type}}; |
816 |
$current_token->{value} .= "\x0D\x0A"; |
$self->{t}->{value} .= "\x0D\x0A"; |
817 |
$self->{state} = URI_UNQUOTED_STATE; |
$self->{state} = URI_UNQUOTED_STATE; |
818 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
819 |
redo A; |
redo A; |
820 |
} else { |
} else { |
821 |
## Note: In |nl| in ... in |string| or |ident|. |
## Note: In |nl| in ... in |string| or |ident|. |
822 |
$current_token->{value} .= "\x0D\x0A"; |
$self->{t}->{value} .= "\x0D\x0A"; |
823 |
$self->{state} = ESCAPE_BEFORE_LF_STATE; |
$self->{state} = ESCAPE_BEFORE_LF_STATE; |
824 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
825 |
redo A; |
redo A; |
826 |
} |
} |
827 |
} else { |
} else { |
828 |
## NOTE: second character of |escape|. |
## NOTE: second character of |escape|. |
829 |
$current_token->{value} .= chr $self->{c}; |
$self->{t}->{value} .= chr $self->{c}; |
830 |
$self->{state} = $q == 0 ? NAME_STATE : |
$self->{state} = $q == 0 ? NAME_STATE : |
831 |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
832 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
853 |
$self->{c} == 0x000A or # \n |
$self->{c} == 0x000A or # \n |
854 |
$self->{c} == 0x0009 or # \t |
$self->{c} == 0x0009 or # \t |
855 |
$self->{c} == 0x000C) { # \f |
$self->{c} == 0x000C) { # \f |
856 |
$current_token->{value} .= chr $char; |
$self->{t}->{value} .= chr $char; |
857 |
$self->{state} = $q == 0 ? NAME_STATE : |
$self->{state} = $q == 0 ? NAME_STATE : |
858 |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
859 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
863 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
864 |
redo A; |
redo A; |
865 |
} else { |
} else { |
866 |
$current_token->{value} .= chr $char; |
$self->{t}->{value} .= chr $char; |
867 |
$self->{state} = $q == 0 ? NAME_STATE : |
$self->{state} = $q == 0 ? NAME_STATE : |
868 |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
869 |
# reconsume |
# reconsume |
875 |
$self->{c} == 0x000A or # \n |
$self->{c} == 0x000A or # \n |
876 |
$self->{c} == 0x0009 or # \t |
$self->{c} == 0x0009 or # \t |
877 |
$self->{c} == 0x000C) { # \f |
$self->{c} == 0x000C) { # \f |
878 |
$current_token->{value} .= chr $char; |
$self->{t}->{value} .= chr $char; |
879 |
$self->{state} = $q == 0 ? NAME_STATE : |
$self->{state} = $q == 0 ? NAME_STATE : |
880 |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
881 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
885 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
886 |
redo A; |
redo A; |
887 |
} else { |
} else { |
888 |
$current_token->{value} .= chr $char; |
$self->{t}->{value} .= chr $char; |
889 |
$self->{state} = $q == 0 ? NAME_STATE : |
$self->{state} = $q == 0 ? NAME_STATE : |
890 |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
891 |
# reconsume |
# reconsume |
894 |
} elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) { |
} elsif ($self->{state} == ESCAPE_BEFORE_LF_STATE) { |
895 |
## NOTE: |\n| in |\r\n| in |unicode| in |escape|. |
## NOTE: |\n| in |\r\n| in |unicode| in |escape|. |
896 |
if ($self->{c} == 0x000A) { # \n |
if ($self->{c} == 0x000A) { # \n |
897 |
$current_token->{value} .= chr $char; |
$self->{t}->{value} .= chr $char; |
898 |
$self->{state} = $q == 0 ? NAME_STATE : |
$self->{state} = $q == 0 ? NAME_STATE : |
899 |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
900 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
901 |
redo A; |
redo A; |
902 |
} else { |
} else { |
903 |
$current_token->{value} .= chr $char; |
$self->{t}->{value} .= chr $char; |
904 |
$self->{state} = $q == 0 ? NAME_STATE : |
$self->{state} = $q == 0 ? NAME_STATE : |
905 |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
$q == 1 ? URI_UNQUOTED_STATE : STRING_STATE; |
906 |
# reconsume |
# reconsume |
916 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
917 |
redo A; |
redo A; |
918 |
} elsif ($self->{c} == $q) { # " | ' |
} elsif ($self->{c} == $q) { # " | ' |
919 |
if ($current_token->{type} == STRING_TOKEN) { |
if ($self->{t}->{type} == STRING_TOKEN) { |
920 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
921 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
922 |
return $current_token; |
return $self->{t}; |
923 |
#redo A; |
#redo A; |
924 |
} else { |
} else { |
925 |
$self->{state} = URI_AFTER_WSP_STATE; |
$self->{state} = URI_AFTER_WSP_STATE; |
930 |
$self->{c} == 0x000D or # \r |
$self->{c} == 0x000D or # \r |
931 |
$self->{c} == 0x000C or # \f |
$self->{c} == 0x000C or # \f |
932 |
$self->{c} == -1) { |
$self->{c} == -1) { |
933 |
$current_token->{type} = INVALID_TOKEN; |
$self->{t}->{type} = INVALID_TOKEN; |
934 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
935 |
# reconsume |
# reconsume |
936 |
return $current_token; |
return $self->{t}; |
937 |
#redo A; |
#redo A; |
938 |
} else { |
} else { |
939 |
$current_token->{value} .= chr $self->{c}; |
$self->{t}->{value} .= chr $self->{c}; |
940 |
# stay in the state |
# stay in the state |
941 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
942 |
redo A; |
redo A; |
944 |
} elsif ($self->{state} == NUMBER_STATE) { |
} elsif ($self->{state} == NUMBER_STATE) { |
945 |
## NOTE: 2nd, 3rd, or ... character in |num| before |.|. |
## NOTE: 2nd, 3rd, or ... character in |num| before |.|. |
946 |
if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { |
if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { |
947 |
$current_token->{value} .= chr $self->{c}; |
$self->{t}->{value} .= chr $self->{c}; |
948 |
# stay in the state |
# stay in the state |
949 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
950 |
redo A; |
redo A; |
953 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
954 |
redo A; |
redo A; |
955 |
} else { |
} else { |
956 |
$current_token->{number} = $current_token->{value}; |
$self->{t}->{number} = $self->{t}->{value}; |
957 |
$current_token->{value} = ''; |
$self->{t}->{value} = ''; |
958 |
$self->{state} = AFTER_NUMBER_STATE; |
$self->{state} = AFTER_NUMBER_STATE; |
959 |
# reprocess |
# reprocess |
960 |
redo A; |
redo A; |
962 |
} elsif ($self->{state} == NUMBER_DOT_STATE) { |
} elsif ($self->{state} == NUMBER_DOT_STATE) { |
963 |
## NOTE: The character immediately following |.| in |num|. |
## NOTE: The character immediately following |.| in |num|. |
964 |
if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { |
if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { |
965 |
$current_token->{value} .= '.' . chr $self->{c}; |
$self->{t}->{value} .= '.' . chr $self->{c}; |
966 |
$self->{state} = NUMBER_DOT_NUMBER_STATE; |
$self->{state} = NUMBER_DOT_NUMBER_STATE; |
967 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
968 |
redo A; |
redo A; |
969 |
} else { |
} else { |
970 |
unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'}; |
unshift @{$self->{token}}, {type => DELIM_STATE, value => '.'}; |
971 |
$current_token->{number} = $current_token->{value}; |
$self->{t}->{number} = $self->{t}->{value}; |
972 |
$current_token->{value} = ''; |
$self->{t}->{value} = ''; |
973 |
$self->{state} = BEFORE_TOKEN_STATE; |
$self->{state} = BEFORE_TOKEN_STATE; |
974 |
# reprocess |
# reprocess |
975 |
return $current_token; |
return $self->{t}; |
976 |
#redo A; |
#redo A; |
977 |
} |
} |
978 |
} elsif ($self->{state} == NUMBER_FRACTION_STATE) { |
} elsif ($self->{state} == NUMBER_FRACTION_STATE) { |
979 |
## NOTE: The character immediately following |.| at the beginning of |num|. |
## NOTE: The character immediately following |.| at the beginning of |num|. |
980 |
if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { |
if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { |
981 |
$current_token->{value} .= '.' . chr $self->{c}; |
$self->{t}->{value} .= '.' . chr $self->{c}; |
982 |
$self->{state} = NUMBER_DOT_NUMBER_STATE; |
$self->{state} = NUMBER_DOT_NUMBER_STATE; |
983 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
984 |
redo A; |
redo A; |
991 |
} elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) { |
} elsif ($self->{state} == NUMBER_DOT_NUMBER_STATE) { |
992 |
## NOTE: |[0-9]| in |num| after |.|. |
## NOTE: |[0-9]| in |num| after |.|. |
993 |
if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { |
if (0x0030 <= $self->{c} and $self->{c} <= 0x0039) { |
994 |
$current_token->{value} .= chr $self->{c}; |
$self->{t}->{value} .= chr $self->{c}; |
995 |
# stay in the state |
# stay in the state |
996 |
$self->{c} = $self->{get_char}->(); |
$self->{c} = $self->{get_char}->(); |
997 |
redo A; |
redo A; |
998 |
} else { |
} else { |
999 |
$current_token->{number} = $current_token->{value}; |
$self->{t}->{number} = $self->{t}->{value}; |
1000 |
$current_token->{value} = ''; |
$self->{t}->{value} = ''; |
1001 |
$self->{state} = AFTER_NUMBER_STATE; |
$self->{state} = AFTER_NUMBER_STATE; |
1002 |
# reprocess |
# reprocess |
1003 |
redo A; |
redo A; |
1006 |
die "$0: Unknown state |$self->{state}|"; |
die "$0: Unknown state |$self->{state}|"; |
1007 |
} |
} |
1008 |
} # A |
} # A |
|
|
|
|
## TODO: |URI|, |UNICODE-RANGE|, |COMMENT| |
|
|
|
|
1009 |
} # get_next_token |
} # get_next_token |
1010 |
|
|
1011 |
1; |
1; |