/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (hide annotations) (download) (as text)
Wed Oct 15 04:38:22 2008 UTC (16 years ago) by wakaba
Branch: MAIN
Changes since 1.7: +163 -15 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	15 Oct 2008 04:37:36 -0000
	* XML-Parser.t: "xml/pis-1.dat" and "xml/xmldecls-1.dat" added.
	Test directifes "#xml-version", "#xml-encoding", and
	"#xml-standalone" are added.

2008-10-15  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/t/xml/ChangeLog	15 Oct 2008 04:37:54 -0000
	* pis-1.dat, xmldecls-1.dat: New test data files.

2008-10-15  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	15 Oct 2008 04:33:34 -0000
2008-10-15  Wakaba  <wakaba@suika.fam.cx>

	* NanoDOM.pm (create_processing_instruction): New method.
	(xml_version, xml_encoding, xml_standalone): New attributes.
	(ProcessingInstruction): New class.

++ whatpm/Whatpm/HTML/ChangeLog	15 Oct 2008 04:34:03 -0000
	* Tokenizer.pm.src: Support for XML processing instructions.

2008-10-15  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/XML/ChangeLog	15 Oct 2008 04:34:57 -0000
	* Parser.pm.src: Support for XML declarations.

2008-10-15  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::HTML::Tokenizer;
2     use strict;
3 wakaba 1.8 our $VERSION=do{my @r=(q$Revision: 1.7 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.2
5     BEGIN {
6     require Exporter;
7     push our @ISA, 'Exporter';
8    
9     our @EXPORT_OK = qw(
10     DOCTYPE_TOKEN
11     COMMENT_TOKEN
12     START_TAG_TOKEN
13     END_TAG_TOKEN
14     END_OF_FILE_TOKEN
15     CHARACTER_TOKEN
16     PI_TOKEN
17     ABORT_TOKEN
18     );
19    
20     our %EXPORT_TAGS = (
21     token => [qw(
22     DOCTYPE_TOKEN
23     COMMENT_TOKEN
24     START_TAG_TOKEN
25     END_TAG_TOKEN
26     END_OF_FILE_TOKEN
27     CHARACTER_TOKEN
28     PI_TOKEN
29     ABORT_TOKEN
30     )],
31     );
32     }
33    
34     ## Token types
35    
36     sub DOCTYPE_TOKEN () { 1 }
37     sub COMMENT_TOKEN () { 2 }
38     sub START_TAG_TOKEN () { 3 }
39     sub END_TAG_TOKEN () { 4 }
40     sub END_OF_FILE_TOKEN () { 5 }
41     sub CHARACTER_TOKEN () { 6 }
42     sub PI_TOKEN () { 7 } # XML5
43     sub ABORT_TOKEN () { 8 } # Not a token actually
44 wakaba 1.1
45     package Whatpm::HTML;
46    
47 wakaba 1.2 BEGIN { Whatpm::HTML::Tokenizer->import (':token') }
48    
49 wakaba 1.1 ## Content model flags
50    
51     sub CM_ENTITY () { 0b001 } # & markup in data
52     sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
53     sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
54    
55     sub PLAINTEXT_CONTENT_MODEL () { 0 }
56     sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
57     sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
58     sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
59    
60     ## Tokenizer states
61    
62     sub DATA_STATE () { 0 }
63     #sub ENTITY_DATA_STATE () { 1 }
64     sub TAG_OPEN_STATE () { 2 }
65     sub CLOSE_TAG_OPEN_STATE () { 3 }
66     sub TAG_NAME_STATE () { 4 }
67     sub BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
68     sub ATTRIBUTE_NAME_STATE () { 6 }
69     sub AFTER_ATTRIBUTE_NAME_STATE () { 7 }
70     sub BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
71     sub ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
72     sub ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
73     sub ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
74     #sub ENTITY_IN_ATTRIBUTE_VALUE_STATE () { 12 }
75     sub MARKUP_DECLARATION_OPEN_STATE () { 13 }
76     sub COMMENT_START_STATE () { 14 }
77     sub COMMENT_START_DASH_STATE () { 15 }
78     sub COMMENT_STATE () { 16 }
79     sub COMMENT_END_STATE () { 17 }
80     sub COMMENT_END_DASH_STATE () { 18 }
81     sub BOGUS_COMMENT_STATE () { 19 }
82     sub DOCTYPE_STATE () { 20 }
83     sub BEFORE_DOCTYPE_NAME_STATE () { 21 }
84     sub DOCTYPE_NAME_STATE () { 22 }
85     sub AFTER_DOCTYPE_NAME_STATE () { 23 }
86     sub BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
87     sub DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
88     sub DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
89     sub AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
90     sub BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
91     sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
92     sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
93     sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
94     sub BOGUS_DOCTYPE_STATE () { 32 }
95     sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
96     sub SELF_CLOSING_START_TAG_STATE () { 34 }
97     sub CDATA_SECTION_STATE () { 35 }
98     sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec
99     sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec
100     sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec
101     sub CDATA_RCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec
102     sub CDATA_SECTION_MSE1_STATE () { 40 } # "CDATA section state" in the spec
103     sub CDATA_SECTION_MSE2_STATE () { 41 } # "CDATA section state" in the spec
104     sub PUBLIC_STATE () { 42 } # "after DOCTYPE name state" in the spec
105     sub SYSTEM_STATE () { 43 } # "after DOCTYPE name state" in the spec
106     ## NOTE: "Entity data state", "entity in attribute value state", and
107     ## "consume a character reference" algorithm are jointly implemented
108     ## using the following six states:
109     sub ENTITY_STATE () { 44 }
110     sub ENTITY_HASH_STATE () { 45 }
111     sub NCR_NUM_STATE () { 46 }
112     sub HEXREF_X_STATE () { 47 }
113     sub HEXREF_HEX_STATE () { 48 }
114     sub ENTITY_NAME_STATE () { 49 }
115     sub PCDATA_STATE () { 50 } # "data state" in the spec
116    
117 wakaba 1.8 ## XML states
118     sub PI_STATE () { 51 }
119     sub PI_TARGET_STATE () { 52 }
120     sub PI_TARGET_AFTER_STATE () { 53 }
121     sub PI_DATA_STATE () { 54 }
122     sub PI_AFTER_STATE () { 55 }
123     sub PI_DATA_AFTER_STATE () { 56 }
124    
125 wakaba 1.1 ## Tree constructor state constants (see Whatpm::HTML for the full
126     ## list and descriptions)
127    
128     sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 }
129     sub FOREIGN_EL () { 0b1_00000000000 }
130    
131     ## Character reference mappings
132    
133     my $charref_map = {
134     0x0D => 0x000A,
135     0x80 => 0x20AC,
136     0x81 => 0xFFFD,
137     0x82 => 0x201A,
138     0x83 => 0x0192,
139     0x84 => 0x201E,
140     0x85 => 0x2026,
141     0x86 => 0x2020,
142     0x87 => 0x2021,
143     0x88 => 0x02C6,
144     0x89 => 0x2030,
145     0x8A => 0x0160,
146     0x8B => 0x2039,
147     0x8C => 0x0152,
148     0x8D => 0xFFFD,
149     0x8E => 0x017D,
150     0x8F => 0xFFFD,
151     0x90 => 0xFFFD,
152     0x91 => 0x2018,
153     0x92 => 0x2019,
154     0x93 => 0x201C,
155     0x94 => 0x201D,
156     0x95 => 0x2022,
157     0x96 => 0x2013,
158     0x97 => 0x2014,
159     0x98 => 0x02DC,
160     0x99 => 0x2122,
161     0x9A => 0x0161,
162     0x9B => 0x203A,
163     0x9C => 0x0153,
164     0x9D => 0xFFFD,
165     0x9E => 0x017E,
166     0x9F => 0x0178,
167     }; # $charref_map
168     $charref_map->{$_} = 0xFFFD
169     for 0x0000..0x0008, 0x000B, 0x000E..0x001F, 0x007F,
170     0xD800..0xDFFF, 0xFDD0..0xFDDF, ## ISSUE: 0xFDEF
171     0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF,
172     0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,
173     0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
174     0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE,
175     0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF;
176    
177     ## Implementations MUST act as if state machine in the spec
178    
179     sub _initialize_tokenizer ($) {
180     my $self = shift;
181    
182     ## NOTE: Fields set by |new| constructor:
183     #$self->{level}
184     #$self->{set_nc}
185     #$self->{parse_error}
186 wakaba 1.3 #$self->{is_xml} (if XML)
187 wakaba 1.1
188     $self->{state} = DATA_STATE; # MUST
189 wakaba 1.5 $self->{s_kwd} = ''; # state keyword
190 wakaba 1.1 #$self->{entity__value}; # initialized when used
191     #$self->{entity__match}; # initialized when used
192     $self->{content_model} = PCDATA_CONTENT_MODEL; # be
193     undef $self->{ct}; # current token
194     undef $self->{ca}; # current attribute
195     undef $self->{last_stag_name}; # last emitted start tag name
196     #$self->{prev_state}; # initialized when used
197     delete $self->{self_closing};
198     $self->{char_buffer} = '';
199     $self->{char_buffer_pos} = 0;
200     $self->{nc} = -1; # next input character
201     #$self->{next_nc}
202     !!!next-input-character;
203     $self->{token} = [];
204     # $self->{escape}
205     } # _initialize_tokenizer
206    
207     ## A token has:
208     ## ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
209     ## CHARACTER_TOKEN, or END_OF_FILE_TOKEN
210     ## ->{name} (DOCTYPE_TOKEN)
211     ## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
212     ## ->{pubid} (DOCTYPE_TOKEN)
213     ## ->{sysid} (DOCTYPE_TOKEN)
214     ## ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
215     ## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
216     ## ->{name}
217     ## ->{value}
218     ## ->{has_reference} == 1 or 0
219     ## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)
220 wakaba 1.7 ## ->{has_reference} == 1 or 0 (CHARACTER_TOKEN)
221 wakaba 1.1 ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
222     ## |->{self_closing}| is used to save the value of |$self->{self_closing}|
223     ## while the token is pushed back to the stack.
224    
225     ## Emitted token MUST immediately be handled by the tree construction state.
226    
227     ## Before each step, UA MAY check to see if either one of the scripts in
228     ## "list of scripts that will execute as soon as possible" or the first
229     ## script in the "list of scripts that will execute asynchronously",
230     ## has completed loading. If one has, then it MUST be executed
231     ## and removed from the list.
232    
233     ## TODO: Polytheistic slash SHOULD NOT be used. (Applied only to atheists.)
234     ## (This requirement was dropped from HTML5 spec, unfortunately.)
235    
236     my $is_space = {
237     0x0009 => 1, # CHARACTER TABULATION (HT)
238     0x000A => 1, # LINE FEED (LF)
239     #0x000B => 0, # LINE TABULATION (VT)
240     0x000C => 1, # FORM FEED (FF)
241     #0x000D => 1, # CARRIAGE RETURN (CR)
242     0x0020 => 1, # SPACE (SP)
243     };
244    
245     sub _get_next_token ($) {
246     my $self = shift;
247    
248     if ($self->{self_closing}) {
249     !!!parse-error (type => 'nestc', token => $self->{ct});
250     ## NOTE: The |self_closing| flag is only set by start tag token.
251     ## In addition, when a start tag token is emitted, it is always set to
252     ## |ct|.
253     delete $self->{self_closing};
254     }
255    
256     if (@{$self->{token}}) {
257     $self->{self_closing} = $self->{token}->[0]->{self_closing};
258     return shift @{$self->{token}};
259     }
260    
261     A: {
262     if ($self->{state} == PCDATA_STATE) {
263     ## NOTE: Same as |DATA_STATE|, but only for |PCDATA| content model.
264    
265     if ($self->{nc} == 0x0026) { # &
266     !!!cp (0.1);
267     ## NOTE: In the spec, the tokenizer is switched to the
268     ## "entity data state". In this implementation, the tokenizer
269     ## is switched to the |ENTITY_STATE|, which is an implementation
270     ## of the "consume a character reference" algorithm.
271     $self->{entity_add} = -1;
272     $self->{prev_state} = DATA_STATE;
273     $self->{state} = ENTITY_STATE;
274     !!!next-input-character;
275     redo A;
276     } elsif ($self->{nc} == 0x003C) { # <
277     !!!cp (0.2);
278     $self->{state} = TAG_OPEN_STATE;
279     !!!next-input-character;
280     redo A;
281     } elsif ($self->{nc} == -1) {
282     !!!cp (0.3);
283     !!!emit ({type => END_OF_FILE_TOKEN,
284     line => $self->{line}, column => $self->{column}});
285     last A; ## TODO: ok?
286     } else {
287     !!!cp (0.4);
288     #
289     }
290    
291     # Anything else
292     my $token = {type => CHARACTER_TOKEN,
293     data => chr $self->{nc},
294     line => $self->{line}, column => $self->{column},
295     };
296     $self->{read_until}->($token->{data}, q[<&], length $token->{data});
297    
298     ## Stay in the state.
299     !!!next-input-character;
300     !!!emit ($token);
301     redo A;
302     } elsif ($self->{state} == DATA_STATE) {
303     $self->{s_kwd} = '' unless defined $self->{s_kwd};
304     if ($self->{nc} == 0x0026) { # &
305     $self->{s_kwd} = '';
306     if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
307     not $self->{escape}) {
308     !!!cp (1);
309     ## NOTE: In the spec, the tokenizer is switched to the
310     ## "entity data state". In this implementation, the tokenizer
311     ## is switched to the |ENTITY_STATE|, which is an implementation
312     ## of the "consume a character reference" algorithm.
313     $self->{entity_add} = -1;
314     $self->{prev_state} = DATA_STATE;
315     $self->{state} = ENTITY_STATE;
316     !!!next-input-character;
317     redo A;
318     } else {
319     !!!cp (2);
320     #
321     }
322     } elsif ($self->{nc} == 0x002D) { # -
323     if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
324 wakaba 1.5 if ($self->{s_kwd} eq '<!-') {
325 wakaba 1.1 !!!cp (3);
326     $self->{escape} = 1; # unless $self->{escape};
327     $self->{s_kwd} = '--';
328     #
329 wakaba 1.5 } elsif ($self->{s_kwd} eq '-') {
330 wakaba 1.1 !!!cp (4);
331     $self->{s_kwd} = '--';
332     #
333 wakaba 1.5 } elsif ($self->{s_kwd} eq '<!' or $self->{s_kwd} eq '-') {
334     !!!cp (4.1);
335     $self->{s_kwd} .= '-';
336     #
337 wakaba 1.1 } else {
338     !!!cp (5);
339 wakaba 1.5 $self->{s_kwd} = '-';
340 wakaba 1.1 #
341     }
342     }
343    
344     #
345     } elsif ($self->{nc} == 0x0021) { # !
346     if (length $self->{s_kwd}) {
347     !!!cp (5.1);
348     $self->{s_kwd} .= '!';
349     #
350     } else {
351     !!!cp (5.2);
352     #$self->{s_kwd} = '';
353     #
354     }
355     #
356     } elsif ($self->{nc} == 0x003C) { # <
357     if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
358     (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
359     not $self->{escape})) {
360     !!!cp (6);
361     $self->{state} = TAG_OPEN_STATE;
362     !!!next-input-character;
363     redo A;
364     } else {
365     !!!cp (7);
366     $self->{s_kwd} = '';
367     #
368     }
369     } elsif ($self->{nc} == 0x003E) { # >
370     if ($self->{escape} and
371     ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
372     if ($self->{s_kwd} eq '--') {
373     !!!cp (8);
374     delete $self->{escape};
375 wakaba 1.5 #
376 wakaba 1.1 } else {
377     !!!cp (9);
378 wakaba 1.5 #
379 wakaba 1.1 }
380 wakaba 1.5 } elsif ($self->{is_xml} and $self->{s_kwd} eq ']]') {
381     !!!cp (9.1);
382     !!!parse-error (type => 'unmatched mse', ## TODO: type
383     line => $self->{line_prev},
384     column => $self->{column_prev} - 1);
385     #
386 wakaba 1.1 } else {
387     !!!cp (10);
388 wakaba 1.5 #
389 wakaba 1.1 }
390    
391     $self->{s_kwd} = '';
392     #
393 wakaba 1.5 } elsif ($self->{nc} == 0x005D) { # ]
394     if ($self->{s_kwd} eq ']' or $self->{s_kwd} eq '') {
395     !!!cp (10.1);
396     $self->{s_kwd} .= ']';
397     } elsif ($self->{s_kwd} eq ']]') {
398     !!!cp (10.2);
399     #
400     } else {
401     !!!cp (10.3);
402     $self->{s_kwd} = '';
403     }
404     #
405 wakaba 1.1 } elsif ($self->{nc} == -1) {
406     !!!cp (11);
407     $self->{s_kwd} = '';
408     !!!emit ({type => END_OF_FILE_TOKEN,
409     line => $self->{line}, column => $self->{column}});
410     last A; ## TODO: ok?
411     } else {
412     !!!cp (12);
413     $self->{s_kwd} = '';
414     #
415     }
416    
417     # Anything else
418     my $token = {type => CHARACTER_TOKEN,
419     data => chr $self->{nc},
420     line => $self->{line}, column => $self->{column},
421     };
422 wakaba 1.5 if ($self->{read_until}->($token->{data}, q{-!<>&\]},
423 wakaba 1.1 length $token->{data})) {
424     $self->{s_kwd} = '';
425     }
426    
427     ## Stay in the data state.
428 wakaba 1.5 if (not $self->{is_xml} and
429     $self->{content_model} == PCDATA_CONTENT_MODEL) {
430 wakaba 1.1 !!!cp (13);
431     $self->{state} = PCDATA_STATE;
432     } else {
433     !!!cp (14);
434     ## Stay in the state.
435     }
436     !!!next-input-character;
437     !!!emit ($token);
438     redo A;
439     } elsif ($self->{state} == TAG_OPEN_STATE) {
440     if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
441     if ($self->{nc} == 0x002F) { # /
442     !!!cp (15);
443     !!!next-input-character;
444     $self->{state} = CLOSE_TAG_OPEN_STATE;
445     redo A;
446     } elsif ($self->{nc} == 0x0021) { # !
447     !!!cp (15.1);
448     $self->{s_kwd} = '<' unless $self->{escape};
449     #
450     } else {
451     !!!cp (16);
452     #
453     }
454    
455     ## reconsume
456     $self->{state} = DATA_STATE;
457 wakaba 1.5 $self->{s_kwd} = '';
458 wakaba 1.1 !!!emit ({type => CHARACTER_TOKEN, data => '<',
459     line => $self->{line_prev},
460     column => $self->{column_prev},
461     });
462     redo A;
463     } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
464     if ($self->{nc} == 0x0021) { # !
465     !!!cp (17);
466     $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
467     !!!next-input-character;
468     redo A;
469     } elsif ($self->{nc} == 0x002F) { # /
470     !!!cp (18);
471     $self->{state} = CLOSE_TAG_OPEN_STATE;
472     !!!next-input-character;
473     redo A;
474     } elsif (0x0041 <= $self->{nc} and
475     $self->{nc} <= 0x005A) { # A..Z
476     !!!cp (19);
477     $self->{ct}
478     = {type => START_TAG_TOKEN,
479 wakaba 1.4 tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
480 wakaba 1.1 line => $self->{line_prev},
481     column => $self->{column_prev}};
482     $self->{state} = TAG_NAME_STATE;
483     !!!next-input-character;
484     redo A;
485     } elsif (0x0061 <= $self->{nc} and
486     $self->{nc} <= 0x007A) { # a..z
487     !!!cp (20);
488     $self->{ct} = {type => START_TAG_TOKEN,
489     tag_name => chr ($self->{nc}),
490     line => $self->{line_prev},
491     column => $self->{column_prev}};
492     $self->{state} = TAG_NAME_STATE;
493     !!!next-input-character;
494     redo A;
495     } elsif ($self->{nc} == 0x003E) { # >
496     !!!cp (21);
497     !!!parse-error (type => 'empty start tag',
498     line => $self->{line_prev},
499     column => $self->{column_prev});
500     $self->{state} = DATA_STATE;
501 wakaba 1.5 $self->{s_kwd} = '';
502 wakaba 1.1 !!!next-input-character;
503    
504     !!!emit ({type => CHARACTER_TOKEN, data => '<>',
505     line => $self->{line_prev},
506     column => $self->{column_prev},
507     });
508    
509     redo A;
510     } elsif ($self->{nc} == 0x003F) { # ?
511 wakaba 1.8 if ($self->{is_xml}) {
512     !!!cp (22.1);
513     $self->{state} = PI_STATE;
514     !!!next-input-character;
515     redo A;
516     } else {
517     !!!cp (22);
518     !!!parse-error (type => 'pio',
519     line => $self->{line_prev},
520     column => $self->{column_prev});
521     $self->{state} = BOGUS_COMMENT_STATE;
522     $self->{ct} = {type => COMMENT_TOKEN, data => '',
523     line => $self->{line_prev},
524     column => $self->{column_prev},
525     };
526     ## $self->{nc} is intentionally left as is
527     redo A;
528     }
529 wakaba 1.1 } else {
530     !!!cp (23);
531     !!!parse-error (type => 'bare stago',
532     line => $self->{line_prev},
533     column => $self->{column_prev});
534     $self->{state} = DATA_STATE;
535 wakaba 1.5 $self->{s_kwd} = '';
536 wakaba 1.1 ## reconsume
537    
538     !!!emit ({type => CHARACTER_TOKEN, data => '<',
539     line => $self->{line_prev},
540     column => $self->{column_prev},
541     });
542    
543     redo A;
544     }
545     } else {
546     die "$0: $self->{content_model} in tag open";
547     }
548     } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
549     ## NOTE: The "close tag open state" in the spec is implemented as
550     ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.
551    
552     my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
553     if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
554     if (defined $self->{last_stag_name}) {
555     $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;
556     $self->{s_kwd} = '';
557     ## Reconsume.
558     redo A;
559     } else {
560     ## No start tag token has ever been emitted
561     ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.
562     !!!cp (28);
563     $self->{state} = DATA_STATE;
564 wakaba 1.5 $self->{s_kwd} = '';
565 wakaba 1.1 ## Reconsume.
566     !!!emit ({type => CHARACTER_TOKEN, data => '</',
567     line => $l, column => $c,
568     });
569     redo A;
570     }
571     }
572    
573     if (0x0041 <= $self->{nc} and
574     $self->{nc} <= 0x005A) { # A..Z
575     !!!cp (29);
576     $self->{ct}
577     = {type => END_TAG_TOKEN,
578 wakaba 1.4 tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
579 wakaba 1.1 line => $l, column => $c};
580     $self->{state} = TAG_NAME_STATE;
581     !!!next-input-character;
582     redo A;
583     } elsif (0x0061 <= $self->{nc} and
584     $self->{nc} <= 0x007A) { # a..z
585     !!!cp (30);
586     $self->{ct} = {type => END_TAG_TOKEN,
587     tag_name => chr ($self->{nc}),
588     line => $l, column => $c};
589     $self->{state} = TAG_NAME_STATE;
590     !!!next-input-character;
591     redo A;
592     } elsif ($self->{nc} == 0x003E) { # >
593     !!!cp (31);
594     !!!parse-error (type => 'empty end tag',
595     line => $self->{line_prev}, ## "<" in "</>"
596     column => $self->{column_prev} - 1);
597     $self->{state} = DATA_STATE;
598 wakaba 1.5 $self->{s_kwd} = '';
599 wakaba 1.1 !!!next-input-character;
600     redo A;
601     } elsif ($self->{nc} == -1) {
602     !!!cp (32);
603     !!!parse-error (type => 'bare etago');
604 wakaba 1.5 $self->{s_kwd} = '';
605 wakaba 1.1 $self->{state} = DATA_STATE;
606     # reconsume
607    
608     !!!emit ({type => CHARACTER_TOKEN, data => '</',
609     line => $l, column => $c,
610     });
611    
612     redo A;
613     } else {
614     !!!cp (33);
615     !!!parse-error (type => 'bogus end tag');
616     $self->{state} = BOGUS_COMMENT_STATE;
617     $self->{ct} = {type => COMMENT_TOKEN, data => '',
618     line => $self->{line_prev}, # "<" of "</"
619     column => $self->{column_prev} - 1,
620     };
621     ## NOTE: $self->{nc} is intentionally left as is.
622     ## Although the "anything else" case of the spec not explicitly
623     ## states that the next input character is to be reconsumed,
624     ## it will be included to the |data| of the comment token
625     ## generated from the bogus end tag, as defined in the
626     ## "bogus comment state" entry.
627     redo A;
628     }
629     } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {
630     my $ch = substr $self->{last_stag_name}, length $self->{s_kwd}, 1;
631     if (length $ch) {
632     my $CH = $ch;
633     $ch =~ tr/a-z/A-Z/;
634     my $nch = chr $self->{nc};
635     if ($nch eq $ch or $nch eq $CH) {
636     !!!cp (24);
637     ## Stay in the state.
638     $self->{s_kwd} .= $nch;
639     !!!next-input-character;
640     redo A;
641     } else {
642     !!!cp (25);
643     $self->{state} = DATA_STATE;
644 wakaba 1.5 $self->{s_kwd} = '';
645 wakaba 1.1 ## Reconsume.
646     !!!emit ({type => CHARACTER_TOKEN,
647     data => '</' . $self->{s_kwd},
648     line => $self->{line_prev},
649     column => $self->{column_prev} - 1 - length $self->{s_kwd},
650     });
651     redo A;
652     }
653     } else { # after "<{tag-name}"
654     unless ($is_space->{$self->{nc}} or
655     {
656     0x003E => 1, # >
657     0x002F => 1, # /
658     -1 => 1, # EOF
659     }->{$self->{nc}}) {
660     !!!cp (26);
661     ## Reconsume.
662     $self->{state} = DATA_STATE;
663 wakaba 1.5 $self->{s_kwd} = '';
664 wakaba 1.1 !!!emit ({type => CHARACTER_TOKEN,
665     data => '</' . $self->{s_kwd},
666     line => $self->{line_prev},
667     column => $self->{column_prev} - 1 - length $self->{s_kwd},
668     });
669     redo A;
670     } else {
671     !!!cp (27);
672     $self->{ct}
673     = {type => END_TAG_TOKEN,
674     tag_name => $self->{last_stag_name},
675     line => $self->{line_prev},
676     column => $self->{column_prev} - 1 - length $self->{s_kwd}};
677     $self->{state} = TAG_NAME_STATE;
678     ## Reconsume.
679     redo A;
680     }
681     }
682     } elsif ($self->{state} == TAG_NAME_STATE) {
683     if ($is_space->{$self->{nc}}) {
684     !!!cp (34);
685     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
686     !!!next-input-character;
687     redo A;
688     } elsif ($self->{nc} == 0x003E) { # >
689     if ($self->{ct}->{type} == START_TAG_TOKEN) {
690     !!!cp (35);
691     $self->{last_stag_name} = $self->{ct}->{tag_name};
692     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
693     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
694     #if ($self->{ct}->{attributes}) {
695     # ## NOTE: This should never be reached.
696     # !!! cp (36);
697     # !!! parse-error (type => 'end tag attribute');
698     #} else {
699     !!!cp (37);
700     #}
701     } else {
702     die "$0: $self->{ct}->{type}: Unknown token type";
703     }
704     $self->{state} = DATA_STATE;
705 wakaba 1.5 $self->{s_kwd} = '';
706 wakaba 1.1 !!!next-input-character;
707    
708     !!!emit ($self->{ct}); # start tag or end tag
709    
710     redo A;
711     } elsif (0x0041 <= $self->{nc} and
712     $self->{nc} <= 0x005A) { # A..Z
713     !!!cp (38);
714 wakaba 1.4 $self->{ct}->{tag_name}
715     .= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
716 wakaba 1.1 # start tag or end tag
717     ## Stay in this state
718     !!!next-input-character;
719     redo A;
720     } elsif ($self->{nc} == -1) {
721     !!!parse-error (type => 'unclosed tag');
722     if ($self->{ct}->{type} == START_TAG_TOKEN) {
723     !!!cp (39);
724     $self->{last_stag_name} = $self->{ct}->{tag_name};
725     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
726     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
727     #if ($self->{ct}->{attributes}) {
728     # ## NOTE: This state should never be reached.
729     # !!! cp (40);
730     # !!! parse-error (type => 'end tag attribute');
731     #} else {
732     !!!cp (41);
733     #}
734     } else {
735     die "$0: $self->{ct}->{type}: Unknown token type";
736     }
737     $self->{state} = DATA_STATE;
738 wakaba 1.5 $self->{s_kwd} = '';
739 wakaba 1.1 # reconsume
740    
741     !!!emit ($self->{ct}); # start tag or end tag
742    
743     redo A;
744     } elsif ($self->{nc} == 0x002F) { # /
745     !!!cp (42);
746     $self->{state} = SELF_CLOSING_START_TAG_STATE;
747     !!!next-input-character;
748     redo A;
749     } else {
750     !!!cp (44);
751     $self->{ct}->{tag_name} .= chr $self->{nc};
752     # start tag or end tag
753     ## Stay in the state
754     !!!next-input-character;
755     redo A;
756     }
757     } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
758     if ($is_space->{$self->{nc}}) {
759     !!!cp (45);
760     ## Stay in the state
761     !!!next-input-character;
762     redo A;
763     } elsif ($self->{nc} == 0x003E) { # >
764     if ($self->{ct}->{type} == START_TAG_TOKEN) {
765     !!!cp (46);
766     $self->{last_stag_name} = $self->{ct}->{tag_name};
767     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
768     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
769     if ($self->{ct}->{attributes}) {
770     !!!cp (47);
771     !!!parse-error (type => 'end tag attribute');
772     } else {
773     !!!cp (48);
774     }
775     } else {
776     die "$0: $self->{ct}->{type}: Unknown token type";
777     }
778     $self->{state} = DATA_STATE;
779 wakaba 1.5 $self->{s_kwd} = '';
780 wakaba 1.1 !!!next-input-character;
781    
782     !!!emit ($self->{ct}); # start tag or end tag
783    
784     redo A;
785     } elsif (0x0041 <= $self->{nc} and
786     $self->{nc} <= 0x005A) { # A..Z
787     !!!cp (49);
788     $self->{ca}
789 wakaba 1.4 = {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
790 wakaba 1.1 value => '',
791     line => $self->{line}, column => $self->{column}};
792     $self->{state} = ATTRIBUTE_NAME_STATE;
793     !!!next-input-character;
794     redo A;
795     } elsif ($self->{nc} == 0x002F) { # /
796     !!!cp (50);
797     $self->{state} = SELF_CLOSING_START_TAG_STATE;
798     !!!next-input-character;
799     redo A;
800     } elsif ($self->{nc} == -1) {
801     !!!parse-error (type => 'unclosed tag');
802     if ($self->{ct}->{type} == START_TAG_TOKEN) {
803     !!!cp (52);
804     $self->{last_stag_name} = $self->{ct}->{tag_name};
805     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
806     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
807     if ($self->{ct}->{attributes}) {
808     !!!cp (53);
809     !!!parse-error (type => 'end tag attribute');
810     } else {
811     !!!cp (54);
812     }
813     } else {
814     die "$0: $self->{ct}->{type}: Unknown token type";
815     }
816     $self->{state} = DATA_STATE;
817 wakaba 1.5 $self->{s_kwd} = '';
818 wakaba 1.1 # reconsume
819    
820     !!!emit ($self->{ct}); # start tag or end tag
821    
822     redo A;
823     } else {
824     if ({
825     0x0022 => 1, # "
826     0x0027 => 1, # '
827     0x003D => 1, # =
828     }->{$self->{nc}}) {
829     !!!cp (55);
830     !!!parse-error (type => 'bad attribute name');
831     } else {
832     !!!cp (56);
833     }
834     $self->{ca}
835     = {name => chr ($self->{nc}),
836     value => '',
837     line => $self->{line}, column => $self->{column}};
838     $self->{state} = ATTRIBUTE_NAME_STATE;
839     !!!next-input-character;
840     redo A;
841     }
842     } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
843     my $before_leave = sub {
844     if (exists $self->{ct}->{attributes} # start tag or end tag
845     ->{$self->{ca}->{name}}) { # MUST
846     !!!cp (57);
847     !!!parse-error (type => 'duplicate attribute', text => $self->{ca}->{name}, line => $self->{ca}->{line}, column => $self->{ca}->{column});
848     ## Discard $self->{ca} # MUST
849     } else {
850     !!!cp (58);
851     $self->{ct}->{attributes}->{$self->{ca}->{name}}
852     = $self->{ca};
853     }
854     }; # $before_leave
855    
856     if ($is_space->{$self->{nc}}) {
857     !!!cp (59);
858     $before_leave->();
859     $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
860     !!!next-input-character;
861     redo A;
862     } elsif ($self->{nc} == 0x003D) { # =
863     !!!cp (60);
864     $before_leave->();
865     $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
866     !!!next-input-character;
867     redo A;
868     } elsif ($self->{nc} == 0x003E) { # >
869     $before_leave->();
870     if ($self->{ct}->{type} == START_TAG_TOKEN) {
871     !!!cp (61);
872     $self->{last_stag_name} = $self->{ct}->{tag_name};
873     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
874     !!!cp (62);
875     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
876     if ($self->{ct}->{attributes}) {
877     !!!parse-error (type => 'end tag attribute');
878     }
879     } else {
880     die "$0: $self->{ct}->{type}: Unknown token type";
881     }
882     $self->{state} = DATA_STATE;
883 wakaba 1.5 $self->{s_kwd} = '';
884 wakaba 1.1 !!!next-input-character;
885    
886     !!!emit ($self->{ct}); # start tag or end tag
887    
888     redo A;
889     } elsif (0x0041 <= $self->{nc} and
890     $self->{nc} <= 0x005A) { # A..Z
891     !!!cp (63);
892 wakaba 1.4 $self->{ca}->{name}
893     .= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
894 wakaba 1.1 ## Stay in the state
895     !!!next-input-character;
896     redo A;
897     } elsif ($self->{nc} == 0x002F) { # /
898     !!!cp (64);
899     $before_leave->();
900     $self->{state} = SELF_CLOSING_START_TAG_STATE;
901     !!!next-input-character;
902     redo A;
903     } elsif ($self->{nc} == -1) {
904     !!!parse-error (type => 'unclosed tag');
905     $before_leave->();
906     if ($self->{ct}->{type} == START_TAG_TOKEN) {
907     !!!cp (66);
908     $self->{last_stag_name} = $self->{ct}->{tag_name};
909     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
910     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
911     if ($self->{ct}->{attributes}) {
912     !!!cp (67);
913     !!!parse-error (type => 'end tag attribute');
914     } else {
915     ## NOTE: This state should never be reached.
916     !!!cp (68);
917     }
918     } else {
919     die "$0: $self->{ct}->{type}: Unknown token type";
920     }
921     $self->{state} = DATA_STATE;
922 wakaba 1.5 $self->{s_kwd} = '';
923 wakaba 1.1 # reconsume
924    
925     !!!emit ($self->{ct}); # start tag or end tag
926    
927     redo A;
928     } else {
929     if ($self->{nc} == 0x0022 or # "
930     $self->{nc} == 0x0027) { # '
931     !!!cp (69);
932     !!!parse-error (type => 'bad attribute name');
933     } else {
934     !!!cp (70);
935     }
936     $self->{ca}->{name} .= chr ($self->{nc});
937     ## Stay in the state
938     !!!next-input-character;
939     redo A;
940     }
941     } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
942     if ($is_space->{$self->{nc}}) {
943     !!!cp (71);
944     ## Stay in the state
945     !!!next-input-character;
946     redo A;
947     } elsif ($self->{nc} == 0x003D) { # =
948     !!!cp (72);
949     $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
950     !!!next-input-character;
951     redo A;
952     } elsif ($self->{nc} == 0x003E) { # >
953     if ($self->{ct}->{type} == START_TAG_TOKEN) {
954     !!!cp (73);
955     $self->{last_stag_name} = $self->{ct}->{tag_name};
956     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
957     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
958     if ($self->{ct}->{attributes}) {
959     !!!cp (74);
960     !!!parse-error (type => 'end tag attribute');
961     } else {
962     ## NOTE: This state should never be reached.
963     !!!cp (75);
964     }
965     } else {
966     die "$0: $self->{ct}->{type}: Unknown token type";
967     }
968     $self->{state} = DATA_STATE;
969 wakaba 1.5 $self->{s_kwd} = '';
970 wakaba 1.1 !!!next-input-character;
971    
972     !!!emit ($self->{ct}); # start tag or end tag
973    
974     redo A;
975     } elsif (0x0041 <= $self->{nc} and
976     $self->{nc} <= 0x005A) { # A..Z
977     !!!cp (76);
978     $self->{ca}
979 wakaba 1.4 = {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
980 wakaba 1.1 value => '',
981     line => $self->{line}, column => $self->{column}};
982     $self->{state} = ATTRIBUTE_NAME_STATE;
983     !!!next-input-character;
984     redo A;
985     } elsif ($self->{nc} == 0x002F) { # /
986     !!!cp (77);
987     $self->{state} = SELF_CLOSING_START_TAG_STATE;
988     !!!next-input-character;
989     redo A;
990     } elsif ($self->{nc} == -1) {
991     !!!parse-error (type => 'unclosed tag');
992     if ($self->{ct}->{type} == START_TAG_TOKEN) {
993     !!!cp (79);
994     $self->{last_stag_name} = $self->{ct}->{tag_name};
995     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
996     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
997     if ($self->{ct}->{attributes}) {
998     !!!cp (80);
999     !!!parse-error (type => 'end tag attribute');
1000     } else {
1001     ## NOTE: This state should never be reached.
1002     !!!cp (81);
1003     }
1004     } else {
1005     die "$0: $self->{ct}->{type}: Unknown token type";
1006     }
1007 wakaba 1.5 $self->{s_kwd} = '';
1008 wakaba 1.1 $self->{state} = DATA_STATE;
1009     # reconsume
1010    
1011     !!!emit ($self->{ct}); # start tag or end tag
1012    
1013     redo A;
1014     } else {
1015     if ($self->{nc} == 0x0022 or # "
1016     $self->{nc} == 0x0027) { # '
1017     !!!cp (78);
1018     !!!parse-error (type => 'bad attribute name');
1019     } else {
1020     !!!cp (82);
1021     }
1022     $self->{ca}
1023     = {name => chr ($self->{nc}),
1024     value => '',
1025     line => $self->{line}, column => $self->{column}};
1026     $self->{state} = ATTRIBUTE_NAME_STATE;
1027     !!!next-input-character;
1028     redo A;
1029     }
1030     } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1031     if ($is_space->{$self->{nc}}) {
1032     !!!cp (83);
1033     ## Stay in the state
1034     !!!next-input-character;
1035     redo A;
1036     } elsif ($self->{nc} == 0x0022) { # "
1037     !!!cp (84);
1038     $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
1039     !!!next-input-character;
1040     redo A;
1041     } elsif ($self->{nc} == 0x0026) { # &
1042     !!!cp (85);
1043     $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1044     ## reconsume
1045     redo A;
1046     } elsif ($self->{nc} == 0x0027) { # '
1047     !!!cp (86);
1048     $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
1049     !!!next-input-character;
1050     redo A;
1051     } elsif ($self->{nc} == 0x003E) { # >
1052     !!!parse-error (type => 'empty unquoted attribute value');
1053     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1054     !!!cp (87);
1055     $self->{last_stag_name} = $self->{ct}->{tag_name};
1056     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1057     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1058     if ($self->{ct}->{attributes}) {
1059     !!!cp (88);
1060     !!!parse-error (type => 'end tag attribute');
1061     } else {
1062     ## NOTE: This state should never be reached.
1063     !!!cp (89);
1064     }
1065     } else {
1066     die "$0: $self->{ct}->{type}: Unknown token type";
1067     }
1068     $self->{state} = DATA_STATE;
1069 wakaba 1.5 $self->{s_kwd} = '';
1070 wakaba 1.1 !!!next-input-character;
1071    
1072     !!!emit ($self->{ct}); # start tag or end tag
1073    
1074     redo A;
1075     } elsif ($self->{nc} == -1) {
1076     !!!parse-error (type => 'unclosed tag');
1077     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1078     !!!cp (90);
1079     $self->{last_stag_name} = $self->{ct}->{tag_name};
1080     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1081     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1082     if ($self->{ct}->{attributes}) {
1083     !!!cp (91);
1084     !!!parse-error (type => 'end tag attribute');
1085     } else {
1086     ## NOTE: This state should never be reached.
1087     !!!cp (92);
1088     }
1089     } else {
1090     die "$0: $self->{ct}->{type}: Unknown token type";
1091     }
1092     $self->{state} = DATA_STATE;
1093 wakaba 1.5 $self->{s_kwd} = '';
1094 wakaba 1.1 ## reconsume
1095    
1096     !!!emit ($self->{ct}); # start tag or end tag
1097    
1098     redo A;
1099     } else {
1100     if ($self->{nc} == 0x003D) { # =
1101     !!!cp (93);
1102     !!!parse-error (type => 'bad attribute value');
1103     } else {
1104     !!!cp (94);
1105     }
1106     $self->{ca}->{value} .= chr ($self->{nc});
1107     $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1108     !!!next-input-character;
1109     redo A;
1110     }
1111     } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1112     if ($self->{nc} == 0x0022) { # "
1113     !!!cp (95);
1114     $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1115     !!!next-input-character;
1116     redo A;
1117     } elsif ($self->{nc} == 0x0026) { # &
1118     !!!cp (96);
1119     ## NOTE: In the spec, the tokenizer is switched to the
1120     ## "entity in attribute value state". In this implementation, the
1121     ## tokenizer is switched to the |ENTITY_STATE|, which is an
1122     ## implementation of the "consume a character reference" algorithm.
1123     $self->{prev_state} = $self->{state};
1124     $self->{entity_add} = 0x0022; # "
1125     $self->{state} = ENTITY_STATE;
1126     !!!next-input-character;
1127     redo A;
1128     } elsif ($self->{nc} == -1) {
1129     !!!parse-error (type => 'unclosed attribute value');
1130     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1131     !!!cp (97);
1132     $self->{last_stag_name} = $self->{ct}->{tag_name};
1133     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1134     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1135     if ($self->{ct}->{attributes}) {
1136     !!!cp (98);
1137     !!!parse-error (type => 'end tag attribute');
1138     } else {
1139     ## NOTE: This state should never be reached.
1140     !!!cp (99);
1141     }
1142     } else {
1143     die "$0: $self->{ct}->{type}: Unknown token type";
1144     }
1145     $self->{state} = DATA_STATE;
1146 wakaba 1.5 $self->{s_kwd} = '';
1147 wakaba 1.1 ## reconsume
1148    
1149     !!!emit ($self->{ct}); # start tag or end tag
1150    
1151     redo A;
1152     } else {
1153     !!!cp (100);
1154     $self->{ca}->{value} .= chr ($self->{nc});
1155     $self->{read_until}->($self->{ca}->{value},
1156     q["&],
1157     length $self->{ca}->{value});
1158    
1159     ## Stay in the state
1160     !!!next-input-character;
1161     redo A;
1162     }
1163     } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1164     if ($self->{nc} == 0x0027) { # '
1165     !!!cp (101);
1166     $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1167     !!!next-input-character;
1168     redo A;
1169     } elsif ($self->{nc} == 0x0026) { # &
1170     !!!cp (102);
1171     ## NOTE: In the spec, the tokenizer is switched to the
1172     ## "entity in attribute value state". In this implementation, the
1173     ## tokenizer is switched to the |ENTITY_STATE|, which is an
1174     ## implementation of the "consume a character reference" algorithm.
1175     $self->{entity_add} = 0x0027; # '
1176     $self->{prev_state} = $self->{state};
1177     $self->{state} = ENTITY_STATE;
1178     !!!next-input-character;
1179     redo A;
1180     } elsif ($self->{nc} == -1) {
1181     !!!parse-error (type => 'unclosed attribute value');
1182     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1183     !!!cp (103);
1184     $self->{last_stag_name} = $self->{ct}->{tag_name};
1185     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1186     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1187     if ($self->{ct}->{attributes}) {
1188     !!!cp (104);
1189     !!!parse-error (type => 'end tag attribute');
1190     } else {
1191     ## NOTE: This state should never be reached.
1192     !!!cp (105);
1193     }
1194     } else {
1195     die "$0: $self->{ct}->{type}: Unknown token type";
1196     }
1197     $self->{state} = DATA_STATE;
1198 wakaba 1.5 $self->{s_kwd} = '';
1199 wakaba 1.1 ## reconsume
1200    
1201     !!!emit ($self->{ct}); # start tag or end tag
1202    
1203     redo A;
1204     } else {
1205     !!!cp (106);
1206     $self->{ca}->{value} .= chr ($self->{nc});
1207     $self->{read_until}->($self->{ca}->{value},
1208     q['&],
1209     length $self->{ca}->{value});
1210    
1211     ## Stay in the state
1212     !!!next-input-character;
1213     redo A;
1214     }
1215     } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1216     if ($is_space->{$self->{nc}}) {
1217     !!!cp (107);
1218     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1219     !!!next-input-character;
1220     redo A;
1221     } elsif ($self->{nc} == 0x0026) { # &
1222     !!!cp (108);
1223     ## NOTE: In the spec, the tokenizer is switched to the
1224     ## "entity in attribute value state". In this implementation, the
1225     ## tokenizer is switched to the |ENTITY_STATE|, which is an
1226     ## implementation of the "consume a character reference" algorithm.
1227     $self->{entity_add} = -1;
1228     $self->{prev_state} = $self->{state};
1229     $self->{state} = ENTITY_STATE;
1230     !!!next-input-character;
1231     redo A;
1232     } elsif ($self->{nc} == 0x003E) { # >
1233     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1234     !!!cp (109);
1235     $self->{last_stag_name} = $self->{ct}->{tag_name};
1236     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1237     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1238     if ($self->{ct}->{attributes}) {
1239     !!!cp (110);
1240     !!!parse-error (type => 'end tag attribute');
1241     } else {
1242     ## NOTE: This state should never be reached.
1243     !!!cp (111);
1244     }
1245     } else {
1246     die "$0: $self->{ct}->{type}: Unknown token type";
1247     }
1248     $self->{state} = DATA_STATE;
1249 wakaba 1.5 $self->{s_kwd} = '';
1250 wakaba 1.1 !!!next-input-character;
1251    
1252     !!!emit ($self->{ct}); # start tag or end tag
1253    
1254     redo A;
1255     } elsif ($self->{nc} == -1) {
1256     !!!parse-error (type => 'unclosed tag');
1257     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1258     !!!cp (112);
1259     $self->{last_stag_name} = $self->{ct}->{tag_name};
1260     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1261     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1262     if ($self->{ct}->{attributes}) {
1263     !!!cp (113);
1264     !!!parse-error (type => 'end tag attribute');
1265     } else {
1266     ## NOTE: This state should never be reached.
1267     !!!cp (114);
1268     }
1269     } else {
1270     die "$0: $self->{ct}->{type}: Unknown token type";
1271     }
1272     $self->{state} = DATA_STATE;
1273 wakaba 1.5 $self->{s_kwd} = '';
1274 wakaba 1.1 ## reconsume
1275    
1276     !!!emit ($self->{ct}); # start tag or end tag
1277    
1278     redo A;
1279     } else {
1280     if ({
1281     0x0022 => 1, # "
1282     0x0027 => 1, # '
1283     0x003D => 1, # =
1284     }->{$self->{nc}}) {
1285     !!!cp (115);
1286     !!!parse-error (type => 'bad attribute value');
1287     } else {
1288     !!!cp (116);
1289     }
1290     $self->{ca}->{value} .= chr ($self->{nc});
1291     $self->{read_until}->($self->{ca}->{value},
1292     q["'=& >],
1293     length $self->{ca}->{value});
1294    
1295     ## Stay in the state
1296     !!!next-input-character;
1297     redo A;
1298     }
1299     } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1300     if ($is_space->{$self->{nc}}) {
1301     !!!cp (118);
1302     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1303     !!!next-input-character;
1304     redo A;
1305     } elsif ($self->{nc} == 0x003E) { # >
1306     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1307     !!!cp (119);
1308     $self->{last_stag_name} = $self->{ct}->{tag_name};
1309     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1310     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1311     if ($self->{ct}->{attributes}) {
1312     !!!cp (120);
1313     !!!parse-error (type => 'end tag attribute');
1314     } else {
1315     ## NOTE: This state should never be reached.
1316     !!!cp (121);
1317     }
1318     } else {
1319     die "$0: $self->{ct}->{type}: Unknown token type";
1320     }
1321     $self->{state} = DATA_STATE;
1322 wakaba 1.5 $self->{s_kwd} = '';
1323 wakaba 1.1 !!!next-input-character;
1324    
1325     !!!emit ($self->{ct}); # start tag or end tag
1326    
1327     redo A;
1328     } elsif ($self->{nc} == 0x002F) { # /
1329     !!!cp (122);
1330     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1331     !!!next-input-character;
1332     redo A;
1333     } elsif ($self->{nc} == -1) {
1334     !!!parse-error (type => 'unclosed tag');
1335     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1336     !!!cp (122.3);
1337     $self->{last_stag_name} = $self->{ct}->{tag_name};
1338     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1339     if ($self->{ct}->{attributes}) {
1340     !!!cp (122.1);
1341     !!!parse-error (type => 'end tag attribute');
1342     } else {
1343     ## NOTE: This state should never be reached.
1344     !!!cp (122.2);
1345     }
1346     } else {
1347     die "$0: $self->{ct}->{type}: Unknown token type";
1348     }
1349     $self->{state} = DATA_STATE;
1350 wakaba 1.5 $self->{s_kwd} = '';
1351 wakaba 1.1 ## Reconsume.
1352     !!!emit ($self->{ct}); # start tag or end tag
1353     redo A;
1354     } else {
1355     !!!cp ('124.1');
1356     !!!parse-error (type => 'no space between attributes');
1357     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1358     ## reconsume
1359     redo A;
1360     }
1361     } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
1362     if ($self->{nc} == 0x003E) { # >
1363     if ($self->{ct}->{type} == END_TAG_TOKEN) {
1364     !!!cp ('124.2');
1365     !!!parse-error (type => 'nestc', token => $self->{ct});
1366     ## TODO: Different type than slash in start tag
1367     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1368     if ($self->{ct}->{attributes}) {
1369     !!!cp ('124.4');
1370     !!!parse-error (type => 'end tag attribute');
1371     } else {
1372     !!!cp ('124.5');
1373     }
1374     ## TODO: Test |<title></title/>|
1375     } else {
1376     !!!cp ('124.3');
1377     $self->{self_closing} = 1;
1378     }
1379    
1380     $self->{state} = DATA_STATE;
1381 wakaba 1.5 $self->{s_kwd} = '';
1382 wakaba 1.1 !!!next-input-character;
1383    
1384     !!!emit ($self->{ct}); # start tag or end tag
1385    
1386     redo A;
1387     } elsif ($self->{nc} == -1) {
1388     !!!parse-error (type => 'unclosed tag');
1389     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1390     !!!cp (124.7);
1391     $self->{last_stag_name} = $self->{ct}->{tag_name};
1392     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1393     if ($self->{ct}->{attributes}) {
1394     !!!cp (124.5);
1395     !!!parse-error (type => 'end tag attribute');
1396     } else {
1397     ## NOTE: This state should never be reached.
1398     !!!cp (124.6);
1399     }
1400     } else {
1401     die "$0: $self->{ct}->{type}: Unknown token type";
1402     }
1403     $self->{state} = DATA_STATE;
1404 wakaba 1.5 $self->{s_kwd} = '';
1405 wakaba 1.1 ## Reconsume.
1406     !!!emit ($self->{ct}); # start tag or end tag
1407     redo A;
1408     } else {
1409     !!!cp ('124.4');
1410     !!!parse-error (type => 'nestc');
1411     ## TODO: This error type is wrong.
1412     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1413     ## Reconsume.
1414     redo A;
1415     }
1416     } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
1417     ## (only happen if PCDATA state)
1418    
1419     ## NOTE: Unlike spec's "bogus comment state", this implementation
1420     ## consumes characters one-by-one basis.
1421    
1422     if ($self->{nc} == 0x003E) { # >
1423     !!!cp (124);
1424     $self->{state} = DATA_STATE;
1425 wakaba 1.5 $self->{s_kwd} = '';
1426 wakaba 1.1 !!!next-input-character;
1427    
1428     !!!emit ($self->{ct}); # comment
1429     redo A;
1430     } elsif ($self->{nc} == -1) {
1431     !!!cp (125);
1432     $self->{state} = DATA_STATE;
1433 wakaba 1.5 $self->{s_kwd} = '';
1434 wakaba 1.1 ## reconsume
1435    
1436     !!!emit ($self->{ct}); # comment
1437     redo A;
1438     } else {
1439     !!!cp (126);
1440     $self->{ct}->{data} .= chr ($self->{nc}); # comment
1441     $self->{read_until}->($self->{ct}->{data},
1442     q[>],
1443     length $self->{ct}->{data});
1444    
1445     ## Stay in the state.
1446     !!!next-input-character;
1447     redo A;
1448     }
1449     } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
1450     ## (only happen if PCDATA state)
1451    
1452     if ($self->{nc} == 0x002D) { # -
1453     !!!cp (133);
1454     $self->{state} = MD_HYPHEN_STATE;
1455     !!!next-input-character;
1456     redo A;
1457     } elsif ($self->{nc} == 0x0044 or # D
1458     $self->{nc} == 0x0064) { # d
1459     ## ASCII case-insensitive.
1460     !!!cp (130);
1461     $self->{state} = MD_DOCTYPE_STATE;
1462     $self->{s_kwd} = chr $self->{nc};
1463     !!!next-input-character;
1464     redo A;
1465 wakaba 1.3 } elsif ((($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
1466     $self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
1467     $self->{is_xml}) and
1468 wakaba 1.1 $self->{nc} == 0x005B) { # [
1469     !!!cp (135.4);
1470     $self->{state} = MD_CDATA_STATE;
1471     $self->{s_kwd} = '[';
1472     !!!next-input-character;
1473     redo A;
1474     } else {
1475     !!!cp (136);
1476     }
1477    
1478     !!!parse-error (type => 'bogus comment',
1479     line => $self->{line_prev},
1480     column => $self->{column_prev} - 1);
1481     ## Reconsume.
1482     $self->{state} = BOGUS_COMMENT_STATE;
1483     $self->{ct} = {type => COMMENT_TOKEN, data => '',
1484     line => $self->{line_prev},
1485     column => $self->{column_prev} - 1,
1486     };
1487     redo A;
1488     } elsif ($self->{state} == MD_HYPHEN_STATE) {
1489     if ($self->{nc} == 0x002D) { # -
1490     !!!cp (127);
1491     $self->{ct} = {type => COMMENT_TOKEN, data => '',
1492     line => $self->{line_prev},
1493     column => $self->{column_prev} - 2,
1494     };
1495     $self->{state} = COMMENT_START_STATE;
1496     !!!next-input-character;
1497     redo A;
1498     } else {
1499     !!!cp (128);
1500     !!!parse-error (type => 'bogus comment',
1501     line => $self->{line_prev},
1502     column => $self->{column_prev} - 2);
1503     $self->{state} = BOGUS_COMMENT_STATE;
1504     ## Reconsume.
1505     $self->{ct} = {type => COMMENT_TOKEN,
1506     data => '-',
1507     line => $self->{line_prev},
1508     column => $self->{column_prev} - 2,
1509     };
1510     redo A;
1511     }
1512     } elsif ($self->{state} == MD_DOCTYPE_STATE) {
1513     ## ASCII case-insensitive.
1514     if ($self->{nc} == [
1515     undef,
1516     0x004F, # O
1517     0x0043, # C
1518     0x0054, # T
1519     0x0059, # Y
1520     0x0050, # P
1521     ]->[length $self->{s_kwd}] or
1522     $self->{nc} == [
1523     undef,
1524     0x006F, # o
1525     0x0063, # c
1526     0x0074, # t
1527     0x0079, # y
1528     0x0070, # p
1529     ]->[length $self->{s_kwd}]) {
1530     !!!cp (131);
1531     ## Stay in the state.
1532     $self->{s_kwd} .= chr $self->{nc};
1533     !!!next-input-character;
1534     redo A;
1535     } elsif ((length $self->{s_kwd}) == 6 and
1536     ($self->{nc} == 0x0045 or # E
1537     $self->{nc} == 0x0065)) { # e
1538     !!!cp (129);
1539     $self->{state} = DOCTYPE_STATE;
1540     $self->{ct} = {type => DOCTYPE_TOKEN,
1541     quirks => 1,
1542     line => $self->{line_prev},
1543     column => $self->{column_prev} - 7,
1544     };
1545     !!!next-input-character;
1546     redo A;
1547     } else {
1548     !!!cp (132);
1549     !!!parse-error (type => 'bogus comment',
1550     line => $self->{line_prev},
1551     column => $self->{column_prev} - 1 - length $self->{s_kwd});
1552     $self->{state} = BOGUS_COMMENT_STATE;
1553     ## Reconsume.
1554     $self->{ct} = {type => COMMENT_TOKEN,
1555     data => $self->{s_kwd},
1556     line => $self->{line_prev},
1557     column => $self->{column_prev} - 1 - length $self->{s_kwd},
1558     };
1559     redo A;
1560     }
1561     } elsif ($self->{state} == MD_CDATA_STATE) {
1562     if ($self->{nc} == {
1563     '[' => 0x0043, # C
1564     '[C' => 0x0044, # D
1565     '[CD' => 0x0041, # A
1566     '[CDA' => 0x0054, # T
1567     '[CDAT' => 0x0041, # A
1568     }->{$self->{s_kwd}}) {
1569     !!!cp (135.1);
1570     ## Stay in the state.
1571     $self->{s_kwd} .= chr $self->{nc};
1572     !!!next-input-character;
1573     redo A;
1574     } elsif ($self->{s_kwd} eq '[CDATA' and
1575     $self->{nc} == 0x005B) { # [
1576 wakaba 1.6 if ($self->{is_xml} and
1577     not $self->{tainted} and
1578     @{$self->{open_elements} or []} == 0) {
1579 wakaba 1.8 !!!cp (135.2);
1580 wakaba 1.6 !!!parse-error (type => 'cdata outside of root element',
1581     line => $self->{line_prev},
1582     column => $self->{column_prev} - 7);
1583     $self->{tainted} = 1;
1584 wakaba 1.8 } else {
1585     !!!cp (135.21);
1586 wakaba 1.6 }
1587    
1588 wakaba 1.1 $self->{ct} = {type => CHARACTER_TOKEN,
1589     data => '',
1590     line => $self->{line_prev},
1591     column => $self->{column_prev} - 7};
1592     $self->{state} = CDATA_SECTION_STATE;
1593     !!!next-input-character;
1594     redo A;
1595     } else {
1596     !!!cp (135.3);
1597     !!!parse-error (type => 'bogus comment',
1598     line => $self->{line_prev},
1599     column => $self->{column_prev} - 1 - length $self->{s_kwd});
1600     $self->{state} = BOGUS_COMMENT_STATE;
1601     ## Reconsume.
1602     $self->{ct} = {type => COMMENT_TOKEN,
1603     data => $self->{s_kwd},
1604     line => $self->{line_prev},
1605     column => $self->{column_prev} - 1 - length $self->{s_kwd},
1606     };
1607     redo A;
1608     }
1609     } elsif ($self->{state} == COMMENT_START_STATE) {
1610     if ($self->{nc} == 0x002D) { # -
1611     !!!cp (137);
1612     $self->{state} = COMMENT_START_DASH_STATE;
1613     !!!next-input-character;
1614     redo A;
1615     } elsif ($self->{nc} == 0x003E) { # >
1616     !!!cp (138);
1617     !!!parse-error (type => 'bogus comment');
1618     $self->{state} = DATA_STATE;
1619 wakaba 1.5 $self->{s_kwd} = '';
1620 wakaba 1.1 !!!next-input-character;
1621    
1622     !!!emit ($self->{ct}); # comment
1623    
1624     redo A;
1625     } elsif ($self->{nc} == -1) {
1626     !!!cp (139);
1627     !!!parse-error (type => 'unclosed comment');
1628     $self->{state} = DATA_STATE;
1629 wakaba 1.5 $self->{s_kwd} = '';
1630 wakaba 1.1 ## reconsume
1631    
1632     !!!emit ($self->{ct}); # comment
1633    
1634     redo A;
1635     } else {
1636     !!!cp (140);
1637     $self->{ct}->{data} # comment
1638     .= chr ($self->{nc});
1639     $self->{state} = COMMENT_STATE;
1640     !!!next-input-character;
1641     redo A;
1642     }
1643     } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
1644     if ($self->{nc} == 0x002D) { # -
1645     !!!cp (141);
1646     $self->{state} = COMMENT_END_STATE;
1647     !!!next-input-character;
1648     redo A;
1649     } elsif ($self->{nc} == 0x003E) { # >
1650     !!!cp (142);
1651     !!!parse-error (type => 'bogus comment');
1652     $self->{state} = DATA_STATE;
1653 wakaba 1.5 $self->{s_kwd} = '';
1654 wakaba 1.1 !!!next-input-character;
1655    
1656     !!!emit ($self->{ct}); # comment
1657    
1658     redo A;
1659     } elsif ($self->{nc} == -1) {
1660     !!!cp (143);
1661     !!!parse-error (type => 'unclosed comment');
1662     $self->{state} = DATA_STATE;
1663 wakaba 1.5 $self->{s_kwd} = '';
1664 wakaba 1.1 ## reconsume
1665    
1666     !!!emit ($self->{ct}); # comment
1667    
1668     redo A;
1669     } else {
1670     !!!cp (144);
1671     $self->{ct}->{data} # comment
1672     .= '-' . chr ($self->{nc});
1673     $self->{state} = COMMENT_STATE;
1674     !!!next-input-character;
1675     redo A;
1676     }
1677     } elsif ($self->{state} == COMMENT_STATE) {
1678     if ($self->{nc} == 0x002D) { # -
1679     !!!cp (145);
1680     $self->{state} = COMMENT_END_DASH_STATE;
1681     !!!next-input-character;
1682     redo A;
1683     } elsif ($self->{nc} == -1) {
1684     !!!cp (146);
1685     !!!parse-error (type => 'unclosed comment');
1686     $self->{state} = DATA_STATE;
1687 wakaba 1.5 $self->{s_kwd} = '';
1688 wakaba 1.1 ## reconsume
1689    
1690     !!!emit ($self->{ct}); # comment
1691    
1692     redo A;
1693     } else {
1694     !!!cp (147);
1695     $self->{ct}->{data} .= chr ($self->{nc}); # comment
1696     $self->{read_until}->($self->{ct}->{data},
1697     q[-],
1698     length $self->{ct}->{data});
1699    
1700     ## Stay in the state
1701     !!!next-input-character;
1702     redo A;
1703     }
1704     } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
1705     if ($self->{nc} == 0x002D) { # -
1706     !!!cp (148);
1707     $self->{state} = COMMENT_END_STATE;
1708     !!!next-input-character;
1709     redo A;
1710     } elsif ($self->{nc} == -1) {
1711     !!!cp (149);
1712     !!!parse-error (type => 'unclosed comment');
1713 wakaba 1.5 $self->{s_kwd} = '';
1714 wakaba 1.1 $self->{state} = DATA_STATE;
1715 wakaba 1.5 $self->{s_kwd} = '';
1716 wakaba 1.1 ## reconsume
1717    
1718     !!!emit ($self->{ct}); # comment
1719    
1720     redo A;
1721     } else {
1722     !!!cp (150);
1723     $self->{ct}->{data} .= '-' . chr ($self->{nc}); # comment
1724     $self->{state} = COMMENT_STATE;
1725     !!!next-input-character;
1726     redo A;
1727     }
1728     } elsif ($self->{state} == COMMENT_END_STATE) {
1729     if ($self->{nc} == 0x003E) { # >
1730     !!!cp (151);
1731     $self->{state} = DATA_STATE;
1732 wakaba 1.5 $self->{s_kwd} = '';
1733 wakaba 1.1 !!!next-input-character;
1734    
1735     !!!emit ($self->{ct}); # comment
1736    
1737     redo A;
1738     } elsif ($self->{nc} == 0x002D) { # -
1739     !!!cp (152);
1740     !!!parse-error (type => 'dash in comment',
1741     line => $self->{line_prev},
1742     column => $self->{column_prev});
1743     $self->{ct}->{data} .= '-'; # comment
1744     ## Stay in the state
1745     !!!next-input-character;
1746     redo A;
1747     } elsif ($self->{nc} == -1) {
1748     !!!cp (153);
1749     !!!parse-error (type => 'unclosed comment');
1750     $self->{state} = DATA_STATE;
1751 wakaba 1.5 $self->{s_kwd} = '';
1752 wakaba 1.1 ## reconsume
1753    
1754     !!!emit ($self->{ct}); # comment
1755    
1756     redo A;
1757     } else {
1758     !!!cp (154);
1759     !!!parse-error (type => 'dash in comment',
1760     line => $self->{line_prev},
1761     column => $self->{column_prev});
1762     $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment
1763     $self->{state} = COMMENT_STATE;
1764     !!!next-input-character;
1765     redo A;
1766     }
1767     } elsif ($self->{state} == DOCTYPE_STATE) {
1768     if ($is_space->{$self->{nc}}) {
1769     !!!cp (155);
1770     $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
1771     !!!next-input-character;
1772     redo A;
1773     } else {
1774     !!!cp (156);
1775     !!!parse-error (type => 'no space before DOCTYPE name');
1776     $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
1777     ## reconsume
1778     redo A;
1779     }
1780     } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
1781     if ($is_space->{$self->{nc}}) {
1782     !!!cp (157);
1783     ## Stay in the state
1784     !!!next-input-character;
1785     redo A;
1786     } elsif ($self->{nc} == 0x003E) { # >
1787     !!!cp (158);
1788     !!!parse-error (type => 'no DOCTYPE name');
1789     $self->{state} = DATA_STATE;
1790 wakaba 1.5 $self->{s_kwd} = '';
1791 wakaba 1.1 !!!next-input-character;
1792    
1793     !!!emit ($self->{ct}); # DOCTYPE (quirks)
1794    
1795     redo A;
1796     } elsif ($self->{nc} == -1) {
1797     !!!cp (159);
1798     !!!parse-error (type => 'no DOCTYPE name');
1799     $self->{state} = DATA_STATE;
1800 wakaba 1.5 $self->{s_kwd} = '';
1801 wakaba 1.1 ## reconsume
1802    
1803     !!!emit ($self->{ct}); # DOCTYPE (quirks)
1804    
1805     redo A;
1806     } else {
1807     !!!cp (160);
1808     $self->{ct}->{name} = chr $self->{nc};
1809     delete $self->{ct}->{quirks};
1810     $self->{state} = DOCTYPE_NAME_STATE;
1811     !!!next-input-character;
1812     redo A;
1813     }
1814     } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
1815     ## ISSUE: Redundant "First," in the spec.
1816     if ($is_space->{$self->{nc}}) {
1817     !!!cp (161);
1818     $self->{state} = AFTER_DOCTYPE_NAME_STATE;
1819     !!!next-input-character;
1820     redo A;
1821     } elsif ($self->{nc} == 0x003E) { # >
1822     !!!cp (162);
1823     $self->{state} = DATA_STATE;
1824 wakaba 1.5 $self->{s_kwd} = '';
1825 wakaba 1.1 !!!next-input-character;
1826    
1827     !!!emit ($self->{ct}); # DOCTYPE
1828    
1829     redo A;
1830     } elsif ($self->{nc} == -1) {
1831     !!!cp (163);
1832     !!!parse-error (type => 'unclosed DOCTYPE');
1833     $self->{state} = DATA_STATE;
1834 wakaba 1.5 $self->{s_kwd} = '';
1835 wakaba 1.1 ## reconsume
1836    
1837     $self->{ct}->{quirks} = 1;
1838     !!!emit ($self->{ct}); # DOCTYPE
1839    
1840     redo A;
1841     } else {
1842     !!!cp (164);
1843     $self->{ct}->{name}
1844     .= chr ($self->{nc}); # DOCTYPE
1845     ## Stay in the state
1846     !!!next-input-character;
1847     redo A;
1848     }
1849     } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
1850     if ($is_space->{$self->{nc}}) {
1851     !!!cp (165);
1852     ## Stay in the state
1853     !!!next-input-character;
1854     redo A;
1855     } elsif ($self->{nc} == 0x003E) { # >
1856     !!!cp (166);
1857     $self->{state} = DATA_STATE;
1858 wakaba 1.5 $self->{s_kwd} = '';
1859 wakaba 1.1 !!!next-input-character;
1860    
1861     !!!emit ($self->{ct}); # DOCTYPE
1862    
1863     redo A;
1864     } elsif ($self->{nc} == -1) {
1865     !!!cp (167);
1866     !!!parse-error (type => 'unclosed DOCTYPE');
1867     $self->{state} = DATA_STATE;
1868 wakaba 1.5 $self->{s_kwd} = '';
1869 wakaba 1.1 ## reconsume
1870    
1871     $self->{ct}->{quirks} = 1;
1872     !!!emit ($self->{ct}); # DOCTYPE
1873    
1874     redo A;
1875     } elsif ($self->{nc} == 0x0050 or # P
1876     $self->{nc} == 0x0070) { # p
1877     $self->{state} = PUBLIC_STATE;
1878     $self->{s_kwd} = chr $self->{nc};
1879     !!!next-input-character;
1880     redo A;
1881     } elsif ($self->{nc} == 0x0053 or # S
1882     $self->{nc} == 0x0073) { # s
1883     $self->{state} = SYSTEM_STATE;
1884     $self->{s_kwd} = chr $self->{nc};
1885     !!!next-input-character;
1886     redo A;
1887     } else {
1888     !!!cp (180);
1889     !!!parse-error (type => 'string after DOCTYPE name');
1890     $self->{ct}->{quirks} = 1;
1891    
1892     $self->{state} = BOGUS_DOCTYPE_STATE;
1893     !!!next-input-character;
1894     redo A;
1895     }
1896     } elsif ($self->{state} == PUBLIC_STATE) {
1897     ## ASCII case-insensitive
1898     if ($self->{nc} == [
1899     undef,
1900     0x0055, # U
1901     0x0042, # B
1902     0x004C, # L
1903     0x0049, # I
1904     ]->[length $self->{s_kwd}] or
1905     $self->{nc} == [
1906     undef,
1907     0x0075, # u
1908     0x0062, # b
1909     0x006C, # l
1910     0x0069, # i
1911     ]->[length $self->{s_kwd}]) {
1912     !!!cp (175);
1913     ## Stay in the state.
1914     $self->{s_kwd} .= chr $self->{nc};
1915     !!!next-input-character;
1916     redo A;
1917     } elsif ((length $self->{s_kwd}) == 5 and
1918     ($self->{nc} == 0x0043 or # C
1919     $self->{nc} == 0x0063)) { # c
1920     !!!cp (168);
1921     $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
1922     !!!next-input-character;
1923     redo A;
1924     } else {
1925     !!!cp (169);
1926     !!!parse-error (type => 'string after DOCTYPE name',
1927     line => $self->{line_prev},
1928     column => $self->{column_prev} + 1 - length $self->{s_kwd});
1929     $self->{ct}->{quirks} = 1;
1930    
1931     $self->{state} = BOGUS_DOCTYPE_STATE;
1932     ## Reconsume.
1933     redo A;
1934     }
1935     } elsif ($self->{state} == SYSTEM_STATE) {
1936     ## ASCII case-insensitive
1937     if ($self->{nc} == [
1938     undef,
1939     0x0059, # Y
1940     0x0053, # S
1941     0x0054, # T
1942     0x0045, # E
1943     ]->[length $self->{s_kwd}] or
1944     $self->{nc} == [
1945     undef,
1946     0x0079, # y
1947     0x0073, # s
1948     0x0074, # t
1949     0x0065, # e
1950     ]->[length $self->{s_kwd}]) {
1951     !!!cp (170);
1952     ## Stay in the state.
1953     $self->{s_kwd} .= chr $self->{nc};
1954     !!!next-input-character;
1955     redo A;
1956     } elsif ((length $self->{s_kwd}) == 5 and
1957     ($self->{nc} == 0x004D or # M
1958     $self->{nc} == 0x006D)) { # m
1959     !!!cp (171);
1960     $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
1961     !!!next-input-character;
1962     redo A;
1963     } else {
1964     !!!cp (172);
1965     !!!parse-error (type => 'string after DOCTYPE name',
1966     line => $self->{line_prev},
1967     column => $self->{column_prev} + 1 - length $self->{s_kwd});
1968     $self->{ct}->{quirks} = 1;
1969    
1970     $self->{state} = BOGUS_DOCTYPE_STATE;
1971     ## Reconsume.
1972     redo A;
1973     }
1974     } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
1975     if ($is_space->{$self->{nc}}) {
1976     !!!cp (181);
1977     ## Stay in the state
1978     !!!next-input-character;
1979     redo A;
1980     } elsif ($self->{nc} eq 0x0022) { # "
1981     !!!cp (182);
1982     $self->{ct}->{pubid} = ''; # DOCTYPE
1983     $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
1984     !!!next-input-character;
1985     redo A;
1986     } elsif ($self->{nc} eq 0x0027) { # '
1987     !!!cp (183);
1988     $self->{ct}->{pubid} = ''; # DOCTYPE
1989     $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
1990     !!!next-input-character;
1991     redo A;
1992     } elsif ($self->{nc} eq 0x003E) { # >
1993     !!!cp (184);
1994     !!!parse-error (type => 'no PUBLIC literal');
1995    
1996     $self->{state} = DATA_STATE;
1997 wakaba 1.5 $self->{s_kwd} = '';
1998 wakaba 1.1 !!!next-input-character;
1999    
2000     $self->{ct}->{quirks} = 1;
2001     !!!emit ($self->{ct}); # DOCTYPE
2002    
2003     redo A;
2004     } elsif ($self->{nc} == -1) {
2005     !!!cp (185);
2006     !!!parse-error (type => 'unclosed DOCTYPE');
2007    
2008     $self->{state} = DATA_STATE;
2009 wakaba 1.5 $self->{s_kwd} = '';
2010 wakaba 1.1 ## reconsume
2011    
2012     $self->{ct}->{quirks} = 1;
2013     !!!emit ($self->{ct}); # DOCTYPE
2014    
2015     redo A;
2016     } else {
2017     !!!cp (186);
2018     !!!parse-error (type => 'string after PUBLIC');
2019     $self->{ct}->{quirks} = 1;
2020    
2021     $self->{state} = BOGUS_DOCTYPE_STATE;
2022     !!!next-input-character;
2023     redo A;
2024     }
2025     } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2026     if ($self->{nc} == 0x0022) { # "
2027     !!!cp (187);
2028     $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2029     !!!next-input-character;
2030     redo A;
2031     } elsif ($self->{nc} == 0x003E) { # >
2032     !!!cp (188);
2033     !!!parse-error (type => 'unclosed PUBLIC literal');
2034    
2035     $self->{state} = DATA_STATE;
2036 wakaba 1.5 $self->{s_kwd} = '';
2037 wakaba 1.1 !!!next-input-character;
2038    
2039     $self->{ct}->{quirks} = 1;
2040     !!!emit ($self->{ct}); # DOCTYPE
2041    
2042     redo A;
2043     } elsif ($self->{nc} == -1) {
2044     !!!cp (189);
2045     !!!parse-error (type => 'unclosed PUBLIC literal');
2046    
2047     $self->{state} = DATA_STATE;
2048 wakaba 1.5 $self->{s_kwd} = '';
2049 wakaba 1.1 ## reconsume
2050    
2051     $self->{ct}->{quirks} = 1;
2052     !!!emit ($self->{ct}); # DOCTYPE
2053    
2054     redo A;
2055     } else {
2056     !!!cp (190);
2057     $self->{ct}->{pubid} # DOCTYPE
2058     .= chr $self->{nc};
2059     $self->{read_until}->($self->{ct}->{pubid}, q[">],
2060     length $self->{ct}->{pubid});
2061    
2062     ## Stay in the state
2063     !!!next-input-character;
2064     redo A;
2065     }
2066     } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
2067     if ($self->{nc} == 0x0027) { # '
2068     !!!cp (191);
2069     $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2070     !!!next-input-character;
2071     redo A;
2072     } elsif ($self->{nc} == 0x003E) { # >
2073     !!!cp (192);
2074     !!!parse-error (type => 'unclosed PUBLIC literal');
2075    
2076     $self->{state} = DATA_STATE;
2077 wakaba 1.5 $self->{s_kwd} = '';
2078 wakaba 1.1 !!!next-input-character;
2079    
2080     $self->{ct}->{quirks} = 1;
2081     !!!emit ($self->{ct}); # DOCTYPE
2082    
2083     redo A;
2084     } elsif ($self->{nc} == -1) {
2085     !!!cp (193);
2086     !!!parse-error (type => 'unclosed PUBLIC literal');
2087    
2088     $self->{state} = DATA_STATE;
2089 wakaba 1.5 $self->{s_kwd} = '';
2090 wakaba 1.1 ## reconsume
2091    
2092     $self->{ct}->{quirks} = 1;
2093     !!!emit ($self->{ct}); # DOCTYPE
2094    
2095     redo A;
2096     } else {
2097     !!!cp (194);
2098     $self->{ct}->{pubid} # DOCTYPE
2099     .= chr $self->{nc};
2100     $self->{read_until}->($self->{ct}->{pubid}, q['>],
2101     length $self->{ct}->{pubid});
2102    
2103     ## Stay in the state
2104     !!!next-input-character;
2105     redo A;
2106     }
2107     } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2108     if ($is_space->{$self->{nc}}) {
2109     !!!cp (195);
2110     ## Stay in the state
2111     !!!next-input-character;
2112     redo A;
2113     } elsif ($self->{nc} == 0x0022) { # "
2114     !!!cp (196);
2115     $self->{ct}->{sysid} = ''; # DOCTYPE
2116     $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2117     !!!next-input-character;
2118     redo A;
2119     } elsif ($self->{nc} == 0x0027) { # '
2120     !!!cp (197);
2121     $self->{ct}->{sysid} = ''; # DOCTYPE
2122     $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2123     !!!next-input-character;
2124     redo A;
2125     } elsif ($self->{nc} == 0x003E) { # >
2126     !!!cp (198);
2127     $self->{state} = DATA_STATE;
2128 wakaba 1.5 $self->{s_kwd} = '';
2129 wakaba 1.1 !!!next-input-character;
2130    
2131     !!!emit ($self->{ct}); # DOCTYPE
2132    
2133     redo A;
2134     } elsif ($self->{nc} == -1) {
2135     !!!cp (199);
2136     !!!parse-error (type => 'unclosed DOCTYPE');
2137    
2138     $self->{state} = DATA_STATE;
2139 wakaba 1.5 $self->{s_kwd} = '';
2140 wakaba 1.1 ## reconsume
2141    
2142     $self->{ct}->{quirks} = 1;
2143     !!!emit ($self->{ct}); # DOCTYPE
2144    
2145     redo A;
2146     } else {
2147     !!!cp (200);
2148     !!!parse-error (type => 'string after PUBLIC literal');
2149     $self->{ct}->{quirks} = 1;
2150    
2151     $self->{state} = BOGUS_DOCTYPE_STATE;
2152     !!!next-input-character;
2153     redo A;
2154     }
2155     } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2156     if ($is_space->{$self->{nc}}) {
2157     !!!cp (201);
2158     ## Stay in the state
2159     !!!next-input-character;
2160     redo A;
2161     } elsif ($self->{nc} == 0x0022) { # "
2162     !!!cp (202);
2163     $self->{ct}->{sysid} = ''; # DOCTYPE
2164     $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2165     !!!next-input-character;
2166     redo A;
2167     } elsif ($self->{nc} == 0x0027) { # '
2168     !!!cp (203);
2169     $self->{ct}->{sysid} = ''; # DOCTYPE
2170     $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2171     !!!next-input-character;
2172     redo A;
2173     } elsif ($self->{nc} == 0x003E) { # >
2174     !!!cp (204);
2175     !!!parse-error (type => 'no SYSTEM literal');
2176     $self->{state} = DATA_STATE;
2177 wakaba 1.5 $self->{s_kwd} = '';
2178 wakaba 1.1 !!!next-input-character;
2179    
2180     $self->{ct}->{quirks} = 1;
2181     !!!emit ($self->{ct}); # DOCTYPE
2182    
2183     redo A;
2184     } elsif ($self->{nc} == -1) {
2185     !!!cp (205);
2186     !!!parse-error (type => 'unclosed DOCTYPE');
2187    
2188     $self->{state} = DATA_STATE;
2189 wakaba 1.5 $self->{s_kwd} = '';
2190 wakaba 1.1 ## reconsume
2191    
2192     $self->{ct}->{quirks} = 1;
2193     !!!emit ($self->{ct}); # DOCTYPE
2194    
2195     redo A;
2196     } else {
2197     !!!cp (206);
2198     !!!parse-error (type => 'string after SYSTEM');
2199     $self->{ct}->{quirks} = 1;
2200    
2201     $self->{state} = BOGUS_DOCTYPE_STATE;
2202     !!!next-input-character;
2203     redo A;
2204     }
2205     } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2206     if ($self->{nc} == 0x0022) { # "
2207     !!!cp (207);
2208     $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2209     !!!next-input-character;
2210     redo A;
2211     } elsif ($self->{nc} == 0x003E) { # >
2212     !!!cp (208);
2213     !!!parse-error (type => 'unclosed SYSTEM literal');
2214    
2215     $self->{state} = DATA_STATE;
2216 wakaba 1.5 $self->{s_kwd} = '';
2217 wakaba 1.1 !!!next-input-character;
2218    
2219     $self->{ct}->{quirks} = 1;
2220     !!!emit ($self->{ct}); # DOCTYPE
2221    
2222     redo A;
2223     } elsif ($self->{nc} == -1) {
2224     !!!cp (209);
2225     !!!parse-error (type => 'unclosed SYSTEM literal');
2226    
2227     $self->{state} = DATA_STATE;
2228 wakaba 1.5 $self->{s_kwd} = '';
2229 wakaba 1.1 ## reconsume
2230    
2231     $self->{ct}->{quirks} = 1;
2232     !!!emit ($self->{ct}); # DOCTYPE
2233    
2234     redo A;
2235     } else {
2236     !!!cp (210);
2237     $self->{ct}->{sysid} # DOCTYPE
2238     .= chr $self->{nc};
2239     $self->{read_until}->($self->{ct}->{sysid}, q[">],
2240     length $self->{ct}->{sysid});
2241    
2242     ## Stay in the state
2243     !!!next-input-character;
2244     redo A;
2245     }
2246     } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
2247     if ($self->{nc} == 0x0027) { # '
2248     !!!cp (211);
2249     $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2250     !!!next-input-character;
2251     redo A;
2252     } elsif ($self->{nc} == 0x003E) { # >
2253     !!!cp (212);
2254     !!!parse-error (type => 'unclosed SYSTEM literal');
2255    
2256     $self->{state} = DATA_STATE;
2257 wakaba 1.5 $self->{s_kwd} = '';
2258 wakaba 1.1 !!!next-input-character;
2259    
2260     $self->{ct}->{quirks} = 1;
2261     !!!emit ($self->{ct}); # DOCTYPE
2262    
2263     redo A;
2264     } elsif ($self->{nc} == -1) {
2265     !!!cp (213);
2266     !!!parse-error (type => 'unclosed SYSTEM literal');
2267    
2268     $self->{state} = DATA_STATE;
2269 wakaba 1.5 $self->{s_kwd} = '';
2270 wakaba 1.1 ## reconsume
2271    
2272     $self->{ct}->{quirks} = 1;
2273     !!!emit ($self->{ct}); # DOCTYPE
2274    
2275     redo A;
2276     } else {
2277     !!!cp (214);
2278     $self->{ct}->{sysid} # DOCTYPE
2279     .= chr $self->{nc};
2280     $self->{read_until}->($self->{ct}->{sysid}, q['>],
2281     length $self->{ct}->{sysid});
2282    
2283     ## Stay in the state
2284     !!!next-input-character;
2285     redo A;
2286     }
2287     } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2288     if ($is_space->{$self->{nc}}) {
2289     !!!cp (215);
2290     ## Stay in the state
2291     !!!next-input-character;
2292     redo A;
2293     } elsif ($self->{nc} == 0x003E) { # >
2294     !!!cp (216);
2295     $self->{state} = DATA_STATE;
2296 wakaba 1.5 $self->{s_kwd} = '';
2297 wakaba 1.1 !!!next-input-character;
2298    
2299     !!!emit ($self->{ct}); # DOCTYPE
2300    
2301     redo A;
2302     } elsif ($self->{nc} == -1) {
2303     !!!cp (217);
2304     !!!parse-error (type => 'unclosed DOCTYPE');
2305     $self->{state} = DATA_STATE;
2306 wakaba 1.5 $self->{s_kwd} = '';
2307 wakaba 1.1 ## reconsume
2308    
2309     $self->{ct}->{quirks} = 1;
2310     !!!emit ($self->{ct}); # DOCTYPE
2311    
2312     redo A;
2313     } else {
2314     !!!cp (218);
2315     !!!parse-error (type => 'string after SYSTEM literal');
2316     #$self->{ct}->{quirks} = 1;
2317    
2318     $self->{state} = BOGUS_DOCTYPE_STATE;
2319     !!!next-input-character;
2320     redo A;
2321     }
2322     } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
2323     if ($self->{nc} == 0x003E) { # >
2324     !!!cp (219);
2325     $self->{state} = DATA_STATE;
2326 wakaba 1.5 $self->{s_kwd} = '';
2327 wakaba 1.1 !!!next-input-character;
2328    
2329     !!!emit ($self->{ct}); # DOCTYPE
2330    
2331     redo A;
2332     } elsif ($self->{nc} == -1) {
2333     !!!cp (220);
2334     $self->{state} = DATA_STATE;
2335 wakaba 1.5 $self->{s_kwd} = '';
2336 wakaba 1.1 ## reconsume
2337    
2338     !!!emit ($self->{ct}); # DOCTYPE
2339    
2340     redo A;
2341     } else {
2342     !!!cp (221);
2343     my $s = '';
2344     $self->{read_until}->($s, q[>], 0);
2345    
2346     ## Stay in the state
2347     !!!next-input-character;
2348     redo A;
2349     }
2350     } elsif ($self->{state} == CDATA_SECTION_STATE) {
2351     ## NOTE: "CDATA section state" in the state is jointly implemented
2352     ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,
2353     ## and |CDATA_SECTION_MSE2_STATE|.
2354    
2355     if ($self->{nc} == 0x005D) { # ]
2356     !!!cp (221.1);
2357     $self->{state} = CDATA_SECTION_MSE1_STATE;
2358     !!!next-input-character;
2359     redo A;
2360     } elsif ($self->{nc} == -1) {
2361 wakaba 1.6 if ($self->{is_xml}) {
2362 wakaba 1.8 !!!cp (221.11);
2363 wakaba 1.6 !!!parse-error (type => 'no mse'); ## TODO: type
2364 wakaba 1.8 } else {
2365     !!!cp (221.12);
2366 wakaba 1.6 }
2367    
2368 wakaba 1.1 $self->{state} = DATA_STATE;
2369 wakaba 1.5 $self->{s_kwd} = '';
2370 wakaba 1.1 !!!next-input-character;
2371     if (length $self->{ct}->{data}) { # character
2372     !!!cp (221.2);
2373     !!!emit ($self->{ct}); # character
2374     } else {
2375     !!!cp (221.3);
2376     ## No token to emit. $self->{ct} is discarded.
2377     }
2378     redo A;
2379     } else {
2380     !!!cp (221.4);
2381     $self->{ct}->{data} .= chr $self->{nc};
2382     $self->{read_until}->($self->{ct}->{data},
2383     q<]>,
2384     length $self->{ct}->{data});
2385    
2386     ## Stay in the state.
2387     !!!next-input-character;
2388     redo A;
2389     }
2390    
2391     ## ISSUE: "text tokens" in spec.
2392     } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {
2393     if ($self->{nc} == 0x005D) { # ]
2394     !!!cp (221.5);
2395     $self->{state} = CDATA_SECTION_MSE2_STATE;
2396     !!!next-input-character;
2397     redo A;
2398     } else {
2399     !!!cp (221.6);
2400     $self->{ct}->{data} .= ']';
2401     $self->{state} = CDATA_SECTION_STATE;
2402     ## Reconsume.
2403     redo A;
2404     }
2405     } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
2406     if ($self->{nc} == 0x003E) { # >
2407     $self->{state} = DATA_STATE;
2408 wakaba 1.5 $self->{s_kwd} = '';
2409 wakaba 1.1 !!!next-input-character;
2410     if (length $self->{ct}->{data}) { # character
2411     !!!cp (221.7);
2412     !!!emit ($self->{ct}); # character
2413     } else {
2414     !!!cp (221.8);
2415     ## No token to emit. $self->{ct} is discarded.
2416     }
2417     redo A;
2418     } elsif ($self->{nc} == 0x005D) { # ]
2419     !!!cp (221.9); # character
2420     $self->{ct}->{data} .= ']'; ## Add first "]" of "]]]".
2421     ## Stay in the state.
2422     !!!next-input-character;
2423     redo A;
2424     } else {
2425     !!!cp (221.11);
2426     $self->{ct}->{data} .= ']]'; # character
2427     $self->{state} = CDATA_SECTION_STATE;
2428     ## Reconsume.
2429     redo A;
2430     }
2431     } elsif ($self->{state} == ENTITY_STATE) {
2432     if ($is_space->{$self->{nc}} or
2433     {
2434     0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
2435     $self->{entity_add} => 1,
2436     }->{$self->{nc}}) {
2437     !!!cp (1001);
2438     ## Don't consume
2439     ## No error
2440     ## Return nothing.
2441     #
2442     } elsif ($self->{nc} == 0x0023) { # #
2443     !!!cp (999);
2444     $self->{state} = ENTITY_HASH_STATE;
2445     $self->{s_kwd} = '#';
2446     !!!next-input-character;
2447     redo A;
2448     } elsif ((0x0041 <= $self->{nc} and
2449     $self->{nc} <= 0x005A) or # A..Z
2450     (0x0061 <= $self->{nc} and
2451     $self->{nc} <= 0x007A)) { # a..z
2452     !!!cp (998);
2453     require Whatpm::_NamedEntityList;
2454     $self->{state} = ENTITY_NAME_STATE;
2455     $self->{s_kwd} = chr $self->{nc};
2456     $self->{entity__value} = $self->{s_kwd};
2457     $self->{entity__match} = 0;
2458     !!!next-input-character;
2459     redo A;
2460     } else {
2461     !!!cp (1027);
2462     !!!parse-error (type => 'bare ero');
2463     ## Return nothing.
2464     #
2465     }
2466    
2467     ## NOTE: No character is consumed by the "consume a character
2468     ## reference" algorithm. In other word, there is an "&" character
2469     ## that does not introduce a character reference, which would be
2470     ## appended to the parent element or the attribute value in later
2471     ## process of the tokenizer.
2472    
2473     if ($self->{prev_state} == DATA_STATE) {
2474     !!!cp (997);
2475     $self->{state} = $self->{prev_state};
2476 wakaba 1.5 $self->{s_kwd} = '';
2477 wakaba 1.1 ## Reconsume.
2478     !!!emit ({type => CHARACTER_TOKEN, data => '&',
2479     line => $self->{line_prev},
2480     column => $self->{column_prev},
2481     });
2482     redo A;
2483     } else {
2484     !!!cp (996);
2485     $self->{ca}->{value} .= '&';
2486     $self->{state} = $self->{prev_state};
2487 wakaba 1.5 $self->{s_kwd} = '';
2488 wakaba 1.1 ## Reconsume.
2489     redo A;
2490     }
2491     } elsif ($self->{state} == ENTITY_HASH_STATE) {
2492     if ($self->{nc} == 0x0078 or # x
2493     $self->{nc} == 0x0058) { # X
2494     !!!cp (995);
2495     $self->{state} = HEXREF_X_STATE;
2496     $self->{s_kwd} .= chr $self->{nc};
2497     !!!next-input-character;
2498     redo A;
2499     } elsif (0x0030 <= $self->{nc} and
2500     $self->{nc} <= 0x0039) { # 0..9
2501     !!!cp (994);
2502     $self->{state} = NCR_NUM_STATE;
2503     $self->{s_kwd} = $self->{nc} - 0x0030;
2504     !!!next-input-character;
2505     redo A;
2506     } else {
2507     !!!parse-error (type => 'bare nero',
2508     line => $self->{line_prev},
2509     column => $self->{column_prev} - 1);
2510    
2511     ## NOTE: According to the spec algorithm, nothing is returned,
2512     ## and then "&#" is appended to the parent element or the attribute
2513     ## value in the later processing.
2514    
2515     if ($self->{prev_state} == DATA_STATE) {
2516     !!!cp (1019);
2517     $self->{state} = $self->{prev_state};
2518 wakaba 1.5 $self->{s_kwd} = '';
2519 wakaba 1.1 ## Reconsume.
2520     !!!emit ({type => CHARACTER_TOKEN,
2521     data => '&#',
2522     line => $self->{line_prev},
2523     column => $self->{column_prev} - 1,
2524     });
2525     redo A;
2526     } else {
2527     !!!cp (993);
2528     $self->{ca}->{value} .= '&#';
2529     $self->{state} = $self->{prev_state};
2530 wakaba 1.5 $self->{s_kwd} = '';
2531 wakaba 1.1 ## Reconsume.
2532     redo A;
2533     }
2534     }
2535     } elsif ($self->{state} == NCR_NUM_STATE) {
2536     if (0x0030 <= $self->{nc} and
2537     $self->{nc} <= 0x0039) { # 0..9
2538     !!!cp (1012);
2539     $self->{s_kwd} *= 10;
2540     $self->{s_kwd} += $self->{nc} - 0x0030;
2541    
2542     ## Stay in the state.
2543     !!!next-input-character;
2544     redo A;
2545     } elsif ($self->{nc} == 0x003B) { # ;
2546     !!!cp (1013);
2547     !!!next-input-character;
2548     #
2549     } else {
2550     !!!cp (1014);
2551     !!!parse-error (type => 'no refc');
2552     ## Reconsume.
2553     #
2554     }
2555    
2556     my $code = $self->{s_kwd};
2557     my $l = $self->{line_prev};
2558     my $c = $self->{column_prev};
2559     if ($charref_map->{$code}) {
2560     !!!cp (1015);
2561     !!!parse-error (type => 'invalid character reference',
2562     text => (sprintf 'U+%04X', $code),
2563     line => $l, column => $c);
2564     $code = $charref_map->{$code};
2565     } elsif ($code > 0x10FFFF) {
2566     !!!cp (1016);
2567     !!!parse-error (type => 'invalid character reference',
2568     text => (sprintf 'U-%08X', $code),
2569     line => $l, column => $c);
2570     $code = 0xFFFD;
2571     }
2572    
2573     if ($self->{prev_state} == DATA_STATE) {
2574     !!!cp (992);
2575     $self->{state} = $self->{prev_state};
2576 wakaba 1.5 $self->{s_kwd} = '';
2577 wakaba 1.1 ## Reconsume.
2578     !!!emit ({type => CHARACTER_TOKEN, data => chr $code,
2579 wakaba 1.7 has_reference => 1,
2580 wakaba 1.1 line => $l, column => $c,
2581     });
2582     redo A;
2583     } else {
2584     !!!cp (991);
2585     $self->{ca}->{value} .= chr $code;
2586     $self->{ca}->{has_reference} = 1;
2587     $self->{state} = $self->{prev_state};
2588 wakaba 1.5 $self->{s_kwd} = '';
2589 wakaba 1.1 ## Reconsume.
2590     redo A;
2591     }
2592     } elsif ($self->{state} == HEXREF_X_STATE) {
2593     if ((0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) or
2594     (0x0041 <= $self->{nc} and $self->{nc} <= 0x0046) or
2595     (0x0061 <= $self->{nc} and $self->{nc} <= 0x0066)) {
2596     # 0..9, A..F, a..f
2597     !!!cp (990);
2598     $self->{state} = HEXREF_HEX_STATE;
2599     $self->{s_kwd} = 0;
2600     ## Reconsume.
2601     redo A;
2602     } else {
2603     !!!parse-error (type => 'bare hcro',
2604     line => $self->{line_prev},
2605     column => $self->{column_prev} - 2);
2606    
2607     ## NOTE: According to the spec algorithm, nothing is returned,
2608     ## and then "&#" followed by "X" or "x" is appended to the parent
2609     ## element or the attribute value in the later processing.
2610    
2611     if ($self->{prev_state} == DATA_STATE) {
2612     !!!cp (1005);
2613     $self->{state} = $self->{prev_state};
2614 wakaba 1.5 $self->{s_kwd} = '';
2615 wakaba 1.1 ## Reconsume.
2616     !!!emit ({type => CHARACTER_TOKEN,
2617     data => '&' . $self->{s_kwd},
2618     line => $self->{line_prev},
2619     column => $self->{column_prev} - length $self->{s_kwd},
2620     });
2621     redo A;
2622     } else {
2623     !!!cp (989);
2624     $self->{ca}->{value} .= '&' . $self->{s_kwd};
2625     $self->{state} = $self->{prev_state};
2626 wakaba 1.5 $self->{s_kwd} = '';
2627 wakaba 1.1 ## Reconsume.
2628     redo A;
2629     }
2630     }
2631     } elsif ($self->{state} == HEXREF_HEX_STATE) {
2632     if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {
2633     # 0..9
2634     !!!cp (1002);
2635     $self->{s_kwd} *= 0x10;
2636     $self->{s_kwd} += $self->{nc} - 0x0030;
2637     ## Stay in the state.
2638     !!!next-input-character;
2639     redo A;
2640     } elsif (0x0061 <= $self->{nc} and
2641     $self->{nc} <= 0x0066) { # a..f
2642     !!!cp (1003);
2643     $self->{s_kwd} *= 0x10;
2644     $self->{s_kwd} += $self->{nc} - 0x0060 + 9;
2645     ## Stay in the state.
2646     !!!next-input-character;
2647     redo A;
2648     } elsif (0x0041 <= $self->{nc} and
2649     $self->{nc} <= 0x0046) { # A..F
2650     !!!cp (1004);
2651     $self->{s_kwd} *= 0x10;
2652     $self->{s_kwd} += $self->{nc} - 0x0040 + 9;
2653     ## Stay in the state.
2654     !!!next-input-character;
2655     redo A;
2656     } elsif ($self->{nc} == 0x003B) { # ;
2657     !!!cp (1006);
2658     !!!next-input-character;
2659     #
2660     } else {
2661     !!!cp (1007);
2662     !!!parse-error (type => 'no refc',
2663     line => $self->{line},
2664     column => $self->{column});
2665     ## Reconsume.
2666     #
2667     }
2668    
2669     my $code = $self->{s_kwd};
2670     my $l = $self->{line_prev};
2671     my $c = $self->{column_prev};
2672     if ($charref_map->{$code}) {
2673     !!!cp (1008);
2674     !!!parse-error (type => 'invalid character reference',
2675     text => (sprintf 'U+%04X', $code),
2676     line => $l, column => $c);
2677     $code = $charref_map->{$code};
2678     } elsif ($code > 0x10FFFF) {
2679     !!!cp (1009);
2680     !!!parse-error (type => 'invalid character reference',
2681     text => (sprintf 'U-%08X', $code),
2682     line => $l, column => $c);
2683     $code = 0xFFFD;
2684     }
2685    
2686     if ($self->{prev_state} == DATA_STATE) {
2687     !!!cp (988);
2688     $self->{state} = $self->{prev_state};
2689 wakaba 1.5 $self->{s_kwd} = '';
2690 wakaba 1.1 ## Reconsume.
2691     !!!emit ({type => CHARACTER_TOKEN, data => chr $code,
2692 wakaba 1.7 has_reference => 1,
2693 wakaba 1.1 line => $l, column => $c,
2694     });
2695     redo A;
2696     } else {
2697     !!!cp (987);
2698     $self->{ca}->{value} .= chr $code;
2699     $self->{ca}->{has_reference} = 1;
2700     $self->{state} = $self->{prev_state};
2701 wakaba 1.5 $self->{s_kwd} = '';
2702 wakaba 1.1 ## Reconsume.
2703     redo A;
2704     }
2705     } elsif ($self->{state} == ENTITY_NAME_STATE) {
2706     if (length $self->{s_kwd} < 30 and
2707     ## NOTE: Some number greater than the maximum length of entity name
2708     ((0x0041 <= $self->{nc} and # a
2709     $self->{nc} <= 0x005A) or # x
2710     (0x0061 <= $self->{nc} and # a
2711     $self->{nc} <= 0x007A) or # z
2712     (0x0030 <= $self->{nc} and # 0
2713     $self->{nc} <= 0x0039) or # 9
2714     $self->{nc} == 0x003B)) { # ;
2715     our $EntityChar;
2716     $self->{s_kwd} .= chr $self->{nc};
2717     if (defined $EntityChar->{$self->{s_kwd}}) {
2718     if ($self->{nc} == 0x003B) { # ;
2719     !!!cp (1020);
2720     $self->{entity__value} = $EntityChar->{$self->{s_kwd}};
2721     $self->{entity__match} = 1;
2722     !!!next-input-character;
2723     #
2724     } else {
2725     !!!cp (1021);
2726     $self->{entity__value} = $EntityChar->{$self->{s_kwd}};
2727     $self->{entity__match} = -1;
2728     ## Stay in the state.
2729     !!!next-input-character;
2730     redo A;
2731     }
2732     } else {
2733     !!!cp (1022);
2734     $self->{entity__value} .= chr $self->{nc};
2735     $self->{entity__match} *= 2;
2736     ## Stay in the state.
2737     !!!next-input-character;
2738     redo A;
2739     }
2740     }
2741    
2742     my $data;
2743     my $has_ref;
2744     if ($self->{entity__match} > 0) {
2745     !!!cp (1023);
2746     $data = $self->{entity__value};
2747     $has_ref = 1;
2748     #
2749     } elsif ($self->{entity__match} < 0) {
2750     !!!parse-error (type => 'no refc');
2751     if ($self->{prev_state} != DATA_STATE and # in attribute
2752     $self->{entity__match} < -1) {
2753     !!!cp (1024);
2754     $data = '&' . $self->{s_kwd};
2755     #
2756     } else {
2757     !!!cp (1025);
2758     $data = $self->{entity__value};
2759     $has_ref = 1;
2760     #
2761     }
2762     } else {
2763     !!!cp (1026);
2764     !!!parse-error (type => 'bare ero',
2765     line => $self->{line_prev},
2766     column => $self->{column_prev} - length $self->{s_kwd});
2767     $data = '&' . $self->{s_kwd};
2768     #
2769     }
2770    
2771     ## NOTE: In these cases, when a character reference is found,
2772     ## it is consumed and a character token is returned, or, otherwise,
2773     ## nothing is consumed and returned, according to the spec algorithm.
2774     ## In this implementation, anything that has been examined by the
2775     ## tokenizer is appended to the parent element or the attribute value
2776     ## as string, either literal string when no character reference or
2777     ## entity-replaced string otherwise, in this stage, since any characters
2778     ## that would not be consumed are appended in the data state or in an
2779     ## appropriate attribute value state anyway.
2780    
2781     if ($self->{prev_state} == DATA_STATE) {
2782     !!!cp (986);
2783     $self->{state} = $self->{prev_state};
2784 wakaba 1.5 $self->{s_kwd} = '';
2785 wakaba 1.1 ## Reconsume.
2786     !!!emit ({type => CHARACTER_TOKEN,
2787     data => $data,
2788 wakaba 1.7 has_reference => $has_ref,
2789 wakaba 1.1 line => $self->{line_prev},
2790     column => $self->{column_prev} + 1 - length $self->{s_kwd},
2791     });
2792     redo A;
2793     } else {
2794     !!!cp (985);
2795     $self->{ca}->{value} .= $data;
2796     $self->{ca}->{has_reference} = 1 if $has_ref;
2797     $self->{state} = $self->{prev_state};
2798 wakaba 1.5 $self->{s_kwd} = '';
2799 wakaba 1.1 ## Reconsume.
2800     redo A;
2801     }
2802 wakaba 1.8
2803     ## XML-only states
2804    
2805     } elsif ($self->{state} == PI_STATE) {
2806     if ($is_space->{$self->{nc}} or
2807     $self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else"
2808     $self->{nc} == -1) {
2809     !!!parse-error (type => 'bare pio', ## TODO: type
2810     line => $self->{line_prev},
2811     column => $self->{column_prev}
2812     - 1 * ($self->{nc} != -1));
2813     $self->{state} = BOGUS_COMMENT_STATE;
2814     ## Reconsume.
2815     $self->{ct} = {type => COMMENT_TOKEN,
2816     data => '?',
2817     line => $self->{line_prev},
2818     column => $self->{column_prev}
2819     - 1 * ($self->{nc} != -1),
2820     };
2821     redo A;
2822     } else {
2823     $self->{ct} = {type => PI_TOKEN,
2824     target => chr $self->{nc},
2825     data => '',
2826     line => $self->{line_prev},
2827     column => $self->{column_prev} - 1,
2828     };
2829     $self->{state} = PI_TARGET_STATE;
2830     !!!next-input-character;
2831     redo A;
2832     }
2833     } elsif ($self->{state} == PI_TARGET_STATE) {
2834     if ($is_space->{$self->{nc}}) {
2835     $self->{state} = PI_TARGET_AFTER_STATE;
2836     !!!next-input-character;
2837     redo A;
2838     } elsif ($self->{nc} == -1) {
2839     !!!parse-error (type => 'no pic'); ## TODO: type
2840     $self->{state} = DATA_STATE;
2841     $self->{s_kwd} = '';
2842     ## Reconsume.
2843     !!!emit ($self->{ct}); # pi
2844     redo A;
2845     } elsif ($self->{nc} == 0x003F) { # ?
2846     $self->{state} = PI_AFTER_STATE;
2847     !!!next-input-character;
2848     redo A;
2849     } else {
2850     ## XML5: typo ("tag name" -> "target")
2851     $self->{ct}->{target} .= chr $self->{nc}; # pi
2852     !!!next-input-character;
2853     redo A;
2854     }
2855     } elsif ($self->{state} == PI_TARGET_AFTER_STATE) {
2856     if ($is_space->{$self->{nc}}) {
2857     ## Stay in the state.
2858     !!!next-input-character;
2859     redo A;
2860     } else {
2861     $self->{state} = PI_DATA_STATE;
2862     ## Reprocess.
2863     redo A;
2864     }
2865     } elsif ($self->{state} == PI_DATA_STATE) {
2866     if ($self->{nc} == 0x003F) { # ?
2867     $self->{state} = PI_DATA_AFTER_STATE;
2868     !!!next-input-character;
2869     redo A;
2870     } elsif ($self->{nc} == -1) {
2871     !!!parse-error (type => 'no pic'); ## TODO: type
2872     $self->{state} = DATA_STATE;
2873     $self->{s_kwd} = '';
2874     ## Reprocess.
2875     !!!emit ($self->{ct}); # pi
2876     redo A;
2877     } else {
2878     $self->{ct}->{data} .= chr $self->{nc}; # pi
2879     $self->{read_until}->($self->{ct}->{data}, q[?],
2880     length $self->{ct}->{data});
2881     ## Stay in the state.
2882     !!!next-input-character;
2883     ## Reprocess.
2884     redo A;
2885     }
2886     } elsif ($self->{state} == PI_AFTER_STATE) {
2887     if ($self->{nc} == 0x003E) { # >
2888     $self->{state} = DATA_STATE;
2889     $self->{s_kwd} = '';
2890     !!!next-input-character;
2891     !!!emit ($self->{ct}); # pi
2892     redo A;
2893     } elsif ($self->{nc} == 0x003F) { # ?
2894     !!!parse-error (type => 'no s after target', ## TODO: type
2895     line => $self->{line_prev},
2896     column => $self->{column_prev}); ## XML5: no error
2897     $self->{ct}->{data} .= '?';
2898     $self->{state} = PI_DATA_AFTER_STATE;
2899     !!!next-input-character;
2900     redo A;
2901     } else {
2902     !!!parse-error (type => 'no s after target', ## TODO: type
2903     line => $self->{line_prev},
2904     column => $self->{column_prev}
2905     + 1 * ($self->{nc} == -1)); ## XML5: no error
2906     $self->{ct}->{data} .= '?'; ## XML5: not appended
2907     $self->{state} = PI_DATA_STATE;
2908     ## Reprocess.
2909     redo A;
2910     }
2911     } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
2912     ## XML5: Same as "pi after state" in XML5
2913     if ($self->{nc} == 0x003E) { # >
2914     $self->{state} = DATA_STATE;
2915     $self->{s_kwd} = '';
2916     !!!next-input-character;
2917     !!!emit ($self->{ct}); # pi
2918     redo A;
2919     } elsif ($self->{nc} == 0x003F) { # ?
2920     $self->{ct}->{data} .= '?';
2921     ## Stay in the state.
2922     !!!next-input-character;
2923     redo A;
2924     } else {
2925     $self->{ct}->{data} .= '?'; ## XML5: not appended
2926     $self->{state} = PI_DATA_STATE;
2927     ## Reprocess.
2928     redo A;
2929     }
2930    
2931 wakaba 1.1 } else {
2932     die "$0: $self->{state}: Unknown state";
2933     }
2934     } # A
2935    
2936     die "$0: _get_next_token: unexpected case";
2937     } # _get_next_token
2938    
2939     1;
2940 wakaba 1.8 ## $Date: 2008/10/14 15:25:50 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24