/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.15 - (hide annotations) (download) (as text)
Sat Oct 18 08:05:29 2008 UTC (16 years ago) by wakaba
Branch: MAIN
Changes since 1.14: +606 -46 lines
File MIME type: application/x-wais-source
++ whatpm/t/xml/ChangeLog	18 Oct 2008 08:05:22 -0000
2008-10-18  Wakaba  <wakaba@suika.fam.cx>

	* attlists-1.dat: Tests added.

++ whatpm/Whatpm/ChangeLog	18 Oct 2008 08:03:10 -0000
2008-10-18  Wakaba  <wakaba@suika.fam.cx>

	* NanoDOM.pm (text_content): Moved to Node from Element.  Setter
	implemented.
	(allowed_tokens, default_type, declared_type): Implemented.

++ whatpm/Whatpm/HTML/ChangeLog	18 Oct 2008 08:04:10 -0000
2008-10-18  Wakaba  <wakaba@suika.fam.cx>

	* Tokenizer.pm.src: <!ATTLIST> in the internal subset of an XML
	document, is now fully implemented.

	* Dumper.pm (dumptree): Output allowed tokens and default value
	always.

++ whatpm/Whatpm/XML/ChangeLog	18 Oct 2008 08:05:03 -0000
2008-10-18  Wakaba  <wakaba@suika.fam.cx>

	* Parser.pm.src (_tree_in_subset): <!ATTLIST> node construction
	implemented.

1 wakaba 1.1 package Whatpm::HTML::Tokenizer;
2     use strict;
3 wakaba 1.15 our $VERSION=do{my @r=(q$Revision: 1.14 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.2
5     BEGIN {
6     require Exporter;
7     push our @ISA, 'Exporter';
8    
9     our @EXPORT_OK = qw(
10     DOCTYPE_TOKEN
11     COMMENT_TOKEN
12     START_TAG_TOKEN
13     END_TAG_TOKEN
14     END_OF_FILE_TOKEN
15     CHARACTER_TOKEN
16     PI_TOKEN
17     ABORT_TOKEN
18 wakaba 1.13 END_OF_DOCTYPE_TOKEN
19 wakaba 1.14 ATTLIST_TOKEN
20     ELEMENT_TOKEN
21     GENERAL_ENTITY_TOKEN
22     PARAMETER_ENTITY_TOKEN
23     NOTATION_TOKEN
24 wakaba 1.2 );
25    
26     our %EXPORT_TAGS = (
27     token => [qw(
28     DOCTYPE_TOKEN
29     COMMENT_TOKEN
30     START_TAG_TOKEN
31     END_TAG_TOKEN
32     END_OF_FILE_TOKEN
33     CHARACTER_TOKEN
34     PI_TOKEN
35     ABORT_TOKEN
36 wakaba 1.13 END_OF_DOCTYPE_TOKEN
37 wakaba 1.14 ATTLIST_TOKEN
38     ELEMENT_TOKEN
39     GENERAL_ENTITY_TOKEN
40     PARAMETER_ENTITY_TOKEN
41     NOTATION_TOKEN
42 wakaba 1.2 )],
43     );
44     }
45    
46 wakaba 1.12 ## NOTE: Differences from the XML5 draft are marked as "XML5:".
47    
48 wakaba 1.2 ## Token types
49    
50 wakaba 1.12 sub DOCTYPE_TOKEN () { 1 } ## XML5: No DOCTYPE token.
51 wakaba 1.2 sub COMMENT_TOKEN () { 2 }
52     sub START_TAG_TOKEN () { 3 }
53     sub END_TAG_TOKEN () { 4 }
54     sub END_OF_FILE_TOKEN () { 5 }
55     sub CHARACTER_TOKEN () { 6 }
56 wakaba 1.12 sub PI_TOKEN () { 7 } ## NOTE: XML only.
57     sub ABORT_TOKEN () { 8 } ## NOTE: For internal processing.
58 wakaba 1.14 sub END_OF_DOCTYPE_TOKEN () { 9 } ## NOTE: XML only.
59     sub ATTLIST_TOKEN () { 10 } ## NOTE: XML only.
60     sub ELEMENT_TOKEN () { 11 } ## NOTE: XML only.
61     sub GENERAL_ENTITY_TOKEN () { 12 } ## NOTE: XML only.
62     sub PARAMETER_ENTITY_TOKEN () { 13 } ## NOTE: XML only.
63     sub NOTATION_TOKEN () { 14 } ## NOTE: XML only.
64 wakaba 1.12
65     ## XML5: XML5 has "empty tag token". In this implementation, it is
66     ## represented as a start tag token with $self->{self_closing} flag
67     ## set to true.
68    
69     ## XML5: XML5 has "short end tag token". In this implementation, it
70     ## is represented as an end tag token with $token->{tag_name} flag set
71     ## to an empty string.
72 wakaba 1.1
73     package Whatpm::HTML;
74    
75 wakaba 1.2 BEGIN { Whatpm::HTML::Tokenizer->import (':token') }
76    
77 wakaba 1.1 ## Content model flags
78    
79     sub CM_ENTITY () { 0b001 } # & markup in data
80     sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
81     sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
82    
83     sub PLAINTEXT_CONTENT_MODEL () { 0 }
84     sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
85     sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
86     sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
87    
88     ## Tokenizer states
89    
90     sub DATA_STATE () { 0 }
91     #sub ENTITY_DATA_STATE () { 1 }
92     sub TAG_OPEN_STATE () { 2 }
93     sub CLOSE_TAG_OPEN_STATE () { 3 }
94     sub TAG_NAME_STATE () { 4 }
95     sub BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
96     sub ATTRIBUTE_NAME_STATE () { 6 }
97     sub AFTER_ATTRIBUTE_NAME_STATE () { 7 }
98     sub BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
99     sub ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
100     sub ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
101     sub ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
102     #sub ENTITY_IN_ATTRIBUTE_VALUE_STATE () { 12 }
103     sub MARKUP_DECLARATION_OPEN_STATE () { 13 }
104     sub COMMENT_START_STATE () { 14 }
105     sub COMMENT_START_DASH_STATE () { 15 }
106     sub COMMENT_STATE () { 16 }
107     sub COMMENT_END_STATE () { 17 }
108     sub COMMENT_END_DASH_STATE () { 18 }
109     sub BOGUS_COMMENT_STATE () { 19 }
110     sub DOCTYPE_STATE () { 20 }
111     sub BEFORE_DOCTYPE_NAME_STATE () { 21 }
112     sub DOCTYPE_NAME_STATE () { 22 }
113     sub AFTER_DOCTYPE_NAME_STATE () { 23 }
114     sub BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
115     sub DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
116     sub DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
117     sub AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
118     sub BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
119     sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
120     sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
121     sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
122     sub BOGUS_DOCTYPE_STATE () { 32 }
123     sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
124     sub SELF_CLOSING_START_TAG_STATE () { 34 }
125     sub CDATA_SECTION_STATE () { 35 }
126     sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec
127     sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec
128     sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec
129     sub CDATA_RCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec
130     sub CDATA_SECTION_MSE1_STATE () { 40 } # "CDATA section state" in the spec
131     sub CDATA_SECTION_MSE2_STATE () { 41 } # "CDATA section state" in the spec
132     sub PUBLIC_STATE () { 42 } # "after DOCTYPE name state" in the spec
133     sub SYSTEM_STATE () { 43 } # "after DOCTYPE name state" in the spec
134     ## NOTE: "Entity data state", "entity in attribute value state", and
135     ## "consume a character reference" algorithm are jointly implemented
136     ## using the following six states:
137     sub ENTITY_STATE () { 44 }
138     sub ENTITY_HASH_STATE () { 45 }
139     sub NCR_NUM_STATE () { 46 }
140     sub HEXREF_X_STATE () { 47 }
141     sub HEXREF_HEX_STATE () { 48 }
142     sub ENTITY_NAME_STATE () { 49 }
143     sub PCDATA_STATE () { 50 } # "data state" in the spec
144    
145 wakaba 1.12 ## XML-only states
146 wakaba 1.8 sub PI_STATE () { 51 }
147     sub PI_TARGET_STATE () { 52 }
148     sub PI_TARGET_AFTER_STATE () { 53 }
149     sub PI_DATA_STATE () { 54 }
150     sub PI_AFTER_STATE () { 55 }
151     sub PI_DATA_AFTER_STATE () { 56 }
152 wakaba 1.12 sub DOCTYPE_INTERNAL_SUBSET_STATE () { 57 }
153     sub DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 58 }
154 wakaba 1.14 sub BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE () { 59 }
155     sub DOCTYPE_TAG_STATE () { 60 }
156     sub DOCTYPE_MARKUP_DECLARATION_OPEN_STATE () { 61 }
157     sub MD_ATTLIST_STATE () { 62 }
158     sub MD_E_STATE () { 63 }
159     sub MD_ELEMENT_STATE () { 64 }
160     sub MD_ENTITY_STATE () { 65 }
161     sub MD_NOTATION_STATE () { 66 }
162     sub DOCTYPE_MD_STATE () { 67 }
163     sub BEFORE_MD_NAME_STATE () { 68 }
164     sub MD_NAME_STATE () { 69 }
165     sub DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE () { 70 }
166     sub DOCTYPE_ATTLIST_NAME_AFTER_STATE () { 71 }
167 wakaba 1.15 sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE () { 72 }
168     sub DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE () { 73 }
169     sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE () { 74 }
170     sub DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE () { 75 }
171     sub BEFORE_ALLOWED_TOKEN_STATE () { 76 }
172     sub ALLOWED_TOKEN_STATE () { 77 }
173     sub AFTER_ALLOWED_TOKEN_STATE () { 78 }
174     sub AFTER_ALLOWED_TOKENS_STATE () { 79 }
175     sub BEFORE_ATTR_DEFAULT_STATE () { 80 }
176     sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE () { 81 }
177     sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE () { 82 }
178     sub DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE () { 83 }
179     sub AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE () { 84 }
180 wakaba 1.8
181 wakaba 1.1 ## Tree constructor state constants (see Whatpm::HTML for the full
182     ## list and descriptions)
183    
184     sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 }
185     sub FOREIGN_EL () { 0b1_00000000000 }
186    
187     ## Character reference mappings
188    
189     my $charref_map = {
190     0x0D => 0x000A,
191     0x80 => 0x20AC,
192     0x81 => 0xFFFD,
193     0x82 => 0x201A,
194     0x83 => 0x0192,
195     0x84 => 0x201E,
196     0x85 => 0x2026,
197     0x86 => 0x2020,
198     0x87 => 0x2021,
199     0x88 => 0x02C6,
200     0x89 => 0x2030,
201     0x8A => 0x0160,
202     0x8B => 0x2039,
203     0x8C => 0x0152,
204     0x8D => 0xFFFD,
205     0x8E => 0x017D,
206     0x8F => 0xFFFD,
207     0x90 => 0xFFFD,
208     0x91 => 0x2018,
209     0x92 => 0x2019,
210     0x93 => 0x201C,
211     0x94 => 0x201D,
212     0x95 => 0x2022,
213     0x96 => 0x2013,
214     0x97 => 0x2014,
215     0x98 => 0x02DC,
216     0x99 => 0x2122,
217     0x9A => 0x0161,
218     0x9B => 0x203A,
219     0x9C => 0x0153,
220     0x9D => 0xFFFD,
221     0x9E => 0x017E,
222     0x9F => 0x0178,
223     }; # $charref_map
224     $charref_map->{$_} = 0xFFFD
225     for 0x0000..0x0008, 0x000B, 0x000E..0x001F, 0x007F,
226     0xD800..0xDFFF, 0xFDD0..0xFDDF, ## ISSUE: 0xFDEF
227     0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF,
228     0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,
229     0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
230     0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE,
231     0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF;
232    
233     ## Implementations MUST act as if state machine in the spec
234    
235     sub _initialize_tokenizer ($) {
236     my $self = shift;
237    
238     ## NOTE: Fields set by |new| constructor:
239     #$self->{level}
240     #$self->{set_nc}
241     #$self->{parse_error}
242 wakaba 1.3 #$self->{is_xml} (if XML)
243 wakaba 1.1
244     $self->{state} = DATA_STATE; # MUST
245 wakaba 1.12 $self->{s_kwd} = ''; # Data state keyword
246     #$self->{kwd} = ''; # State-dependent keyword; initialized when used
247 wakaba 1.1 #$self->{entity__value}; # initialized when used
248     #$self->{entity__match}; # initialized when used
249     $self->{content_model} = PCDATA_CONTENT_MODEL; # be
250     undef $self->{ct}; # current token
251     undef $self->{ca}; # current attribute
252     undef $self->{last_stag_name}; # last emitted start tag name
253     #$self->{prev_state}; # initialized when used
254     delete $self->{self_closing};
255     $self->{char_buffer} = '';
256     $self->{char_buffer_pos} = 0;
257     $self->{nc} = -1; # next input character
258     #$self->{next_nc}
259     !!!next-input-character;
260     $self->{token} = [];
261     # $self->{escape}
262     } # _initialize_tokenizer
263    
264     ## A token has:
265     ## ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
266 wakaba 1.11 ## CHARACTER_TOKEN, END_OF_FILE_TOKEN, PI_TOKEN, or ABORT_TOKEN
267 wakaba 1.1 ## ->{name} (DOCTYPE_TOKEN)
268     ## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
269 wakaba 1.11 ## ->{target} (PI_TOKEN)
270 wakaba 1.1 ## ->{pubid} (DOCTYPE_TOKEN)
271     ## ->{sysid} (DOCTYPE_TOKEN)
272     ## ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
273     ## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
274     ## ->{name}
275     ## ->{value}
276     ## ->{has_reference} == 1 or 0
277 wakaba 1.11 ## ->{index}: Index of the attribute in a tag.
278     ## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN, PI_TOKEN)
279 wakaba 1.7 ## ->{has_reference} == 1 or 0 (CHARACTER_TOKEN)
280 wakaba 1.11 ## ->{last_index} (ELEMENT_TOKEN): Next attribute's index - 1.
281 wakaba 1.12 ## ->{has_internal_subset} = 1 or 0 (DOCTYPE_TOKEN)
282    
283 wakaba 1.1 ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
284     ## |->{self_closing}| is used to save the value of |$self->{self_closing}|
285     ## while the token is pushed back to the stack.
286    
287     ## Emitted token MUST immediately be handled by the tree construction state.
288    
289     ## Before each step, UA MAY check to see if either one of the scripts in
290     ## "list of scripts that will execute as soon as possible" or the first
291     ## script in the "list of scripts that will execute asynchronously",
292     ## has completed loading. If one has, then it MUST be executed
293     ## and removed from the list.
294    
295     ## TODO: Polytheistic slash SHOULD NOT be used. (Applied only to atheists.)
296     ## (This requirement was dropped from HTML5 spec, unfortunately.)
297    
298     my $is_space = {
299     0x0009 => 1, # CHARACTER TABULATION (HT)
300     0x000A => 1, # LINE FEED (LF)
301     #0x000B => 0, # LINE TABULATION (VT)
302 wakaba 1.12 0x000C => 1, # FORM FEED (FF) ## XML5: Not a space character.
303 wakaba 1.1 #0x000D => 1, # CARRIAGE RETURN (CR)
304     0x0020 => 1, # SPACE (SP)
305     };
306    
307     sub _get_next_token ($) {
308     my $self = shift;
309    
310     if ($self->{self_closing}) {
311     !!!parse-error (type => 'nestc', token => $self->{ct});
312     ## NOTE: The |self_closing| flag is only set by start tag token.
313     ## In addition, when a start tag token is emitted, it is always set to
314     ## |ct|.
315     delete $self->{self_closing};
316     }
317    
318     if (@{$self->{token}}) {
319     $self->{self_closing} = $self->{token}->[0]->{self_closing};
320     return shift @{$self->{token}};
321     }
322    
323     A: {
324     if ($self->{state} == PCDATA_STATE) {
325     ## NOTE: Same as |DATA_STATE|, but only for |PCDATA| content model.
326    
327     if ($self->{nc} == 0x0026) { # &
328     !!!cp (0.1);
329     ## NOTE: In the spec, the tokenizer is switched to the
330     ## "entity data state". In this implementation, the tokenizer
331     ## is switched to the |ENTITY_STATE|, which is an implementation
332     ## of the "consume a character reference" algorithm.
333     $self->{entity_add} = -1;
334     $self->{prev_state} = DATA_STATE;
335     $self->{state} = ENTITY_STATE;
336     !!!next-input-character;
337     redo A;
338     } elsif ($self->{nc} == 0x003C) { # <
339     !!!cp (0.2);
340     $self->{state} = TAG_OPEN_STATE;
341     !!!next-input-character;
342     redo A;
343     } elsif ($self->{nc} == -1) {
344     !!!cp (0.3);
345     !!!emit ({type => END_OF_FILE_TOKEN,
346     line => $self->{line}, column => $self->{column}});
347     last A; ## TODO: ok?
348     } else {
349     !!!cp (0.4);
350     #
351     }
352    
353     # Anything else
354     my $token = {type => CHARACTER_TOKEN,
355     data => chr $self->{nc},
356     line => $self->{line}, column => $self->{column},
357     };
358     $self->{read_until}->($token->{data}, q[<&], length $token->{data});
359    
360     ## Stay in the state.
361     !!!next-input-character;
362     !!!emit ($token);
363     redo A;
364     } elsif ($self->{state} == DATA_STATE) {
365     $self->{s_kwd} = '' unless defined $self->{s_kwd};
366     if ($self->{nc} == 0x0026) { # &
367     $self->{s_kwd} = '';
368     if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
369     not $self->{escape}) {
370     !!!cp (1);
371     ## NOTE: In the spec, the tokenizer is switched to the
372     ## "entity data state". In this implementation, the tokenizer
373     ## is switched to the |ENTITY_STATE|, which is an implementation
374     ## of the "consume a character reference" algorithm.
375     $self->{entity_add} = -1;
376     $self->{prev_state} = DATA_STATE;
377     $self->{state} = ENTITY_STATE;
378     !!!next-input-character;
379     redo A;
380     } else {
381     !!!cp (2);
382     #
383     }
384     } elsif ($self->{nc} == 0x002D) { # -
385     if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
386 wakaba 1.5 if ($self->{s_kwd} eq '<!-') {
387 wakaba 1.1 !!!cp (3);
388     $self->{escape} = 1; # unless $self->{escape};
389     $self->{s_kwd} = '--';
390     #
391 wakaba 1.5 } elsif ($self->{s_kwd} eq '-') {
392 wakaba 1.1 !!!cp (4);
393     $self->{s_kwd} = '--';
394     #
395 wakaba 1.5 } elsif ($self->{s_kwd} eq '<!' or $self->{s_kwd} eq '-') {
396     !!!cp (4.1);
397     $self->{s_kwd} .= '-';
398     #
399 wakaba 1.1 } else {
400     !!!cp (5);
401 wakaba 1.5 $self->{s_kwd} = '-';
402 wakaba 1.1 #
403     }
404     }
405    
406     #
407     } elsif ($self->{nc} == 0x0021) { # !
408     if (length $self->{s_kwd}) {
409     !!!cp (5.1);
410     $self->{s_kwd} .= '!';
411     #
412     } else {
413     !!!cp (5.2);
414     #$self->{s_kwd} = '';
415     #
416     }
417     #
418     } elsif ($self->{nc} == 0x003C) { # <
419     if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
420     (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
421     not $self->{escape})) {
422     !!!cp (6);
423     $self->{state} = TAG_OPEN_STATE;
424     !!!next-input-character;
425     redo A;
426     } else {
427     !!!cp (7);
428     $self->{s_kwd} = '';
429     #
430     }
431     } elsif ($self->{nc} == 0x003E) { # >
432     if ($self->{escape} and
433     ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
434     if ($self->{s_kwd} eq '--') {
435     !!!cp (8);
436     delete $self->{escape};
437 wakaba 1.5 #
438 wakaba 1.1 } else {
439     !!!cp (9);
440 wakaba 1.5 #
441 wakaba 1.1 }
442 wakaba 1.5 } elsif ($self->{is_xml} and $self->{s_kwd} eq ']]') {
443     !!!cp (9.1);
444     !!!parse-error (type => 'unmatched mse', ## TODO: type
445     line => $self->{line_prev},
446     column => $self->{column_prev} - 1);
447     #
448 wakaba 1.1 } else {
449     !!!cp (10);
450 wakaba 1.5 #
451 wakaba 1.1 }
452    
453     $self->{s_kwd} = '';
454     #
455 wakaba 1.5 } elsif ($self->{nc} == 0x005D) { # ]
456     if ($self->{s_kwd} eq ']' or $self->{s_kwd} eq '') {
457     !!!cp (10.1);
458     $self->{s_kwd} .= ']';
459     } elsif ($self->{s_kwd} eq ']]') {
460     !!!cp (10.2);
461     #
462     } else {
463     !!!cp (10.3);
464     $self->{s_kwd} = '';
465     }
466     #
467 wakaba 1.1 } elsif ($self->{nc} == -1) {
468     !!!cp (11);
469     $self->{s_kwd} = '';
470     !!!emit ({type => END_OF_FILE_TOKEN,
471     line => $self->{line}, column => $self->{column}});
472     last A; ## TODO: ok?
473     } else {
474     !!!cp (12);
475     $self->{s_kwd} = '';
476     #
477     }
478    
479     # Anything else
480     my $token = {type => CHARACTER_TOKEN,
481     data => chr $self->{nc},
482     line => $self->{line}, column => $self->{column},
483     };
484 wakaba 1.5 if ($self->{read_until}->($token->{data}, q{-!<>&\]},
485 wakaba 1.1 length $token->{data})) {
486     $self->{s_kwd} = '';
487     }
488    
489     ## Stay in the data state.
490 wakaba 1.5 if (not $self->{is_xml} and
491     $self->{content_model} == PCDATA_CONTENT_MODEL) {
492 wakaba 1.1 !!!cp (13);
493     $self->{state} = PCDATA_STATE;
494     } else {
495     !!!cp (14);
496     ## Stay in the state.
497     }
498     !!!next-input-character;
499     !!!emit ($token);
500     redo A;
501     } elsif ($self->{state} == TAG_OPEN_STATE) {
502 wakaba 1.10 ## XML5: "tag state".
503    
504 wakaba 1.1 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
505     if ($self->{nc} == 0x002F) { # /
506     !!!cp (15);
507     !!!next-input-character;
508     $self->{state} = CLOSE_TAG_OPEN_STATE;
509     redo A;
510     } elsif ($self->{nc} == 0x0021) { # !
511     !!!cp (15.1);
512 wakaba 1.12 $self->{s_kwd} = $self->{escaped} ? '' : '<';
513 wakaba 1.1 #
514     } else {
515     !!!cp (16);
516 wakaba 1.12 $self->{s_kwd} = '';
517 wakaba 1.1 #
518     }
519    
520     ## reconsume
521     $self->{state} = DATA_STATE;
522     !!!emit ({type => CHARACTER_TOKEN, data => '<',
523     line => $self->{line_prev},
524     column => $self->{column_prev},
525     });
526     redo A;
527     } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
528     if ($self->{nc} == 0x0021) { # !
529     !!!cp (17);
530     $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
531     !!!next-input-character;
532     redo A;
533     } elsif ($self->{nc} == 0x002F) { # /
534     !!!cp (18);
535     $self->{state} = CLOSE_TAG_OPEN_STATE;
536     !!!next-input-character;
537     redo A;
538     } elsif (0x0041 <= $self->{nc} and
539     $self->{nc} <= 0x005A) { # A..Z
540     !!!cp (19);
541     $self->{ct}
542     = {type => START_TAG_TOKEN,
543 wakaba 1.4 tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
544 wakaba 1.1 line => $self->{line_prev},
545     column => $self->{column_prev}};
546     $self->{state} = TAG_NAME_STATE;
547     !!!next-input-character;
548     redo A;
549     } elsif (0x0061 <= $self->{nc} and
550     $self->{nc} <= 0x007A) { # a..z
551     !!!cp (20);
552     $self->{ct} = {type => START_TAG_TOKEN,
553     tag_name => chr ($self->{nc}),
554     line => $self->{line_prev},
555     column => $self->{column_prev}};
556     $self->{state} = TAG_NAME_STATE;
557     !!!next-input-character;
558     redo A;
559     } elsif ($self->{nc} == 0x003E) { # >
560     !!!cp (21);
561     !!!parse-error (type => 'empty start tag',
562     line => $self->{line_prev},
563     column => $self->{column_prev});
564     $self->{state} = DATA_STATE;
565 wakaba 1.5 $self->{s_kwd} = '';
566 wakaba 1.1 !!!next-input-character;
567    
568     !!!emit ({type => CHARACTER_TOKEN, data => '<>',
569     line => $self->{line_prev},
570     column => $self->{column_prev},
571     });
572    
573     redo A;
574     } elsif ($self->{nc} == 0x003F) { # ?
575 wakaba 1.8 if ($self->{is_xml}) {
576     !!!cp (22.1);
577     $self->{state} = PI_STATE;
578     !!!next-input-character;
579     redo A;
580     } else {
581     !!!cp (22);
582     !!!parse-error (type => 'pio',
583     line => $self->{line_prev},
584     column => $self->{column_prev});
585     $self->{state} = BOGUS_COMMENT_STATE;
586     $self->{ct} = {type => COMMENT_TOKEN, data => '',
587     line => $self->{line_prev},
588     column => $self->{column_prev},
589     };
590     ## $self->{nc} is intentionally left as is
591     redo A;
592     }
593 wakaba 1.9 } elsif (not $self->{is_xml} or $is_space->{$self->{nc}}) {
594 wakaba 1.1 !!!cp (23);
595     !!!parse-error (type => 'bare stago',
596     line => $self->{line_prev},
597     column => $self->{column_prev});
598     $self->{state} = DATA_STATE;
599 wakaba 1.5 $self->{s_kwd} = '';
600 wakaba 1.1 ## reconsume
601    
602     !!!emit ({type => CHARACTER_TOKEN, data => '<',
603     line => $self->{line_prev},
604     column => $self->{column_prev},
605     });
606    
607     redo A;
608 wakaba 1.9 } else {
609     ## XML5: "<:" is a parse error.
610     !!!cp (23.1);
611     $self->{ct} = {type => START_TAG_TOKEN,
612     tag_name => chr ($self->{nc}),
613     line => $self->{line_prev},
614     column => $self->{column_prev}};
615     $self->{state} = TAG_NAME_STATE;
616     !!!next-input-character;
617     redo A;
618 wakaba 1.1 }
619     } else {
620     die "$0: $self->{content_model} in tag open";
621     }
622     } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
623     ## NOTE: The "close tag open state" in the spec is implemented as
624     ## |CLOSE_TAG_OPEN_STATE| and |CDATA_RCDATA_CLOSE_TAG_STATE|.
625    
626 wakaba 1.10 ## XML5: "end tag state".
627    
628 wakaba 1.1 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
629     if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
630     if (defined $self->{last_stag_name}) {
631     $self->{state} = CDATA_RCDATA_CLOSE_TAG_STATE;
632 wakaba 1.12 $self->{kwd} = '';
633 wakaba 1.1 ## Reconsume.
634     redo A;
635     } else {
636     ## No start tag token has ever been emitted
637     ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.
638     !!!cp (28);
639     $self->{state} = DATA_STATE;
640 wakaba 1.5 $self->{s_kwd} = '';
641 wakaba 1.1 ## Reconsume.
642     !!!emit ({type => CHARACTER_TOKEN, data => '</',
643     line => $l, column => $c,
644     });
645     redo A;
646     }
647     }
648    
649     if (0x0041 <= $self->{nc} and
650     $self->{nc} <= 0x005A) { # A..Z
651     !!!cp (29);
652     $self->{ct}
653     = {type => END_TAG_TOKEN,
654 wakaba 1.4 tag_name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
655 wakaba 1.1 line => $l, column => $c};
656     $self->{state} = TAG_NAME_STATE;
657     !!!next-input-character;
658     redo A;
659     } elsif (0x0061 <= $self->{nc} and
660     $self->{nc} <= 0x007A) { # a..z
661     !!!cp (30);
662     $self->{ct} = {type => END_TAG_TOKEN,
663     tag_name => chr ($self->{nc}),
664     line => $l, column => $c};
665     $self->{state} = TAG_NAME_STATE;
666     !!!next-input-character;
667     redo A;
668     } elsif ($self->{nc} == 0x003E) { # >
669     !!!parse-error (type => 'empty end tag',
670     line => $self->{line_prev}, ## "<" in "</>"
671     column => $self->{column_prev} - 1);
672     $self->{state} = DATA_STATE;
673 wakaba 1.5 $self->{s_kwd} = '';
674 wakaba 1.10 if ($self->{is_xml}) {
675     !!!cp (31);
676     ## XML5: No parse error.
677    
678     ## NOTE: This parser raises a parse error, since it supports
679     ## XML1, not XML5.
680    
681     ## NOTE: A short end tag token.
682     my $ct = {type => END_TAG_TOKEN,
683     tag_name => '',
684     line => $self->{line_prev},
685     column => $self->{column_prev} - 1,
686     };
687     !!!next-input-character;
688     !!!emit ($ct);
689     } else {
690     !!!cp (31.1);
691     !!!next-input-character;
692     }
693 wakaba 1.1 redo A;
694     } elsif ($self->{nc} == -1) {
695     !!!cp (32);
696     !!!parse-error (type => 'bare etago');
697 wakaba 1.5 $self->{s_kwd} = '';
698 wakaba 1.1 $self->{state} = DATA_STATE;
699     # reconsume
700    
701     !!!emit ({type => CHARACTER_TOKEN, data => '</',
702     line => $l, column => $c,
703     });
704    
705     redo A;
706 wakaba 1.10 } elsif (not $self->{is_xml} or
707     $is_space->{$self->{nc}}) {
708 wakaba 1.1 !!!cp (33);
709 wakaba 1.10 !!!parse-error (type => 'bogus end tag',
710     line => $self->{line_prev}, # "<" of "</"
711     column => $self->{column_prev} - 1);
712 wakaba 1.1 $self->{state} = BOGUS_COMMENT_STATE;
713     $self->{ct} = {type => COMMENT_TOKEN, data => '',
714     line => $self->{line_prev}, # "<" of "</"
715     column => $self->{column_prev} - 1,
716     };
717     ## NOTE: $self->{nc} is intentionally left as is.
718     ## Although the "anything else" case of the spec not explicitly
719     ## states that the next input character is to be reconsumed,
720     ## it will be included to the |data| of the comment token
721     ## generated from the bogus end tag, as defined in the
722     ## "bogus comment state" entry.
723     redo A;
724 wakaba 1.10 } else {
725     ## XML5: "</:" is a parse error.
726     !!!cp (30.1);
727     $self->{ct} = {type => END_TAG_TOKEN,
728     tag_name => chr ($self->{nc}),
729     line => $l, column => $c};
730     $self->{state} = TAG_NAME_STATE; ## XML5: "end tag name state".
731     !!!next-input-character;
732     redo A;
733 wakaba 1.1 }
734     } elsif ($self->{state} == CDATA_RCDATA_CLOSE_TAG_STATE) {
735 wakaba 1.12 my $ch = substr $self->{last_stag_name}, length $self->{kwd}, 1;
736 wakaba 1.1 if (length $ch) {
737     my $CH = $ch;
738     $ch =~ tr/a-z/A-Z/;
739     my $nch = chr $self->{nc};
740     if ($nch eq $ch or $nch eq $CH) {
741     !!!cp (24);
742     ## Stay in the state.
743 wakaba 1.12 $self->{kwd} .= $nch;
744 wakaba 1.1 !!!next-input-character;
745     redo A;
746     } else {
747     !!!cp (25);
748     $self->{state} = DATA_STATE;
749 wakaba 1.5 $self->{s_kwd} = '';
750 wakaba 1.1 ## Reconsume.
751     !!!emit ({type => CHARACTER_TOKEN,
752 wakaba 1.12 data => '</' . $self->{kwd},
753 wakaba 1.1 line => $self->{line_prev},
754 wakaba 1.12 column => $self->{column_prev} - 1 - length $self->{kwd},
755 wakaba 1.1 });
756     redo A;
757     }
758     } else { # after "<{tag-name}"
759     unless ($is_space->{$self->{nc}} or
760     {
761     0x003E => 1, # >
762     0x002F => 1, # /
763     -1 => 1, # EOF
764     }->{$self->{nc}}) {
765     !!!cp (26);
766     ## Reconsume.
767     $self->{state} = DATA_STATE;
768 wakaba 1.5 $self->{s_kwd} = '';
769 wakaba 1.1 !!!emit ({type => CHARACTER_TOKEN,
770 wakaba 1.12 data => '</' . $self->{kwd},
771 wakaba 1.1 line => $self->{line_prev},
772 wakaba 1.12 column => $self->{column_prev} - 1 - length $self->{kwd},
773 wakaba 1.1 });
774     redo A;
775     } else {
776     !!!cp (27);
777     $self->{ct}
778     = {type => END_TAG_TOKEN,
779     tag_name => $self->{last_stag_name},
780     line => $self->{line_prev},
781 wakaba 1.12 column => $self->{column_prev} - 1 - length $self->{kwd}};
782 wakaba 1.1 $self->{state} = TAG_NAME_STATE;
783     ## Reconsume.
784     redo A;
785     }
786     }
787     } elsif ($self->{state} == TAG_NAME_STATE) {
788     if ($is_space->{$self->{nc}}) {
789     !!!cp (34);
790     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
791     !!!next-input-character;
792     redo A;
793     } elsif ($self->{nc} == 0x003E) { # >
794     if ($self->{ct}->{type} == START_TAG_TOKEN) {
795     !!!cp (35);
796     $self->{last_stag_name} = $self->{ct}->{tag_name};
797     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
798     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
799     #if ($self->{ct}->{attributes}) {
800     # ## NOTE: This should never be reached.
801     # !!! cp (36);
802     # !!! parse-error (type => 'end tag attribute');
803     #} else {
804     !!!cp (37);
805     #}
806     } else {
807     die "$0: $self->{ct}->{type}: Unknown token type";
808     }
809     $self->{state} = DATA_STATE;
810 wakaba 1.5 $self->{s_kwd} = '';
811 wakaba 1.1 !!!next-input-character;
812    
813     !!!emit ($self->{ct}); # start tag or end tag
814    
815     redo A;
816     } elsif (0x0041 <= $self->{nc} and
817     $self->{nc} <= 0x005A) { # A..Z
818     !!!cp (38);
819 wakaba 1.4 $self->{ct}->{tag_name}
820     .= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
821 wakaba 1.1 # start tag or end tag
822     ## Stay in this state
823     !!!next-input-character;
824     redo A;
825     } elsif ($self->{nc} == -1) {
826     !!!parse-error (type => 'unclosed tag');
827     if ($self->{ct}->{type} == START_TAG_TOKEN) {
828     !!!cp (39);
829     $self->{last_stag_name} = $self->{ct}->{tag_name};
830     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
831     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
832     #if ($self->{ct}->{attributes}) {
833     # ## NOTE: This state should never be reached.
834     # !!! cp (40);
835     # !!! parse-error (type => 'end tag attribute');
836     #} else {
837     !!!cp (41);
838     #}
839     } else {
840     die "$0: $self->{ct}->{type}: Unknown token type";
841     }
842     $self->{state} = DATA_STATE;
843 wakaba 1.5 $self->{s_kwd} = '';
844 wakaba 1.1 # reconsume
845    
846     !!!emit ($self->{ct}); # start tag or end tag
847    
848     redo A;
849     } elsif ($self->{nc} == 0x002F) { # /
850     !!!cp (42);
851     $self->{state} = SELF_CLOSING_START_TAG_STATE;
852     !!!next-input-character;
853     redo A;
854     } else {
855     !!!cp (44);
856     $self->{ct}->{tag_name} .= chr $self->{nc};
857     # start tag or end tag
858     ## Stay in the state
859     !!!next-input-character;
860     redo A;
861     }
862     } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
863 wakaba 1.11 ## XML5: "Tag attribute name before state".
864    
865 wakaba 1.1 if ($is_space->{$self->{nc}}) {
866     !!!cp (45);
867     ## Stay in the state
868     !!!next-input-character;
869     redo A;
870     } elsif ($self->{nc} == 0x003E) { # >
871     if ($self->{ct}->{type} == START_TAG_TOKEN) {
872     !!!cp (46);
873     $self->{last_stag_name} = $self->{ct}->{tag_name};
874     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
875     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
876     if ($self->{ct}->{attributes}) {
877     !!!cp (47);
878     !!!parse-error (type => 'end tag attribute');
879     } else {
880     !!!cp (48);
881     }
882     } else {
883     die "$0: $self->{ct}->{type}: Unknown token type";
884     }
885     $self->{state} = DATA_STATE;
886 wakaba 1.5 $self->{s_kwd} = '';
887 wakaba 1.1 !!!next-input-character;
888    
889     !!!emit ($self->{ct}); # start tag or end tag
890    
891     redo A;
892     } elsif (0x0041 <= $self->{nc} and
893     $self->{nc} <= 0x005A) { # A..Z
894     !!!cp (49);
895     $self->{ca}
896 wakaba 1.4 = {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
897 wakaba 1.1 value => '',
898     line => $self->{line}, column => $self->{column}};
899     $self->{state} = ATTRIBUTE_NAME_STATE;
900     !!!next-input-character;
901     redo A;
902     } elsif ($self->{nc} == 0x002F) { # /
903     !!!cp (50);
904     $self->{state} = SELF_CLOSING_START_TAG_STATE;
905     !!!next-input-character;
906     redo A;
907     } elsif ($self->{nc} == -1) {
908     !!!parse-error (type => 'unclosed tag');
909     if ($self->{ct}->{type} == START_TAG_TOKEN) {
910     !!!cp (52);
911     $self->{last_stag_name} = $self->{ct}->{tag_name};
912     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
913     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
914     if ($self->{ct}->{attributes}) {
915     !!!cp (53);
916     !!!parse-error (type => 'end tag attribute');
917     } else {
918     !!!cp (54);
919     }
920     } else {
921     die "$0: $self->{ct}->{type}: Unknown token type";
922     }
923     $self->{state} = DATA_STATE;
924 wakaba 1.5 $self->{s_kwd} = '';
925 wakaba 1.1 # reconsume
926    
927     !!!emit ($self->{ct}); # start tag or end tag
928    
929     redo A;
930     } else {
931     if ({
932     0x0022 => 1, # "
933     0x0027 => 1, # '
934     0x003D => 1, # =
935     }->{$self->{nc}}) {
936     !!!cp (55);
937 wakaba 1.11 ## XML5: Not a parse error.
938 wakaba 1.1 !!!parse-error (type => 'bad attribute name');
939     } else {
940     !!!cp (56);
941 wakaba 1.11 ## XML5: ":" raises a parse error and is ignored.
942 wakaba 1.1 }
943     $self->{ca}
944     = {name => chr ($self->{nc}),
945     value => '',
946     line => $self->{line}, column => $self->{column}};
947     $self->{state} = ATTRIBUTE_NAME_STATE;
948     !!!next-input-character;
949     redo A;
950     }
951     } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
952 wakaba 1.11 ## XML5: "Tag attribute name state".
953    
954 wakaba 1.1 my $before_leave = sub {
955     if (exists $self->{ct}->{attributes} # start tag or end tag
956     ->{$self->{ca}->{name}}) { # MUST
957     !!!cp (57);
958     !!!parse-error (type => 'duplicate attribute', text => $self->{ca}->{name}, line => $self->{ca}->{line}, column => $self->{ca}->{column});
959     ## Discard $self->{ca} # MUST
960     } else {
961     !!!cp (58);
962     $self->{ct}->{attributes}->{$self->{ca}->{name}}
963     = $self->{ca};
964 wakaba 1.11 $self->{ca}->{index} = ++$self->{ct}->{last_index};
965 wakaba 1.1 }
966     }; # $before_leave
967    
968     if ($is_space->{$self->{nc}}) {
969     !!!cp (59);
970     $before_leave->();
971     $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
972     !!!next-input-character;
973     redo A;
974     } elsif ($self->{nc} == 0x003D) { # =
975     !!!cp (60);
976     $before_leave->();
977     $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
978     !!!next-input-character;
979     redo A;
980     } elsif ($self->{nc} == 0x003E) { # >
981 wakaba 1.11 if ($self->{is_xml}) {
982     !!!cp (60.1);
983     ## XML5: Not a parse error.
984     !!!parse-error (type => 'no attr value'); ## TODO: type
985     } else {
986     !!!cp (60.2);
987     }
988    
989 wakaba 1.1 $before_leave->();
990     if ($self->{ct}->{type} == START_TAG_TOKEN) {
991     !!!cp (61);
992     $self->{last_stag_name} = $self->{ct}->{tag_name};
993     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
994     !!!cp (62);
995     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
996     if ($self->{ct}->{attributes}) {
997     !!!parse-error (type => 'end tag attribute');
998     }
999     } else {
1000     die "$0: $self->{ct}->{type}: Unknown token type";
1001     }
1002     $self->{state} = DATA_STATE;
1003 wakaba 1.5 $self->{s_kwd} = '';
1004 wakaba 1.1 !!!next-input-character;
1005    
1006     !!!emit ($self->{ct}); # start tag or end tag
1007    
1008     redo A;
1009     } elsif (0x0041 <= $self->{nc} and
1010     $self->{nc} <= 0x005A) { # A..Z
1011     !!!cp (63);
1012 wakaba 1.4 $self->{ca}->{name}
1013     .= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020));
1014 wakaba 1.1 ## Stay in the state
1015     !!!next-input-character;
1016     redo A;
1017     } elsif ($self->{nc} == 0x002F) { # /
1018 wakaba 1.11 if ($self->{is_xml}) {
1019     !!!cp (64);
1020     ## XML5: Not a parse error.
1021     !!!parse-error (type => 'no attr value'); ## TODO: type
1022     } else {
1023     !!!cp (64.1);
1024     }
1025    
1026 wakaba 1.1 $before_leave->();
1027     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1028     !!!next-input-character;
1029     redo A;
1030     } elsif ($self->{nc} == -1) {
1031     !!!parse-error (type => 'unclosed tag');
1032     $before_leave->();
1033     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1034     !!!cp (66);
1035     $self->{last_stag_name} = $self->{ct}->{tag_name};
1036     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1037     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1038     if ($self->{ct}->{attributes}) {
1039     !!!cp (67);
1040     !!!parse-error (type => 'end tag attribute');
1041     } else {
1042     ## NOTE: This state should never be reached.
1043     !!!cp (68);
1044     }
1045     } else {
1046     die "$0: $self->{ct}->{type}: Unknown token type";
1047     }
1048     $self->{state} = DATA_STATE;
1049 wakaba 1.5 $self->{s_kwd} = '';
1050 wakaba 1.1 # reconsume
1051    
1052     !!!emit ($self->{ct}); # start tag or end tag
1053    
1054     redo A;
1055     } else {
1056     if ($self->{nc} == 0x0022 or # "
1057     $self->{nc} == 0x0027) { # '
1058     !!!cp (69);
1059 wakaba 1.11 ## XML5: Not a parse error.
1060 wakaba 1.1 !!!parse-error (type => 'bad attribute name');
1061     } else {
1062     !!!cp (70);
1063     }
1064     $self->{ca}->{name} .= chr ($self->{nc});
1065     ## Stay in the state
1066     !!!next-input-character;
1067     redo A;
1068     }
1069     } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1070 wakaba 1.11 ## XML5: "Tag attribute name after state".
1071    
1072 wakaba 1.1 if ($is_space->{$self->{nc}}) {
1073     !!!cp (71);
1074     ## Stay in the state
1075     !!!next-input-character;
1076     redo A;
1077     } elsif ($self->{nc} == 0x003D) { # =
1078     !!!cp (72);
1079     $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1080     !!!next-input-character;
1081     redo A;
1082     } elsif ($self->{nc} == 0x003E) { # >
1083 wakaba 1.11 if ($self->{is_xml}) {
1084     !!!cp (72.1);
1085     ## XML5: Not a parse error.
1086     !!!parse-error (type => 'no attr value'); ## TODO: type
1087     } else {
1088     !!!cp (72.2);
1089     }
1090    
1091 wakaba 1.1 if ($self->{ct}->{type} == START_TAG_TOKEN) {
1092     !!!cp (73);
1093     $self->{last_stag_name} = $self->{ct}->{tag_name};
1094     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1095     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1096     if ($self->{ct}->{attributes}) {
1097     !!!cp (74);
1098     !!!parse-error (type => 'end tag attribute');
1099     } else {
1100     ## NOTE: This state should never be reached.
1101     !!!cp (75);
1102     }
1103     } else {
1104     die "$0: $self->{ct}->{type}: Unknown token type";
1105     }
1106     $self->{state} = DATA_STATE;
1107 wakaba 1.5 $self->{s_kwd} = '';
1108 wakaba 1.1 !!!next-input-character;
1109    
1110     !!!emit ($self->{ct}); # start tag or end tag
1111    
1112     redo A;
1113     } elsif (0x0041 <= $self->{nc} and
1114     $self->{nc} <= 0x005A) { # A..Z
1115     !!!cp (76);
1116     $self->{ca}
1117 wakaba 1.4 = {name => chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)),
1118 wakaba 1.1 value => '',
1119     line => $self->{line}, column => $self->{column}};
1120     $self->{state} = ATTRIBUTE_NAME_STATE;
1121     !!!next-input-character;
1122     redo A;
1123     } elsif ($self->{nc} == 0x002F) { # /
1124 wakaba 1.11 if ($self->{is_xml}) {
1125     !!!cp (77);
1126     ## XML5: Not a parse error.
1127     !!!parse-error (type => 'no attr value'); ## TODO: type
1128     } else {
1129     !!!cp (77.1);
1130     }
1131    
1132 wakaba 1.1 $self->{state} = SELF_CLOSING_START_TAG_STATE;
1133     !!!next-input-character;
1134     redo A;
1135     } elsif ($self->{nc} == -1) {
1136     !!!parse-error (type => 'unclosed tag');
1137     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1138     !!!cp (79);
1139     $self->{last_stag_name} = $self->{ct}->{tag_name};
1140     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1141     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1142     if ($self->{ct}->{attributes}) {
1143     !!!cp (80);
1144     !!!parse-error (type => 'end tag attribute');
1145     } else {
1146     ## NOTE: This state should never be reached.
1147     !!!cp (81);
1148     }
1149     } else {
1150     die "$0: $self->{ct}->{type}: Unknown token type";
1151     }
1152 wakaba 1.5 $self->{s_kwd} = '';
1153 wakaba 1.1 $self->{state} = DATA_STATE;
1154     # reconsume
1155    
1156     !!!emit ($self->{ct}); # start tag or end tag
1157    
1158     redo A;
1159     } else {
1160 wakaba 1.11 if ($self->{is_xml}) {
1161     !!!cp (78.1);
1162     ## XML5: Not a parse error.
1163     !!!parse-error (type => 'no attr value'); ## TODO: type
1164     } else {
1165     !!!cp (78.2);
1166     }
1167    
1168 wakaba 1.1 if ($self->{nc} == 0x0022 or # "
1169     $self->{nc} == 0x0027) { # '
1170     !!!cp (78);
1171 wakaba 1.11 ## XML5: Not a parse error.
1172 wakaba 1.1 !!!parse-error (type => 'bad attribute name');
1173     } else {
1174     !!!cp (82);
1175     }
1176     $self->{ca}
1177     = {name => chr ($self->{nc}),
1178     value => '',
1179     line => $self->{line}, column => $self->{column}};
1180     $self->{state} = ATTRIBUTE_NAME_STATE;
1181     !!!next-input-character;
1182     redo A;
1183     }
1184     } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1185 wakaba 1.11 ## XML5: "Tag attribute value before state".
1186    
1187 wakaba 1.1 if ($is_space->{$self->{nc}}) {
1188     !!!cp (83);
1189     ## Stay in the state
1190     !!!next-input-character;
1191     redo A;
1192     } elsif ($self->{nc} == 0x0022) { # "
1193     !!!cp (84);
1194     $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
1195     !!!next-input-character;
1196     redo A;
1197     } elsif ($self->{nc} == 0x0026) { # &
1198     !!!cp (85);
1199     $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1200     ## reconsume
1201     redo A;
1202     } elsif ($self->{nc} == 0x0027) { # '
1203     !!!cp (86);
1204     $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
1205     !!!next-input-character;
1206     redo A;
1207     } elsif ($self->{nc} == 0x003E) { # >
1208     !!!parse-error (type => 'empty unquoted attribute value');
1209     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1210     !!!cp (87);
1211     $self->{last_stag_name} = $self->{ct}->{tag_name};
1212     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1213     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1214     if ($self->{ct}->{attributes}) {
1215     !!!cp (88);
1216     !!!parse-error (type => 'end tag attribute');
1217     } else {
1218     ## NOTE: This state should never be reached.
1219     !!!cp (89);
1220     }
1221     } else {
1222     die "$0: $self->{ct}->{type}: Unknown token type";
1223     }
1224     $self->{state} = DATA_STATE;
1225 wakaba 1.5 $self->{s_kwd} = '';
1226 wakaba 1.1 !!!next-input-character;
1227    
1228     !!!emit ($self->{ct}); # start tag or end tag
1229    
1230     redo A;
1231     } elsif ($self->{nc} == -1) {
1232     !!!parse-error (type => 'unclosed tag');
1233     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1234     !!!cp (90);
1235     $self->{last_stag_name} = $self->{ct}->{tag_name};
1236     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1237     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1238     if ($self->{ct}->{attributes}) {
1239     !!!cp (91);
1240     !!!parse-error (type => 'end tag attribute');
1241     } else {
1242     ## NOTE: This state should never be reached.
1243     !!!cp (92);
1244     }
1245     } else {
1246     die "$0: $self->{ct}->{type}: Unknown token type";
1247     }
1248     $self->{state} = DATA_STATE;
1249 wakaba 1.5 $self->{s_kwd} = '';
1250 wakaba 1.1 ## reconsume
1251    
1252     !!!emit ($self->{ct}); # start tag or end tag
1253    
1254     redo A;
1255     } else {
1256     if ($self->{nc} == 0x003D) { # =
1257     !!!cp (93);
1258 wakaba 1.11 ## XML5: Not a parse error.
1259 wakaba 1.1 !!!parse-error (type => 'bad attribute value');
1260 wakaba 1.11 } elsif ($self->{is_xml}) {
1261     !!!cp (93.1);
1262     ## XML5: No parse error.
1263     !!!parse-error (type => 'unquoted attr value'); ## TODO
1264 wakaba 1.1 } else {
1265     !!!cp (94);
1266     }
1267     $self->{ca}->{value} .= chr ($self->{nc});
1268     $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1269     !!!next-input-character;
1270     redo A;
1271     }
1272     } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1273 wakaba 1.15 ## XML5: "Tag attribute value double quoted state" and "DOCTYPE
1274     ## ATTLIST attribute value double quoted state".
1275 wakaba 1.11
1276 wakaba 1.1 if ($self->{nc} == 0x0022) { # "
1277 wakaba 1.15 if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1278     !!!cp (95.1);
1279     ## XML5: "DOCTYPE ATTLIST name after state".
1280     push @{$self->{ct}->{attrdefs}}, $self->{ca};
1281     $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1282     } else {
1283     !!!cp (95);
1284     ## XML5: "Tag attribute name before state".
1285     $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1286     }
1287 wakaba 1.1 !!!next-input-character;
1288     redo A;
1289     } elsif ($self->{nc} == 0x0026) { # &
1290     !!!cp (96);
1291 wakaba 1.11 ## XML5: Not defined yet.
1292    
1293 wakaba 1.1 ## NOTE: In the spec, the tokenizer is switched to the
1294     ## "entity in attribute value state". In this implementation, the
1295     ## tokenizer is switched to the |ENTITY_STATE|, which is an
1296     ## implementation of the "consume a character reference" algorithm.
1297     $self->{prev_state} = $self->{state};
1298     $self->{entity_add} = 0x0022; # "
1299     $self->{state} = ENTITY_STATE;
1300     !!!next-input-character;
1301     redo A;
1302     } elsif ($self->{nc} == -1) {
1303     !!!parse-error (type => 'unclosed attribute value');
1304     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1305     !!!cp (97);
1306     $self->{last_stag_name} = $self->{ct}->{tag_name};
1307 wakaba 1.15
1308     $self->{state} = DATA_STATE;
1309     $self->{s_kwd} = '';
1310     ## reconsume
1311     !!!emit ($self->{ct}); # start tag
1312     redo A;
1313 wakaba 1.1 } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1314     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1315     if ($self->{ct}->{attributes}) {
1316     !!!cp (98);
1317     !!!parse-error (type => 'end tag attribute');
1318     } else {
1319     ## NOTE: This state should never be reached.
1320     !!!cp (99);
1321     }
1322 wakaba 1.15
1323     $self->{state} = DATA_STATE;
1324     $self->{s_kwd} = '';
1325     ## reconsume
1326     !!!emit ($self->{ct}); # end tag
1327     redo A;
1328     } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1329     ## XML5: No parse error above; not defined yet.
1330     push @{$self->{ct}->{attrdefs}}, $self->{ca};
1331     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1332     ## Reconsume.
1333     !!!emit ($self->{ct}); # ATTLIST
1334     redo A;
1335 wakaba 1.1 } else {
1336     die "$0: $self->{ct}->{type}: Unknown token type";
1337     }
1338     } else {
1339 wakaba 1.15 ## XML5 [ATTLIST]: Not defined yet.
1340 wakaba 1.11 if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1341     !!!cp (100);
1342     ## XML5: Not a parse error.
1343     !!!parse-error (type => 'lt in attr value'); ## TODO: type
1344     } else {
1345     !!!cp (100.1);
1346     }
1347 wakaba 1.1 $self->{ca}->{value} .= chr ($self->{nc});
1348     $self->{read_until}->($self->{ca}->{value},
1349 wakaba 1.11 q["&<],
1350 wakaba 1.1 length $self->{ca}->{value});
1351    
1352     ## Stay in the state
1353     !!!next-input-character;
1354     redo A;
1355     }
1356     } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1357 wakaba 1.15 ## XML5: "Tag attribute value single quoted state" and "DOCTYPE
1358     ## ATTLIST attribute value single quoted state".
1359 wakaba 1.11
1360 wakaba 1.1 if ($self->{nc} == 0x0027) { # '
1361 wakaba 1.15 if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1362     !!!cp (101.1);
1363     ## XML5: "DOCTYPE ATTLIST name after state".
1364     push @{$self->{ct}->{attrdefs}}, $self->{ca};
1365     $self->{state} = AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE;
1366     } else {
1367     !!!cp (101);
1368     ## XML5: "Before attribute name state" (sic).
1369     $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1370     }
1371 wakaba 1.1 !!!next-input-character;
1372     redo A;
1373     } elsif ($self->{nc} == 0x0026) { # &
1374     !!!cp (102);
1375 wakaba 1.11 ## XML5: Not defined yet.
1376    
1377 wakaba 1.1 ## NOTE: In the spec, the tokenizer is switched to the
1378     ## "entity in attribute value state". In this implementation, the
1379     ## tokenizer is switched to the |ENTITY_STATE|, which is an
1380     ## implementation of the "consume a character reference" algorithm.
1381     $self->{entity_add} = 0x0027; # '
1382     $self->{prev_state} = $self->{state};
1383     $self->{state} = ENTITY_STATE;
1384     !!!next-input-character;
1385     redo A;
1386     } elsif ($self->{nc} == -1) {
1387     !!!parse-error (type => 'unclosed attribute value');
1388     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1389     !!!cp (103);
1390     $self->{last_stag_name} = $self->{ct}->{tag_name};
1391 wakaba 1.15
1392     $self->{state} = DATA_STATE;
1393     $self->{s_kwd} = '';
1394     ## reconsume
1395     !!!emit ($self->{ct}); # start tag
1396     redo A;
1397 wakaba 1.1 } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1398     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1399     if ($self->{ct}->{attributes}) {
1400     !!!cp (104);
1401     !!!parse-error (type => 'end tag attribute');
1402     } else {
1403     ## NOTE: This state should never be reached.
1404     !!!cp (105);
1405     }
1406 wakaba 1.15
1407     $self->{state} = DATA_STATE;
1408     $self->{s_kwd} = '';
1409     ## reconsume
1410     !!!emit ($self->{ct}); # end tag
1411     redo A;
1412     } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1413     ## XML5: No parse error above; not defined yet.
1414     push @{$self->{ct}->{attrdefs}}, $self->{ca};
1415     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1416     ## Reconsume.
1417     !!!emit ($self->{ct}); # ATTLIST
1418     redo A;
1419 wakaba 1.1 } else {
1420     die "$0: $self->{ct}->{type}: Unknown token type";
1421     }
1422     } else {
1423 wakaba 1.15 ## XML5 [ATTLIST]: Not defined yet.
1424 wakaba 1.11 if ($self->{is_xml} and $self->{nc} == 0x003C) { # <
1425     !!!cp (106);
1426     ## XML5: Not a parse error.
1427     !!!parse-error (type => 'lt in attr value'); ## TODO: type
1428     } else {
1429     !!!cp (106.1);
1430     }
1431 wakaba 1.1 $self->{ca}->{value} .= chr ($self->{nc});
1432     $self->{read_until}->($self->{ca}->{value},
1433 wakaba 1.11 q['&<],
1434 wakaba 1.1 length $self->{ca}->{value});
1435    
1436     ## Stay in the state
1437     !!!next-input-character;
1438     redo A;
1439     }
1440     } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1441 wakaba 1.11 ## XML5: "Tag attribute value unquoted state".
1442    
1443 wakaba 1.1 if ($is_space->{$self->{nc}}) {
1444 wakaba 1.15 if ($self->{ct}->{type} == ATTLIST_TOKEN) {
1445     !!!cp (107.1);
1446     push @{$self->{ct}->{attrdefs}}, $self->{ca};
1447     $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
1448     } else {
1449     !!!cp (107);
1450     ## XML5: "Tag attribute name before state".
1451     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1452     }
1453 wakaba 1.1 !!!next-input-character;
1454     redo A;
1455     } elsif ($self->{nc} == 0x0026) { # &
1456     !!!cp (108);
1457 wakaba 1.11
1458     ## XML5: Not defined yet.
1459    
1460 wakaba 1.1 ## NOTE: In the spec, the tokenizer is switched to the
1461     ## "entity in attribute value state". In this implementation, the
1462     ## tokenizer is switched to the |ENTITY_STATE|, which is an
1463     ## implementation of the "consume a character reference" algorithm.
1464     $self->{entity_add} = -1;
1465     $self->{prev_state} = $self->{state};
1466     $self->{state} = ENTITY_STATE;
1467     !!!next-input-character;
1468     redo A;
1469     } elsif ($self->{nc} == 0x003E) { # >
1470     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1471     !!!cp (109);
1472     $self->{last_stag_name} = $self->{ct}->{tag_name};
1473 wakaba 1.15
1474     $self->{state} = DATA_STATE;
1475     $self->{s_kwd} = '';
1476     !!!next-input-character;
1477     !!!emit ($self->{ct}); # start tag
1478     redo A;
1479 wakaba 1.1 } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1480     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1481     if ($self->{ct}->{attributes}) {
1482     !!!cp (110);
1483     !!!parse-error (type => 'end tag attribute');
1484     } else {
1485     ## NOTE: This state should never be reached.
1486     !!!cp (111);
1487     }
1488 wakaba 1.15
1489     $self->{state} = DATA_STATE;
1490     $self->{s_kwd} = '';
1491     !!!next-input-character;
1492     !!!emit ($self->{ct}); # end tag
1493     redo A;
1494     } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1495     push @{$self->{ct}->{attrdefs}}, $self->{ca};
1496     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1497     !!!next-input-character;
1498     !!!emit ($self->{ct}); # ATTLIST
1499     redo A;
1500 wakaba 1.1 } else {
1501     die "$0: $self->{ct}->{type}: Unknown token type";
1502     }
1503     } elsif ($self->{nc} == -1) {
1504     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1505     !!!cp (112);
1506 wakaba 1.15 !!!parse-error (type => 'unclosed tag');
1507 wakaba 1.1 $self->{last_stag_name} = $self->{ct}->{tag_name};
1508 wakaba 1.15
1509     $self->{state} = DATA_STATE;
1510     $self->{s_kwd} = '';
1511     ## reconsume
1512     !!!emit ($self->{ct}); # start tag
1513     redo A;
1514 wakaba 1.1 } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1515 wakaba 1.15 !!!parse-error (type => 'unclosed tag');
1516 wakaba 1.1 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1517     if ($self->{ct}->{attributes}) {
1518     !!!cp (113);
1519     !!!parse-error (type => 'end tag attribute');
1520     } else {
1521     ## NOTE: This state should never be reached.
1522     !!!cp (114);
1523     }
1524 wakaba 1.15
1525     $self->{state} = DATA_STATE;
1526     $self->{s_kwd} = '';
1527     ## reconsume
1528     !!!emit ($self->{ct}); # end tag
1529     redo A;
1530     } elsif ($self->{ct}->{type} == ATTLIST_TOKEN) {
1531     !!!parse-error (type => 'unclosed md'); ## TODO: type
1532     push @{$self->{ct}->{attrdefs}}, $self->{ca};
1533     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1534     ## Reconsume.
1535     !!!emit ($self->{ct}); # ATTLIST
1536     redo A;
1537 wakaba 1.1 } else {
1538     die "$0: $self->{ct}->{type}: Unknown token type";
1539     }
1540     } else {
1541     if ({
1542     0x0022 => 1, # "
1543     0x0027 => 1, # '
1544     0x003D => 1, # =
1545     }->{$self->{nc}}) {
1546     !!!cp (115);
1547 wakaba 1.11 ## XML5: Not a parse error.
1548 wakaba 1.1 !!!parse-error (type => 'bad attribute value');
1549     } else {
1550     !!!cp (116);
1551     }
1552     $self->{ca}->{value} .= chr ($self->{nc});
1553     $self->{read_until}->($self->{ca}->{value},
1554     q["'=& >],
1555     length $self->{ca}->{value});
1556    
1557     ## Stay in the state
1558     !!!next-input-character;
1559     redo A;
1560     }
1561     } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1562     if ($is_space->{$self->{nc}}) {
1563     !!!cp (118);
1564     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1565     !!!next-input-character;
1566     redo A;
1567     } elsif ($self->{nc} == 0x003E) { # >
1568     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1569     !!!cp (119);
1570     $self->{last_stag_name} = $self->{ct}->{tag_name};
1571     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1572     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1573     if ($self->{ct}->{attributes}) {
1574     !!!cp (120);
1575     !!!parse-error (type => 'end tag attribute');
1576     } else {
1577     ## NOTE: This state should never be reached.
1578     !!!cp (121);
1579     }
1580     } else {
1581     die "$0: $self->{ct}->{type}: Unknown token type";
1582     }
1583     $self->{state} = DATA_STATE;
1584 wakaba 1.5 $self->{s_kwd} = '';
1585 wakaba 1.1 !!!next-input-character;
1586    
1587     !!!emit ($self->{ct}); # start tag or end tag
1588    
1589     redo A;
1590     } elsif ($self->{nc} == 0x002F) { # /
1591     !!!cp (122);
1592     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1593     !!!next-input-character;
1594     redo A;
1595     } elsif ($self->{nc} == -1) {
1596     !!!parse-error (type => 'unclosed tag');
1597     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1598     !!!cp (122.3);
1599     $self->{last_stag_name} = $self->{ct}->{tag_name};
1600     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1601     if ($self->{ct}->{attributes}) {
1602     !!!cp (122.1);
1603     !!!parse-error (type => 'end tag attribute');
1604     } else {
1605     ## NOTE: This state should never be reached.
1606     !!!cp (122.2);
1607     }
1608     } else {
1609     die "$0: $self->{ct}->{type}: Unknown token type";
1610     }
1611     $self->{state} = DATA_STATE;
1612 wakaba 1.5 $self->{s_kwd} = '';
1613 wakaba 1.1 ## Reconsume.
1614     !!!emit ($self->{ct}); # start tag or end tag
1615     redo A;
1616     } else {
1617     !!!cp ('124.1');
1618     !!!parse-error (type => 'no space between attributes');
1619     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1620     ## reconsume
1621     redo A;
1622     }
1623     } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
1624 wakaba 1.11 ## XML5: "Empty tag state".
1625    
1626 wakaba 1.1 if ($self->{nc} == 0x003E) { # >
1627     if ($self->{ct}->{type} == END_TAG_TOKEN) {
1628     !!!cp ('124.2');
1629     !!!parse-error (type => 'nestc', token => $self->{ct});
1630     ## TODO: Different type than slash in start tag
1631     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1632     if ($self->{ct}->{attributes}) {
1633     !!!cp ('124.4');
1634     !!!parse-error (type => 'end tag attribute');
1635     } else {
1636     !!!cp ('124.5');
1637     }
1638     ## TODO: Test |<title></title/>|
1639     } else {
1640     !!!cp ('124.3');
1641     $self->{self_closing} = 1;
1642     }
1643    
1644     $self->{state} = DATA_STATE;
1645 wakaba 1.5 $self->{s_kwd} = '';
1646 wakaba 1.1 !!!next-input-character;
1647    
1648     !!!emit ($self->{ct}); # start tag or end tag
1649    
1650     redo A;
1651     } elsif ($self->{nc} == -1) {
1652     !!!parse-error (type => 'unclosed tag');
1653     if ($self->{ct}->{type} == START_TAG_TOKEN) {
1654     !!!cp (124.7);
1655     $self->{last_stag_name} = $self->{ct}->{tag_name};
1656     } elsif ($self->{ct}->{type} == END_TAG_TOKEN) {
1657     if ($self->{ct}->{attributes}) {
1658     !!!cp (124.5);
1659     !!!parse-error (type => 'end tag attribute');
1660     } else {
1661     ## NOTE: This state should never be reached.
1662     !!!cp (124.6);
1663     }
1664     } else {
1665     die "$0: $self->{ct}->{type}: Unknown token type";
1666     }
1667 wakaba 1.11 ## XML5: "Tag attribute name before state".
1668 wakaba 1.1 $self->{state} = DATA_STATE;
1669 wakaba 1.5 $self->{s_kwd} = '';
1670 wakaba 1.1 ## Reconsume.
1671     !!!emit ($self->{ct}); # start tag or end tag
1672     redo A;
1673     } else {
1674     !!!cp ('124.4');
1675     !!!parse-error (type => 'nestc');
1676     ## TODO: This error type is wrong.
1677     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1678     ## Reconsume.
1679     redo A;
1680     }
1681     } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
1682 wakaba 1.14 ## XML5: "Bogus comment state" and "DOCTYPE bogus comment state".
1683    
1684 wakaba 1.1 ## NOTE: Unlike spec's "bogus comment state", this implementation
1685     ## consumes characters one-by-one basis.
1686    
1687     if ($self->{nc} == 0x003E) { # >
1688 wakaba 1.13 if ($self->{in_subset}) {
1689     !!!cp (123);
1690     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1691     } else {
1692     !!!cp (124);
1693     $self->{state} = DATA_STATE;
1694     $self->{s_kwd} = '';
1695     }
1696 wakaba 1.1 !!!next-input-character;
1697    
1698     !!!emit ($self->{ct}); # comment
1699     redo A;
1700     } elsif ($self->{nc} == -1) {
1701 wakaba 1.13 if ($self->{in_subset}) {
1702     !!!cp (125.1);
1703     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1704     } else {
1705     !!!cp (125);
1706     $self->{state} = DATA_STATE;
1707     $self->{s_kwd} = '';
1708     }
1709 wakaba 1.1 ## reconsume
1710    
1711     !!!emit ($self->{ct}); # comment
1712     redo A;
1713     } else {
1714     !!!cp (126);
1715     $self->{ct}->{data} .= chr ($self->{nc}); # comment
1716     $self->{read_until}->($self->{ct}->{data},
1717     q[>],
1718     length $self->{ct}->{data});
1719    
1720     ## Stay in the state.
1721     !!!next-input-character;
1722     redo A;
1723     }
1724     } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
1725 wakaba 1.14 ## XML5: "Markup declaration state".
1726 wakaba 1.1
1727     if ($self->{nc} == 0x002D) { # -
1728     !!!cp (133);
1729     $self->{state} = MD_HYPHEN_STATE;
1730     !!!next-input-character;
1731     redo A;
1732     } elsif ($self->{nc} == 0x0044 or # D
1733     $self->{nc} == 0x0064) { # d
1734     ## ASCII case-insensitive.
1735     !!!cp (130);
1736     $self->{state} = MD_DOCTYPE_STATE;
1737 wakaba 1.12 $self->{kwd} = chr $self->{nc};
1738 wakaba 1.1 !!!next-input-character;
1739     redo A;
1740 wakaba 1.3 } elsif ((($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
1741     $self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
1742     $self->{is_xml}) and
1743 wakaba 1.1 $self->{nc} == 0x005B) { # [
1744     !!!cp (135.4);
1745     $self->{state} = MD_CDATA_STATE;
1746 wakaba 1.12 $self->{kwd} = '[';
1747 wakaba 1.1 !!!next-input-character;
1748     redo A;
1749     } else {
1750     !!!cp (136);
1751     }
1752    
1753     !!!parse-error (type => 'bogus comment',
1754     line => $self->{line_prev},
1755     column => $self->{column_prev} - 1);
1756     ## Reconsume.
1757     $self->{state} = BOGUS_COMMENT_STATE;
1758     $self->{ct} = {type => COMMENT_TOKEN, data => '',
1759     line => $self->{line_prev},
1760     column => $self->{column_prev} - 1,
1761     };
1762     redo A;
1763     } elsif ($self->{state} == MD_HYPHEN_STATE) {
1764     if ($self->{nc} == 0x002D) { # -
1765     !!!cp (127);
1766     $self->{ct} = {type => COMMENT_TOKEN, data => '',
1767     line => $self->{line_prev},
1768     column => $self->{column_prev} - 2,
1769     };
1770 wakaba 1.10 $self->{state} = COMMENT_START_STATE; ## XML5: "comment state".
1771 wakaba 1.1 !!!next-input-character;
1772     redo A;
1773     } else {
1774     !!!cp (128);
1775     !!!parse-error (type => 'bogus comment',
1776     line => $self->{line_prev},
1777     column => $self->{column_prev} - 2);
1778     $self->{state} = BOGUS_COMMENT_STATE;
1779     ## Reconsume.
1780     $self->{ct} = {type => COMMENT_TOKEN,
1781     data => '-',
1782     line => $self->{line_prev},
1783     column => $self->{column_prev} - 2,
1784     };
1785     redo A;
1786     }
1787     } elsif ($self->{state} == MD_DOCTYPE_STATE) {
1788     ## ASCII case-insensitive.
1789     if ($self->{nc} == [
1790     undef,
1791     0x004F, # O
1792     0x0043, # C
1793     0x0054, # T
1794     0x0059, # Y
1795     0x0050, # P
1796 wakaba 1.12 ]->[length $self->{kwd}] or
1797 wakaba 1.1 $self->{nc} == [
1798     undef,
1799     0x006F, # o
1800     0x0063, # c
1801     0x0074, # t
1802     0x0079, # y
1803     0x0070, # p
1804 wakaba 1.12 ]->[length $self->{kwd}]) {
1805 wakaba 1.1 !!!cp (131);
1806     ## Stay in the state.
1807 wakaba 1.12 $self->{kwd} .= chr $self->{nc};
1808 wakaba 1.1 !!!next-input-character;
1809     redo A;
1810 wakaba 1.12 } elsif ((length $self->{kwd}) == 6 and
1811 wakaba 1.1 ($self->{nc} == 0x0045 or # E
1812     $self->{nc} == 0x0065)) { # e
1813 wakaba 1.12 if ($self->{is_xml} and
1814     ($self->{kwd} ne 'DOCTYP' or $self->{nc} == 0x0065)) {
1815 wakaba 1.10 !!!cp (129);
1816     ## XML5: case-sensitive.
1817     !!!parse-error (type => 'lowercase keyword', ## TODO
1818     text => 'DOCTYPE',
1819     line => $self->{line_prev},
1820     column => $self->{column_prev} - 5);
1821     } else {
1822     !!!cp (129.1);
1823     }
1824 wakaba 1.1 $self->{state} = DOCTYPE_STATE;
1825     $self->{ct} = {type => DOCTYPE_TOKEN,
1826     quirks => 1,
1827     line => $self->{line_prev},
1828     column => $self->{column_prev} - 7,
1829     };
1830     !!!next-input-character;
1831     redo A;
1832     } else {
1833     !!!cp (132);
1834     !!!parse-error (type => 'bogus comment',
1835     line => $self->{line_prev},
1836 wakaba 1.12 column => $self->{column_prev} - 1 - length $self->{kwd});
1837 wakaba 1.1 $self->{state} = BOGUS_COMMENT_STATE;
1838     ## Reconsume.
1839     $self->{ct} = {type => COMMENT_TOKEN,
1840 wakaba 1.12 data => $self->{kwd},
1841 wakaba 1.1 line => $self->{line_prev},
1842 wakaba 1.12 column => $self->{column_prev} - 1 - length $self->{kwd},
1843 wakaba 1.1 };
1844     redo A;
1845     }
1846     } elsif ($self->{state} == MD_CDATA_STATE) {
1847     if ($self->{nc} == {
1848     '[' => 0x0043, # C
1849     '[C' => 0x0044, # D
1850     '[CD' => 0x0041, # A
1851     '[CDA' => 0x0054, # T
1852     '[CDAT' => 0x0041, # A
1853 wakaba 1.12 }->{$self->{kwd}}) {
1854 wakaba 1.1 !!!cp (135.1);
1855     ## Stay in the state.
1856 wakaba 1.12 $self->{kwd} .= chr $self->{nc};
1857 wakaba 1.1 !!!next-input-character;
1858     redo A;
1859 wakaba 1.12 } elsif ($self->{kwd} eq '[CDATA' and
1860 wakaba 1.1 $self->{nc} == 0x005B) { # [
1861 wakaba 1.6 if ($self->{is_xml} and
1862     not $self->{tainted} and
1863     @{$self->{open_elements} or []} == 0) {
1864 wakaba 1.8 !!!cp (135.2);
1865 wakaba 1.6 !!!parse-error (type => 'cdata outside of root element',
1866     line => $self->{line_prev},
1867     column => $self->{column_prev} - 7);
1868     $self->{tainted} = 1;
1869 wakaba 1.8 } else {
1870     !!!cp (135.21);
1871 wakaba 1.6 }
1872    
1873 wakaba 1.1 $self->{ct} = {type => CHARACTER_TOKEN,
1874     data => '',
1875     line => $self->{line_prev},
1876     column => $self->{column_prev} - 7};
1877     $self->{state} = CDATA_SECTION_STATE;
1878     !!!next-input-character;
1879     redo A;
1880     } else {
1881     !!!cp (135.3);
1882     !!!parse-error (type => 'bogus comment',
1883     line => $self->{line_prev},
1884 wakaba 1.12 column => $self->{column_prev} - 1 - length $self->{kwd});
1885 wakaba 1.1 $self->{state} = BOGUS_COMMENT_STATE;
1886     ## Reconsume.
1887     $self->{ct} = {type => COMMENT_TOKEN,
1888 wakaba 1.12 data => $self->{kwd},
1889 wakaba 1.1 line => $self->{line_prev},
1890 wakaba 1.12 column => $self->{column_prev} - 1 - length $self->{kwd},
1891 wakaba 1.1 };
1892     redo A;
1893     }
1894     } elsif ($self->{state} == COMMENT_START_STATE) {
1895     if ($self->{nc} == 0x002D) { # -
1896     !!!cp (137);
1897     $self->{state} = COMMENT_START_DASH_STATE;
1898     !!!next-input-character;
1899     redo A;
1900     } elsif ($self->{nc} == 0x003E) { # >
1901     !!!parse-error (type => 'bogus comment');
1902 wakaba 1.13 if ($self->{in_subset}) {
1903     !!!cp (138.1);
1904     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1905     } else {
1906     !!!cp (138);
1907     $self->{state} = DATA_STATE;
1908     $self->{s_kwd} = '';
1909     }
1910 wakaba 1.1 !!!next-input-character;
1911    
1912     !!!emit ($self->{ct}); # comment
1913    
1914     redo A;
1915     } elsif ($self->{nc} == -1) {
1916     !!!parse-error (type => 'unclosed comment');
1917 wakaba 1.13 if ($self->{in_subset}) {
1918     !!!cp (139.1);
1919     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1920     } else {
1921     !!!cp (139);
1922     $self->{state} = DATA_STATE;
1923     $self->{s_kwd} = '';
1924     }
1925 wakaba 1.1 ## reconsume
1926    
1927     !!!emit ($self->{ct}); # comment
1928    
1929     redo A;
1930     } else {
1931     !!!cp (140);
1932     $self->{ct}->{data} # comment
1933     .= chr ($self->{nc});
1934     $self->{state} = COMMENT_STATE;
1935     !!!next-input-character;
1936     redo A;
1937     }
1938     } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
1939     if ($self->{nc} == 0x002D) { # -
1940     !!!cp (141);
1941     $self->{state} = COMMENT_END_STATE;
1942     !!!next-input-character;
1943     redo A;
1944     } elsif ($self->{nc} == 0x003E) { # >
1945     !!!parse-error (type => 'bogus comment');
1946 wakaba 1.13 if ($self->{in_subset}) {
1947     !!!cp (142.1);
1948     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1949     } else {
1950     !!!cp (142);
1951     $self->{state} = DATA_STATE;
1952     $self->{s_kwd} = '';
1953     }
1954 wakaba 1.1 !!!next-input-character;
1955    
1956     !!!emit ($self->{ct}); # comment
1957    
1958     redo A;
1959     } elsif ($self->{nc} == -1) {
1960     !!!parse-error (type => 'unclosed comment');
1961 wakaba 1.13 if ($self->{in_subset}) {
1962     !!!cp (143.1);
1963     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1964     } else {
1965     !!!cp (143);
1966     $self->{state} = DATA_STATE;
1967     $self->{s_kwd} = '';
1968     }
1969 wakaba 1.1 ## reconsume
1970    
1971     !!!emit ($self->{ct}); # comment
1972    
1973     redo A;
1974     } else {
1975     !!!cp (144);
1976     $self->{ct}->{data} # comment
1977     .= '-' . chr ($self->{nc});
1978     $self->{state} = COMMENT_STATE;
1979     !!!next-input-character;
1980     redo A;
1981     }
1982     } elsif ($self->{state} == COMMENT_STATE) {
1983 wakaba 1.14 ## XML5: "Comment state" and "DOCTYPE comment state".
1984    
1985 wakaba 1.1 if ($self->{nc} == 0x002D) { # -
1986     !!!cp (145);
1987     $self->{state} = COMMENT_END_DASH_STATE;
1988     !!!next-input-character;
1989     redo A;
1990     } elsif ($self->{nc} == -1) {
1991     !!!parse-error (type => 'unclosed comment');
1992 wakaba 1.13 if ($self->{in_subset}) {
1993     !!!cp (146.1);
1994     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
1995     } else {
1996     !!!cp (146);
1997     $self->{state} = DATA_STATE;
1998     $self->{s_kwd} = '';
1999     }
2000 wakaba 1.1 ## reconsume
2001    
2002     !!!emit ($self->{ct}); # comment
2003    
2004     redo A;
2005     } else {
2006     !!!cp (147);
2007     $self->{ct}->{data} .= chr ($self->{nc}); # comment
2008     $self->{read_until}->($self->{ct}->{data},
2009     q[-],
2010     length $self->{ct}->{data});
2011    
2012     ## Stay in the state
2013     !!!next-input-character;
2014     redo A;
2015     }
2016     } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2017 wakaba 1.14 ## XML5: "Comment dash state" and "DOCTYPE comment dash state".
2018 wakaba 1.10
2019 wakaba 1.1 if ($self->{nc} == 0x002D) { # -
2020     !!!cp (148);
2021     $self->{state} = COMMENT_END_STATE;
2022     !!!next-input-character;
2023     redo A;
2024     } elsif ($self->{nc} == -1) {
2025     !!!parse-error (type => 'unclosed comment');
2026 wakaba 1.13 if ($self->{in_subset}) {
2027     !!!cp (149.1);
2028     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2029     } else {
2030     !!!cp (149);
2031     $self->{state} = DATA_STATE;
2032     $self->{s_kwd} = '';
2033     }
2034 wakaba 1.1 ## reconsume
2035    
2036     !!!emit ($self->{ct}); # comment
2037    
2038     redo A;
2039     } else {
2040     !!!cp (150);
2041     $self->{ct}->{data} .= '-' . chr ($self->{nc}); # comment
2042     $self->{state} = COMMENT_STATE;
2043     !!!next-input-character;
2044     redo A;
2045     }
2046     } elsif ($self->{state} == COMMENT_END_STATE) {
2047 wakaba 1.14 ## XML5: "Comment end state" and "DOCTYPE comment end state".
2048    
2049 wakaba 1.1 if ($self->{nc} == 0x003E) { # >
2050 wakaba 1.13 if ($self->{in_subset}) {
2051     !!!cp (151.1);
2052     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2053     } else {
2054     !!!cp (151);
2055     $self->{state} = DATA_STATE;
2056     $self->{s_kwd} = '';
2057     }
2058 wakaba 1.1 !!!next-input-character;
2059    
2060     !!!emit ($self->{ct}); # comment
2061    
2062     redo A;
2063     } elsif ($self->{nc} == 0x002D) { # -
2064     !!!cp (152);
2065 wakaba 1.10 ## XML5: Not a parse error.
2066 wakaba 1.1 !!!parse-error (type => 'dash in comment',
2067     line => $self->{line_prev},
2068     column => $self->{column_prev});
2069     $self->{ct}->{data} .= '-'; # comment
2070     ## Stay in the state
2071     !!!next-input-character;
2072     redo A;
2073     } elsif ($self->{nc} == -1) {
2074     !!!parse-error (type => 'unclosed comment');
2075 wakaba 1.13 if ($self->{in_subset}) {
2076     !!!cp (153.1);
2077     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2078     } else {
2079     !!!cp (153);
2080     $self->{state} = DATA_STATE;
2081     $self->{s_kwd} = '';
2082     }
2083 wakaba 1.1 ## reconsume
2084    
2085     !!!emit ($self->{ct}); # comment
2086    
2087     redo A;
2088     } else {
2089     !!!cp (154);
2090 wakaba 1.10 ## XML5: Not a parse error.
2091 wakaba 1.1 !!!parse-error (type => 'dash in comment',
2092     line => $self->{line_prev},
2093     column => $self->{column_prev});
2094     $self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment
2095     $self->{state} = COMMENT_STATE;
2096     !!!next-input-character;
2097     redo A;
2098     }
2099     } elsif ($self->{state} == DOCTYPE_STATE) {
2100     if ($is_space->{$self->{nc}}) {
2101     !!!cp (155);
2102     $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2103     !!!next-input-character;
2104     redo A;
2105     } else {
2106     !!!cp (156);
2107 wakaba 1.12 ## XML5: Unless EOF, swith to the bogus comment state.
2108 wakaba 1.1 !!!parse-error (type => 'no space before DOCTYPE name');
2109     $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2110     ## reconsume
2111     redo A;
2112     }
2113     } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
2114 wakaba 1.12 ## XML5: "DOCTYPE root name before state".
2115    
2116 wakaba 1.1 if ($is_space->{$self->{nc}}) {
2117     !!!cp (157);
2118     ## Stay in the state
2119     !!!next-input-character;
2120     redo A;
2121     } elsif ($self->{nc} == 0x003E) { # >
2122     !!!cp (158);
2123 wakaba 1.12 ## XML5: No parse error.
2124 wakaba 1.1 !!!parse-error (type => 'no DOCTYPE name');
2125     $self->{state} = DATA_STATE;
2126 wakaba 1.5 $self->{s_kwd} = '';
2127 wakaba 1.1 !!!next-input-character;
2128    
2129     !!!emit ($self->{ct}); # DOCTYPE (quirks)
2130    
2131     redo A;
2132     } elsif ($self->{nc} == -1) {
2133     !!!cp (159);
2134     !!!parse-error (type => 'no DOCTYPE name');
2135     $self->{state} = DATA_STATE;
2136 wakaba 1.5 $self->{s_kwd} = '';
2137 wakaba 1.1 ## reconsume
2138    
2139     !!!emit ($self->{ct}); # DOCTYPE (quirks)
2140    
2141     redo A;
2142 wakaba 1.12 } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
2143     !!!cp (159.1);
2144     !!!parse-error (type => 'no DOCTYPE name');
2145     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2146 wakaba 1.13 $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
2147     $self->{in_subset} = 1;
2148 wakaba 1.12 !!!next-input-character;
2149 wakaba 1.13 !!!emit ($self->{ct}); # DOCTYPE
2150 wakaba 1.12 redo A;
2151 wakaba 1.1 } else {
2152     !!!cp (160);
2153     $self->{ct}->{name} = chr $self->{nc};
2154     delete $self->{ct}->{quirks};
2155     $self->{state} = DOCTYPE_NAME_STATE;
2156     !!!next-input-character;
2157     redo A;
2158     }
2159     } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
2160 wakaba 1.12 ## XML5: "DOCTYPE root name state".
2161    
2162     ## ISSUE: Redundant "First," in the spec.
2163    
2164 wakaba 1.1 if ($is_space->{$self->{nc}}) {
2165     !!!cp (161);
2166     $self->{state} = AFTER_DOCTYPE_NAME_STATE;
2167     !!!next-input-character;
2168     redo A;
2169     } elsif ($self->{nc} == 0x003E) { # >
2170     !!!cp (162);
2171     $self->{state} = DATA_STATE;
2172 wakaba 1.5 $self->{s_kwd} = '';
2173 wakaba 1.1 !!!next-input-character;
2174    
2175     !!!emit ($self->{ct}); # DOCTYPE
2176    
2177     redo A;
2178     } elsif ($self->{nc} == -1) {
2179     !!!cp (163);
2180     !!!parse-error (type => 'unclosed DOCTYPE');
2181     $self->{state} = DATA_STATE;
2182 wakaba 1.5 $self->{s_kwd} = '';
2183 wakaba 1.1 ## reconsume
2184    
2185     $self->{ct}->{quirks} = 1;
2186     !!!emit ($self->{ct}); # DOCTYPE
2187    
2188     redo A;
2189 wakaba 1.12 } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
2190     !!!cp (163.1);
2191     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2192 wakaba 1.13 $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
2193     $self->{in_subset} = 1;
2194 wakaba 1.12 !!!next-input-character;
2195 wakaba 1.13 !!!emit ($self->{ct}); # DOCTYPE
2196 wakaba 1.12 redo A;
2197 wakaba 1.1 } else {
2198     !!!cp (164);
2199     $self->{ct}->{name}
2200     .= chr ($self->{nc}); # DOCTYPE
2201     ## Stay in the state
2202     !!!next-input-character;
2203     redo A;
2204     }
2205     } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
2206 wakaba 1.12 ## XML5: Corresponding to XML5's "DOCTYPE root name after
2207     ## state", but implemented differently.
2208    
2209 wakaba 1.1 if ($is_space->{$self->{nc}}) {
2210     !!!cp (165);
2211     ## Stay in the state
2212     !!!next-input-character;
2213     redo A;
2214     } elsif ($self->{nc} == 0x003E) { # >
2215     !!!cp (166);
2216     $self->{state} = DATA_STATE;
2217 wakaba 1.5 $self->{s_kwd} = '';
2218 wakaba 1.1 !!!next-input-character;
2219    
2220     !!!emit ($self->{ct}); # DOCTYPE
2221    
2222     redo A;
2223     } elsif ($self->{nc} == -1) {
2224     !!!cp (167);
2225     !!!parse-error (type => 'unclosed DOCTYPE');
2226     $self->{state} = DATA_STATE;
2227 wakaba 1.5 $self->{s_kwd} = '';
2228 wakaba 1.1 ## reconsume
2229    
2230     $self->{ct}->{quirks} = 1;
2231     !!!emit ($self->{ct}); # DOCTYPE
2232    
2233     redo A;
2234     } elsif ($self->{nc} == 0x0050 or # P
2235     $self->{nc} == 0x0070) { # p
2236 wakaba 1.12 !!!cp (167.1);
2237 wakaba 1.1 $self->{state} = PUBLIC_STATE;
2238 wakaba 1.12 $self->{kwd} = chr $self->{nc};
2239 wakaba 1.1 !!!next-input-character;
2240     redo A;
2241     } elsif ($self->{nc} == 0x0053 or # S
2242     $self->{nc} == 0x0073) { # s
2243 wakaba 1.12 !!!cp (167.2);
2244 wakaba 1.1 $self->{state} = SYSTEM_STATE;
2245 wakaba 1.12 $self->{kwd} = chr $self->{nc};
2246     !!!next-input-character;
2247     redo A;
2248     } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
2249     !!!cp (167.3);
2250     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2251     $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
2252 wakaba 1.13 $self->{in_subset} = 1;
2253 wakaba 1.1 !!!next-input-character;
2254 wakaba 1.13 !!!emit ($self->{ct}); # DOCTYPE
2255 wakaba 1.1 redo A;
2256     } else {
2257     !!!cp (180);
2258     !!!parse-error (type => 'string after DOCTYPE name');
2259     $self->{ct}->{quirks} = 1;
2260    
2261     $self->{state} = BOGUS_DOCTYPE_STATE;
2262     !!!next-input-character;
2263     redo A;
2264     }
2265     } elsif ($self->{state} == PUBLIC_STATE) {
2266     ## ASCII case-insensitive
2267     if ($self->{nc} == [
2268     undef,
2269     0x0055, # U
2270     0x0042, # B
2271     0x004C, # L
2272     0x0049, # I
2273 wakaba 1.12 ]->[length $self->{kwd}] or
2274 wakaba 1.1 $self->{nc} == [
2275     undef,
2276     0x0075, # u
2277     0x0062, # b
2278     0x006C, # l
2279     0x0069, # i
2280 wakaba 1.12 ]->[length $self->{kwd}]) {
2281 wakaba 1.1 !!!cp (175);
2282     ## Stay in the state.
2283 wakaba 1.12 $self->{kwd} .= chr $self->{nc};
2284 wakaba 1.1 !!!next-input-character;
2285     redo A;
2286 wakaba 1.12 } elsif ((length $self->{kwd}) == 5 and
2287 wakaba 1.1 ($self->{nc} == 0x0043 or # C
2288     $self->{nc} == 0x0063)) { # c
2289 wakaba 1.12 if ($self->{is_xml} and
2290     ($self->{kwd} ne 'PUBLI' or $self->{nc} == 0x0063)) { # c
2291     !!!cp (168.1);
2292     !!!parse-error (type => 'lowercase keyword', ## TODO: type
2293     text => 'PUBLIC',
2294     line => $self->{line_prev},
2295     column => $self->{column_prev} - 4);
2296     } else {
2297     !!!cp (168);
2298     }
2299 wakaba 1.1 $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2300     !!!next-input-character;
2301     redo A;
2302     } else {
2303     !!!cp (169);
2304     !!!parse-error (type => 'string after DOCTYPE name',
2305     line => $self->{line_prev},
2306 wakaba 1.12 column => $self->{column_prev} + 1 - length $self->{kwd});
2307 wakaba 1.1 $self->{ct}->{quirks} = 1;
2308    
2309     $self->{state} = BOGUS_DOCTYPE_STATE;
2310     ## Reconsume.
2311     redo A;
2312     }
2313     } elsif ($self->{state} == SYSTEM_STATE) {
2314     ## ASCII case-insensitive
2315     if ($self->{nc} == [
2316     undef,
2317     0x0059, # Y
2318     0x0053, # S
2319     0x0054, # T
2320     0x0045, # E
2321 wakaba 1.12 ]->[length $self->{kwd}] or
2322 wakaba 1.1 $self->{nc} == [
2323     undef,
2324     0x0079, # y
2325     0x0073, # s
2326     0x0074, # t
2327     0x0065, # e
2328 wakaba 1.12 ]->[length $self->{kwd}]) {
2329 wakaba 1.1 !!!cp (170);
2330     ## Stay in the state.
2331 wakaba 1.12 $self->{kwd} .= chr $self->{nc};
2332 wakaba 1.1 !!!next-input-character;
2333     redo A;
2334 wakaba 1.12 } elsif ((length $self->{kwd}) == 5 and
2335 wakaba 1.1 ($self->{nc} == 0x004D or # M
2336     $self->{nc} == 0x006D)) { # m
2337 wakaba 1.12 if ($self->{is_xml} and
2338     ($self->{kwd} ne 'SYSTE' or $self->{nc} == 0x006D)) { # m
2339     !!!cp (171.1);
2340     !!!parse-error (type => 'lowercase keyword', ## TODO: type
2341     text => 'SYSTEM',
2342     line => $self->{line_prev},
2343     column => $self->{column_prev} - 4);
2344     } else {
2345     !!!cp (171);
2346     }
2347 wakaba 1.1 $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2348     !!!next-input-character;
2349     redo A;
2350     } else {
2351     !!!cp (172);
2352     !!!parse-error (type => 'string after DOCTYPE name',
2353     line => $self->{line_prev},
2354 wakaba 1.12 column => $self->{column_prev} + 1 - length $self->{kwd});
2355 wakaba 1.1 $self->{ct}->{quirks} = 1;
2356    
2357     $self->{state} = BOGUS_DOCTYPE_STATE;
2358     ## Reconsume.
2359     redo A;
2360     }
2361     } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2362     if ($is_space->{$self->{nc}}) {
2363     !!!cp (181);
2364     ## Stay in the state
2365     !!!next-input-character;
2366     redo A;
2367     } elsif ($self->{nc} eq 0x0022) { # "
2368     !!!cp (182);
2369     $self->{ct}->{pubid} = ''; # DOCTYPE
2370     $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
2371     !!!next-input-character;
2372     redo A;
2373     } elsif ($self->{nc} eq 0x0027) { # '
2374     !!!cp (183);
2375     $self->{ct}->{pubid} = ''; # DOCTYPE
2376     $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
2377     !!!next-input-character;
2378     redo A;
2379     } elsif ($self->{nc} eq 0x003E) { # >
2380     !!!cp (184);
2381     !!!parse-error (type => 'no PUBLIC literal');
2382    
2383     $self->{state} = DATA_STATE;
2384 wakaba 1.5 $self->{s_kwd} = '';
2385 wakaba 1.1 !!!next-input-character;
2386    
2387     $self->{ct}->{quirks} = 1;
2388     !!!emit ($self->{ct}); # DOCTYPE
2389    
2390     redo A;
2391     } elsif ($self->{nc} == -1) {
2392     !!!cp (185);
2393     !!!parse-error (type => 'unclosed DOCTYPE');
2394    
2395     $self->{state} = DATA_STATE;
2396 wakaba 1.5 $self->{s_kwd} = '';
2397 wakaba 1.1 ## reconsume
2398    
2399     $self->{ct}->{quirks} = 1;
2400     !!!emit ($self->{ct}); # DOCTYPE
2401    
2402     redo A;
2403 wakaba 1.12 } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
2404     !!!cp (186.1);
2405     !!!parse-error (type => 'no PUBLIC literal');
2406     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2407     $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
2408 wakaba 1.13 $self->{in_subset} = 1;
2409 wakaba 1.12 !!!next-input-character;
2410 wakaba 1.13 !!!emit ($self->{ct}); # DOCTYPE
2411 wakaba 1.12 redo A;
2412 wakaba 1.1 } else {
2413     !!!cp (186);
2414     !!!parse-error (type => 'string after PUBLIC');
2415     $self->{ct}->{quirks} = 1;
2416    
2417     $self->{state} = BOGUS_DOCTYPE_STATE;
2418     !!!next-input-character;
2419     redo A;
2420     }
2421     } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2422     if ($self->{nc} == 0x0022) { # "
2423     !!!cp (187);
2424     $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2425     !!!next-input-character;
2426     redo A;
2427     } elsif ($self->{nc} == 0x003E) { # >
2428     !!!cp (188);
2429     !!!parse-error (type => 'unclosed PUBLIC literal');
2430    
2431     $self->{state} = DATA_STATE;
2432 wakaba 1.5 $self->{s_kwd} = '';
2433 wakaba 1.1 !!!next-input-character;
2434    
2435     $self->{ct}->{quirks} = 1;
2436     !!!emit ($self->{ct}); # DOCTYPE
2437    
2438     redo A;
2439     } elsif ($self->{nc} == -1) {
2440     !!!cp (189);
2441     !!!parse-error (type => 'unclosed PUBLIC literal');
2442    
2443     $self->{state} = DATA_STATE;
2444 wakaba 1.5 $self->{s_kwd} = '';
2445 wakaba 1.1 ## reconsume
2446    
2447     $self->{ct}->{quirks} = 1;
2448     !!!emit ($self->{ct}); # DOCTYPE
2449    
2450     redo A;
2451     } else {
2452     !!!cp (190);
2453     $self->{ct}->{pubid} # DOCTYPE
2454     .= chr $self->{nc};
2455     $self->{read_until}->($self->{ct}->{pubid}, q[">],
2456     length $self->{ct}->{pubid});
2457    
2458     ## Stay in the state
2459     !!!next-input-character;
2460     redo A;
2461     }
2462     } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
2463     if ($self->{nc} == 0x0027) { # '
2464     !!!cp (191);
2465     $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2466     !!!next-input-character;
2467     redo A;
2468     } elsif ($self->{nc} == 0x003E) { # >
2469     !!!cp (192);
2470     !!!parse-error (type => 'unclosed PUBLIC literal');
2471    
2472     $self->{state} = DATA_STATE;
2473 wakaba 1.5 $self->{s_kwd} = '';
2474 wakaba 1.1 !!!next-input-character;
2475    
2476     $self->{ct}->{quirks} = 1;
2477     !!!emit ($self->{ct}); # DOCTYPE
2478    
2479     redo A;
2480     } elsif ($self->{nc} == -1) {
2481     !!!cp (193);
2482     !!!parse-error (type => 'unclosed PUBLIC literal');
2483    
2484     $self->{state} = DATA_STATE;
2485 wakaba 1.5 $self->{s_kwd} = '';
2486 wakaba 1.1 ## reconsume
2487    
2488     $self->{ct}->{quirks} = 1;
2489     !!!emit ($self->{ct}); # DOCTYPE
2490    
2491     redo A;
2492     } else {
2493     !!!cp (194);
2494     $self->{ct}->{pubid} # DOCTYPE
2495     .= chr $self->{nc};
2496     $self->{read_until}->($self->{ct}->{pubid}, q['>],
2497     length $self->{ct}->{pubid});
2498    
2499     ## Stay in the state
2500     !!!next-input-character;
2501     redo A;
2502     }
2503     } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2504     if ($is_space->{$self->{nc}}) {
2505     !!!cp (195);
2506     ## Stay in the state
2507     !!!next-input-character;
2508     redo A;
2509     } elsif ($self->{nc} == 0x0022) { # "
2510     !!!cp (196);
2511     $self->{ct}->{sysid} = ''; # DOCTYPE
2512     $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2513     !!!next-input-character;
2514     redo A;
2515     } elsif ($self->{nc} == 0x0027) { # '
2516     !!!cp (197);
2517     $self->{ct}->{sysid} = ''; # DOCTYPE
2518     $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2519     !!!next-input-character;
2520     redo A;
2521     } elsif ($self->{nc} == 0x003E) { # >
2522 wakaba 1.12 if ($self->{is_xml}) {
2523     !!!cp (198.1);
2524     !!!parse-error (type => 'no SYSTEM literal');
2525     } else {
2526     !!!cp (198);
2527     }
2528 wakaba 1.1 $self->{state} = DATA_STATE;
2529 wakaba 1.5 $self->{s_kwd} = '';
2530 wakaba 1.1 !!!next-input-character;
2531    
2532     !!!emit ($self->{ct}); # DOCTYPE
2533    
2534     redo A;
2535     } elsif ($self->{nc} == -1) {
2536     !!!cp (199);
2537     !!!parse-error (type => 'unclosed DOCTYPE');
2538    
2539     $self->{state} = DATA_STATE;
2540 wakaba 1.5 $self->{s_kwd} = '';
2541 wakaba 1.1 ## reconsume
2542    
2543     $self->{ct}->{quirks} = 1;
2544     !!!emit ($self->{ct}); # DOCTYPE
2545    
2546     redo A;
2547 wakaba 1.12 } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
2548     !!!cp (200.1);
2549     !!!parse-error (type => 'no SYSTEM literal');
2550     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2551     $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
2552 wakaba 1.13 $self->{in_subset} = 1;
2553 wakaba 1.12 !!!next-input-character;
2554 wakaba 1.13 !!!emit ($self->{ct}); # DOCTYPE
2555 wakaba 1.12 redo A;
2556 wakaba 1.1 } else {
2557     !!!cp (200);
2558     !!!parse-error (type => 'string after PUBLIC literal');
2559     $self->{ct}->{quirks} = 1;
2560    
2561     $self->{state} = BOGUS_DOCTYPE_STATE;
2562     !!!next-input-character;
2563     redo A;
2564     }
2565     } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2566     if ($is_space->{$self->{nc}}) {
2567     !!!cp (201);
2568     ## Stay in the state
2569     !!!next-input-character;
2570     redo A;
2571     } elsif ($self->{nc} == 0x0022) { # "
2572     !!!cp (202);
2573     $self->{ct}->{sysid} = ''; # DOCTYPE
2574     $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2575     !!!next-input-character;
2576     redo A;
2577     } elsif ($self->{nc} == 0x0027) { # '
2578     !!!cp (203);
2579     $self->{ct}->{sysid} = ''; # DOCTYPE
2580     $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2581     !!!next-input-character;
2582     redo A;
2583     } elsif ($self->{nc} == 0x003E) { # >
2584     !!!cp (204);
2585     !!!parse-error (type => 'no SYSTEM literal');
2586     $self->{state} = DATA_STATE;
2587 wakaba 1.5 $self->{s_kwd} = '';
2588 wakaba 1.1 !!!next-input-character;
2589    
2590     $self->{ct}->{quirks} = 1;
2591     !!!emit ($self->{ct}); # DOCTYPE
2592    
2593     redo A;
2594     } elsif ($self->{nc} == -1) {
2595     !!!cp (205);
2596     !!!parse-error (type => 'unclosed DOCTYPE');
2597    
2598     $self->{state} = DATA_STATE;
2599 wakaba 1.5 $self->{s_kwd} = '';
2600 wakaba 1.1 ## reconsume
2601    
2602     $self->{ct}->{quirks} = 1;
2603     !!!emit ($self->{ct}); # DOCTYPE
2604    
2605     redo A;
2606 wakaba 1.12 } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
2607     !!!cp (206.1);
2608     !!!parse-error (type => 'no SYSTEM literal');
2609    
2610     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2611     $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
2612 wakaba 1.13 $self->{in_subset} = 1;
2613 wakaba 1.12 !!!next-input-character;
2614 wakaba 1.13 !!!emit ($self->{ct}); # DOCTYPE
2615 wakaba 1.12 redo A;
2616 wakaba 1.1 } else {
2617     !!!cp (206);
2618     !!!parse-error (type => 'string after SYSTEM');
2619     $self->{ct}->{quirks} = 1;
2620    
2621     $self->{state} = BOGUS_DOCTYPE_STATE;
2622     !!!next-input-character;
2623     redo A;
2624     }
2625     } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2626     if ($self->{nc} == 0x0022) { # "
2627     !!!cp (207);
2628     $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2629     !!!next-input-character;
2630     redo A;
2631 wakaba 1.12 } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
2632 wakaba 1.1 !!!cp (208);
2633     !!!parse-error (type => 'unclosed SYSTEM literal');
2634    
2635     $self->{state} = DATA_STATE;
2636 wakaba 1.5 $self->{s_kwd} = '';
2637 wakaba 1.1 !!!next-input-character;
2638    
2639     $self->{ct}->{quirks} = 1;
2640     !!!emit ($self->{ct}); # DOCTYPE
2641    
2642     redo A;
2643     } elsif ($self->{nc} == -1) {
2644     !!!cp (209);
2645     !!!parse-error (type => 'unclosed SYSTEM literal');
2646    
2647     $self->{state} = DATA_STATE;
2648 wakaba 1.5 $self->{s_kwd} = '';
2649 wakaba 1.1 ## reconsume
2650    
2651     $self->{ct}->{quirks} = 1;
2652     !!!emit ($self->{ct}); # DOCTYPE
2653    
2654     redo A;
2655     } else {
2656     !!!cp (210);
2657     $self->{ct}->{sysid} # DOCTYPE
2658     .= chr $self->{nc};
2659     $self->{read_until}->($self->{ct}->{sysid}, q[">],
2660     length $self->{ct}->{sysid});
2661    
2662     ## Stay in the state
2663     !!!next-input-character;
2664     redo A;
2665     }
2666     } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
2667     if ($self->{nc} == 0x0027) { # '
2668     !!!cp (211);
2669     $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2670     !!!next-input-character;
2671     redo A;
2672 wakaba 1.12 } elsif (not $self->{is_xml} and $self->{nc} == 0x003E) { # >
2673 wakaba 1.1 !!!cp (212);
2674     !!!parse-error (type => 'unclosed SYSTEM literal');
2675    
2676     $self->{state} = DATA_STATE;
2677 wakaba 1.5 $self->{s_kwd} = '';
2678 wakaba 1.1 !!!next-input-character;
2679    
2680     $self->{ct}->{quirks} = 1;
2681     !!!emit ($self->{ct}); # DOCTYPE
2682    
2683     redo A;
2684     } elsif ($self->{nc} == -1) {
2685     !!!cp (213);
2686     !!!parse-error (type => 'unclosed SYSTEM literal');
2687    
2688     $self->{state} = DATA_STATE;
2689 wakaba 1.5 $self->{s_kwd} = '';
2690 wakaba 1.1 ## reconsume
2691    
2692     $self->{ct}->{quirks} = 1;
2693     !!!emit ($self->{ct}); # DOCTYPE
2694    
2695     redo A;
2696     } else {
2697     !!!cp (214);
2698     $self->{ct}->{sysid} # DOCTYPE
2699     .= chr $self->{nc};
2700     $self->{read_until}->($self->{ct}->{sysid}, q['>],
2701     length $self->{ct}->{sysid});
2702    
2703     ## Stay in the state
2704     !!!next-input-character;
2705     redo A;
2706     }
2707     } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2708     if ($is_space->{$self->{nc}}) {
2709     !!!cp (215);
2710     ## Stay in the state
2711     !!!next-input-character;
2712     redo A;
2713     } elsif ($self->{nc} == 0x003E) { # >
2714     !!!cp (216);
2715     $self->{state} = DATA_STATE;
2716 wakaba 1.5 $self->{s_kwd} = '';
2717 wakaba 1.1 !!!next-input-character;
2718    
2719     !!!emit ($self->{ct}); # DOCTYPE
2720    
2721     redo A;
2722     } elsif ($self->{nc} == -1) {
2723     !!!cp (217);
2724     !!!parse-error (type => 'unclosed DOCTYPE');
2725     $self->{state} = DATA_STATE;
2726 wakaba 1.5 $self->{s_kwd} = '';
2727 wakaba 1.1 ## reconsume
2728    
2729     $self->{ct}->{quirks} = 1;
2730     !!!emit ($self->{ct}); # DOCTYPE
2731    
2732     redo A;
2733 wakaba 1.12 } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
2734     !!!cp (218.1);
2735     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2736     $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
2737 wakaba 1.13 $self->{in_subset} = 1;
2738 wakaba 1.12 !!!next-input-character;
2739 wakaba 1.13 !!!emit ($self->{ct}); # DOCTYPE
2740 wakaba 1.12 redo A;
2741 wakaba 1.1 } else {
2742     !!!cp (218);
2743     !!!parse-error (type => 'string after SYSTEM literal');
2744     #$self->{ct}->{quirks} = 1;
2745    
2746     $self->{state} = BOGUS_DOCTYPE_STATE;
2747     !!!next-input-character;
2748     redo A;
2749     }
2750     } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
2751     if ($self->{nc} == 0x003E) { # >
2752     !!!cp (219);
2753     $self->{state} = DATA_STATE;
2754 wakaba 1.5 $self->{s_kwd} = '';
2755 wakaba 1.1 !!!next-input-character;
2756    
2757     !!!emit ($self->{ct}); # DOCTYPE
2758    
2759     redo A;
2760 wakaba 1.12 } elsif ($self->{is_xml} and $self->{nc} == 0x005B) { # [
2761 wakaba 1.13 !!!cp (220.1);
2762     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
2763     $self->{ct}->{has_internal_subset} = 1; # DOCTYPE
2764     $self->{in_subset} = 1;
2765     !!!next-input-character;
2766     !!!emit ($self->{ct}); # DOCTYPE
2767     redo A;
2768 wakaba 1.1 } elsif ($self->{nc} == -1) {
2769     !!!cp (220);
2770     $self->{state} = DATA_STATE;
2771 wakaba 1.5 $self->{s_kwd} = '';
2772 wakaba 1.1 ## reconsume
2773    
2774     !!!emit ($self->{ct}); # DOCTYPE
2775    
2776     redo A;
2777     } else {
2778     !!!cp (221);
2779     my $s = '';
2780 wakaba 1.12 $self->{read_until}->($s, q{>[}, 0);
2781 wakaba 1.1
2782     ## Stay in the state
2783     !!!next-input-character;
2784     redo A;
2785     }
2786     } elsif ($self->{state} == CDATA_SECTION_STATE) {
2787     ## NOTE: "CDATA section state" in the state is jointly implemented
2788     ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,
2789     ## and |CDATA_SECTION_MSE2_STATE|.
2790 wakaba 1.10
2791     ## XML5: "CDATA state".
2792 wakaba 1.1
2793     if ($self->{nc} == 0x005D) { # ]
2794     !!!cp (221.1);
2795     $self->{state} = CDATA_SECTION_MSE1_STATE;
2796     !!!next-input-character;
2797     redo A;
2798     } elsif ($self->{nc} == -1) {
2799 wakaba 1.6 if ($self->{is_xml}) {
2800 wakaba 1.8 !!!cp (221.11);
2801 wakaba 1.6 !!!parse-error (type => 'no mse'); ## TODO: type
2802 wakaba 1.8 } else {
2803     !!!cp (221.12);
2804 wakaba 1.6 }
2805    
2806 wakaba 1.1 $self->{state} = DATA_STATE;
2807 wakaba 1.5 $self->{s_kwd} = '';
2808 wakaba 1.10 ## Reconsume.
2809 wakaba 1.1 if (length $self->{ct}->{data}) { # character
2810     !!!cp (221.2);
2811     !!!emit ($self->{ct}); # character
2812     } else {
2813     !!!cp (221.3);
2814     ## No token to emit. $self->{ct} is discarded.
2815     }
2816     redo A;
2817     } else {
2818     !!!cp (221.4);
2819     $self->{ct}->{data} .= chr $self->{nc};
2820     $self->{read_until}->($self->{ct}->{data},
2821     q<]>,
2822     length $self->{ct}->{data});
2823    
2824     ## Stay in the state.
2825     !!!next-input-character;
2826     redo A;
2827     }
2828    
2829     ## ISSUE: "text tokens" in spec.
2830     } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {
2831 wakaba 1.10 ## XML5: "CDATA bracket state".
2832    
2833 wakaba 1.1 if ($self->{nc} == 0x005D) { # ]
2834     !!!cp (221.5);
2835     $self->{state} = CDATA_SECTION_MSE2_STATE;
2836     !!!next-input-character;
2837     redo A;
2838     } else {
2839     !!!cp (221.6);
2840 wakaba 1.10 ## XML5: If EOF, "]" is not appended and changed to the data state.
2841 wakaba 1.1 $self->{ct}->{data} .= ']';
2842 wakaba 1.10 $self->{state} = CDATA_SECTION_STATE; ## XML5: Stay in the state.
2843 wakaba 1.1 ## Reconsume.
2844     redo A;
2845     }
2846     } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
2847 wakaba 1.10 ## XML5: "CDATA end state".
2848    
2849 wakaba 1.1 if ($self->{nc} == 0x003E) { # >
2850     $self->{state} = DATA_STATE;
2851 wakaba 1.5 $self->{s_kwd} = '';
2852 wakaba 1.1 !!!next-input-character;
2853     if (length $self->{ct}->{data}) { # character
2854     !!!cp (221.7);
2855     !!!emit ($self->{ct}); # character
2856     } else {
2857     !!!cp (221.8);
2858     ## No token to emit. $self->{ct} is discarded.
2859     }
2860     redo A;
2861     } elsif ($self->{nc} == 0x005D) { # ]
2862     !!!cp (221.9); # character
2863     $self->{ct}->{data} .= ']'; ## Add first "]" of "]]]".
2864     ## Stay in the state.
2865     !!!next-input-character;
2866     redo A;
2867     } else {
2868     !!!cp (221.11);
2869     $self->{ct}->{data} .= ']]'; # character
2870     $self->{state} = CDATA_SECTION_STATE;
2871 wakaba 1.10 ## Reconsume. ## XML5: Emit.
2872 wakaba 1.1 redo A;
2873     }
2874     } elsif ($self->{state} == ENTITY_STATE) {
2875     if ($is_space->{$self->{nc}} or
2876     {
2877     0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
2878     $self->{entity_add} => 1,
2879     }->{$self->{nc}}) {
2880     !!!cp (1001);
2881     ## Don't consume
2882     ## No error
2883     ## Return nothing.
2884     #
2885     } elsif ($self->{nc} == 0x0023) { # #
2886     !!!cp (999);
2887     $self->{state} = ENTITY_HASH_STATE;
2888 wakaba 1.12 $self->{kwd} = '#';
2889 wakaba 1.1 !!!next-input-character;
2890     redo A;
2891     } elsif ((0x0041 <= $self->{nc} and
2892     $self->{nc} <= 0x005A) or # A..Z
2893     (0x0061 <= $self->{nc} and
2894     $self->{nc} <= 0x007A)) { # a..z
2895     !!!cp (998);
2896     require Whatpm::_NamedEntityList;
2897     $self->{state} = ENTITY_NAME_STATE;
2898 wakaba 1.12 $self->{kwd} = chr $self->{nc};
2899     $self->{entity__value} = $self->{kwd};
2900 wakaba 1.1 $self->{entity__match} = 0;
2901     !!!next-input-character;
2902     redo A;
2903     } else {
2904     !!!cp (1027);
2905     !!!parse-error (type => 'bare ero');
2906     ## Return nothing.
2907     #
2908     }
2909    
2910     ## NOTE: No character is consumed by the "consume a character
2911     ## reference" algorithm. In other word, there is an "&" character
2912     ## that does not introduce a character reference, which would be
2913     ## appended to the parent element or the attribute value in later
2914     ## process of the tokenizer.
2915    
2916     if ($self->{prev_state} == DATA_STATE) {
2917     !!!cp (997);
2918     $self->{state} = $self->{prev_state};
2919 wakaba 1.5 $self->{s_kwd} = '';
2920 wakaba 1.1 ## Reconsume.
2921     !!!emit ({type => CHARACTER_TOKEN, data => '&',
2922     line => $self->{line_prev},
2923     column => $self->{column_prev},
2924     });
2925     redo A;
2926     } else {
2927     !!!cp (996);
2928     $self->{ca}->{value} .= '&';
2929     $self->{state} = $self->{prev_state};
2930 wakaba 1.5 $self->{s_kwd} = '';
2931 wakaba 1.1 ## Reconsume.
2932     redo A;
2933     }
2934     } elsif ($self->{state} == ENTITY_HASH_STATE) {
2935     if ($self->{nc} == 0x0078 or # x
2936     $self->{nc} == 0x0058) { # X
2937     !!!cp (995);
2938     $self->{state} = HEXREF_X_STATE;
2939 wakaba 1.12 $self->{kwd} .= chr $self->{nc};
2940 wakaba 1.1 !!!next-input-character;
2941     redo A;
2942     } elsif (0x0030 <= $self->{nc} and
2943     $self->{nc} <= 0x0039) { # 0..9
2944     !!!cp (994);
2945     $self->{state} = NCR_NUM_STATE;
2946 wakaba 1.12 $self->{kwd} = $self->{nc} - 0x0030;
2947 wakaba 1.1 !!!next-input-character;
2948     redo A;
2949     } else {
2950     !!!parse-error (type => 'bare nero',
2951     line => $self->{line_prev},
2952     column => $self->{column_prev} - 1);
2953    
2954     ## NOTE: According to the spec algorithm, nothing is returned,
2955     ## and then "&#" is appended to the parent element or the attribute
2956     ## value in the later processing.
2957    
2958     if ($self->{prev_state} == DATA_STATE) {
2959     !!!cp (1019);
2960     $self->{state} = $self->{prev_state};
2961 wakaba 1.5 $self->{s_kwd} = '';
2962 wakaba 1.1 ## Reconsume.
2963     !!!emit ({type => CHARACTER_TOKEN,
2964     data => '&#',
2965     line => $self->{line_prev},
2966     column => $self->{column_prev} - 1,
2967     });
2968     redo A;
2969     } else {
2970     !!!cp (993);
2971     $self->{ca}->{value} .= '&#';
2972     $self->{state} = $self->{prev_state};
2973 wakaba 1.5 $self->{s_kwd} = '';
2974 wakaba 1.1 ## Reconsume.
2975     redo A;
2976     }
2977     }
2978     } elsif ($self->{state} == NCR_NUM_STATE) {
2979     if (0x0030 <= $self->{nc} and
2980     $self->{nc} <= 0x0039) { # 0..9
2981     !!!cp (1012);
2982 wakaba 1.12 $self->{kwd} *= 10;
2983     $self->{kwd} += $self->{nc} - 0x0030;
2984 wakaba 1.1
2985     ## Stay in the state.
2986     !!!next-input-character;
2987     redo A;
2988     } elsif ($self->{nc} == 0x003B) { # ;
2989     !!!cp (1013);
2990     !!!next-input-character;
2991     #
2992     } else {
2993     !!!cp (1014);
2994     !!!parse-error (type => 'no refc');
2995     ## Reconsume.
2996     #
2997     }
2998    
2999 wakaba 1.12 my $code = $self->{kwd};
3000 wakaba 1.1 my $l = $self->{line_prev};
3001     my $c = $self->{column_prev};
3002     if ($charref_map->{$code}) {
3003     !!!cp (1015);
3004     !!!parse-error (type => 'invalid character reference',
3005     text => (sprintf 'U+%04X', $code),
3006     line => $l, column => $c);
3007     $code = $charref_map->{$code};
3008     } elsif ($code > 0x10FFFF) {
3009     !!!cp (1016);
3010     !!!parse-error (type => 'invalid character reference',
3011     text => (sprintf 'U-%08X', $code),
3012     line => $l, column => $c);
3013     $code = 0xFFFD;
3014     }
3015    
3016     if ($self->{prev_state} == DATA_STATE) {
3017     !!!cp (992);
3018     $self->{state} = $self->{prev_state};
3019 wakaba 1.5 $self->{s_kwd} = '';
3020 wakaba 1.1 ## Reconsume.
3021     !!!emit ({type => CHARACTER_TOKEN, data => chr $code,
3022 wakaba 1.7 has_reference => 1,
3023 wakaba 1.1 line => $l, column => $c,
3024     });
3025     redo A;
3026     } else {
3027     !!!cp (991);
3028     $self->{ca}->{value} .= chr $code;
3029     $self->{ca}->{has_reference} = 1;
3030     $self->{state} = $self->{prev_state};
3031 wakaba 1.5 $self->{s_kwd} = '';
3032 wakaba 1.1 ## Reconsume.
3033     redo A;
3034     }
3035     } elsif ($self->{state} == HEXREF_X_STATE) {
3036     if ((0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) or
3037     (0x0041 <= $self->{nc} and $self->{nc} <= 0x0046) or
3038     (0x0061 <= $self->{nc} and $self->{nc} <= 0x0066)) {
3039     # 0..9, A..F, a..f
3040     !!!cp (990);
3041     $self->{state} = HEXREF_HEX_STATE;
3042 wakaba 1.12 $self->{kwd} = 0;
3043 wakaba 1.1 ## Reconsume.
3044     redo A;
3045     } else {
3046     !!!parse-error (type => 'bare hcro',
3047     line => $self->{line_prev},
3048     column => $self->{column_prev} - 2);
3049    
3050     ## NOTE: According to the spec algorithm, nothing is returned,
3051     ## and then "&#" followed by "X" or "x" is appended to the parent
3052     ## element or the attribute value in the later processing.
3053    
3054     if ($self->{prev_state} == DATA_STATE) {
3055     !!!cp (1005);
3056     $self->{state} = $self->{prev_state};
3057 wakaba 1.5 $self->{s_kwd} = '';
3058 wakaba 1.1 ## Reconsume.
3059     !!!emit ({type => CHARACTER_TOKEN,
3060 wakaba 1.12 data => '&' . $self->{kwd},
3061 wakaba 1.1 line => $self->{line_prev},
3062 wakaba 1.12 column => $self->{column_prev} - length $self->{kwd},
3063 wakaba 1.1 });
3064     redo A;
3065     } else {
3066     !!!cp (989);
3067 wakaba 1.12 $self->{ca}->{value} .= '&' . $self->{kwd};
3068 wakaba 1.1 $self->{state} = $self->{prev_state};
3069 wakaba 1.5 $self->{s_kwd} = '';
3070 wakaba 1.1 ## Reconsume.
3071     redo A;
3072     }
3073     }
3074     } elsif ($self->{state} == HEXREF_HEX_STATE) {
3075     if (0x0030 <= $self->{nc} and $self->{nc} <= 0x0039) {
3076     # 0..9
3077     !!!cp (1002);
3078 wakaba 1.12 $self->{kwd} *= 0x10;
3079     $self->{kwd} += $self->{nc} - 0x0030;
3080 wakaba 1.1 ## Stay in the state.
3081     !!!next-input-character;
3082     redo A;
3083     } elsif (0x0061 <= $self->{nc} and
3084     $self->{nc} <= 0x0066) { # a..f
3085     !!!cp (1003);
3086 wakaba 1.12 $self->{kwd} *= 0x10;
3087     $self->{kwd} += $self->{nc} - 0x0060 + 9;
3088 wakaba 1.1 ## Stay in the state.
3089     !!!next-input-character;
3090     redo A;
3091     } elsif (0x0041 <= $self->{nc} and
3092     $self->{nc} <= 0x0046) { # A..F
3093     !!!cp (1004);
3094 wakaba 1.12 $self->{kwd} *= 0x10;
3095     $self->{kwd} += $self->{nc} - 0x0040 + 9;
3096 wakaba 1.1 ## Stay in the state.
3097     !!!next-input-character;
3098     redo A;
3099     } elsif ($self->{nc} == 0x003B) { # ;
3100     !!!cp (1006);
3101     !!!next-input-character;
3102     #
3103     } else {
3104     !!!cp (1007);
3105     !!!parse-error (type => 'no refc',
3106     line => $self->{line},
3107     column => $self->{column});
3108     ## Reconsume.
3109     #
3110     }
3111    
3112 wakaba 1.12 my $code = $self->{kwd};
3113 wakaba 1.1 my $l = $self->{line_prev};
3114     my $c = $self->{column_prev};
3115     if ($charref_map->{$code}) {
3116     !!!cp (1008);
3117     !!!parse-error (type => 'invalid character reference',
3118     text => (sprintf 'U+%04X', $code),
3119     line => $l, column => $c);
3120     $code = $charref_map->{$code};
3121     } elsif ($code > 0x10FFFF) {
3122     !!!cp (1009);
3123     !!!parse-error (type => 'invalid character reference',
3124     text => (sprintf 'U-%08X', $code),
3125     line => $l, column => $c);
3126     $code = 0xFFFD;
3127     }
3128    
3129     if ($self->{prev_state} == DATA_STATE) {
3130     !!!cp (988);
3131     $self->{state} = $self->{prev_state};
3132 wakaba 1.5 $self->{s_kwd} = '';
3133 wakaba 1.1 ## Reconsume.
3134     !!!emit ({type => CHARACTER_TOKEN, data => chr $code,
3135 wakaba 1.7 has_reference => 1,
3136 wakaba 1.1 line => $l, column => $c,
3137     });
3138     redo A;
3139     } else {
3140     !!!cp (987);
3141     $self->{ca}->{value} .= chr $code;
3142     $self->{ca}->{has_reference} = 1;
3143     $self->{state} = $self->{prev_state};
3144 wakaba 1.5 $self->{s_kwd} = '';
3145 wakaba 1.1 ## Reconsume.
3146     redo A;
3147     }
3148     } elsif ($self->{state} == ENTITY_NAME_STATE) {
3149 wakaba 1.12 if (length $self->{kwd} < 30 and
3150 wakaba 1.1 ## NOTE: Some number greater than the maximum length of entity name
3151     ((0x0041 <= $self->{nc} and # a
3152     $self->{nc} <= 0x005A) or # x
3153     (0x0061 <= $self->{nc} and # a
3154     $self->{nc} <= 0x007A) or # z
3155     (0x0030 <= $self->{nc} and # 0
3156     $self->{nc} <= 0x0039) or # 9
3157     $self->{nc} == 0x003B)) { # ;
3158     our $EntityChar;
3159 wakaba 1.12 $self->{kwd} .= chr $self->{nc};
3160     if (defined $EntityChar->{$self->{kwd}}) {
3161 wakaba 1.1 if ($self->{nc} == 0x003B) { # ;
3162     !!!cp (1020);
3163 wakaba 1.12 $self->{entity__value} = $EntityChar->{$self->{kwd}};
3164 wakaba 1.1 $self->{entity__match} = 1;
3165     !!!next-input-character;
3166     #
3167     } else {
3168     !!!cp (1021);
3169 wakaba 1.12 $self->{entity__value} = $EntityChar->{$self->{kwd}};
3170 wakaba 1.1 $self->{entity__match} = -1;
3171     ## Stay in the state.
3172     !!!next-input-character;
3173     redo A;
3174     }
3175     } else {
3176     !!!cp (1022);
3177     $self->{entity__value} .= chr $self->{nc};
3178     $self->{entity__match} *= 2;
3179     ## Stay in the state.
3180     !!!next-input-character;
3181     redo A;
3182     }
3183     }
3184    
3185     my $data;
3186     my $has_ref;
3187     if ($self->{entity__match} > 0) {
3188     !!!cp (1023);
3189     $data = $self->{entity__value};
3190     $has_ref = 1;
3191     #
3192     } elsif ($self->{entity__match} < 0) {
3193     !!!parse-error (type => 'no refc');
3194     if ($self->{prev_state} != DATA_STATE and # in attribute
3195     $self->{entity__match} < -1) {
3196     !!!cp (1024);
3197 wakaba 1.12 $data = '&' . $self->{kwd};
3198 wakaba 1.1 #
3199     } else {
3200     !!!cp (1025);
3201     $data = $self->{entity__value};
3202     $has_ref = 1;
3203     #
3204     }
3205     } else {
3206     !!!cp (1026);
3207     !!!parse-error (type => 'bare ero',
3208     line => $self->{line_prev},
3209 wakaba 1.12 column => $self->{column_prev} - length $self->{kwd});
3210     $data = '&' . $self->{kwd};
3211 wakaba 1.1 #
3212     }
3213    
3214     ## NOTE: In these cases, when a character reference is found,
3215     ## it is consumed and a character token is returned, or, otherwise,
3216     ## nothing is consumed and returned, according to the spec algorithm.
3217     ## In this implementation, anything that has been examined by the
3218     ## tokenizer is appended to the parent element or the attribute value
3219     ## as string, either literal string when no character reference or
3220     ## entity-replaced string otherwise, in this stage, since any characters
3221     ## that would not be consumed are appended in the data state or in an
3222     ## appropriate attribute value state anyway.
3223    
3224     if ($self->{prev_state} == DATA_STATE) {
3225     !!!cp (986);
3226     $self->{state} = $self->{prev_state};
3227 wakaba 1.5 $self->{s_kwd} = '';
3228 wakaba 1.1 ## Reconsume.
3229     !!!emit ({type => CHARACTER_TOKEN,
3230     data => $data,
3231 wakaba 1.7 has_reference => $has_ref,
3232 wakaba 1.1 line => $self->{line_prev},
3233 wakaba 1.12 column => $self->{column_prev} + 1 - length $self->{kwd},
3234 wakaba 1.1 });
3235     redo A;
3236     } else {
3237     !!!cp (985);
3238     $self->{ca}->{value} .= $data;
3239     $self->{ca}->{has_reference} = 1 if $has_ref;
3240     $self->{state} = $self->{prev_state};
3241 wakaba 1.5 $self->{s_kwd} = '';
3242 wakaba 1.1 ## Reconsume.
3243     redo A;
3244     }
3245 wakaba 1.8
3246     ## XML-only states
3247    
3248     } elsif ($self->{state} == PI_STATE) {
3249 wakaba 1.14 ## XML5: "Pi state" and "DOCTYPE pi state".
3250    
3251 wakaba 1.8 if ($is_space->{$self->{nc}} or
3252 wakaba 1.14 $self->{nc} == 0x003F or # ?
3253 wakaba 1.8 $self->{nc} == -1) {
3254 wakaba 1.14 ## XML5: U+003F: "pi state": Same as "Anything else"; "DOCTYPE
3255     ## pi state": Switch to the "DOCTYPE pi after state". EOF:
3256     ## "DOCTYPE pi state": Parse error, switch to the "data
3257     ## state".
3258 wakaba 1.8 !!!parse-error (type => 'bare pio', ## TODO: type
3259     line => $self->{line_prev},
3260     column => $self->{column_prev}
3261     - 1 * ($self->{nc} != -1));
3262     $self->{state} = BOGUS_COMMENT_STATE;
3263     ## Reconsume.
3264     $self->{ct} = {type => COMMENT_TOKEN,
3265     data => '?',
3266     line => $self->{line_prev},
3267     column => $self->{column_prev}
3268     - 1 * ($self->{nc} != -1),
3269     };
3270     redo A;
3271     } else {
3272 wakaba 1.14 ## XML5: "DOCTYPE pi state": Stay in the state.
3273 wakaba 1.8 $self->{ct} = {type => PI_TOKEN,
3274     target => chr $self->{nc},
3275     data => '',
3276     line => $self->{line_prev},
3277     column => $self->{column_prev} - 1,
3278     };
3279     $self->{state} = PI_TARGET_STATE;
3280     !!!next-input-character;
3281     redo A;
3282     }
3283     } elsif ($self->{state} == PI_TARGET_STATE) {
3284     if ($is_space->{$self->{nc}}) {
3285     $self->{state} = PI_TARGET_AFTER_STATE;
3286     !!!next-input-character;
3287     redo A;
3288     } elsif ($self->{nc} == -1) {
3289     !!!parse-error (type => 'no pic'); ## TODO: type
3290 wakaba 1.13 if ($self->{in_subset}) {
3291     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3292     } else {
3293     $self->{state} = DATA_STATE;
3294     $self->{s_kwd} = '';
3295     }
3296 wakaba 1.8 ## Reconsume.
3297     !!!emit ($self->{ct}); # pi
3298     redo A;
3299     } elsif ($self->{nc} == 0x003F) { # ?
3300     $self->{state} = PI_AFTER_STATE;
3301     !!!next-input-character;
3302     redo A;
3303     } else {
3304     ## XML5: typo ("tag name" -> "target")
3305     $self->{ct}->{target} .= chr $self->{nc}; # pi
3306     !!!next-input-character;
3307     redo A;
3308     }
3309     } elsif ($self->{state} == PI_TARGET_AFTER_STATE) {
3310     if ($is_space->{$self->{nc}}) {
3311     ## Stay in the state.
3312     !!!next-input-character;
3313     redo A;
3314     } else {
3315     $self->{state} = PI_DATA_STATE;
3316     ## Reprocess.
3317     redo A;
3318     }
3319     } elsif ($self->{state} == PI_DATA_STATE) {
3320     if ($self->{nc} == 0x003F) { # ?
3321     $self->{state} = PI_DATA_AFTER_STATE;
3322     !!!next-input-character;
3323     redo A;
3324     } elsif ($self->{nc} == -1) {
3325     !!!parse-error (type => 'no pic'); ## TODO: type
3326 wakaba 1.13 if ($self->{in_subset}) {
3327 wakaba 1.14 $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state"
3328 wakaba 1.13 } else {
3329     $self->{state} = DATA_STATE;
3330     $self->{s_kwd} = '';
3331     }
3332 wakaba 1.8 ## Reprocess.
3333     !!!emit ($self->{ct}); # pi
3334     redo A;
3335     } else {
3336     $self->{ct}->{data} .= chr $self->{nc}; # pi
3337     $self->{read_until}->($self->{ct}->{data}, q[?],
3338     length $self->{ct}->{data});
3339     ## Stay in the state.
3340     !!!next-input-character;
3341     ## Reprocess.
3342     redo A;
3343     }
3344     } elsif ($self->{state} == PI_AFTER_STATE) {
3345 wakaba 1.14 ## XML5: Part of "Pi after state".
3346    
3347 wakaba 1.8 if ($self->{nc} == 0x003E) { # >
3348 wakaba 1.13 if ($self->{in_subset}) {
3349     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3350     } else {
3351     $self->{state} = DATA_STATE;
3352     $self->{s_kwd} = '';
3353     }
3354 wakaba 1.8 !!!next-input-character;
3355     !!!emit ($self->{ct}); # pi
3356     redo A;
3357     } elsif ($self->{nc} == 0x003F) { # ?
3358     !!!parse-error (type => 'no s after target', ## TODO: type
3359     line => $self->{line_prev},
3360     column => $self->{column_prev}); ## XML5: no error
3361     $self->{ct}->{data} .= '?';
3362     $self->{state} = PI_DATA_AFTER_STATE;
3363     !!!next-input-character;
3364     redo A;
3365     } else {
3366     !!!parse-error (type => 'no s after target', ## TODO: type
3367     line => $self->{line_prev},
3368     column => $self->{column_prev}
3369     + 1 * ($self->{nc} == -1)); ## XML5: no error
3370     $self->{ct}->{data} .= '?'; ## XML5: not appended
3371     $self->{state} = PI_DATA_STATE;
3372     ## Reprocess.
3373     redo A;
3374     }
3375     } elsif ($self->{state} == PI_DATA_AFTER_STATE) {
3376 wakaba 1.14 ## XML5: Same as "pi after state" and "DOCTYPE pi after state".
3377    
3378 wakaba 1.8 if ($self->{nc} == 0x003E) { # >
3379 wakaba 1.13 if ($self->{in_subset}) {
3380     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3381     } else {
3382     $self->{state} = DATA_STATE;
3383     $self->{s_kwd} = '';
3384     }
3385 wakaba 1.8 !!!next-input-character;
3386     !!!emit ($self->{ct}); # pi
3387     redo A;
3388     } elsif ($self->{nc} == 0x003F) { # ?
3389     $self->{ct}->{data} .= '?';
3390     ## Stay in the state.
3391     !!!next-input-character;
3392     redo A;
3393     } else {
3394     $self->{ct}->{data} .= '?'; ## XML5: not appended
3395     $self->{state} = PI_DATA_STATE;
3396     ## Reprocess.
3397     redo A;
3398     }
3399 wakaba 1.12
3400     } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_STATE) {
3401     if ($self->{nc} == 0x003C) { # <
3402 wakaba 1.13 $self->{state} = DOCTYPE_TAG_STATE;
3403 wakaba 1.12 !!!next-input-character;
3404     redo A;
3405     } elsif ($self->{nc} == 0x0025) { # %
3406     ## XML5: Not defined yet.
3407    
3408     ## TODO:
3409     !!!next-input-character;
3410     redo A;
3411     } elsif ($self->{nc} == 0x005D) { # ]
3412 wakaba 1.13 delete $self->{in_subset};
3413 wakaba 1.12 $self->{state} = DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
3414     !!!next-input-character;
3415     redo A;
3416     } elsif ($is_space->{$self->{nc}}) {
3417     ## Stay in the state.
3418     !!!next-input-character;
3419     redo A;
3420     } elsif ($self->{nc} == -1) {
3421     !!!parse-error (type => 'unclosed internal subset'); ## TODO: type
3422 wakaba 1.13 delete $self->{in_subset};
3423 wakaba 1.12 $self->{state} = DATA_STATE;
3424     $self->{s_kwd} = '';
3425     ## Reconsume.
3426 wakaba 1.13 !!!emit ({type => END_OF_DOCTYPE_TOKEN});
3427 wakaba 1.12 redo A;
3428     } else {
3429     unless ($self->{internal_subset_tainted}) {
3430     ## XML5: No parse error.
3431     !!!parse-error (type => 'string in internal subset');
3432     $self->{internal_subset_tainted} = 1;
3433     }
3434     ## Stay in the state.
3435     !!!next-input-character;
3436     redo A;
3437     }
3438     } elsif ($self->{state} == DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
3439     if ($self->{nc} == 0x003E) { # >
3440     $self->{state} = DATA_STATE;
3441     $self->{s_kwd} = '';
3442     !!!next-input-character;
3443 wakaba 1.13 !!!emit ({type => END_OF_DOCTYPE_TOKEN});
3444 wakaba 1.12 redo A;
3445     } elsif ($self->{nc} == -1) {
3446     !!!parse-error (type => 'unclosed DOCTYPE');
3447     $self->{state} = DATA_STATE;
3448     $self->{s_kwd} = '';
3449     ## Reconsume.
3450 wakaba 1.13 !!!emit ({type => END_OF_DOCTYPE_TOKEN});
3451 wakaba 1.12 redo A;
3452     } else {
3453     ## XML5: No parse error and stay in the state.
3454     !!!parse-error (type => 'string after internal subset'); ## TODO: type
3455    
3456 wakaba 1.13 $self->{state} = BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE;
3457     !!!next-input-character;
3458     redo A;
3459     }
3460     } elsif ($self->{state} == BOGUS_DOCTYPE_INTERNAL_SUBSET_AFTER_STATE) {
3461     if ($self->{nc} == 0x003E) { # >
3462     $self->{state} = DATA_STATE;
3463     $self->{s_kwd} = '';
3464     !!!next-input-character;
3465     !!!emit ({type => END_OF_DOCTYPE_TOKEN});
3466     redo A;
3467     } elsif ($self->{nc} == -1) {
3468     $self->{state} = DATA_STATE;
3469     $self->{s_kwd} = '';
3470     ## Reconsume.
3471     !!!emit ({type => END_OF_DOCTYPE_TOKEN});
3472     redo A;
3473     } else {
3474     ## Stay in the state.
3475     !!!next-input-character;
3476     redo A;
3477     }
3478     } elsif ($self->{state} == DOCTYPE_TAG_STATE) {
3479     if ($self->{nc} == 0x0021) { # !
3480 wakaba 1.14 $self->{state} = DOCTYPE_MARKUP_DECLARATION_OPEN_STATE;
3481 wakaba 1.13 !!!next-input-character;
3482     redo A;
3483     } elsif ($self->{nc} == 0x003F) { # ?
3484     $self->{state} = PI_STATE;
3485     !!!next-input-character;
3486     redo A;
3487     } elsif ($self->{nc} == -1) {
3488     !!!parse-error (type => 'bare stago');
3489     $self->{state} = DATA_STATE;
3490     $self->{s_kwd} = '';
3491     ## Reconsume.
3492     redo A;
3493     } else {
3494     !!!parse-error (type => 'bare stago', ## XML5: Not a parse error.
3495     line => $self->{line_prev},
3496     column => $self->{column_prev});
3497     $self->{state} = BOGUS_COMMENT_STATE;
3498     $self->{ct} = {type => COMMENT_TOKEN,
3499     data => '',
3500     }; ## NOTE: Will be discarded.
3501 wakaba 1.12 !!!next-input-character;
3502     redo A;
3503     }
3504 wakaba 1.14 } elsif ($self->{state} == DOCTYPE_MARKUP_DECLARATION_OPEN_STATE) {
3505     ## XML5: "DOCTYPE markup declaration state".
3506    
3507     if ($self->{nc} == 0x002D) { # -
3508     $self->{state} = MD_HYPHEN_STATE;
3509     !!!next-input-character;
3510     redo A;
3511     } elsif ($self->{nc} == 0x0045) { # E
3512     $self->{state} = MD_E_STATE;
3513     $self->{kwd} = chr $self->{nc};
3514     !!!next-input-character;
3515     redo A;
3516     } elsif ($self->{nc} == 0x0041) { # A
3517     $self->{state} = MD_ATTLIST_STATE;
3518     $self->{kwd} = chr $self->{nc};
3519     !!!next-input-character;
3520     redo A;
3521     } elsif ($self->{nc} == 0x004E) { # N
3522     $self->{state} = MD_NOTATION_STATE;
3523     $self->{kwd} = chr $self->{nc};
3524     !!!next-input-character;
3525     redo A;
3526     } else {
3527     #
3528     }
3529    
3530     ## XML5: No parse error.
3531     !!!parse-error (type => 'bogus comment',
3532     line => $self->{line_prev},
3533     column => $self->{column_prev} - 1);
3534     ## Reconsume.
3535     $self->{state} = BOGUS_COMMENT_STATE;
3536     $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded.
3537     redo A;
3538     } elsif ($self->{state} == MD_E_STATE) {
3539     if ($self->{nc} == 0x004E) { # N
3540     $self->{state} = MD_ENTITY_STATE;
3541     $self->{kwd} .= chr $self->{nc};
3542     !!!next-input-character;
3543     redo A;
3544     } elsif ($self->{nc} == 0x004C) { # L
3545     ## XML5: <!ELEMENT> not supported.
3546     $self->{state} = MD_ELEMENT_STATE;
3547     $self->{kwd} .= chr $self->{nc};
3548     !!!next-input-character;
3549     redo A;
3550     } else {
3551     ## XML5: No parse error.
3552     !!!parse-error (type => 'bogus comment',
3553     line => $self->{line_prev},
3554     column => $self->{column_prev} - 2
3555     + 1 * ($self->{nc} == -1));
3556     ## Reconsume.
3557     $self->{state} = BOGUS_COMMENT_STATE;
3558     $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
3559     redo A;
3560     }
3561     } elsif ($self->{state} == MD_ENTITY_STATE) {
3562     if ($self->{nc} == {
3563     'EN' => 0x0054, # T
3564     'ENT' => 0x0049, # I
3565     'ENTI' => 0x0054, # T
3566     }->{$self->{kwd}}) {
3567     ## Stay in the state.
3568     $self->{kwd} .= chr $self->{nc};
3569     !!!next-input-character;
3570     redo A;
3571     } elsif ($self->{kwd} eq 'ENTIT' and
3572     $self->{nc} == 0x0059) { # Y
3573     $self->{ct} = {type => GENERAL_ENTITY_TOKEN, name => '', text => '',
3574     line => $self->{line_prev},
3575     column => $self->{column_prev} - 6};
3576     $self->{state} = DOCTYPE_MD_STATE;
3577     !!!next-input-character;
3578     redo A;
3579     } else {
3580     !!!parse-error (type => 'bogus comment',
3581     line => $self->{line_prev},
3582     column => $self->{column_prev} - 1
3583     - (length $self->{kwd})
3584     + 1 * ($self->{nc} == -1));
3585     $self->{state} = BOGUS_COMMENT_STATE;
3586     ## Reconsume.
3587     $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
3588     redo A;
3589     }
3590     } elsif ($self->{state} == MD_ELEMENT_STATE) {
3591     if ($self->{nc} == {
3592     'EL' => 0x0045, # E
3593     'ELE' => 0x004D, # M
3594     'ELEM' => 0x0045, # E
3595     'ELEME' => 0x004E, # N
3596     }->{$self->{kwd}}) {
3597     ## Stay in the state.
3598     $self->{kwd} .= chr $self->{nc};
3599     !!!next-input-character;
3600     redo A;
3601     } elsif ($self->{kwd} eq 'ELEMEN' and
3602     $self->{nc} == 0x0054) { # T
3603     $self->{ct} = {type => ELEMENT_TOKEN, name => '',
3604     line => $self->{line_prev},
3605     column => $self->{column_prev} - 6};
3606     $self->{state} = DOCTYPE_MD_STATE;
3607     !!!next-input-character;
3608     redo A;
3609     } else {
3610     !!!parse-error (type => 'bogus comment',
3611     line => $self->{line_prev},
3612     column => $self->{column_prev} - 1
3613     - (length $self->{kwd})
3614     + 1 * ($self->{nc} == -1));
3615     $self->{state} = BOGUS_COMMENT_STATE;
3616     ## Reconsume.
3617     $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
3618     redo A;
3619     }
3620     } elsif ($self->{state} == MD_ATTLIST_STATE) {
3621     if ($self->{nc} == {
3622     'A' => 0x0054, # T
3623     'AT' => 0x0054, # T
3624     'ATT' => 0x004C, # L
3625     'ATTL' => 0x0049, # I
3626     'ATTLI' => 0x0053, # S
3627     }->{$self->{kwd}}) {
3628     ## Stay in the state.
3629     $self->{kwd} .= chr $self->{nc};
3630     !!!next-input-character;
3631     redo A;
3632     } elsif ($self->{kwd} eq 'ATTLIS' and
3633     $self->{nc} == 0x0054) { # T
3634     $self->{ct} = {type => ATTLIST_TOKEN, name => '',
3635 wakaba 1.15 attrdefs => [],
3636 wakaba 1.14 line => $self->{line_prev},
3637     column => $self->{column_prev} - 6};
3638     $self->{state} = DOCTYPE_MD_STATE;
3639     !!!next-input-character;
3640     redo A;
3641     } else {
3642     !!!parse-error (type => 'bogus comment',
3643     line => $self->{line_prev},
3644     column => $self->{column_prev} - 1
3645     - (length $self->{kwd})
3646     + 1 * ($self->{nc} == -1));
3647     $self->{state} = BOGUS_COMMENT_STATE;
3648     ## Reconsume.
3649     $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
3650     redo A;
3651     }
3652     } elsif ($self->{state} == MD_NOTATION_STATE) {
3653     if ($self->{nc} == {
3654     'N' => 0x004F, # O
3655     'NO' => 0x0054, # T
3656     'NOT' => 0x0041, # A
3657     'NOTA' => 0x0054, # T
3658     'NOTAT' => 0x0049, # I
3659     'NOTATI' => 0x004F, # O
3660     }->{$self->{kwd}}) {
3661     ## Stay in the state.
3662     $self->{kwd} .= chr $self->{nc};
3663     !!!next-input-character;
3664     redo A;
3665     } elsif ($self->{kwd} eq 'NOTATIO' and
3666     $self->{nc} == 0x004E) { # N
3667     $self->{ct} = {type => NOTATION_TOKEN, name => '',
3668     line => $self->{line_prev},
3669     column => $self->{column_prev} - 6};
3670     $self->{state} = DOCTYPE_MD_STATE;
3671     !!!next-input-character;
3672     redo A;
3673     } else {
3674     !!!parse-error (type => 'bogus comment',
3675     line => $self->{line_prev},
3676     column => $self->{column_prev} - 1
3677     - (length $self->{kwd})
3678     + 1 * ($self->{nc} == -1));
3679     $self->{state} = BOGUS_COMMENT_STATE;
3680     ## Reconsume.
3681     $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
3682     redo A;
3683     }
3684     } elsif ($self->{state} == DOCTYPE_MD_STATE) {
3685     ## XML5: "DOCTYPE ENTITY state", "DOCTYPE ATTLIST state", and
3686     ## "DOCTYPE NOTATION state".
3687    
3688     if ($is_space->{$self->{nc}}) {
3689     ## XML5: [NOTATION] Switch to the "DOCTYPE NOTATION identifier state".
3690     $self->{state} = BEFORE_MD_NAME_STATE;
3691     !!!next-input-character;
3692     redo A;
3693     } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
3694     $self->{nc} == 0x0025) { # %
3695     ## XML5: Switch to the "DOCTYPE bogus comment state".
3696     !!!parse-error (type => 'no space before md name'); ## TODO: type
3697     $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
3698     !!!next-input-character;
3699     redo A;
3700     } elsif ($self->{nc} == -1) {
3701     !!!parse-error (type => 'unclosed md'); ## TODO: type
3702     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3703     ## Reconsume.
3704     redo A;
3705     } elsif ($self->{nc} == 0x003E) { # >
3706     ## XML5: Switch to the "DOCTYPE bogus comment state".
3707     !!!parse-error (type => 'no md name'); ## TODO: type
3708     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3709     !!!next-input-character;
3710     redo A;
3711     } else {
3712     ## XML5: Switch to the "DOCTYPE bogus comment state".
3713     !!!parse-error (type => 'no space before md name'); ## TODO: type
3714     $self->{state} = BEFORE_MD_NAME_STATE;
3715     redo A;
3716     }
3717     } elsif ($self->{state} == BEFORE_MD_NAME_STATE) {
3718     ## XML5: "DOCTYPE ENTITY parameter state", "DOCTYPE ENTITY type
3719     ## before state", "DOCTYPE ATTLIST name before state".
3720    
3721     if ($is_space->{$self->{nc}}) {
3722     ## Stay in the state.
3723     !!!next-input-character;
3724     redo A;
3725     } elsif ($self->{ct}->{type} == GENERAL_ENTITY_TOKEN and
3726     $self->{nc} == 0x0025) { # %
3727     $self->{state} = DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE;
3728     !!!next-input-character;
3729     redo A;
3730     } elsif ($self->{nc} == 0x003E) { # >
3731     ## XML5: Same as "Anything else".
3732     !!!parse-error (type => 'no md name'); ## TODO: type
3733     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3734     !!!next-input-character;
3735     redo A;
3736     } elsif ($self->{nc} == -1) {
3737     !!!parse-error (type => 'unclosed md'); ## TODO: type
3738     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3739     ## Reconsume.
3740     redo A;
3741     } else {
3742     ## XML5: [ATTLIST] Not defined yet.
3743     $self->{ct}->{name} .= chr $self->{nc};
3744     $self->{state} = MD_NAME_STATE;
3745     !!!next-input-character;
3746     redo A;
3747     }
3748     } elsif ($self->{state} == DOCTYPE_ENTITY_PARAMETER_BEFORE_STATE) {
3749     if ($is_space->{$self->{nc}}) {
3750     ## XML5: Switch to the "DOCTYPE ENTITY parameter state".
3751     $self->{ct}->{type} = PARAMETER_ENTITY_TOKEN;
3752     $self->{state} = BEFORE_MD_NAME_STATE;
3753     !!!next-input-character;
3754     redo A;
3755     } elsif ($self->{nc} == 0x003E) { # >
3756     ## XML5: Same as "Anything else".
3757     !!!parse-error (type => 'no md name'); ## TODO: type
3758     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3759     !!!next-input-character;
3760     redo A;
3761     } elsif ($self->{nc} == -1) {
3762     !!!parse-error (type => 'unclosed md');
3763     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3764     ## Reconsume.
3765     redo A;
3766     } else {
3767     ## XML5: No parse error.
3768     !!!parse-error (type => 'no space after ENTITY percent'); ## TODO: type
3769     $self->{state} = BOGUS_COMMENT_STATE;
3770     $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
3771     ## Reconsume.
3772     redo A;
3773     }
3774     } elsif ($self->{state} == MD_NAME_STATE) {
3775     ## XML5: "DOCTYPE ENTITY name state" and "DOCTYPE ATTLIST name state".
3776    
3777     if ($is_space->{$self->{nc}}) {
3778     ## TODO:
3779     $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
3780     !!!next-input-character;
3781     redo A;
3782     } elsif ($self->{nc} == 0x003E) { # >
3783     if ($self->{ct}->{type} == ATTLIST_TOKEN) {
3784     #
3785     } else {
3786     !!!parse-error (type => 'no md body'); ## TODO: type
3787     }
3788     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3789     !!!next-input-character;
3790     !!!emit ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
3791     redo A;
3792     } elsif ($self->{nc} == -1) {
3793     ## XML5: [ATTLIST] No parse error.
3794     !!!parse-error (type => 'unclosed md');
3795     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3796     ## Reconsume.
3797     !!!emit ($self->{ct}); # ELEMENT/ENTITY/ATTLIST/NOTATION
3798     redo A;
3799     } else {
3800     ## XML5: [ATTLIST] Not defined yet.
3801     $self->{ct}->{name} .= chr $self->{nc};
3802     ## Stay in the state.
3803     !!!next-input-character;
3804     redo A;
3805     }
3806     } elsif ($self->{state} == DOCTYPE_ATTLIST_NAME_AFTER_STATE) {
3807     if ($is_space->{$self->{nc}}) {
3808     ## Stay in the state.
3809     !!!next-input-character;
3810     redo A;
3811     } elsif ($self->{nc} == 0x003E) { # >
3812     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3813     !!!next-input-character;
3814     !!!emit ($self->{ct}); # ATTLIST
3815     redo A;
3816     } elsif ($self->{nc} == -1) {
3817     ## XML5: No parse error.
3818     !!!parse-error (type => 'unclosed md'); ## TODO: type
3819     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3820 wakaba 1.15 !!!emit ($self->{ct});
3821     redo A;
3822     } else {
3823     ## XML5: Not defined yet.
3824     $self->{ca} = {name => chr ($self->{nc}), # attrdef
3825     tokens => [],
3826     line => $self->{line}, column => $self->{column}};
3827     $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE;
3828     !!!next-input-character;
3829     redo A;
3830     }
3831     } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE) {
3832     if ($is_space->{$self->{nc}}) {
3833     $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE;
3834     !!!next-input-character;
3835     redo A;
3836     } elsif ($self->{nc} == 0x003E) { # >
3837     ## XML5: Same as "anything else".
3838     !!!parse-error (type => 'no attr type'); ## TODO: type
3839     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3840     !!!next-input-character;
3841     !!!emit ($self->{ct}); # ATTLIST
3842     redo A;
3843     } elsif ($self->{nc} == 0x0028) { # (
3844     ## XML5: Same as "anything else".
3845     !!!parse-error (type => 'no space before paren'); ## TODO: type
3846     $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
3847     !!!next-input-character;
3848     redo A;
3849     } elsif ($self->{nc} == -1) {
3850     ## XML5: No parse error.
3851     !!!parse-error (type => 'unclosed md'); ## TODO: type
3852     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3853     !!!next-input-character;
3854     !!!emit ($self->{ct}); # ATTLIST
3855     redo A;
3856     } else {
3857     ## XML5: Not defined yet.
3858     $self->{ca}->{name} .= chr $self->{nc};
3859     ## Stay in the state.
3860     !!!next-input-character;
3861     redo A;
3862     }
3863     } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_NAME_AFTER_STATE) {
3864     if ($is_space->{$self->{nc}}) {
3865     ## Stay in the state.
3866     !!!next-input-character;
3867     redo A;
3868     } elsif ($self->{nc} == 0x003E) { # >
3869     ## XML5: Same as "anything else".
3870     !!!parse-error (type => 'no attr type'); ## TODO: type
3871     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3872     !!!next-input-character;
3873     !!!emit ($self->{ct}); # ATTLIST
3874     redo A;
3875     } elsif ($self->{nc} == 0x0028) { # (
3876     ## XML5: Same as "anything else".
3877     $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
3878     !!!next-input-character;
3879     redo A;
3880     } elsif ($self->{nc} == -1) {
3881     ## XML5: No parse error.
3882     !!!parse-error (type => 'unclosed md'); ## TODO: type
3883     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3884     !!!next-input-character;
3885     !!!emit ($self->{ct});
3886 wakaba 1.14 redo A;
3887     } else {
3888     ## XML5: Not defined yet.
3889 wakaba 1.15 $self->{ca}->{type} = chr $self->{nc};
3890     $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE;
3891     !!!next-input-character;
3892     redo A;
3893     }
3894     } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_STATE) {
3895     if ($is_space->{$self->{nc}}) {
3896     $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE;
3897     !!!next-input-character;
3898     redo A;
3899     } elsif ($self->{nc} == 0x0023) { # #
3900     ## XML5: Same as "anything else".
3901     !!!parse-error (type => 'no space before default value'); ## TODO: type
3902     $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
3903     !!!next-input-character;
3904     redo A;
3905     } elsif ($self->{nc} == 0x0022) { # "
3906     ## XML5: Same as "anything else".
3907     !!!parse-error (type => 'no space before default value'); ## TODO: type
3908     $self->{ca}->{value} = '';
3909     $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
3910     !!!next-input-character;
3911     redo A;
3912     } elsif ($self->{nc} == 0x0027) { # '
3913     ## XML5: Same as "anything else".
3914     !!!parse-error (type => 'no space before default value'); ## TODO: type
3915     $self->{ca}->{value} = '';
3916     $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
3917     !!!next-input-character;
3918     redo A;
3919     } elsif ($self->{nc} == 0x003E) { # >
3920     ## XML5: Same as "anything else".
3921     !!!parse-error (type => 'no attr default'); ## TODO: type
3922     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3923     !!!next-input-character;
3924     !!!emit ($self->{ct}); # ATTLIST
3925     redo A;
3926     } elsif ($self->{nc} == 0x0028) { # (
3927     ## XML5: Same as "anything else".
3928     !!!parse-error (type => 'no space before paren'); ## TODO: type
3929     $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
3930     !!!next-input-character;
3931     redo A;
3932     } elsif ($self->{nc} == -1) {
3933     ## XML5: No parse error.
3934     !!!parse-error (type => 'unclosed md'); ## TODO: type
3935     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3936     !!!next-input-character;
3937     !!!emit ($self->{ct});
3938     redo A;
3939     } else {
3940     ## XML5: Not defined yet.
3941     $self->{ca}->{type} .= chr $self->{nc};
3942     ## Stay in the state.
3943     !!!next-input-character;
3944     redo A;
3945     }
3946     } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_TYPE_AFTER_STATE) {
3947     if ($is_space->{$self->{nc}}) {
3948     ## Stay in the state.
3949     !!!next-input-character;
3950     redo A;
3951     } elsif ($self->{nc} == 0x0028) { # (
3952     ## XML5: Same as "anything else".
3953     $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
3954     !!!next-input-character;
3955     redo A;
3956     } elsif ($self->{nc} == 0x0023) { # #
3957     $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
3958     !!!next-input-character;
3959     redo A;
3960     } elsif ($self->{nc} == 0x0022) { # "
3961     ## XML5: Same as "anything else".
3962     $self->{ca}->{value} = '';
3963     $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
3964     !!!next-input-character;
3965     redo A;
3966     } elsif ($self->{nc} == 0x0027) { # '
3967     ## XML5: Same as "anything else".
3968     $self->{ca}->{value} = '';
3969     $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
3970     !!!next-input-character;
3971     redo A;
3972     } elsif ($self->{nc} == 0x003E) { # >
3973     ## XML5: Same as "anything else".
3974     !!!parse-error (type => 'no attr default'); ## TODO: type
3975     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
3976     !!!next-input-character;
3977     !!!emit ($self->{ct}); # ATTLIST
3978     redo A;
3979     } elsif ($self->{nc} == -1) {
3980     ## XML5: No parse error.
3981     !!!parse-error (type => 'unclosed md'); ## TODO: type
3982     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
3983     !!!next-input-character;
3984     !!!emit ($self->{ct});
3985     redo A;
3986     } else {
3987     ## XML5: Switch to the "DOCTYPE bogus comment state".
3988     !!!parse-error (type => 'unquoted attr value'); ## TODO: type
3989     $self->{ca}->{value} = '';
3990     $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
3991     ## Reconsume.
3992     redo A;
3993     }
3994     } elsif ($self->{state} == BEFORE_ALLOWED_TOKEN_STATE) {
3995     if ($is_space->{$self->{nc}}) {
3996     ## Stay in the state.
3997     !!!next-input-character;
3998     redo A;
3999     } elsif ($self->{nc} == 0x007C) { # |
4000     !!!parse-error (type => 'empty allowed token'); ## TODO: type
4001     ## Stay in the state.
4002     !!!next-input-character;
4003     redo A;
4004     } elsif ($self->{nc} == 0x0029) { # )
4005     !!!parse-error (type => 'empty allowed token'); ## TODO: type
4006     $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
4007     !!!next-input-character;
4008     redo A;
4009     } elsif ($self->{nc} == 0x003E) { # >
4010     !!!parse-error (type => 'unclosed allowed tokens'); ## TODO: type
4011     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4012     !!!next-input-character;
4013     !!!emit ($self->{ct}); # ATTLIST
4014     redo A;
4015     } elsif ($self->{nc} == -1) {
4016     ## XML5: No parse error.
4017     !!!parse-error (type => 'unclosed md'); ## TODO: type
4018     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4019     !!!next-input-character;
4020     !!!emit ($self->{ct});
4021     redo A;
4022     } else {
4023     push @{$self->{ca}->{tokens}}, chr $self->{nc};
4024     $self->{state} = ALLOWED_TOKEN_STATE;
4025     !!!next-input-character;
4026     redo A;
4027     }
4028     } elsif ($self->{state} == ALLOWED_TOKEN_STATE) {
4029     if ($is_space->{$self->{nc}}) {
4030     $self->{state} = AFTER_ALLOWED_TOKEN_STATE;
4031     !!!next-input-character;
4032     redo A;
4033     } elsif ($self->{nc} == 0x007C) { # |
4034     $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
4035     !!!next-input-character;
4036     redo A;
4037     } elsif ($self->{nc} == 0x0029) { # )
4038     $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
4039     !!!next-input-character;
4040     redo A;
4041     } elsif ($self->{nc} == 0x003E) { # >
4042     !!!parse-error (type => 'unclosed allowed tokens'); ## TODO: type
4043     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4044     !!!next-input-character;
4045     !!!emit ($self->{ct}); # ATTLIST
4046     redo A;
4047     } elsif ($self->{nc} == -1) {
4048     ## XML5: No parse error.
4049     !!!parse-error (type => 'unclosed md'); ## TODO: type
4050     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4051     !!!next-input-character;
4052     !!!emit ($self->{ct});
4053     redo A;
4054     } else {
4055     $self->{ca}->{tokens}->[-1] .= chr $self->{nc};
4056     ## Stay in the state.
4057     !!!next-input-character;
4058     redo A;
4059     }
4060     } elsif ($self->{state} == AFTER_ALLOWED_TOKEN_STATE) {
4061     if ($is_space->{$self->{nc}}) {
4062     ## Stay in the state.
4063     !!!next-input-character;
4064     redo A;
4065     } elsif ($self->{nc} == 0x007C) { # |
4066     $self->{state} = BEFORE_ALLOWED_TOKEN_STATE;
4067     !!!next-input-character;
4068     redo A;
4069     } elsif ($self->{nc} == 0x0029) { # )
4070     $self->{state} = AFTER_ALLOWED_TOKENS_STATE;
4071     !!!next-input-character;
4072     redo A;
4073     } elsif ($self->{nc} == 0x003E) { # >
4074     !!!parse-error (type => 'unclosed allowed tokens'); ## TODO: type
4075     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4076     !!!next-input-character;
4077     !!!emit ($self->{ct}); # ATTLIST
4078     redo A;
4079     } elsif ($self->{nc} == -1) {
4080     ## XML5: No parse error.
4081     !!!parse-error (type => 'unclosed md'); ## TODO: type
4082     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4083     !!!next-input-character;
4084     !!!emit ($self->{ct});
4085     redo A;
4086     } else {
4087     !!!parse-error (type => 'space in allowed token', ## TODO: type
4088     line => $self->{line_prev},
4089     column => $self->{column_prev});
4090     $self->{ca}->{tokens}->[-1] .= ' ' . chr $self->{nc};
4091     $self->{state} = ALLOWED_TOKEN_STATE;
4092     !!!next-input-character;
4093     redo A;
4094     }
4095     } elsif ($self->{state} == AFTER_ALLOWED_TOKENS_STATE) {
4096     if ($is_space->{$self->{nc}}) {
4097     $self->{state} = BEFORE_ATTR_DEFAULT_STATE;
4098     !!!next-input-character;
4099     redo A;
4100     } elsif ($self->{nc} == 0x0023) { # #
4101     !!!parse-error (type => 'no space before default value'); ## TODO: type
4102     $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
4103     !!!next-input-character;
4104     redo A;
4105     } elsif ($self->{nc} == 0x0022) { # "
4106     !!!parse-error (type => 'no space before default value'); ## TODO: type
4107     $self->{ca}->{value} = '';
4108     $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
4109     !!!next-input-character;
4110     redo A;
4111     } elsif ($self->{nc} == 0x0027) { # '
4112     !!!parse-error (type => 'no space before default value'); ## TODO: type
4113     $self->{ca}->{value} = '';
4114     $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
4115     !!!next-input-character;
4116     redo A;
4117     } elsif ($self->{nc} == 0x003E) { # >
4118     !!!parse-error (type => 'no attr default'); ## TODO: type
4119     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4120     !!!next-input-character;
4121     !!!emit ($self->{ct}); # ATTLIST
4122     redo A;
4123     } elsif ($self->{nc} == -1) {
4124     !!!parse-error (type => 'unclosed md'); ## TODO: type
4125     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4126     !!!next-input-character;
4127     !!!emit ($self->{ct});
4128     redo A;
4129     } else {
4130     !!!parse-error (type => 'unquoted attr value'); ## TODO: type
4131     $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
4132     ## Reconsume.
4133     redo A;
4134     }
4135     } elsif ($self->{state} == BEFORE_ATTR_DEFAULT_STATE) {
4136     if ($is_space->{$self->{nc}}) {
4137     ## Stay in the state.
4138     !!!next-input-character;
4139     redo A;
4140     } elsif ($self->{nc} == 0x0023) { # #
4141     $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE;
4142     !!!next-input-character;
4143     redo A;
4144     } elsif ($self->{nc} == 0x0022) { # "
4145     $self->{ca}->{value} = '';
4146     $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
4147     !!!next-input-character;
4148     redo A;
4149     } elsif ($self->{nc} == 0x0027) { # '
4150     $self->{ca}->{value} = '';
4151     $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
4152     !!!next-input-character;
4153     redo A;
4154     } elsif ($self->{nc} == 0x003E) { # >
4155     !!!parse-error (type => 'no attr default'); ## TODO: type
4156     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4157     !!!next-input-character;
4158     !!!emit ($self->{ct}); # ATTLIST
4159     redo A;
4160     } elsif ($self->{nc} == -1) {
4161     !!!parse-error (type => 'unclosed md'); ## TODO: type
4162     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4163     !!!next-input-character;
4164     !!!emit ($self->{ct});
4165     redo A;
4166     } else {
4167     !!!parse-error (type => 'unquoted attr value'); ## TODO: type
4168     $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
4169     ## Reconsume.
4170     redo A;
4171     }
4172     } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_BEFORE_STATE) {
4173     if ($is_space->{$self->{nc}}) {
4174     ## XML5: No parse error.
4175     !!!parse-error (type => 'no default type'); ## TODO: type
4176 wakaba 1.14 $self->{state} = BOGUS_COMMENT_STATE;
4177     $self->{ct} = {type => COMMENT_TOKEN, data => ''}; ## Will be discarded
4178     ## Reconsume.
4179     redo A;
4180 wakaba 1.15 } elsif ($self->{nc} == 0x0022) { # "
4181     ## XML5: Same as "anything else".
4182     $self->{ca}->{value} = '';
4183     $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
4184     !!!next-input-character;
4185     redo A;
4186     } elsif ($self->{nc} == 0x0027) { # '
4187     ## XML5: Same as "anything else".
4188     $self->{ca}->{value} = '';
4189     $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
4190     !!!next-input-character;
4191     redo A;
4192     } elsif ($self->{nc} == 0x003E) { # >
4193     ## XML5: Same as "anything else".
4194     !!!parse-error (type => 'no attr default'); ## TODO: type
4195     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4196     !!!next-input-character;
4197     !!!emit ($self->{ct}); # ATTLIST
4198     redo A;
4199     } elsif ($self->{nc} == -1) {
4200     ## XML5: No parse error.
4201     !!!parse-error (type => 'unclosed md'); ## TODO: type
4202     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4203     !!!next-input-character;
4204     !!!emit ($self->{ct});
4205     redo A;
4206     } else {
4207     $self->{ca}->{default} = chr $self->{nc};
4208     $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE;
4209     !!!next-input-character;
4210     redo A;
4211 wakaba 1.14 }
4212 wakaba 1.15 } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_STATE) {
4213     if ($is_space->{$self->{nc}}) {
4214     $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE;
4215     !!!next-input-character;
4216     redo A;
4217     } elsif ($self->{nc} == 0x0022) { # "
4218     ## XML5: Same as "anything else".
4219     !!!parse-error (type => 'no space before default value'); ## TODO: type
4220     $self->{ca}->{value} = '';
4221     $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
4222     !!!next-input-character;
4223     redo A;
4224     } elsif ($self->{nc} == 0x0027) { # '
4225     ## XML5: Same as "anything else".
4226     !!!parse-error (type => 'no space before default value'); ## TODO: type
4227     $self->{ca}->{value} = '';
4228     $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
4229     !!!next-input-character;
4230     redo A;
4231     } elsif ($self->{nc} == 0x003E) { # >
4232     ## XML5: Same as "anything else".
4233     push @{$self->{ct}->{attrdefs}}, $self->{ca};
4234     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4235     !!!next-input-character;
4236     !!!emit ($self->{ct}); # ATTLIST
4237     redo A;
4238     } elsif ($self->{nc} == -1) {
4239     ## XML5: No parse error.
4240     !!!parse-error (type => 'unclosed md'); ## TODO: type
4241     push @{$self->{ct}->{attrdefs}}, $self->{ca};
4242     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4243     !!!next-input-character;
4244     !!!emit ($self->{ct});
4245     redo A;
4246     } else {
4247     $self->{ca}->{default} .= chr $self->{nc};
4248     ## Stay in the state.
4249     !!!next-input-character;
4250     redo A;
4251     }
4252     } elsif ($self->{state} == DOCTYPE_ATTLIST_ATTRIBUTE_DECLARATION_AFTER_STATE) {
4253     if ($is_space->{$self->{nc}}) {
4254     ## Stay in the state.
4255     !!!next-input-character;
4256     redo A;
4257     } elsif ($self->{nc} == 0x0022) { # "
4258     $self->{ca}->{value} = '';
4259     $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
4260     !!!next-input-character;
4261     redo A;
4262     } elsif ($self->{nc} == 0x0027) { # '
4263     $self->{ca}->{value} = '';
4264     $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
4265     !!!next-input-character;
4266     redo A;
4267     } elsif ($self->{nc} == 0x003E) { # >
4268     push @{$self->{ct}->{attrdefs}}, $self->{ca};
4269     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE;
4270     !!!next-input-character;
4271     !!!emit ($self->{ct}); # ATTLIST
4272     redo A;
4273     } elsif ($self->{nc} == -1) {
4274     ## XML5: No parse error.
4275     !!!parse-error (type => 'unclosed md'); ## TODO: type
4276     push @{$self->{ct}->{attrdefs}}, $self->{ca};
4277     $self->{state} = DOCTYPE_INTERNAL_SUBSET_STATE; ## XML5: "Data state".
4278     !!!next-input-character;
4279     !!!emit ($self->{ct});
4280     redo A;
4281     } else {
4282     ## XML5: Not defined yet.
4283     if ($self->{ca}->{default} eq 'FIXED') {
4284     $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
4285     } else {
4286     push @{$self->{ct}->{attrdefs}}, $self->{ca};
4287     $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
4288     }
4289     ## Reconsume.
4290     redo A;
4291     }
4292     } elsif ($self->{state} == AFTER_ATTLIST_ATTR_VALUE_QUOTED_STATE) {
4293     if ($is_space->{$self->{nc}} or
4294     $self->{nc} == -1 or
4295     $self->{nc} == 0x003E) { # >
4296     $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
4297     ## Reconsume.
4298     redo A;
4299     } else {
4300     !!!parse-error (type => 'no space before attr name'); ## TODO: type
4301     $self->{state} = DOCTYPE_ATTLIST_NAME_AFTER_STATE;
4302     ## Reconsume.
4303     redo A;
4304     }
4305 wakaba 1.1 } else {
4306     die "$0: $self->{state}: Unknown state";
4307     }
4308     } # A
4309    
4310     die "$0: _get_next_token: unexpected case";
4311     } # _get_next_token
4312    
4313     1;
4314 wakaba 1.15 ## $Date: 2008/10/17 07:14:29 $
4315    

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24