/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.128 - (hide annotations) (download) (as text)
Sat Apr 12 15:31:56 2008 UTC (16 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.127: +3 -3 lines
File MIME type: application/x-wais-source
++ whatpm/Whatpm/ChangeLog	12 Apr 2008 15:31:52 -0000
	* HTML.pm.src: Support for new long MathML entities (HTML5
	revision 1406).

2008-04-13  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.128 our $VERSION=do{my @r=(q$Revision: 1.127 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.18 ## ISSUE:
7     ## var doc = implementation.createDocument (null, null, null);
8     ## doc.write ('');
9     ## alert (doc.compatMode);
10 wakaba 1.1
11 wakaba 1.70 ## TODO: Control charcters and noncharacters are not allowed (HTML5 revision 1263)
12     ## TODO: 1252 parse error (revision 1264)
13     ## TODO: 8859-11 = 874 (revision 1271)
14    
15 wakaba 1.126 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
16     my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
17     my $SVG_NS = q<http://www.w3.org/2000/svg>;
18     my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
19     my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
20     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
21    
22 wakaba 1.123 sub A_EL () { 0b1 }
23     sub ADDRESS_EL () { 0b10 }
24     sub BODY_EL () { 0b100 }
25     sub BUTTON_EL () { 0b1000 }
26     sub CAPTION_EL () { 0b10000 }
27     sub DD_EL () { 0b100000 }
28     sub DIV_EL () { 0b1000000 }
29     sub DT_EL () { 0b10000000 }
30     sub FORM_EL () { 0b100000000 }
31     sub FORMATTING_EL () { 0b1000000000 }
32     sub FRAMESET_EL () { 0b10000000000 }
33     sub HEADING_EL () { 0b100000000000 }
34     sub HTML_EL () { 0b1000000000000 }
35     sub LI_EL () { 0b10000000000000 }
36     sub NOBR_EL () { 0b100000000000000 }
37     sub OPTION_EL () { 0b1000000000000000 }
38     sub OPTGROUP_EL () { 0b10000000000000000 }
39     sub P_EL () { 0b100000000000000000 }
40     sub SELECT_EL () { 0b1000000000000000000 }
41     sub TABLE_EL () { 0b10000000000000000000 }
42     sub TABLE_CELL_EL () { 0b100000000000000000000 }
43     sub TABLE_ROW_EL () { 0b1000000000000000000000 }
44     sub TABLE_ROW_GROUP_EL () { 0b10000000000000000000000 }
45     sub MISC_SCOPING_EL () { 0b100000000000000000000000 }
46     sub MISC_SPECIAL_EL () { 0b1000000000000000000000000 }
47 wakaba 1.126 sub FOREIGN_EL () { 0b10000000000000000000000000 }
48     sub FOREIGN_FLOW_CONTENT_EL () { 0b100000000000000000000000000 }
49     sub MML_AXML_EL () { 0b1000000000000000000000000000 }
50 wakaba 1.123
51     sub TABLE_ROWS_EL () {
52     TABLE_EL |
53     TABLE_ROW_EL |
54     TABLE_ROW_GROUP_EL
55     }
56    
57     sub END_TAG_OPTIONAL_EL () {
58     DD_EL |
59     DT_EL |
60     LI_EL |
61     P_EL
62     }
63    
64     sub ALL_END_TAG_OPTIONAL_EL () {
65     END_TAG_OPTIONAL_EL |
66     BODY_EL |
67     HTML_EL |
68     TABLE_CELL_EL |
69     TABLE_ROW_EL |
70     TABLE_ROW_GROUP_EL
71     }
72    
73     sub SCOPING_EL () {
74     BUTTON_EL |
75     CAPTION_EL |
76     HTML_EL |
77     TABLE_EL |
78     TABLE_CELL_EL |
79     MISC_SCOPING_EL
80     }
81    
82     sub TABLE_SCOPING_EL () {
83     HTML_EL |
84     TABLE_EL
85     }
86    
87     sub TABLE_ROWS_SCOPING_EL () {
88     HTML_EL |
89     TABLE_ROW_GROUP_EL
90     }
91    
92     sub TABLE_ROW_SCOPING_EL () {
93     HTML_EL |
94     TABLE_ROW_EL
95     }
96    
97     sub SPECIAL_EL () {
98     ADDRESS_EL |
99     BODY_EL |
100     DIV_EL |
101     END_TAG_OPTIONAL_EL |
102     FORM_EL |
103     FRAMESET_EL |
104     HEADING_EL |
105     OPTION_EL |
106     OPTGROUP_EL |
107     SELECT_EL |
108     TABLE_ROW_EL |
109     TABLE_ROW_GROUP_EL |
110     MISC_SPECIAL_EL
111     }
112    
113     my $el_category = {
114     a => A_EL | FORMATTING_EL,
115     address => ADDRESS_EL,
116     applet => MISC_SCOPING_EL,
117     area => MISC_SPECIAL_EL,
118     b => FORMATTING_EL,
119     base => MISC_SPECIAL_EL,
120     basefont => MISC_SPECIAL_EL,
121     bgsound => MISC_SPECIAL_EL,
122     big => FORMATTING_EL,
123     blockquote => MISC_SPECIAL_EL,
124     body => BODY_EL,
125     br => MISC_SPECIAL_EL,
126     button => BUTTON_EL,
127     caption => CAPTION_EL,
128     center => MISC_SPECIAL_EL,
129     col => MISC_SPECIAL_EL,
130     colgroup => MISC_SPECIAL_EL,
131     dd => DD_EL,
132     dir => MISC_SPECIAL_EL,
133     div => DIV_EL,
134     dl => MISC_SPECIAL_EL,
135     dt => DT_EL,
136     em => FORMATTING_EL,
137     embed => MISC_SPECIAL_EL,
138     fieldset => MISC_SPECIAL_EL,
139     font => FORMATTING_EL,
140     form => FORM_EL,
141     frame => MISC_SPECIAL_EL,
142     frameset => FRAMESET_EL,
143     h1 => HEADING_EL,
144     h2 => HEADING_EL,
145     h3 => HEADING_EL,
146     h4 => HEADING_EL,
147     h5 => HEADING_EL,
148     h6 => HEADING_EL,
149     head => MISC_SPECIAL_EL,
150     hr => MISC_SPECIAL_EL,
151     html => HTML_EL,
152     i => FORMATTING_EL,
153     iframe => MISC_SPECIAL_EL,
154     img => MISC_SPECIAL_EL,
155     input => MISC_SPECIAL_EL,
156     isindex => MISC_SPECIAL_EL,
157     li => LI_EL,
158     link => MISC_SPECIAL_EL,
159     listing => MISC_SPECIAL_EL,
160     marquee => MISC_SCOPING_EL,
161     menu => MISC_SPECIAL_EL,
162     meta => MISC_SPECIAL_EL,
163     nobr => NOBR_EL | FORMATTING_EL,
164     noembed => MISC_SPECIAL_EL,
165     noframes => MISC_SPECIAL_EL,
166     noscript => MISC_SPECIAL_EL,
167     object => MISC_SCOPING_EL,
168     ol => MISC_SPECIAL_EL,
169     optgroup => OPTGROUP_EL,
170     option => OPTION_EL,
171     p => P_EL,
172     param => MISC_SPECIAL_EL,
173     plaintext => MISC_SPECIAL_EL,
174     pre => MISC_SPECIAL_EL,
175     s => FORMATTING_EL,
176     script => MISC_SPECIAL_EL,
177     select => SELECT_EL,
178     small => FORMATTING_EL,
179     spacer => MISC_SPECIAL_EL,
180     strike => FORMATTING_EL,
181     strong => FORMATTING_EL,
182     style => MISC_SPECIAL_EL,
183     table => TABLE_EL,
184     tbody => TABLE_ROW_GROUP_EL,
185     td => TABLE_CELL_EL,
186     textarea => MISC_SPECIAL_EL,
187     tfoot => TABLE_ROW_GROUP_EL,
188     th => TABLE_CELL_EL,
189     thead => TABLE_ROW_GROUP_EL,
190     title => MISC_SPECIAL_EL,
191     tr => TABLE_ROW_EL,
192     tt => FORMATTING_EL,
193     u => FORMATTING_EL,
194     ul => MISC_SPECIAL_EL,
195     wbr => MISC_SPECIAL_EL,
196     };
197    
198 wakaba 1.126 my $el_category_f = {
199     $MML_NS => {
200     'annotation-xml' => MML_AXML_EL,
201     mi => FOREIGN_FLOW_CONTENT_EL,
202     mo => FOREIGN_FLOW_CONTENT_EL,
203     mn => FOREIGN_FLOW_CONTENT_EL,
204     ms => FOREIGN_FLOW_CONTENT_EL,
205     mtext => FOREIGN_FLOW_CONTENT_EL,
206     },
207     $SVG_NS => {
208     foreignobject => FOREIGN_FLOW_CONTENT_EL, ## TODO: case
209     desc => FOREIGN_FLOW_CONTENT_EL,
210     title => FOREIGN_FLOW_CONTENT_EL,
211     },
212     ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
213     };
214    
215 wakaba 1.4 my $c1_entity_char = {
216 wakaba 1.10 0x80 => 0x20AC,
217     0x81 => 0xFFFD,
218     0x82 => 0x201A,
219     0x83 => 0x0192,
220     0x84 => 0x201E,
221     0x85 => 0x2026,
222     0x86 => 0x2020,
223     0x87 => 0x2021,
224     0x88 => 0x02C6,
225     0x89 => 0x2030,
226     0x8A => 0x0160,
227     0x8B => 0x2039,
228     0x8C => 0x0152,
229     0x8D => 0xFFFD,
230     0x8E => 0x017D,
231     0x8F => 0xFFFD,
232     0x90 => 0xFFFD,
233     0x91 => 0x2018,
234     0x92 => 0x2019,
235     0x93 => 0x201C,
236     0x94 => 0x201D,
237     0x95 => 0x2022,
238     0x96 => 0x2013,
239     0x97 => 0x2014,
240     0x98 => 0x02DC,
241     0x99 => 0x2122,
242     0x9A => 0x0161,
243     0x9B => 0x203A,
244     0x9C => 0x0153,
245     0x9D => 0xFFFD,
246     0x9E => 0x017E,
247     0x9F => 0x0178,
248 wakaba 1.4 }; # $c1_entity_char
249 wakaba 1.1
250 wakaba 1.63 sub parse_byte_string ($$$$;$) {
251     my $self = ref $_[0] ? shift : shift->new;
252     my $charset = shift;
253     my $bytes_s = ref $_[0] ? $_[0] : \($_[0]);
254     my $s;
255    
256     if (defined $charset) {
257 wakaba 1.64 require Encode; ## TODO: decode(utf8) don't delete BOM
258 wakaba 1.63 $s = \ (Encode::decode ($charset, $$bytes_s));
259 wakaba 1.64 $self->{input_encoding} = lc $charset; ## TODO: normalize name
260 wakaba 1.63 $self->{confident} = 1;
261     } else {
262 wakaba 1.65 ## TODO: Implement HTML5 detection algorithm
263     require Whatpm::Charset::UniversalCharDet;
264     $charset = Whatpm::Charset::UniversalCharDet->detect_byte_string
265     (substr ($$bytes_s, 0, 1024));
266     $charset ||= 'windows-1252';
267 wakaba 1.64 $s = \ (Encode::decode ($charset, $$bytes_s));
268     $self->{input_encoding} = $charset;
269 wakaba 1.63 $self->{confident} = 0;
270     }
271    
272     $self->{change_encoding} = sub {
273     my $self = shift;
274     my $charset = lc shift;
275 wakaba 1.114 my $token = shift;
276 wakaba 1.63 ## TODO: if $charset is supported
277     ## TODO: normalize charset name
278    
279     ## "Change the encoding" algorithm:
280    
281     ## Step 1
282     if ($charset eq 'utf-16') { ## ISSUE: UTF-16BE -> UTF-8? UTF-16LE -> UTF-8?
283     $charset = 'utf-8';
284     }
285    
286     ## Step 2
287     if (defined $self->{input_encoding} and
288     $self->{input_encoding} eq $charset) {
289     $self->{confident} = 1;
290     return;
291     }
292    
293 wakaba 1.64 !!!parse-error (type => 'charset label detected:'.$self->{input_encoding}.
294 wakaba 1.114 ':'.$charset, level => 'w', token => $token);
295 wakaba 1.63
296     ## Step 3
297     # if (can) {
298     ## change the encoding on the fly.
299     #$self->{confident} = 1;
300     #return;
301     # }
302    
303     ## Step 4
304     throw Whatpm::HTML::RestartParser (charset => $charset);
305     }; # $self->{change_encoding}
306    
307     my @args = @_; shift @args; # $s
308     my $return;
309     try {
310     $return = $self->parse_char_string ($s, @args);
311     } catch Whatpm::HTML::RestartParser with {
312     my $charset = shift->{charset};
313     $s = \ (Encode::decode ($charset, $$bytes_s));
314 wakaba 1.64 $self->{input_encoding} = $charset; ## TODO: normalize
315 wakaba 1.63 $self->{confident} = 1;
316     $return = $self->parse_char_string ($s, @args);
317     };
318     return $return;
319     } # parse_byte_string
320    
321 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
322     ## and the HTML layer MUST ignore it. However, we does strip BOM in
323     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
324     ## because the core part of our HTML parser expects a string of character,
325     ## not a string of bytes or code units or anything which might contain a BOM.
326     ## Therefore, any parser interface that accepts a string of bytes,
327     ## such as |parse_byte_string| in this module, must ensure that it does
328     ## strip the BOM and never strip any ZWNBSP.
329    
330 wakaba 1.63 *parse_char_string = \&parse_string;
331    
332 wakaba 1.1 sub parse_string ($$$;$) {
333 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
334     my $s = ref $_[0] ? $_[0] : \($_[0]);
335 wakaba 1.1 $self->{document} = $_[1];
336 wakaba 1.63 @{$self->{document}->child_nodes} = ();
337 wakaba 1.1
338 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
339    
340 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
341 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
342     if defined $self->{input_encoding};
343 wakaba 1.63
344 wakaba 1.1 my $i = 0;
345 wakaba 1.112 $self->{line_prev} = $self->{line} = 1;
346     $self->{column_prev} = $self->{column} = 0;
347 wakaba 1.76 $self->{set_next_char} = sub {
348 wakaba 1.1 my $self = shift;
349 wakaba 1.13
350 wakaba 1.76 pop @{$self->{prev_char}};
351     unshift @{$self->{prev_char}}, $self->{next_char};
352 wakaba 1.13
353 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
354     $self->{next_char} = ord substr $$s, $i++, 1;
355 wakaba 1.112
356     ($self->{line_prev}, $self->{column_prev})
357     = ($self->{line}, $self->{column});
358     $self->{column}++;
359 wakaba 1.1
360 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
361 wakaba 1.112 $self->{line}++;
362     $self->{column} = 0;
363 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
364 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
365 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
366 wakaba 1.112 $self->{line}++;
367     $self->{column} = 0;
368 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
369     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
370     } elsif ($self->{next_char} == 0x0000) { # NULL
371 wakaba 1.8 !!!parse-error (type => 'NULL');
372 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
373 wakaba 1.1 }
374     };
375 wakaba 1.76 $self->{prev_char} = [-1, -1, -1];
376     $self->{next_char} = -1;
377 wakaba 1.1
378 wakaba 1.3 my $onerror = $_[2] || sub {
379     my (%opt) = @_;
380 wakaba 1.112 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
381     my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
382     warn "Parse error ($opt{type}) at line $line column $column\n";
383 wakaba 1.3 };
384     $self->{parse_error} = sub {
385 wakaba 1.112 $onerror->(line => $self->{line}, column => $self->{column}, @_);
386 wakaba 1.1 };
387    
388     $self->_initialize_tokenizer;
389     $self->_initialize_tree_constructor;
390     $self->_construct_tree;
391     $self->_terminate_tree_constructor;
392    
393 wakaba 1.112 delete $self->{parse_error}; # remove loop
394    
395 wakaba 1.1 return $self->{document};
396     } # parse_string
397    
398     sub new ($) {
399     my $class = shift;
400     my $self = bless {}, $class;
401 wakaba 1.76 $self->{set_next_char} = sub {
402     $self->{next_char} = -1;
403 wakaba 1.1 };
404     $self->{parse_error} = sub {
405     #
406     };
407 wakaba 1.63 $self->{change_encoding} = sub {
408     # if ($_[0] is a supported encoding) {
409     # run "change the encoding" algorithm;
410     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
411     # }
412     };
413 wakaba 1.61 $self->{application_cache_selection} = sub {
414     #
415     };
416 wakaba 1.1 return $self;
417     } # new
418    
419 wakaba 1.40 sub CM_ENTITY () { 0b001 } # & markup in data
420     sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
421     sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
422    
423     sub PLAINTEXT_CONTENT_MODEL () { 0 }
424     sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
425     sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
426     sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
427    
428 wakaba 1.57 sub DATA_STATE () { 0 }
429     sub ENTITY_DATA_STATE () { 1 }
430     sub TAG_OPEN_STATE () { 2 }
431     sub CLOSE_TAG_OPEN_STATE () { 3 }
432     sub TAG_NAME_STATE () { 4 }
433     sub BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
434     sub ATTRIBUTE_NAME_STATE () { 6 }
435     sub AFTER_ATTRIBUTE_NAME_STATE () { 7 }
436     sub BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
437     sub ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
438     sub ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
439     sub ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
440     sub ENTITY_IN_ATTRIBUTE_VALUE_STATE () { 12 }
441     sub MARKUP_DECLARATION_OPEN_STATE () { 13 }
442     sub COMMENT_START_STATE () { 14 }
443     sub COMMENT_START_DASH_STATE () { 15 }
444     sub COMMENT_STATE () { 16 }
445     sub COMMENT_END_STATE () { 17 }
446     sub COMMENT_END_DASH_STATE () { 18 }
447     sub BOGUS_COMMENT_STATE () { 19 }
448     sub DOCTYPE_STATE () { 20 }
449     sub BEFORE_DOCTYPE_NAME_STATE () { 21 }
450     sub DOCTYPE_NAME_STATE () { 22 }
451     sub AFTER_DOCTYPE_NAME_STATE () { 23 }
452     sub BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
453     sub DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
454     sub DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
455     sub AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
456     sub BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
457     sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
458     sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
459     sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
460     sub BOGUS_DOCTYPE_STATE () { 32 }
461 wakaba 1.72 sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
462 wakaba 1.125 sub SELF_CLOSING_START_TAG_STATE () { 34 }
463 wakaba 1.127 sub CDATA_BLOCK_STATE () { 35 }
464 wakaba 1.57
465 wakaba 1.55 sub DOCTYPE_TOKEN () { 1 }
466     sub COMMENT_TOKEN () { 2 }
467     sub START_TAG_TOKEN () { 3 }
468     sub END_TAG_TOKEN () { 4 }
469     sub END_OF_FILE_TOKEN () { 5 }
470     sub CHARACTER_TOKEN () { 6 }
471    
472 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
473     sub HEAD_IMS () { 0b1000 }
474     sub BODY_IMS () { 0b10000 }
475 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
476 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
477 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
478 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
479     sub FRAME_IMS () { 0b1000000000 }
480 wakaba 1.101 sub SELECT_IMS () { 0b10000000000 }
481 wakaba 1.126 sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 }
482     ## NOTE: "in foreign content" insertion mode is special; it is combined
483     ## with the secondary insertion mode. In this parser, they are stored
484     ## together in the bit-or'ed form.
485 wakaba 1.54
486 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
487    
488     ## NOTE: "after after body" insertion mode.
489 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
490 wakaba 1.84
491     ## NOTE: "after after frameset" insertion mode.
492 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
493 wakaba 1.84
494 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
495     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
496     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
497     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
498     sub IN_BODY_IM () { BODY_IMS }
499 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
500     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
501     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
502     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
503 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
504     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
505     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
506     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
507 wakaba 1.101 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
508     sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
509 wakaba 1.54 sub IN_COLUMN_GROUP_IM () { 0b10 }
510    
511 wakaba 1.1 ## Implementations MUST act as if state machine in the spec
512    
513     sub _initialize_tokenizer ($) {
514     my $self = shift;
515 wakaba 1.57 $self->{state} = DATA_STATE; # MUST
516 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # be
517 wakaba 1.1 undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
518     undef $self->{current_attribute};
519     undef $self->{last_emitted_start_tag_name};
520     undef $self->{last_attribute_value_state};
521 wakaba 1.125 delete $self->{self_closing};
522 wakaba 1.1 $self->{char} = [];
523 wakaba 1.76 # $self->{next_char}
524 wakaba 1.1 !!!next-input-character;
525     $self->{token} = [];
526 wakaba 1.18 # $self->{escape}
527 wakaba 1.1 } # _initialize_tokenizer
528    
529     ## A token has:
530 wakaba 1.55 ## ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
531     ## CHARACTER_TOKEN, or END_OF_FILE_TOKEN
532     ## ->{name} (DOCTYPE_TOKEN)
533     ## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
534     ## ->{public_identifier} (DOCTYPE_TOKEN)
535     ## ->{system_identifier} (DOCTYPE_TOKEN)
536 wakaba 1.75 ## ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
537 wakaba 1.55 ## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
538 wakaba 1.66 ## ->{name}
539     ## ->{value}
540     ## ->{has_reference} == 1 or 0
541 wakaba 1.55 ## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)
542 wakaba 1.125 ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
543     ## |->{self_closing}| is used to save the value of |$self->{self_closing}|
544     ## while the token is pushed back to the stack.
545    
546     ## ISSUE: "When a DOCTYPE token is created, its
547     ## <i>self-closing flag</i> must be unset (its other state is that it
548     ## be set), and its attributes list must be empty.": Wrong subject?
549 wakaba 1.1
550     ## Emitted token MUST immediately be handled by the tree construction state.
551    
552     ## Before each step, UA MAY check to see if either one of the scripts in
553     ## "list of scripts that will execute as soon as possible" or the first
554     ## script in the "list of scripts that will execute asynchronously",
555     ## has completed loading. If one has, then it MUST be executed
556     ## and removed from the list.
557    
558 wakaba 1.59 ## NOTE: HTML5 "Writing HTML documents" section, applied to
559     ## documents and not to user agents and conformance checkers,
560     ## contains some requirements that are not detected by the
561     ## parsing algorithm:
562     ## - Some requirements on character encoding declarations. ## TODO
563     ## - "Elements MUST NOT contain content that their content model disallows."
564     ## ... Some are parse error, some are not (will be reported by c.c.).
565     ## - Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) ## TODO
566     ## - Text (in elements, attributes, and comments) SHOULD NOT contain
567     ## control characters other than space characters. ## TODO: (what is control character? C0, C1 and DEL? Unicode control character?)
568    
569     ## TODO: HTML5 poses authors two SHOULD-level requirements that cannot
570     ## be detected by the HTML5 parsing algorithm:
571     ## - Text,
572    
573 wakaba 1.1 sub _get_next_token ($) {
574     my $self = shift;
575 wakaba 1.125
576     if ($self->{self_closing}) {
577     !!!parse-error (type => 'nestc', token => $self->{current_token});
578     ## NOTE: The |self_closing| flag is only set by start tag token.
579     ## In addition, when a start tag token is emitted, it is always set to
580     ## |current_token|.
581     delete $self->{self_closing};
582     }
583    
584 wakaba 1.1 if (@{$self->{token}}) {
585 wakaba 1.125 $self->{self_closing} = $self->{token}->[0]->{self_closing};
586 wakaba 1.1 return shift @{$self->{token}};
587     }
588    
589     A: {
590 wakaba 1.57 if ($self->{state} == DATA_STATE) {
591 wakaba 1.76 if ($self->{next_char} == 0x0026) { # &
592 wakaba 1.72 if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
593     not $self->{escape}) {
594 wakaba 1.77 !!!cp (1);
595 wakaba 1.57 $self->{state} = ENTITY_DATA_STATE;
596 wakaba 1.1 !!!next-input-character;
597     redo A;
598     } else {
599 wakaba 1.77 !!!cp (2);
600 wakaba 1.1 #
601     }
602 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
603 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
604 wakaba 1.13 unless ($self->{escape}) {
605 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
606     $self->{prev_char}->[1] == 0x0021 and # !
607     $self->{prev_char}->[2] == 0x003C) { # <
608 wakaba 1.77 !!!cp (3);
609 wakaba 1.13 $self->{escape} = 1;
610 wakaba 1.77 } else {
611     !!!cp (4);
612 wakaba 1.13 }
613 wakaba 1.77 } else {
614     !!!cp (5);
615 wakaba 1.13 }
616     }
617    
618     #
619 wakaba 1.76 } elsif ($self->{next_char} == 0x003C) { # <
620 wakaba 1.40 if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
621     (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
622 wakaba 1.13 not $self->{escape})) {
623 wakaba 1.77 !!!cp (6);
624 wakaba 1.57 $self->{state} = TAG_OPEN_STATE;
625 wakaba 1.1 !!!next-input-character;
626     redo A;
627     } else {
628 wakaba 1.77 !!!cp (7);
629 wakaba 1.1 #
630     }
631 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
632 wakaba 1.13 if ($self->{escape} and
633 wakaba 1.40 ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
634 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
635     $self->{prev_char}->[1] == 0x002D) { # -
636 wakaba 1.77 !!!cp (8);
637 wakaba 1.13 delete $self->{escape};
638 wakaba 1.77 } else {
639     !!!cp (9);
640 wakaba 1.13 }
641 wakaba 1.77 } else {
642     !!!cp (10);
643 wakaba 1.13 }
644    
645     #
646 wakaba 1.76 } elsif ($self->{next_char} == -1) {
647 wakaba 1.77 !!!cp (11);
648 wakaba 1.112 !!!emit ({type => END_OF_FILE_TOKEN,
649     line => $self->{line}, column => $self->{column}});
650 wakaba 1.1 last A; ## TODO: ok?
651 wakaba 1.77 } else {
652     !!!cp (12);
653 wakaba 1.1 }
654     # Anything else
655 wakaba 1.55 my $token = {type => CHARACTER_TOKEN,
656 wakaba 1.112 data => chr $self->{next_char},
657 wakaba 1.120 line => $self->{line}, column => $self->{column},
658 wakaba 1.118 };
659 wakaba 1.1 ## Stay in the data state
660     !!!next-input-character;
661    
662     !!!emit ($token);
663    
664     redo A;
665 wakaba 1.57 } elsif ($self->{state} == ENTITY_DATA_STATE) {
666 wakaba 1.1 ## (cannot happen in CDATA state)
667 wakaba 1.112
668 wakaba 1.120 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
669 wakaba 1.1
670 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity (0, -1);
671 wakaba 1.1
672 wakaba 1.57 $self->{state} = DATA_STATE;
673 wakaba 1.1 # next-input-character is already done
674    
675     unless (defined $token) {
676 wakaba 1.77 !!!cp (13);
677 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '&',
678 wakaba 1.120 line => $l, column => $c,
679 wakaba 1.118 });
680 wakaba 1.1 } else {
681 wakaba 1.77 !!!cp (14);
682 wakaba 1.1 !!!emit ($token);
683     }
684    
685     redo A;
686 wakaba 1.57 } elsif ($self->{state} == TAG_OPEN_STATE) {
687 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
688 wakaba 1.76 if ($self->{next_char} == 0x002F) { # /
689 wakaba 1.77 !!!cp (15);
690 wakaba 1.1 !!!next-input-character;
691 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
692 wakaba 1.1 redo A;
693     } else {
694 wakaba 1.77 !!!cp (16);
695 wakaba 1.1 ## reconsume
696 wakaba 1.57 $self->{state} = DATA_STATE;
697 wakaba 1.1
698 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
699 wakaba 1.120 line => $self->{line_prev},
700     column => $self->{column_prev},
701 wakaba 1.118 });
702 wakaba 1.1
703     redo A;
704     }
705 wakaba 1.40 } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
706 wakaba 1.76 if ($self->{next_char} == 0x0021) { # !
707 wakaba 1.77 !!!cp (17);
708 wakaba 1.57 $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
709 wakaba 1.1 !!!next-input-character;
710     redo A;
711 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
712 wakaba 1.77 !!!cp (18);
713 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
714 wakaba 1.1 !!!next-input-character;
715     redo A;
716 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
717     $self->{next_char} <= 0x005A) { # A..Z
718 wakaba 1.77 !!!cp (19);
719 wakaba 1.1 $self->{current_token}
720 wakaba 1.55 = {type => START_TAG_TOKEN,
721 wakaba 1.112 tag_name => chr ($self->{next_char} + 0x0020),
722     line => $self->{line_prev},
723     column => $self->{column_prev}};
724 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
725 wakaba 1.1 !!!next-input-character;
726     redo A;
727 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
728     $self->{next_char} <= 0x007A) { # a..z
729 wakaba 1.77 !!!cp (20);
730 wakaba 1.55 $self->{current_token} = {type => START_TAG_TOKEN,
731 wakaba 1.112 tag_name => chr ($self->{next_char}),
732     line => $self->{line_prev},
733     column => $self->{column_prev}};
734 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
735 wakaba 1.1 !!!next-input-character;
736     redo A;
737 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
738 wakaba 1.77 !!!cp (21);
739 wakaba 1.115 !!!parse-error (type => 'empty start tag',
740     line => $self->{line_prev},
741     column => $self->{column_prev});
742 wakaba 1.57 $self->{state} = DATA_STATE;
743 wakaba 1.1 !!!next-input-character;
744    
745 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<>',
746 wakaba 1.120 line => $self->{line_prev},
747     column => $self->{column_prev},
748 wakaba 1.118 });
749 wakaba 1.1
750     redo A;
751 wakaba 1.76 } elsif ($self->{next_char} == 0x003F) { # ?
752 wakaba 1.77 !!!cp (22);
753 wakaba 1.115 !!!parse-error (type => 'pio',
754     line => $self->{line_prev},
755     column => $self->{column_prev});
756 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
757 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
758 wakaba 1.120 line => $self->{line_prev},
759     column => $self->{column_prev},
760 wakaba 1.118 };
761 wakaba 1.76 ## $self->{next_char} is intentionally left as is
762 wakaba 1.1 redo A;
763     } else {
764 wakaba 1.77 !!!cp (23);
765 wakaba 1.3 !!!parse-error (type => 'bare stago');
766 wakaba 1.57 $self->{state} = DATA_STATE;
767 wakaba 1.1 ## reconsume
768    
769 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
770 wakaba 1.120 line => $self->{line_prev},
771     column => $self->{column_prev},
772 wakaba 1.118 });
773 wakaba 1.1
774     redo A;
775     }
776     } else {
777 wakaba 1.40 die "$0: $self->{content_model} in tag open";
778 wakaba 1.1 }
779 wakaba 1.57 } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
780 wakaba 1.113 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
781 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
782 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
783 wakaba 1.112
784 wakaba 1.30 ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
785 wakaba 1.23 my @next_char;
786     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
787 wakaba 1.76 push @next_char, $self->{next_char};
788 wakaba 1.23 my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
789     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
790 wakaba 1.76 if ($self->{next_char} == $c or $self->{next_char} == $C) {
791 wakaba 1.77 !!!cp (24);
792 wakaba 1.23 !!!next-input-character;
793     next TAGNAME;
794     } else {
795 wakaba 1.77 !!!cp (25);
796 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
797 wakaba 1.23 !!!back-next-input-character (@next_char);
798 wakaba 1.57 $self->{state} = DATA_STATE;
799 wakaba 1.23
800 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
801 wakaba 1.120 line => $l, column => $c,
802 wakaba 1.118 });
803 wakaba 1.23
804     redo A;
805     }
806     }
807 wakaba 1.76 push @next_char, $self->{next_char};
808 wakaba 1.23
809 wakaba 1.76 unless ($self->{next_char} == 0x0009 or # HT
810     $self->{next_char} == 0x000A or # LF
811     $self->{next_char} == 0x000B or # VT
812     $self->{next_char} == 0x000C or # FF
813     $self->{next_char} == 0x0020 or # SP
814     $self->{next_char} == 0x003E or # >
815     $self->{next_char} == 0x002F or # /
816     $self->{next_char} == -1) {
817 wakaba 1.77 !!!cp (26);
818 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
819 wakaba 1.1 !!!back-next-input-character (@next_char);
820 wakaba 1.57 $self->{state} = DATA_STATE;
821 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
822 wakaba 1.120 line => $l, column => $c,
823 wakaba 1.118 });
824 wakaba 1.1 redo A;
825 wakaba 1.23 } else {
826 wakaba 1.77 !!!cp (27);
827 wakaba 1.76 $self->{next_char} = shift @next_char;
828 wakaba 1.23 !!!back-next-input-character (@next_char);
829     # and consume...
830 wakaba 1.1 }
831 wakaba 1.23 } else {
832     ## No start tag token has ever been emitted
833 wakaba 1.77 !!!cp (28);
834 wakaba 1.23 # next-input-character is already done
835 wakaba 1.57 $self->{state} = DATA_STATE;
836 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
837 wakaba 1.120 line => $l, column => $c,
838 wakaba 1.118 });
839 wakaba 1.1 redo A;
840     }
841     }
842    
843 wakaba 1.76 if (0x0041 <= $self->{next_char} and
844     $self->{next_char} <= 0x005A) { # A..Z
845 wakaba 1.77 !!!cp (29);
846 wakaba 1.112 $self->{current_token}
847     = {type => END_TAG_TOKEN,
848     tag_name => chr ($self->{next_char} + 0x0020),
849     line => $l, column => $c};
850 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
851 wakaba 1.1 !!!next-input-character;
852     redo A;
853 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
854     $self->{next_char} <= 0x007A) { # a..z
855 wakaba 1.77 !!!cp (30);
856 wakaba 1.55 $self->{current_token} = {type => END_TAG_TOKEN,
857 wakaba 1.112 tag_name => chr ($self->{next_char}),
858     line => $l, column => $c};
859 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
860 wakaba 1.1 !!!next-input-character;
861     redo A;
862 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
863 wakaba 1.77 !!!cp (31);
864 wakaba 1.115 !!!parse-error (type => 'empty end tag',
865     line => $self->{line_prev}, ## "<" in "</>"
866     column => $self->{column_prev} - 1);
867 wakaba 1.57 $self->{state} = DATA_STATE;
868 wakaba 1.1 !!!next-input-character;
869     redo A;
870 wakaba 1.76 } elsif ($self->{next_char} == -1) {
871 wakaba 1.77 !!!cp (32);
872 wakaba 1.3 !!!parse-error (type => 'bare etago');
873 wakaba 1.57 $self->{state} = DATA_STATE;
874 wakaba 1.1 # reconsume
875    
876 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
877 wakaba 1.120 line => $l, column => $c,
878 wakaba 1.118 });
879 wakaba 1.1
880     redo A;
881     } else {
882 wakaba 1.77 !!!cp (33);
883 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
884 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
885 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
886 wakaba 1.120 line => $self->{line_prev}, # "<" of "</"
887     column => $self->{column_prev} - 1,
888 wakaba 1.118 };
889 wakaba 1.76 ## $self->{next_char} is intentionally left as is
890 wakaba 1.1 redo A;
891     }
892 wakaba 1.57 } elsif ($self->{state} == TAG_NAME_STATE) {
893 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
894     $self->{next_char} == 0x000A or # LF
895     $self->{next_char} == 0x000B or # VT
896     $self->{next_char} == 0x000C or # FF
897     $self->{next_char} == 0x0020) { # SP
898 wakaba 1.77 !!!cp (34);
899 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
900 wakaba 1.1 !!!next-input-character;
901     redo A;
902 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
903 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
904 wakaba 1.77 !!!cp (35);
905 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
906 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
907 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
908 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
909     # ## NOTE: This should never be reached.
910     # !!! cp (36);
911     # !!! parse-error (type => 'end tag attribute');
912     #} else {
913 wakaba 1.77 !!!cp (37);
914 wakaba 1.78 #}
915 wakaba 1.1 } else {
916     die "$0: $self->{current_token}->{type}: Unknown token type";
917     }
918 wakaba 1.57 $self->{state} = DATA_STATE;
919 wakaba 1.1 !!!next-input-character;
920    
921     !!!emit ($self->{current_token}); # start tag or end tag
922    
923     redo A;
924 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
925     $self->{next_char} <= 0x005A) { # A..Z
926 wakaba 1.77 !!!cp (38);
927 wakaba 1.76 $self->{current_token}->{tag_name} .= chr ($self->{next_char} + 0x0020);
928 wakaba 1.1 # start tag or end tag
929     ## Stay in this state
930     !!!next-input-character;
931     redo A;
932 wakaba 1.76 } elsif ($self->{next_char} == -1) {
933 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
934 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
935 wakaba 1.77 !!!cp (39);
936 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
937 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
938 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
939 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
940     # ## NOTE: This state should never be reached.
941     # !!! cp (40);
942     # !!! parse-error (type => 'end tag attribute');
943     #} else {
944 wakaba 1.77 !!!cp (41);
945 wakaba 1.78 #}
946 wakaba 1.1 } else {
947     die "$0: $self->{current_token}->{type}: Unknown token type";
948     }
949 wakaba 1.57 $self->{state} = DATA_STATE;
950 wakaba 1.1 # reconsume
951    
952     !!!emit ($self->{current_token}); # start tag or end tag
953    
954     redo A;
955 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
956 wakaba 1.125 !!!cp (42);
957     $self->{state} = SELF_CLOSING_START_TAG_STATE;
958 wakaba 1.1 !!!next-input-character;
959     redo A;
960     } else {
961 wakaba 1.77 !!!cp (44);
962 wakaba 1.76 $self->{current_token}->{tag_name} .= chr $self->{next_char};
963 wakaba 1.1 # start tag or end tag
964     ## Stay in the state
965     !!!next-input-character;
966     redo A;
967     }
968 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
969 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
970     $self->{next_char} == 0x000A or # LF
971     $self->{next_char} == 0x000B or # VT
972     $self->{next_char} == 0x000C or # FF
973     $self->{next_char} == 0x0020) { # SP
974 wakaba 1.77 !!!cp (45);
975 wakaba 1.1 ## Stay in the state
976     !!!next-input-character;
977     redo A;
978 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
979 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
980 wakaba 1.77 !!!cp (46);
981 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
982 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
983 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
984 wakaba 1.1 if ($self->{current_token}->{attributes}) {
985 wakaba 1.77 !!!cp (47);
986 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
987 wakaba 1.77 } else {
988     !!!cp (48);
989 wakaba 1.1 }
990     } else {
991     die "$0: $self->{current_token}->{type}: Unknown token type";
992     }
993 wakaba 1.57 $self->{state} = DATA_STATE;
994 wakaba 1.1 !!!next-input-character;
995    
996     !!!emit ($self->{current_token}); # start tag or end tag
997    
998     redo A;
999 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1000     $self->{next_char} <= 0x005A) { # A..Z
1001 wakaba 1.77 !!!cp (49);
1002 wakaba 1.119 $self->{current_attribute}
1003     = {name => chr ($self->{next_char} + 0x0020),
1004     value => '',
1005     line => $self->{line}, column => $self->{column}};
1006 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1007 wakaba 1.1 !!!next-input-character;
1008     redo A;
1009 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1010 wakaba 1.125 !!!cp (50);
1011     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1012 wakaba 1.1 !!!next-input-character;
1013     redo A;
1014 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1015 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1016 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1017 wakaba 1.77 !!!cp (52);
1018 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1019 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1020 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1021 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1022 wakaba 1.77 !!!cp (53);
1023 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1024 wakaba 1.77 } else {
1025     !!!cp (54);
1026 wakaba 1.1 }
1027     } else {
1028     die "$0: $self->{current_token}->{type}: Unknown token type";
1029     }
1030 wakaba 1.57 $self->{state} = DATA_STATE;
1031 wakaba 1.1 # reconsume
1032    
1033     !!!emit ($self->{current_token}); # start tag or end tag
1034    
1035     redo A;
1036     } else {
1037 wakaba 1.72 if ({
1038     0x0022 => 1, # "
1039     0x0027 => 1, # '
1040     0x003D => 1, # =
1041 wakaba 1.76 }->{$self->{next_char}}) {
1042 wakaba 1.77 !!!cp (55);
1043 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1044 wakaba 1.77 } else {
1045     !!!cp (56);
1046 wakaba 1.72 }
1047 wakaba 1.119 $self->{current_attribute}
1048     = {name => chr ($self->{next_char}),
1049     value => '',
1050     line => $self->{line}, column => $self->{column}};
1051 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1052 wakaba 1.1 !!!next-input-character;
1053     redo A;
1054     }
1055 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1056 wakaba 1.1 my $before_leave = sub {
1057     if (exists $self->{current_token}->{attributes} # start tag or end tag
1058     ->{$self->{current_attribute}->{name}}) { # MUST
1059 wakaba 1.77 !!!cp (57);
1060 wakaba 1.120 !!!parse-error (type => 'duplicate attribute:'.$self->{current_attribute}->{name}, line => $self->{current_attribute}->{line}, column => $self->{current_attribute}->{column});
1061 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
1062     } else {
1063 wakaba 1.77 !!!cp (58);
1064 wakaba 1.1 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
1065     = $self->{current_attribute};
1066     }
1067     }; # $before_leave
1068    
1069 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1070     $self->{next_char} == 0x000A or # LF
1071     $self->{next_char} == 0x000B or # VT
1072     $self->{next_char} == 0x000C or # FF
1073     $self->{next_char} == 0x0020) { # SP
1074 wakaba 1.77 !!!cp (59);
1075 wakaba 1.1 $before_leave->();
1076 wakaba 1.57 $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
1077 wakaba 1.1 !!!next-input-character;
1078     redo A;
1079 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1080 wakaba 1.77 !!!cp (60);
1081 wakaba 1.1 $before_leave->();
1082 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1083 wakaba 1.1 !!!next-input-character;
1084     redo A;
1085 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1086 wakaba 1.1 $before_leave->();
1087 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1088 wakaba 1.77 !!!cp (61);
1089 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1090 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1091 wakaba 1.77 !!!cp (62);
1092 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1093 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1094 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1095 wakaba 1.1 }
1096     } else {
1097     die "$0: $self->{current_token}->{type}: Unknown token type";
1098     }
1099 wakaba 1.57 $self->{state} = DATA_STATE;
1100 wakaba 1.1 !!!next-input-character;
1101    
1102     !!!emit ($self->{current_token}); # start tag or end tag
1103    
1104     redo A;
1105 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1106     $self->{next_char} <= 0x005A) { # A..Z
1107 wakaba 1.77 !!!cp (63);
1108 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char} + 0x0020);
1109 wakaba 1.1 ## Stay in the state
1110     !!!next-input-character;
1111     redo A;
1112 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1113 wakaba 1.125 !!!cp (64);
1114 wakaba 1.1 $before_leave->();
1115 wakaba 1.125 $self->{state} = SELF_CLOSING_START_TAG_STATE;
1116 wakaba 1.1 !!!next-input-character;
1117     redo A;
1118 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1119 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1120 wakaba 1.1 $before_leave->();
1121 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1122 wakaba 1.77 !!!cp (66);
1123 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1124 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1125 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1126 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1127 wakaba 1.77 !!!cp (67);
1128 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1129 wakaba 1.77 } else {
1130 wakaba 1.78 ## NOTE: This state should never be reached.
1131 wakaba 1.77 !!!cp (68);
1132 wakaba 1.1 }
1133     } else {
1134     die "$0: $self->{current_token}->{type}: Unknown token type";
1135     }
1136 wakaba 1.57 $self->{state} = DATA_STATE;
1137 wakaba 1.1 # reconsume
1138    
1139     !!!emit ($self->{current_token}); # start tag or end tag
1140    
1141     redo A;
1142     } else {
1143 wakaba 1.76 if ($self->{next_char} == 0x0022 or # "
1144     $self->{next_char} == 0x0027) { # '
1145 wakaba 1.77 !!!cp (69);
1146 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1147 wakaba 1.77 } else {
1148     !!!cp (70);
1149 wakaba 1.72 }
1150 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char});
1151 wakaba 1.1 ## Stay in the state
1152     !!!next-input-character;
1153     redo A;
1154     }
1155 wakaba 1.57 } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1156 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1157     $self->{next_char} == 0x000A or # LF
1158     $self->{next_char} == 0x000B or # VT
1159     $self->{next_char} == 0x000C or # FF
1160     $self->{next_char} == 0x0020) { # SP
1161 wakaba 1.77 !!!cp (71);
1162 wakaba 1.1 ## Stay in the state
1163     !!!next-input-character;
1164     redo A;
1165 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1166 wakaba 1.77 !!!cp (72);
1167 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1168 wakaba 1.1 !!!next-input-character;
1169     redo A;
1170 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1171 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1172 wakaba 1.77 !!!cp (73);
1173 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1174 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1175 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1176 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1177 wakaba 1.77 !!!cp (74);
1178 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1179 wakaba 1.77 } else {
1180 wakaba 1.78 ## NOTE: This state should never be reached.
1181 wakaba 1.77 !!!cp (75);
1182 wakaba 1.1 }
1183     } else {
1184     die "$0: $self->{current_token}->{type}: Unknown token type";
1185     }
1186 wakaba 1.57 $self->{state} = DATA_STATE;
1187 wakaba 1.1 !!!next-input-character;
1188    
1189     !!!emit ($self->{current_token}); # start tag or end tag
1190    
1191     redo A;
1192 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1193     $self->{next_char} <= 0x005A) { # A..Z
1194 wakaba 1.77 !!!cp (76);
1195 wakaba 1.119 $self->{current_attribute}
1196     = {name => chr ($self->{next_char} + 0x0020),
1197     value => '',
1198     line => $self->{line}, column => $self->{column}};
1199 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1200 wakaba 1.1 !!!next-input-character;
1201     redo A;
1202 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1203 wakaba 1.125 !!!cp (77);
1204     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1205 wakaba 1.1 !!!next-input-character;
1206     redo A;
1207 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1208 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1209 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1210 wakaba 1.77 !!!cp (79);
1211 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1212 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1213 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1214 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1215 wakaba 1.77 !!!cp (80);
1216 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1217 wakaba 1.77 } else {
1218 wakaba 1.78 ## NOTE: This state should never be reached.
1219 wakaba 1.77 !!!cp (81);
1220 wakaba 1.1 }
1221     } else {
1222     die "$0: $self->{current_token}->{type}: Unknown token type";
1223     }
1224 wakaba 1.57 $self->{state} = DATA_STATE;
1225 wakaba 1.1 # reconsume
1226    
1227     !!!emit ($self->{current_token}); # start tag or end tag
1228    
1229     redo A;
1230     } else {
1231 wakaba 1.77 !!!cp (82);
1232 wakaba 1.119 $self->{current_attribute}
1233     = {name => chr ($self->{next_char}),
1234     value => '',
1235     line => $self->{line}, column => $self->{column}};
1236 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1237 wakaba 1.1 !!!next-input-character;
1238     redo A;
1239     }
1240 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1241 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1242     $self->{next_char} == 0x000A or # LF
1243     $self->{next_char} == 0x000B or # VT
1244     $self->{next_char} == 0x000C or # FF
1245     $self->{next_char} == 0x0020) { # SP
1246 wakaba 1.77 !!!cp (83);
1247 wakaba 1.1 ## Stay in the state
1248     !!!next-input-character;
1249     redo A;
1250 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
1251 wakaba 1.77 !!!cp (84);
1252 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
1253 wakaba 1.1 !!!next-input-character;
1254     redo A;
1255 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1256 wakaba 1.77 !!!cp (85);
1257 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1258 wakaba 1.1 ## reconsume
1259     redo A;
1260 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
1261 wakaba 1.77 !!!cp (86);
1262 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
1263 wakaba 1.1 !!!next-input-character;
1264     redo A;
1265 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1266 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1267 wakaba 1.77 !!!cp (87);
1268 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1269 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1270 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1271 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1272 wakaba 1.77 !!!cp (88);
1273 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1274 wakaba 1.77 } else {
1275 wakaba 1.78 ## NOTE: This state should never be reached.
1276 wakaba 1.77 !!!cp (89);
1277 wakaba 1.1 }
1278     } else {
1279     die "$0: $self->{current_token}->{type}: Unknown token type";
1280     }
1281 wakaba 1.57 $self->{state} = DATA_STATE;
1282 wakaba 1.1 !!!next-input-character;
1283    
1284     !!!emit ($self->{current_token}); # start tag or end tag
1285    
1286     redo A;
1287 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1288 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1289 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1290 wakaba 1.77 !!!cp (90);
1291 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1292 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1293 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1294 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1295 wakaba 1.77 !!!cp (91);
1296 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1297 wakaba 1.77 } else {
1298 wakaba 1.78 ## NOTE: This state should never be reached.
1299 wakaba 1.77 !!!cp (92);
1300 wakaba 1.1 }
1301     } else {
1302     die "$0: $self->{current_token}->{type}: Unknown token type";
1303     }
1304 wakaba 1.57 $self->{state} = DATA_STATE;
1305 wakaba 1.1 ## reconsume
1306    
1307     !!!emit ($self->{current_token}); # start tag or end tag
1308    
1309     redo A;
1310     } else {
1311 wakaba 1.76 if ($self->{next_char} == 0x003D) { # =
1312 wakaba 1.77 !!!cp (93);
1313 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1314 wakaba 1.77 } else {
1315     !!!cp (94);
1316 wakaba 1.72 }
1317 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1318 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1319 wakaba 1.1 !!!next-input-character;
1320     redo A;
1321     }
1322 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1323 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
1324 wakaba 1.77 !!!cp (95);
1325 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1326 wakaba 1.1 !!!next-input-character;
1327     redo A;
1328 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1329 wakaba 1.77 !!!cp (96);
1330 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1331     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1332 wakaba 1.1 !!!next-input-character;
1333     redo A;
1334 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1335 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1336 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1337 wakaba 1.77 !!!cp (97);
1338 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1339 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1340 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1341 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1342 wakaba 1.77 !!!cp (98);
1343 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1344 wakaba 1.77 } else {
1345 wakaba 1.78 ## NOTE: This state should never be reached.
1346 wakaba 1.77 !!!cp (99);
1347 wakaba 1.1 }
1348     } else {
1349     die "$0: $self->{current_token}->{type}: Unknown token type";
1350     }
1351 wakaba 1.57 $self->{state} = DATA_STATE;
1352 wakaba 1.1 ## reconsume
1353    
1354     !!!emit ($self->{current_token}); # start tag or end tag
1355    
1356     redo A;
1357     } else {
1358 wakaba 1.77 !!!cp (100);
1359 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1360 wakaba 1.1 ## Stay in the state
1361     !!!next-input-character;
1362     redo A;
1363     }
1364 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1365 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
1366 wakaba 1.77 !!!cp (101);
1367 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1368 wakaba 1.1 !!!next-input-character;
1369     redo A;
1370 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1371 wakaba 1.77 !!!cp (102);
1372 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1373     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1374 wakaba 1.1 !!!next-input-character;
1375     redo A;
1376 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1377 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1378 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1379 wakaba 1.77 !!!cp (103);
1380 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1381 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1382 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1383 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1384 wakaba 1.77 !!!cp (104);
1385 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1386 wakaba 1.77 } else {
1387 wakaba 1.78 ## NOTE: This state should never be reached.
1388 wakaba 1.77 !!!cp (105);
1389 wakaba 1.1 }
1390     } else {
1391     die "$0: $self->{current_token}->{type}: Unknown token type";
1392     }
1393 wakaba 1.57 $self->{state} = DATA_STATE;
1394 wakaba 1.1 ## reconsume
1395    
1396     !!!emit ($self->{current_token}); # start tag or end tag
1397    
1398     redo A;
1399     } else {
1400 wakaba 1.77 !!!cp (106);
1401 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1402 wakaba 1.1 ## Stay in the state
1403     !!!next-input-character;
1404     redo A;
1405     }
1406 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1407 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1408     $self->{next_char} == 0x000A or # LF
1409     $self->{next_char} == 0x000B or # HT
1410     $self->{next_char} == 0x000C or # FF
1411     $self->{next_char} == 0x0020) { # SP
1412 wakaba 1.77 !!!cp (107);
1413 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1414 wakaba 1.1 !!!next-input-character;
1415     redo A;
1416 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1417 wakaba 1.77 !!!cp (108);
1418 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1419     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1420 wakaba 1.1 !!!next-input-character;
1421     redo A;
1422 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1423 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1424 wakaba 1.77 !!!cp (109);
1425 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1426 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1427 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1428 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1429 wakaba 1.77 !!!cp (110);
1430 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1431 wakaba 1.77 } else {
1432 wakaba 1.78 ## NOTE: This state should never be reached.
1433 wakaba 1.77 !!!cp (111);
1434 wakaba 1.1 }
1435     } else {
1436     die "$0: $self->{current_token}->{type}: Unknown token type";
1437     }
1438 wakaba 1.57 $self->{state} = DATA_STATE;
1439 wakaba 1.1 !!!next-input-character;
1440    
1441     !!!emit ($self->{current_token}); # start tag or end tag
1442    
1443     redo A;
1444 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1445 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1446 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1447 wakaba 1.77 !!!cp (112);
1448 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1449 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1450 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1451 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1452 wakaba 1.77 !!!cp (113);
1453 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1454 wakaba 1.77 } else {
1455 wakaba 1.78 ## NOTE: This state should never be reached.
1456 wakaba 1.77 !!!cp (114);
1457 wakaba 1.1 }
1458     } else {
1459     die "$0: $self->{current_token}->{type}: Unknown token type";
1460     }
1461 wakaba 1.57 $self->{state} = DATA_STATE;
1462 wakaba 1.1 ## reconsume
1463    
1464     !!!emit ($self->{current_token}); # start tag or end tag
1465    
1466     redo A;
1467     } else {
1468 wakaba 1.72 if ({
1469     0x0022 => 1, # "
1470     0x0027 => 1, # '
1471     0x003D => 1, # =
1472 wakaba 1.76 }->{$self->{next_char}}) {
1473 wakaba 1.77 !!!cp (115);
1474 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1475 wakaba 1.77 } else {
1476     !!!cp (116);
1477 wakaba 1.72 }
1478 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1479 wakaba 1.1 ## Stay in the state
1480     !!!next-input-character;
1481     redo A;
1482     }
1483 wakaba 1.57 } elsif ($self->{state} == ENTITY_IN_ATTRIBUTE_VALUE_STATE) {
1484 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity
1485     (1,
1486     $self->{last_attribute_value_state}
1487     == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE ? 0x0022 : # "
1488     $self->{last_attribute_value_state}
1489     == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE ? 0x0027 : # '
1490     -1);
1491 wakaba 1.1
1492     unless (defined $token) {
1493 wakaba 1.77 !!!cp (117);
1494 wakaba 1.1 $self->{current_attribute}->{value} .= '&';
1495     } else {
1496 wakaba 1.77 !!!cp (118);
1497 wakaba 1.1 $self->{current_attribute}->{value} .= $token->{data};
1498 wakaba 1.66 $self->{current_attribute}->{has_reference} = $token->{has_reference};
1499 wakaba 1.1 ## ISSUE: spec says "append the returned character token to the current attribute's value"
1500     }
1501    
1502     $self->{state} = $self->{last_attribute_value_state};
1503     # next-input-character is already done
1504     redo A;
1505 wakaba 1.72 } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1506 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1507     $self->{next_char} == 0x000A or # LF
1508     $self->{next_char} == 0x000B or # VT
1509     $self->{next_char} == 0x000C or # FF
1510     $self->{next_char} == 0x0020) { # SP
1511 wakaba 1.77 !!!cp (118);
1512 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1513     !!!next-input-character;
1514     redo A;
1515 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1516 wakaba 1.72 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1517 wakaba 1.77 !!!cp (119);
1518 wakaba 1.72 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1519     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1520     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1521     if ($self->{current_token}->{attributes}) {
1522 wakaba 1.77 !!!cp (120);
1523 wakaba 1.72 !!!parse-error (type => 'end tag attribute');
1524 wakaba 1.77 } else {
1525 wakaba 1.78 ## NOTE: This state should never be reached.
1526 wakaba 1.77 !!!cp (121);
1527 wakaba 1.72 }
1528     } else {
1529     die "$0: $self->{current_token}->{type}: Unknown token type";
1530     }
1531     $self->{state} = DATA_STATE;
1532     !!!next-input-character;
1533    
1534     !!!emit ($self->{current_token}); # start tag or end tag
1535    
1536     redo A;
1537 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1538 wakaba 1.125 !!!cp (122);
1539     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1540 wakaba 1.72 !!!next-input-character;
1541 wakaba 1.125 redo A;
1542     } else {
1543     !!!cp ('124.1');
1544     !!!parse-error (type => 'no space between attributes');
1545     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1546     ## reconsume
1547     redo A;
1548     }
1549     } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
1550     if ($self->{next_char} == 0x003E) { # >
1551     if ($self->{current_token}->{type} == END_TAG_TOKEN) {
1552     !!!cp ('124.2');
1553     !!!parse-error (type => 'nestc', token => $self->{current_token});
1554     ## TODO: Different type than slash in start tag
1555     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1556     if ($self->{current_token}->{attributes}) {
1557     !!!cp ('124.4');
1558     !!!parse-error (type => 'end tag attribute');
1559     } else {
1560     !!!cp ('124.5');
1561     }
1562     ## TODO: Test |<title></title/>|
1563 wakaba 1.72 } else {
1564 wakaba 1.125 !!!cp ('124.3');
1565     $self->{self_closing} = 1;
1566 wakaba 1.72 }
1567 wakaba 1.125
1568     $self->{state} = DATA_STATE;
1569     !!!next-input-character;
1570    
1571     !!!emit ($self->{current_token}); # start tag or end tag
1572    
1573 wakaba 1.72 redo A;
1574     } else {
1575 wakaba 1.125 !!!cp ('124.4');
1576     !!!parse-error (type => 'nestc');
1577     ## TODO: This error type is wrong.
1578 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1579 wakaba 1.125 ## Reconsume.
1580 wakaba 1.72 redo A;
1581     }
1582 wakaba 1.57 } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
1583 wakaba 1.1 ## (only happen if PCDATA state)
1584    
1585 wakaba 1.112 ## NOTE: Set by the previous state
1586     #my $token = {type => COMMENT_TOKEN, data => ''};
1587 wakaba 1.1
1588     BC: {
1589 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
1590 wakaba 1.77 !!!cp (124);
1591 wakaba 1.57 $self->{state} = DATA_STATE;
1592 wakaba 1.1 !!!next-input-character;
1593    
1594 wakaba 1.112 !!!emit ($self->{current_token}); # comment
1595 wakaba 1.1
1596     redo A;
1597 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1598 wakaba 1.77 !!!cp (125);
1599 wakaba 1.57 $self->{state} = DATA_STATE;
1600 wakaba 1.1 ## reconsume
1601    
1602 wakaba 1.112 !!!emit ($self->{current_token}); # comment
1603 wakaba 1.1
1604     redo A;
1605     } else {
1606 wakaba 1.77 !!!cp (126);
1607 wakaba 1.112 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
1608 wakaba 1.1 !!!next-input-character;
1609     redo BC;
1610     }
1611     } # BC
1612 wakaba 1.77
1613     die "$0: _get_next_token: unexpected case [BC]";
1614 wakaba 1.57 } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
1615 wakaba 1.1 ## (only happen if PCDATA state)
1616    
1617 wakaba 1.120 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1);
1618 wakaba 1.112
1619 wakaba 1.1 my @next_char;
1620 wakaba 1.76 push @next_char, $self->{next_char};
1621 wakaba 1.1
1622 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1623 wakaba 1.1 !!!next-input-character;
1624 wakaba 1.76 push @next_char, $self->{next_char};
1625     if ($self->{next_char} == 0x002D) { # -
1626 wakaba 1.77 !!!cp (127);
1627 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1628 wakaba 1.120 line => $l, column => $c,
1629 wakaba 1.118 };
1630 wakaba 1.57 $self->{state} = COMMENT_START_STATE;
1631 wakaba 1.1 !!!next-input-character;
1632     redo A;
1633 wakaba 1.77 } else {
1634     !!!cp (128);
1635 wakaba 1.1 }
1636 wakaba 1.76 } elsif ($self->{next_char} == 0x0044 or # D
1637     $self->{next_char} == 0x0064) { # d
1638 wakaba 1.1 !!!next-input-character;
1639 wakaba 1.76 push @next_char, $self->{next_char};
1640     if ($self->{next_char} == 0x004F or # O
1641     $self->{next_char} == 0x006F) { # o
1642 wakaba 1.1 !!!next-input-character;
1643 wakaba 1.76 push @next_char, $self->{next_char};
1644     if ($self->{next_char} == 0x0043 or # C
1645     $self->{next_char} == 0x0063) { # c
1646 wakaba 1.1 !!!next-input-character;
1647 wakaba 1.76 push @next_char, $self->{next_char};
1648     if ($self->{next_char} == 0x0054 or # T
1649     $self->{next_char} == 0x0074) { # t
1650 wakaba 1.1 !!!next-input-character;
1651 wakaba 1.76 push @next_char, $self->{next_char};
1652     if ($self->{next_char} == 0x0059 or # Y
1653     $self->{next_char} == 0x0079) { # y
1654 wakaba 1.1 !!!next-input-character;
1655 wakaba 1.76 push @next_char, $self->{next_char};
1656     if ($self->{next_char} == 0x0050 or # P
1657     $self->{next_char} == 0x0070) { # p
1658 wakaba 1.1 !!!next-input-character;
1659 wakaba 1.76 push @next_char, $self->{next_char};
1660     if ($self->{next_char} == 0x0045 or # E
1661     $self->{next_char} == 0x0065) { # e
1662 wakaba 1.77 !!!cp (129);
1663     ## TODO: What a stupid code this is!
1664 wakaba 1.57 $self->{state} = DOCTYPE_STATE;
1665 wakaba 1.112 $self->{current_token} = {type => DOCTYPE_TOKEN,
1666     quirks => 1,
1667 wakaba 1.120 line => $l, column => $c,
1668 wakaba 1.118 };
1669 wakaba 1.1 !!!next-input-character;
1670     redo A;
1671 wakaba 1.77 } else {
1672     !!!cp (130);
1673 wakaba 1.1 }
1674 wakaba 1.77 } else {
1675     !!!cp (131);
1676 wakaba 1.1 }
1677 wakaba 1.77 } else {
1678     !!!cp (132);
1679 wakaba 1.1 }
1680 wakaba 1.77 } else {
1681     !!!cp (133);
1682 wakaba 1.1 }
1683 wakaba 1.77 } else {
1684     !!!cp (134);
1685 wakaba 1.1 }
1686 wakaba 1.77 } else {
1687     !!!cp (135);
1688 wakaba 1.1 }
1689 wakaba 1.127 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
1690     $self->{open_elements}->[-1]->[1] & FOREIGN_EL and
1691     $self->{next_char} == 0x005B) { # [
1692     !!!next-input-character;
1693     push @next_char, $self->{next_char};
1694     if ($self->{next_char} == 0x0043) { # C
1695     !!!next-input-character;
1696     push @next_char, $self->{next_char};
1697     if ($self->{next_char} == 0x0044) { # D
1698     !!!next-input-character;
1699     push @next_char, $self->{next_char};
1700     if ($self->{next_char} == 0x0041) { # A
1701     !!!next-input-character;
1702     push @next_char, $self->{next_char};
1703     if ($self->{next_char} == 0x0054) { # T
1704     !!!next-input-character;
1705     push @next_char, $self->{next_char};
1706     if ($self->{next_char} == 0x0041) { # A
1707     !!!next-input-character;
1708     push @next_char, $self->{next_char};
1709     if ($self->{next_char} == 0x005B) { # [
1710     !!!cp (135.1);
1711     $self->{state} = CDATA_BLOCK_STATE;
1712     !!!next-input-character;
1713     redo A;
1714     } else {
1715     !!!cp (135.2);
1716     }
1717     } else {
1718     !!!cp (135.3);
1719     }
1720     } else {
1721     !!!cp (135.4);
1722     }
1723     } else {
1724     !!!cp (135.5);
1725     }
1726     } else {
1727     !!!cp (135.6);
1728     }
1729     } else {
1730     !!!cp (135.7);
1731     }
1732 wakaba 1.77 } else {
1733     !!!cp (136);
1734 wakaba 1.1 }
1735    
1736 wakaba 1.30 !!!parse-error (type => 'bogus comment');
1737 wakaba 1.76 $self->{next_char} = shift @next_char;
1738 wakaba 1.1 !!!back-next-input-character (@next_char);
1739 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1740 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1741 wakaba 1.120 line => $l, column => $c,
1742 wakaba 1.118 };
1743 wakaba 1.1 redo A;
1744    
1745     ## ISSUE: typos in spec: chacacters, is is a parse error
1746     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1747 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_STATE) {
1748 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1749 wakaba 1.77 !!!cp (137);
1750 wakaba 1.57 $self->{state} = COMMENT_START_DASH_STATE;
1751 wakaba 1.23 !!!next-input-character;
1752     redo A;
1753 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1754 wakaba 1.77 !!!cp (138);
1755 wakaba 1.23 !!!parse-error (type => 'bogus comment');
1756 wakaba 1.57 $self->{state} = DATA_STATE;
1757 wakaba 1.23 !!!next-input-character;
1758    
1759     !!!emit ($self->{current_token}); # comment
1760    
1761     redo A;
1762 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1763 wakaba 1.77 !!!cp (139);
1764 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
1765 wakaba 1.57 $self->{state} = DATA_STATE;
1766 wakaba 1.23 ## reconsume
1767    
1768     !!!emit ($self->{current_token}); # comment
1769    
1770     redo A;
1771     } else {
1772 wakaba 1.77 !!!cp (140);
1773 wakaba 1.23 $self->{current_token}->{data} # comment
1774 wakaba 1.76 .= chr ($self->{next_char});
1775 wakaba 1.57 $self->{state} = COMMENT_STATE;
1776 wakaba 1.23 !!!next-input-character;
1777     redo A;
1778     }
1779 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
1780 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1781 wakaba 1.77 !!!cp (141);
1782 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
1783 wakaba 1.23 !!!next-input-character;
1784     redo A;
1785 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1786 wakaba 1.77 !!!cp (142);
1787 wakaba 1.23 !!!parse-error (type => 'bogus comment');
1788 wakaba 1.57 $self->{state} = DATA_STATE;
1789 wakaba 1.23 !!!next-input-character;
1790    
1791     !!!emit ($self->{current_token}); # comment
1792    
1793     redo A;
1794 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1795 wakaba 1.77 !!!cp (143);
1796 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
1797 wakaba 1.57 $self->{state} = DATA_STATE;
1798 wakaba 1.23 ## reconsume
1799    
1800     !!!emit ($self->{current_token}); # comment
1801    
1802     redo A;
1803     } else {
1804 wakaba 1.77 !!!cp (144);
1805 wakaba 1.23 $self->{current_token}->{data} # comment
1806 wakaba 1.76 .= '-' . chr ($self->{next_char});
1807 wakaba 1.57 $self->{state} = COMMENT_STATE;
1808 wakaba 1.23 !!!next-input-character;
1809     redo A;
1810     }
1811 wakaba 1.57 } elsif ($self->{state} == COMMENT_STATE) {
1812 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1813 wakaba 1.77 !!!cp (145);
1814 wakaba 1.57 $self->{state} = COMMENT_END_DASH_STATE;
1815 wakaba 1.1 !!!next-input-character;
1816     redo A;
1817 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1818 wakaba 1.77 !!!cp (146);
1819 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1820 wakaba 1.57 $self->{state} = DATA_STATE;
1821 wakaba 1.1 ## reconsume
1822    
1823     !!!emit ($self->{current_token}); # comment
1824    
1825     redo A;
1826     } else {
1827 wakaba 1.77 !!!cp (147);
1828 wakaba 1.76 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
1829 wakaba 1.1 ## Stay in the state
1830     !!!next-input-character;
1831     redo A;
1832     }
1833 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
1834 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1835 wakaba 1.77 !!!cp (148);
1836 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
1837 wakaba 1.1 !!!next-input-character;
1838     redo A;
1839 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1840 wakaba 1.77 !!!cp (149);
1841 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1842 wakaba 1.57 $self->{state} = DATA_STATE;
1843 wakaba 1.1 ## reconsume
1844    
1845     !!!emit ($self->{current_token}); # comment
1846    
1847     redo A;
1848     } else {
1849 wakaba 1.77 !!!cp (150);
1850 wakaba 1.76 $self->{current_token}->{data} .= '-' . chr ($self->{next_char}); # comment
1851 wakaba 1.57 $self->{state} = COMMENT_STATE;
1852 wakaba 1.1 !!!next-input-character;
1853     redo A;
1854     }
1855 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_STATE) {
1856 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
1857 wakaba 1.77 !!!cp (151);
1858 wakaba 1.57 $self->{state} = DATA_STATE;
1859 wakaba 1.1 !!!next-input-character;
1860    
1861     !!!emit ($self->{current_token}); # comment
1862    
1863     redo A;
1864 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
1865 wakaba 1.77 !!!cp (152);
1866 wakaba 1.114 !!!parse-error (type => 'dash in comment',
1867     line => $self->{line_prev},
1868     column => $self->{column_prev});
1869 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1870     ## Stay in the state
1871     !!!next-input-character;
1872     redo A;
1873 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1874 wakaba 1.77 !!!cp (153);
1875 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1876 wakaba 1.57 $self->{state} = DATA_STATE;
1877 wakaba 1.1 ## reconsume
1878    
1879     !!!emit ($self->{current_token}); # comment
1880    
1881     redo A;
1882     } else {
1883 wakaba 1.77 !!!cp (154);
1884 wakaba 1.114 !!!parse-error (type => 'dash in comment',
1885     line => $self->{line_prev},
1886     column => $self->{column_prev});
1887 wakaba 1.76 $self->{current_token}->{data} .= '--' . chr ($self->{next_char}); # comment
1888 wakaba 1.57 $self->{state} = COMMENT_STATE;
1889 wakaba 1.1 !!!next-input-character;
1890     redo A;
1891     }
1892 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_STATE) {
1893 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1894     $self->{next_char} == 0x000A or # LF
1895     $self->{next_char} == 0x000B or # VT
1896     $self->{next_char} == 0x000C or # FF
1897     $self->{next_char} == 0x0020) { # SP
1898 wakaba 1.77 !!!cp (155);
1899 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
1900 wakaba 1.1 !!!next-input-character;
1901     redo A;
1902     } else {
1903 wakaba 1.77 !!!cp (156);
1904 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
1905 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
1906 wakaba 1.1 ## reconsume
1907     redo A;
1908     }
1909 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
1910 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1911     $self->{next_char} == 0x000A or # LF
1912     $self->{next_char} == 0x000B or # VT
1913     $self->{next_char} == 0x000C or # FF
1914     $self->{next_char} == 0x0020) { # SP
1915 wakaba 1.77 !!!cp (157);
1916 wakaba 1.1 ## Stay in the state
1917     !!!next-input-character;
1918     redo A;
1919 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1920 wakaba 1.77 !!!cp (158);
1921 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1922 wakaba 1.57 $self->{state} = DATA_STATE;
1923 wakaba 1.1 !!!next-input-character;
1924    
1925 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
1926 wakaba 1.1
1927     redo A;
1928 wakaba 1.77 } elsif ($self->{next_char} == -1) {
1929     !!!cp (159);
1930 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1931 wakaba 1.57 $self->{state} = DATA_STATE;
1932 wakaba 1.1 ## reconsume
1933    
1934 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
1935 wakaba 1.1
1936     redo A;
1937     } else {
1938 wakaba 1.77 !!!cp (160);
1939 wakaba 1.112 $self->{current_token}->{name} = chr $self->{next_char};
1940     delete $self->{current_token}->{quirks};
1941 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1942 wakaba 1.57 $self->{state} = DOCTYPE_NAME_STATE;
1943 wakaba 1.1 !!!next-input-character;
1944     redo A;
1945     }
1946 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
1947 wakaba 1.18 ## ISSUE: Redundant "First," in the spec.
1948 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1949     $self->{next_char} == 0x000A or # LF
1950     $self->{next_char} == 0x000B or # VT
1951     $self->{next_char} == 0x000C or # FF
1952     $self->{next_char} == 0x0020) { # SP
1953 wakaba 1.77 !!!cp (161);
1954 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_NAME_STATE;
1955 wakaba 1.1 !!!next-input-character;
1956     redo A;
1957 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1958 wakaba 1.77 !!!cp (162);
1959 wakaba 1.57 $self->{state} = DATA_STATE;
1960 wakaba 1.1 !!!next-input-character;
1961    
1962     !!!emit ($self->{current_token}); # DOCTYPE
1963    
1964     redo A;
1965 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1966 wakaba 1.77 !!!cp (163);
1967 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1968 wakaba 1.57 $self->{state} = DATA_STATE;
1969 wakaba 1.1 ## reconsume
1970    
1971 wakaba 1.75 $self->{current_token}->{quirks} = 1;
1972 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
1973 wakaba 1.1
1974     redo A;
1975     } else {
1976 wakaba 1.77 !!!cp (164);
1977 wakaba 1.1 $self->{current_token}->{name}
1978 wakaba 1.76 .= chr ($self->{next_char}); # DOCTYPE
1979 wakaba 1.1 ## Stay in the state
1980     !!!next-input-character;
1981     redo A;
1982     }
1983 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
1984 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1985     $self->{next_char} == 0x000A or # LF
1986     $self->{next_char} == 0x000B or # VT
1987     $self->{next_char} == 0x000C or # FF
1988     $self->{next_char} == 0x0020) { # SP
1989 wakaba 1.77 !!!cp (165);
1990 wakaba 1.1 ## Stay in the state
1991     !!!next-input-character;
1992     redo A;
1993 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1994 wakaba 1.77 !!!cp (166);
1995 wakaba 1.57 $self->{state} = DATA_STATE;
1996 wakaba 1.1 !!!next-input-character;
1997    
1998     !!!emit ($self->{current_token}); # DOCTYPE
1999    
2000     redo A;
2001 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2002 wakaba 1.77 !!!cp (167);
2003 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2004 wakaba 1.57 $self->{state} = DATA_STATE;
2005 wakaba 1.1 ## reconsume
2006    
2007 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2008 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2009    
2010     redo A;
2011 wakaba 1.76 } elsif ($self->{next_char} == 0x0050 or # P
2012     $self->{next_char} == 0x0070) { # p
2013 wakaba 1.18 !!!next-input-character;
2014 wakaba 1.76 if ($self->{next_char} == 0x0055 or # U
2015     $self->{next_char} == 0x0075) { # u
2016 wakaba 1.18 !!!next-input-character;
2017 wakaba 1.76 if ($self->{next_char} == 0x0042 or # B
2018     $self->{next_char} == 0x0062) { # b
2019 wakaba 1.18 !!!next-input-character;
2020 wakaba 1.76 if ($self->{next_char} == 0x004C or # L
2021     $self->{next_char} == 0x006C) { # l
2022 wakaba 1.18 !!!next-input-character;
2023 wakaba 1.76 if ($self->{next_char} == 0x0049 or # I
2024     $self->{next_char} == 0x0069) { # i
2025 wakaba 1.18 !!!next-input-character;
2026 wakaba 1.76 if ($self->{next_char} == 0x0043 or # C
2027     $self->{next_char} == 0x0063) { # c
2028 wakaba 1.77 !!!cp (168);
2029 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2030 wakaba 1.18 !!!next-input-character;
2031     redo A;
2032 wakaba 1.77 } else {
2033     !!!cp (169);
2034 wakaba 1.18 }
2035 wakaba 1.77 } else {
2036     !!!cp (170);
2037 wakaba 1.18 }
2038 wakaba 1.77 } else {
2039     !!!cp (171);
2040 wakaba 1.18 }
2041 wakaba 1.77 } else {
2042     !!!cp (172);
2043 wakaba 1.18 }
2044 wakaba 1.77 } else {
2045     !!!cp (173);
2046 wakaba 1.18 }
2047    
2048     #
2049 wakaba 1.76 } elsif ($self->{next_char} == 0x0053 or # S
2050     $self->{next_char} == 0x0073) { # s
2051 wakaba 1.18 !!!next-input-character;
2052 wakaba 1.76 if ($self->{next_char} == 0x0059 or # Y
2053     $self->{next_char} == 0x0079) { # y
2054 wakaba 1.18 !!!next-input-character;
2055 wakaba 1.76 if ($self->{next_char} == 0x0053 or # S
2056     $self->{next_char} == 0x0073) { # s
2057 wakaba 1.18 !!!next-input-character;
2058 wakaba 1.76 if ($self->{next_char} == 0x0054 or # T
2059     $self->{next_char} == 0x0074) { # t
2060 wakaba 1.18 !!!next-input-character;
2061 wakaba 1.76 if ($self->{next_char} == 0x0045 or # E
2062     $self->{next_char} == 0x0065) { # e
2063 wakaba 1.18 !!!next-input-character;
2064 wakaba 1.76 if ($self->{next_char} == 0x004D or # M
2065     $self->{next_char} == 0x006D) { # m
2066 wakaba 1.77 !!!cp (174);
2067 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2068 wakaba 1.18 !!!next-input-character;
2069     redo A;
2070 wakaba 1.77 } else {
2071     !!!cp (175);
2072 wakaba 1.18 }
2073 wakaba 1.77 } else {
2074     !!!cp (176);
2075 wakaba 1.18 }
2076 wakaba 1.77 } else {
2077     !!!cp (177);
2078 wakaba 1.18 }
2079 wakaba 1.77 } else {
2080     !!!cp (178);
2081 wakaba 1.18 }
2082 wakaba 1.77 } else {
2083     !!!cp (179);
2084 wakaba 1.18 }
2085    
2086     #
2087     } else {
2088 wakaba 1.77 !!!cp (180);
2089 wakaba 1.18 !!!next-input-character;
2090     #
2091     }
2092    
2093     !!!parse-error (type => 'string after DOCTYPE name');
2094 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2095 wakaba 1.73
2096 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2097 wakaba 1.18 # next-input-character is already done
2098     redo A;
2099 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2100 wakaba 1.18 if ({
2101     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2102     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2103 wakaba 1.76 }->{$self->{next_char}}) {
2104 wakaba 1.77 !!!cp (181);
2105 wakaba 1.18 ## Stay in the state
2106     !!!next-input-character;
2107     redo A;
2108 wakaba 1.76 } elsif ($self->{next_char} eq 0x0022) { # "
2109 wakaba 1.77 !!!cp (182);
2110 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2111 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
2112 wakaba 1.18 !!!next-input-character;
2113     redo A;
2114 wakaba 1.76 } elsif ($self->{next_char} eq 0x0027) { # '
2115 wakaba 1.77 !!!cp (183);
2116 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2117 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
2118 wakaba 1.18 !!!next-input-character;
2119     redo A;
2120 wakaba 1.76 } elsif ($self->{next_char} eq 0x003E) { # >
2121 wakaba 1.77 !!!cp (184);
2122 wakaba 1.18 !!!parse-error (type => 'no PUBLIC literal');
2123    
2124 wakaba 1.57 $self->{state} = DATA_STATE;
2125 wakaba 1.18 !!!next-input-character;
2126    
2127 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2128 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2129    
2130     redo A;
2131 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2132 wakaba 1.77 !!!cp (185);
2133 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2134    
2135 wakaba 1.57 $self->{state} = DATA_STATE;
2136 wakaba 1.18 ## reconsume
2137    
2138 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2139 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2140    
2141     redo A;
2142     } else {
2143 wakaba 1.77 !!!cp (186);
2144 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC');
2145 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2146 wakaba 1.73
2147 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2148 wakaba 1.18 !!!next-input-character;
2149     redo A;
2150     }
2151 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2152 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2153 wakaba 1.77 !!!cp (187);
2154 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2155 wakaba 1.18 !!!next-input-character;
2156     redo A;
2157 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2158 wakaba 1.77 !!!cp (188);
2159 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2160    
2161     $self->{state} = DATA_STATE;
2162     !!!next-input-character;
2163    
2164 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2165 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2166    
2167     redo A;
2168 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2169 wakaba 1.77 !!!cp (189);
2170 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2171    
2172 wakaba 1.57 $self->{state} = DATA_STATE;
2173 wakaba 1.18 ## reconsume
2174    
2175 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2176 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2177    
2178     redo A;
2179     } else {
2180 wakaba 1.77 !!!cp (190);
2181 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2182 wakaba 1.76 .= chr $self->{next_char};
2183 wakaba 1.18 ## Stay in the state
2184     !!!next-input-character;
2185     redo A;
2186     }
2187 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
2188 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2189 wakaba 1.77 !!!cp (191);
2190 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2191 wakaba 1.18 !!!next-input-character;
2192     redo A;
2193 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2194 wakaba 1.77 !!!cp (192);
2195 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2196    
2197     $self->{state} = DATA_STATE;
2198     !!!next-input-character;
2199    
2200 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2201 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2202    
2203     redo A;
2204 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2205 wakaba 1.77 !!!cp (193);
2206 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2207    
2208 wakaba 1.57 $self->{state} = DATA_STATE;
2209 wakaba 1.18 ## reconsume
2210    
2211 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2212 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2213    
2214     redo A;
2215     } else {
2216 wakaba 1.77 !!!cp (194);
2217 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2218 wakaba 1.76 .= chr $self->{next_char};
2219 wakaba 1.18 ## Stay in the state
2220     !!!next-input-character;
2221     redo A;
2222     }
2223 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2224 wakaba 1.18 if ({
2225     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2226     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2227 wakaba 1.76 }->{$self->{next_char}}) {
2228 wakaba 1.77 !!!cp (195);
2229 wakaba 1.18 ## Stay in the state
2230     !!!next-input-character;
2231     redo A;
2232 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2233 wakaba 1.77 !!!cp (196);
2234 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2235 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2236 wakaba 1.18 !!!next-input-character;
2237     redo A;
2238 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2239 wakaba 1.77 !!!cp (197);
2240 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2241 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2242 wakaba 1.18 !!!next-input-character;
2243     redo A;
2244 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2245 wakaba 1.77 !!!cp (198);
2246 wakaba 1.57 $self->{state} = DATA_STATE;
2247 wakaba 1.18 !!!next-input-character;
2248    
2249     !!!emit ($self->{current_token}); # DOCTYPE
2250    
2251     redo A;
2252 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2253 wakaba 1.77 !!!cp (199);
2254 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2255    
2256 wakaba 1.57 $self->{state} = DATA_STATE;
2257 wakaba 1.26 ## reconsume
2258 wakaba 1.18
2259 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2260 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2261    
2262     redo A;
2263     } else {
2264 wakaba 1.77 !!!cp (200);
2265 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC literal');
2266 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2267 wakaba 1.73
2268 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2269 wakaba 1.18 !!!next-input-character;
2270     redo A;
2271     }
2272 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2273 wakaba 1.18 if ({
2274     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2275     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2276 wakaba 1.76 }->{$self->{next_char}}) {
2277 wakaba 1.77 !!!cp (201);
2278 wakaba 1.18 ## Stay in the state
2279     !!!next-input-character;
2280     redo A;
2281 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2282 wakaba 1.77 !!!cp (202);
2283 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2284 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2285 wakaba 1.18 !!!next-input-character;
2286     redo A;
2287 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2288 wakaba 1.77 !!!cp (203);
2289 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2290 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2291 wakaba 1.18 !!!next-input-character;
2292     redo A;
2293 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2294 wakaba 1.77 !!!cp (204);
2295 wakaba 1.18 !!!parse-error (type => 'no SYSTEM literal');
2296 wakaba 1.57 $self->{state} = DATA_STATE;
2297 wakaba 1.18 !!!next-input-character;
2298    
2299 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2300 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2301    
2302     redo A;
2303 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2304 wakaba 1.77 !!!cp (205);
2305 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2306    
2307 wakaba 1.57 $self->{state} = DATA_STATE;
2308 wakaba 1.26 ## reconsume
2309 wakaba 1.18
2310 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2311 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2312    
2313     redo A;
2314     } else {
2315 wakaba 1.77 !!!cp (206);
2316 wakaba 1.30 !!!parse-error (type => 'string after SYSTEM');
2317 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2318 wakaba 1.73
2319 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2320 wakaba 1.18 !!!next-input-character;
2321     redo A;
2322     }
2323 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2324 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2325 wakaba 1.77 !!!cp (207);
2326 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2327 wakaba 1.18 !!!next-input-character;
2328     redo A;
2329 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2330 wakaba 1.77 !!!cp (208);
2331 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2332    
2333     $self->{state} = DATA_STATE;
2334     !!!next-input-character;
2335    
2336 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2337 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2338    
2339     redo A;
2340 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2341 wakaba 1.77 !!!cp (209);
2342 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2343    
2344 wakaba 1.57 $self->{state} = DATA_STATE;
2345 wakaba 1.18 ## reconsume
2346    
2347 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2348 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2349    
2350     redo A;
2351     } else {
2352 wakaba 1.77 !!!cp (210);
2353 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2354 wakaba 1.76 .= chr $self->{next_char};
2355 wakaba 1.18 ## Stay in the state
2356     !!!next-input-character;
2357     redo A;
2358     }
2359 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
2360 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2361 wakaba 1.77 !!!cp (211);
2362 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2363 wakaba 1.18 !!!next-input-character;
2364     redo A;
2365 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2366 wakaba 1.77 !!!cp (212);
2367 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2368    
2369     $self->{state} = DATA_STATE;
2370     !!!next-input-character;
2371    
2372 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2373 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2374    
2375     redo A;
2376 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2377 wakaba 1.77 !!!cp (213);
2378 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2379    
2380 wakaba 1.57 $self->{state} = DATA_STATE;
2381 wakaba 1.18 ## reconsume
2382    
2383 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2384 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
2385    
2386     redo A;
2387     } else {
2388 wakaba 1.77 !!!cp (214);
2389 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2390 wakaba 1.76 .= chr $self->{next_char};
2391 wakaba 1.18 ## Stay in the state
2392     !!!next-input-character;
2393     redo A;
2394     }
2395 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2396 wakaba 1.18 if ({
2397     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2398     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2399 wakaba 1.76 }->{$self->{next_char}}) {
2400 wakaba 1.77 !!!cp (215);
2401 wakaba 1.18 ## Stay in the state
2402     !!!next-input-character;
2403     redo A;
2404 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2405 wakaba 1.77 !!!cp (216);
2406 wakaba 1.57 $self->{state} = DATA_STATE;
2407 wakaba 1.18 !!!next-input-character;
2408    
2409     !!!emit ($self->{current_token}); # DOCTYPE
2410    
2411     redo A;
2412 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2413 wakaba 1.77 !!!cp (217);
2414 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2415    
2416 wakaba 1.57 $self->{state} = DATA_STATE;
2417 wakaba 1.26 ## reconsume
2418 wakaba 1.18
2419 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2420 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2421    
2422     redo A;
2423     } else {
2424 wakaba 1.77 !!!cp (218);
2425 wakaba 1.18 !!!parse-error (type => 'string after SYSTEM literal');
2426 wakaba 1.75 #$self->{current_token}->{quirks} = 1;
2427 wakaba 1.73
2428 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2429 wakaba 1.1 !!!next-input-character;
2430     redo A;
2431     }
2432 wakaba 1.57 } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
2433 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2434 wakaba 1.77 !!!cp (219);
2435 wakaba 1.57 $self->{state} = DATA_STATE;
2436 wakaba 1.1 !!!next-input-character;
2437    
2438     !!!emit ($self->{current_token}); # DOCTYPE
2439    
2440     redo A;
2441 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2442 wakaba 1.77 !!!cp (220);
2443 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2444 wakaba 1.57 $self->{state} = DATA_STATE;
2445 wakaba 1.1 ## reconsume
2446    
2447     !!!emit ($self->{current_token}); # DOCTYPE
2448    
2449     redo A;
2450     } else {
2451 wakaba 1.77 !!!cp (221);
2452 wakaba 1.1 ## Stay in the state
2453     !!!next-input-character;
2454     redo A;
2455     }
2456 wakaba 1.127 } elsif ($self->{state} == CDATA_BLOCK_STATE) {
2457     my $s = '';
2458    
2459     my ($l, $c) = ($self->{line}, $self->{column});
2460    
2461     CS: while ($self->{next_char} != -1) {
2462     if ($self->{next_char} == 0x005D) { # ]
2463     !!!next-input-character;
2464     if ($self->{next_char} == 0x005D) { # ]
2465     !!!next-input-character;
2466     MDC: {
2467     if ($self->{next_char} == 0x003E) { # >
2468     !!!cp (221.1);
2469     !!!next-input-character;
2470     last CS;
2471     } elsif ($self->{next_char} == 0x005D) { # ]
2472     !!!cp (221.2);
2473     $s .= ']';
2474     !!!next-input-character;
2475     redo MDC;
2476     } else {
2477     !!!cp (221.3);
2478     $s .= ']]';
2479     #
2480     }
2481     } # MDC
2482     } else {
2483     !!!cp (221.4);
2484     $s .= ']';
2485     #
2486     }
2487     } else {
2488     !!!cp (221.5);
2489     #
2490     }
2491     $s .= chr $self->{next_char};
2492     !!!next-input-character;
2493     } # CS
2494    
2495     $self->{state} = DATA_STATE;
2496     ## next-input-character done or EOF, which is reconsumed.
2497    
2498     if (length $s) {
2499     !!!cp (221.6);
2500     !!!emit ({type => CHARACTER_TOKEN, data => $s,
2501     line => $l, column => $c});
2502     } else {
2503     !!!cp (221.7);
2504     }
2505    
2506     redo A;
2507    
2508     ## ISSUE: "text tokens" in spec.
2509     ## TODO: Streaming support
2510 wakaba 1.1 } else {
2511     die "$0: $self->{state}: Unknown state";
2512     }
2513     } # A
2514    
2515     die "$0: _get_next_token: unexpected case";
2516     } # _get_next_token
2517    
2518 wakaba 1.72 sub _tokenize_attempt_to_consume_an_entity ($$$) {
2519     my ($self, $in_attr, $additional) = @_;
2520 wakaba 1.20
2521 wakaba 1.112 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
2522    
2523 wakaba 1.20 if ({
2524     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2525     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
2526 wakaba 1.72 $additional => 1,
2527 wakaba 1.76 }->{$self->{next_char}}) {
2528 wakaba 1.78 !!!cp (1001);
2529 wakaba 1.20 ## Don't consume
2530     ## No error
2531     return undef;
2532 wakaba 1.76 } elsif ($self->{next_char} == 0x0023) { # #
2533 wakaba 1.1 !!!next-input-character;
2534 wakaba 1.76 if ($self->{next_char} == 0x0078 or # x
2535     $self->{next_char} == 0x0058) { # X
2536 wakaba 1.26 my $code;
2537 wakaba 1.1 X: {
2538 wakaba 1.76 my $x_char = $self->{next_char};
2539 wakaba 1.1 !!!next-input-character;
2540 wakaba 1.76 if (0x0030 <= $self->{next_char} and
2541     $self->{next_char} <= 0x0039) { # 0..9
2542 wakaba 1.78 !!!cp (1002);
2543 wakaba 1.26 $code ||= 0;
2544     $code *= 0x10;
2545 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2546 wakaba 1.1 redo X;
2547 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
2548     $self->{next_char} <= 0x0066) { # a..f
2549 wakaba 1.78 !!!cp (1003);
2550 wakaba 1.26 $code ||= 0;
2551     $code *= 0x10;
2552 wakaba 1.76 $code += $self->{next_char} - 0x0060 + 9;
2553 wakaba 1.1 redo X;
2554 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
2555     $self->{next_char} <= 0x0046) { # A..F
2556 wakaba 1.78 !!!cp (1004);
2557 wakaba 1.26 $code ||= 0;
2558     $code *= 0x10;
2559 wakaba 1.76 $code += $self->{next_char} - 0x0040 + 9;
2560 wakaba 1.1 redo X;
2561 wakaba 1.26 } elsif (not defined $code) { # no hexadecimal digit
2562 wakaba 1.78 !!!cp (1005);
2563 wakaba 1.112 !!!parse-error (type => 'bare hcro', line => $l, column => $c);
2564 wakaba 1.76 !!!back-next-input-character ($x_char, $self->{next_char});
2565     $self->{next_char} = 0x0023; # #
2566 wakaba 1.1 return undef;
2567 wakaba 1.76 } elsif ($self->{next_char} == 0x003B) { # ;
2568 wakaba 1.78 !!!cp (1006);
2569 wakaba 1.1 !!!next-input-character;
2570     } else {
2571 wakaba 1.78 !!!cp (1007);
2572 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2573 wakaba 1.1 }
2574    
2575 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2576 wakaba 1.78 !!!cp (1008);
2577 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U+%04X', $code), line => $l, column => $c);
2578 wakaba 1.26 $code = 0xFFFD;
2579     } elsif ($code > 0x10FFFF) {
2580 wakaba 1.78 !!!cp (1009);
2581 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U-%08X', $code), line => $l, column => $c);
2582 wakaba 1.26 $code = 0xFFFD;
2583     } elsif ($code == 0x000D) {
2584 wakaba 1.78 !!!cp (1010);
2585 wakaba 1.112 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
2586 wakaba 1.26 $code = 0x000A;
2587     } elsif (0x80 <= $code and $code <= 0x9F) {
2588 wakaba 1.78 !!!cp (1011);
2589 wakaba 1.112 !!!parse-error (type => (sprintf 'C1 character reference:U+%04X', $code), line => $l, column => $c);
2590 wakaba 1.26 $code = $c1_entity_char->{$code};
2591 wakaba 1.1 }
2592    
2593 wakaba 1.66 return {type => CHARACTER_TOKEN, data => chr $code,
2594 wakaba 1.118 has_reference => 1,
2595 wakaba 1.120 line => $l, column => $c,
2596 wakaba 1.118 };
2597 wakaba 1.1 } # X
2598 wakaba 1.76 } elsif (0x0030 <= $self->{next_char} and
2599     $self->{next_char} <= 0x0039) { # 0..9
2600     my $code = $self->{next_char} - 0x0030;
2601 wakaba 1.1 !!!next-input-character;
2602    
2603 wakaba 1.76 while (0x0030 <= $self->{next_char} and
2604     $self->{next_char} <= 0x0039) { # 0..9
2605 wakaba 1.78 !!!cp (1012);
2606 wakaba 1.1 $code *= 10;
2607 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2608 wakaba 1.1
2609     !!!next-input-character;
2610     }
2611    
2612 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
2613 wakaba 1.78 !!!cp (1013);
2614 wakaba 1.1 !!!next-input-character;
2615     } else {
2616 wakaba 1.78 !!!cp (1014);
2617 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2618 wakaba 1.1 }
2619    
2620 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2621 wakaba 1.78 !!!cp (1015);
2622 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U+%04X', $code), line => $l, column => $c);
2623 wakaba 1.26 $code = 0xFFFD;
2624     } elsif ($code > 0x10FFFF) {
2625 wakaba 1.78 !!!cp (1016);
2626 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U-%08X', $code), line => $l, column => $c);
2627 wakaba 1.26 $code = 0xFFFD;
2628     } elsif ($code == 0x000D) {
2629 wakaba 1.78 !!!cp (1017);
2630 wakaba 1.112 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
2631 wakaba 1.26 $code = 0x000A;
2632 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
2633 wakaba 1.78 !!!cp (1018);
2634 wakaba 1.112 !!!parse-error (type => (sprintf 'C1 character reference:U+%04X', $code), line => $l, column => $c);
2635 wakaba 1.4 $code = $c1_entity_char->{$code};
2636 wakaba 1.1 }
2637    
2638 wakaba 1.112 return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1,
2639 wakaba 1.120 line => $l, column => $c,
2640 wakaba 1.118 };
2641 wakaba 1.1 } else {
2642 wakaba 1.78 !!!cp (1019);
2643 wakaba 1.112 !!!parse-error (type => 'bare nero', line => $l, column => $c);
2644 wakaba 1.76 !!!back-next-input-character ($self->{next_char});
2645     $self->{next_char} = 0x0023; # #
2646 wakaba 1.1 return undef;
2647     }
2648 wakaba 1.76 } elsif ((0x0041 <= $self->{next_char} and
2649     $self->{next_char} <= 0x005A) or
2650     (0x0061 <= $self->{next_char} and
2651     $self->{next_char} <= 0x007A)) {
2652     my $entity_name = chr $self->{next_char};
2653 wakaba 1.1 !!!next-input-character;
2654    
2655     my $value = $entity_name;
2656 wakaba 1.37 my $match = 0;
2657 wakaba 1.16 require Whatpm::_NamedEntityList;
2658     our $EntityChar;
2659 wakaba 1.1
2660 wakaba 1.128 while (length $entity_name < 30 and
2661 wakaba 1.1 ## NOTE: Some number greater than the maximum length of entity name
2662 wakaba 1.76 ((0x0041 <= $self->{next_char} and # a
2663     $self->{next_char} <= 0x005A) or # x
2664     (0x0061 <= $self->{next_char} and # a
2665     $self->{next_char} <= 0x007A) or # z
2666     (0x0030 <= $self->{next_char} and # 0
2667     $self->{next_char} <= 0x0039) or # 9
2668     $self->{next_char} == 0x003B)) { # ;
2669     $entity_name .= chr $self->{next_char};
2670 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
2671 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
2672 wakaba 1.78 !!!cp (1020);
2673 wakaba 1.26 $value = $EntityChar->{$entity_name};
2674 wakaba 1.16 $match = 1;
2675     !!!next-input-character;
2676     last;
2677 wakaba 1.37 } else {
2678 wakaba 1.78 !!!cp (1021);
2679 wakaba 1.26 $value = $EntityChar->{$entity_name};
2680     $match = -1;
2681 wakaba 1.37 !!!next-input-character;
2682 wakaba 1.16 }
2683 wakaba 1.1 } else {
2684 wakaba 1.78 !!!cp (1022);
2685 wakaba 1.76 $value .= chr $self->{next_char};
2686 wakaba 1.37 $match *= 2;
2687     !!!next-input-character;
2688 wakaba 1.1 }
2689     }
2690    
2691 wakaba 1.16 if ($match > 0) {
2692 wakaba 1.78 !!!cp (1023);
2693 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
2694 wakaba 1.120 line => $l, column => $c,
2695 wakaba 1.118 };
2696 wakaba 1.16 } elsif ($match < 0) {
2697 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2698 wakaba 1.37 if ($in_attr and $match < -1) {
2699 wakaba 1.78 !!!cp (1024);
2700 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$entity_name,
2701 wakaba 1.120 line => $l, column => $c,
2702 wakaba 1.118 };
2703 wakaba 1.37 } else {
2704 wakaba 1.78 !!!cp (1025);
2705 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
2706 wakaba 1.120 line => $l, column => $c,
2707 wakaba 1.118 };
2708 wakaba 1.37 }
2709 wakaba 1.1 } else {
2710 wakaba 1.78 !!!cp (1026);
2711 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
2712 wakaba 1.66 ## NOTE: "No characters are consumed" in the spec.
2713 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$value,
2714 wakaba 1.120 line => $l, column => $c,
2715 wakaba 1.118 };
2716 wakaba 1.1 }
2717     } else {
2718 wakaba 1.78 !!!cp (1027);
2719 wakaba 1.1 ## no characters are consumed
2720 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
2721 wakaba 1.1 return undef;
2722     }
2723     } # _tokenize_attempt_to_consume_an_entity
2724    
2725     sub _initialize_tree_constructor ($) {
2726     my $self = shift;
2727     ## NOTE: $self->{document} MUST be specified before this method is called
2728     $self->{document}->strict_error_checking (0);
2729     ## TODO: Turn mutation events off # MUST
2730     ## TODO: Turn loose Document option (manakai extension) on
2731 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
2732 wakaba 1.1 } # _initialize_tree_constructor
2733    
2734     sub _terminate_tree_constructor ($) {
2735     my $self = shift;
2736     $self->{document}->strict_error_checking (1);
2737     ## TODO: Turn mutation events on
2738     } # _terminate_tree_constructor
2739    
2740     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
2741    
2742 wakaba 1.3 { # tree construction stage
2743     my $token;
2744    
2745 wakaba 1.1 sub _construct_tree ($) {
2746     my ($self) = @_;
2747    
2748     ## When an interactive UA render the $self->{document} available
2749     ## to the user, or when it begin accepting user input, are
2750     ## not defined.
2751    
2752     ## Append a character: collect it and all subsequent consecutive
2753     ## characters and insert one Text node whose data is concatenation
2754     ## of all those characters. # MUST
2755    
2756     !!!next-token;
2757    
2758 wakaba 1.3 undef $self->{form_element};
2759     undef $self->{head_element};
2760     $self->{open_elements} = [];
2761     undef $self->{inner_html_node};
2762    
2763 wakaba 1.84 ## NOTE: The "initial" insertion mode.
2764 wakaba 1.3 $self->_tree_construction_initial; # MUST
2765 wakaba 1.84
2766     ## NOTE: The "before html" insertion mode.
2767 wakaba 1.3 $self->_tree_construction_root_element;
2768 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
2769    
2770     ## NOTE: The "before head" insertion mode and so on.
2771 wakaba 1.3 $self->_tree_construction_main;
2772     } # _construct_tree
2773    
2774     sub _tree_construction_initial ($) {
2775     my $self = shift;
2776 wakaba 1.84
2777     ## NOTE: "initial" insertion mode
2778    
2779 wakaba 1.18 INITIAL: {
2780 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
2781 wakaba 1.18 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
2782     ## error, switch to a conformance checking mode for another
2783     ## language.
2784     my $doctype_name = $token->{name};
2785     $doctype_name = '' unless defined $doctype_name;
2786     $doctype_name =~ tr/a-z/A-Z/;
2787     if (not defined $token->{name} or # <!DOCTYPE>
2788     defined $token->{public_identifier} or
2789     defined $token->{system_identifier}) {
2790 wakaba 1.79 !!!cp ('t1');
2791 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
2792 wakaba 1.18 } elsif ($doctype_name ne 'HTML') {
2793 wakaba 1.79 !!!cp ('t2');
2794 wakaba 1.18 ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
2795 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
2796 wakaba 1.79 } else {
2797     !!!cp ('t3');
2798 wakaba 1.18 }
2799    
2800     my $doctype = $self->{document}->create_document_type_definition
2801     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
2802 wakaba 1.122 ## NOTE: Default value for both |public_id| and |system_id| attributes
2803     ## are empty strings, so that we don't set any value in missing cases.
2804 wakaba 1.18 $doctype->public_id ($token->{public_identifier})
2805     if defined $token->{public_identifier};
2806     $doctype->system_id ($token->{system_identifier})
2807     if defined $token->{system_identifier};
2808     ## NOTE: Other DocumentType attributes are null or empty lists.
2809     ## ISSUE: internalSubset = null??
2810     $self->{document}->append_child ($doctype);
2811    
2812 wakaba 1.75 if ($token->{quirks} or $doctype_name ne 'HTML') {
2813 wakaba 1.79 !!!cp ('t4');
2814 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2815     } elsif (defined $token->{public_identifier}) {
2816     my $pubid = $token->{public_identifier};
2817     $pubid =~ tr/a-z/A-z/;
2818     if ({
2819     "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
2820     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2821     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
2822     "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
2823     "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
2824     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
2825     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
2826     "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
2827     "-//IETF//DTD HTML 2.0//EN" => 1,
2828     "-//IETF//DTD HTML 2.1E//EN" => 1,
2829     "-//IETF//DTD HTML 3.0//EN" => 1,
2830     "-//IETF//DTD HTML 3.0//EN//" => 1,
2831     "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
2832     "-//IETF//DTD HTML 3.2//EN" => 1,
2833     "-//IETF//DTD HTML 3//EN" => 1,
2834     "-//IETF//DTD HTML LEVEL 0//EN" => 1,
2835     "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
2836     "-//IETF//DTD HTML LEVEL 1//EN" => 1,
2837     "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
2838     "-//IETF//DTD HTML LEVEL 2//EN" => 1,
2839     "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
2840     "-//IETF//DTD HTML LEVEL 3//EN" => 1,
2841     "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
2842     "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
2843     "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
2844     "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
2845     "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
2846     "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
2847     "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
2848     "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
2849     "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
2850     "-//IETF//DTD HTML STRICT//EN" => 1,
2851     "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
2852     "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
2853     "-//IETF//DTD HTML//EN" => 1,
2854     "-//IETF//DTD HTML//EN//2.0" => 1,
2855     "-//IETF//DTD HTML//EN//3.0" => 1,
2856     "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
2857     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
2858     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
2859     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
2860     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
2861     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
2862     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
2863     "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
2864     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
2865     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
2866     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
2867 wakaba 1.72 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//EN" => 1,
2868     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//EN" => 1,
2869     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//EN" => 1,
2870 wakaba 1.18 "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
2871     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
2872     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
2873     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
2874     "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
2875     "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
2876     "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
2877     "-//W3C//DTD HTML 3.2//EN" => 1,
2878     "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
2879     "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
2880     "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
2881     "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
2882     "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
2883     "-//W3C//DTD W3 HTML//EN" => 1,
2884     "-//W3O//DTD W3 HTML 3.0//EN" => 1,
2885     "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
2886     "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
2887     "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
2888     "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
2889     "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
2890     "HTML" => 1,
2891     }->{$pubid}) {
2892 wakaba 1.79 !!!cp ('t5');
2893 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2894     } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
2895     $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
2896     if (defined $token->{system_identifier}) {
2897 wakaba 1.79 !!!cp ('t6');
2898 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2899     } else {
2900 wakaba 1.79 !!!cp ('t7');
2901 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
2902 wakaba 1.3 }
2903 wakaba 1.80 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 FRAMESET//EN" or
2904     $pubid eq "-//W3C//DTD XHTML 1.0 TRANSITIONAL//EN") {
2905 wakaba 1.79 !!!cp ('t8');
2906 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
2907 wakaba 1.79 } else {
2908     !!!cp ('t9');
2909 wakaba 1.18 }
2910 wakaba 1.79 } else {
2911     !!!cp ('t10');
2912 wakaba 1.18 }
2913     if (defined $token->{system_identifier}) {
2914     my $sysid = $token->{system_identifier};
2915     $sysid =~ tr/A-Z/a-z/;
2916     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
2917 wakaba 1.80 ## TODO: Check the spec: PUBLIC "(limited quirks)" "(quirks)"
2918 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2919 wakaba 1.79 !!!cp ('t11');
2920     } else {
2921     !!!cp ('t12');
2922 wakaba 1.18 }
2923 wakaba 1.79 } else {
2924     !!!cp ('t13');
2925 wakaba 1.18 }
2926    
2927 wakaba 1.84 ## Go to the "before html" insertion mode.
2928 wakaba 1.18 !!!next-token;
2929     return;
2930     } elsif ({
2931 wakaba 1.55 START_TAG_TOKEN, 1,
2932     END_TAG_TOKEN, 1,
2933     END_OF_FILE_TOKEN, 1,
2934 wakaba 1.18 }->{$token->{type}}) {
2935 wakaba 1.79 !!!cp ('t14');
2936 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
2937 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2938 wakaba 1.84 ## Go to the "before html" insertion mode.
2939 wakaba 1.18 ## reprocess
2940 wakaba 1.125 !!!ack-later;
2941 wakaba 1.18 return;
2942 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
2943 wakaba 1.18 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
2944     ## Ignore the token
2945 wakaba 1.26
2946 wakaba 1.18 unless (length $token->{data}) {
2947 wakaba 1.79 !!!cp ('t15');
2948 wakaba 1.84 ## Stay in the insertion mode.
2949 wakaba 1.18 !!!next-token;
2950     redo INITIAL;
2951 wakaba 1.79 } else {
2952     !!!cp ('t16');
2953 wakaba 1.3 }
2954 wakaba 1.79 } else {
2955     !!!cp ('t17');
2956 wakaba 1.3 }
2957 wakaba 1.18
2958 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
2959 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2960 wakaba 1.84 ## Go to the "before html" insertion mode.
2961 wakaba 1.18 ## reprocess
2962     return;
2963 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
2964 wakaba 1.79 !!!cp ('t18');
2965 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
2966     $self->{document}->append_child ($comment);
2967    
2968 wakaba 1.84 ## Stay in the insertion mode.
2969 wakaba 1.18 !!!next-token;
2970     redo INITIAL;
2971     } else {
2972 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
2973 wakaba 1.18 }
2974     } # INITIAL
2975 wakaba 1.79
2976     die "$0: _tree_construction_initial: This should be never reached";
2977 wakaba 1.3 } # _tree_construction_initial
2978    
2979     sub _tree_construction_root_element ($) {
2980     my $self = shift;
2981 wakaba 1.84
2982     ## NOTE: "before html" insertion mode.
2983 wakaba 1.3
2984     B: {
2985 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
2986 wakaba 1.79 !!!cp ('t19');
2987 wakaba 1.113 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
2988 wakaba 1.3 ## Ignore the token
2989 wakaba 1.84 ## Stay in the insertion mode.
2990 wakaba 1.3 !!!next-token;
2991     redo B;
2992 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
2993 wakaba 1.79 !!!cp ('t20');
2994 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
2995     $self->{document}->append_child ($comment);
2996 wakaba 1.84 ## Stay in the insertion mode.
2997 wakaba 1.3 !!!next-token;
2998     redo B;
2999 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
3000 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
3001     ## Ignore the token.
3002    
3003 wakaba 1.3 unless (length $token->{data}) {
3004 wakaba 1.79 !!!cp ('t21');
3005 wakaba 1.84 ## Stay in the insertion mode.
3006 wakaba 1.3 !!!next-token;
3007     redo B;
3008 wakaba 1.79 } else {
3009     !!!cp ('t22');
3010 wakaba 1.3 }
3011 wakaba 1.79 } else {
3012     !!!cp ('t23');
3013 wakaba 1.3 }
3014 wakaba 1.61
3015     $self->{application_cache_selection}->(undef);
3016    
3017     #
3018     } elsif ($token->{type} == START_TAG_TOKEN) {
3019 wakaba 1.84 if ($token->{tag_name} eq 'html') {
3020     my $root_element;
3021 wakaba 1.126 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
3022 wakaba 1.84 $self->{document}->append_child ($root_element);
3023 wakaba 1.123 push @{$self->{open_elements}},
3024     [$root_element, $el_category->{html}];
3025 wakaba 1.84
3026     if ($token->{attributes}->{manifest}) {
3027     !!!cp ('t24');
3028     $self->{application_cache_selection}
3029     ->($token->{attributes}->{manifest}->{value});
3030 wakaba 1.118 ## ISSUE: Spec is unclear on relative references.
3031     ## According to Hixie (#whatwg 2008-03-19), it should be
3032     ## resolved against the base URI of the document in HTML
3033     ## or xml:base of the element in XHTML.
3034 wakaba 1.84 } else {
3035     !!!cp ('t25');
3036     $self->{application_cache_selection}->(undef);
3037     }
3038    
3039 wakaba 1.125 !!!nack ('t25c');
3040    
3041 wakaba 1.84 !!!next-token;
3042     return; ## Go to the "before head" insertion mode.
3043 wakaba 1.61 } else {
3044 wakaba 1.84 !!!cp ('t25.1');
3045     #
3046 wakaba 1.61 }
3047 wakaba 1.3 } elsif ({
3048 wakaba 1.55 END_TAG_TOKEN, 1,
3049     END_OF_FILE_TOKEN, 1,
3050 wakaba 1.3 }->{$token->{type}}) {
3051 wakaba 1.79 !!!cp ('t26');
3052 wakaba 1.3 #
3053     } else {
3054 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
3055 wakaba 1.3 }
3056 wakaba 1.61
3057 wakaba 1.126 my $root_element;
3058     !!!create-element ($root_element, $HTML_NS, 'html',, $token);
3059 wakaba 1.84 $self->{document}->append_child ($root_element);
3060 wakaba 1.123 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
3061 wakaba 1.84
3062     $self->{application_cache_selection}->(undef);
3063    
3064     ## NOTE: Reprocess the token.
3065 wakaba 1.125 !!!ack-later;
3066 wakaba 1.84 return; ## Go to the "before head" insertion mode.
3067    
3068     ## ISSUE: There is an issue in the spec
3069 wakaba 1.3 } # B
3070 wakaba 1.79
3071     die "$0: _tree_construction_root_element: This should never be reached";
3072 wakaba 1.3 } # _tree_construction_root_element
3073    
3074     sub _reset_insertion_mode ($) {
3075     my $self = shift;
3076    
3077     ## Step 1
3078     my $last;
3079    
3080     ## Step 2
3081     my $i = -1;
3082     my $node = $self->{open_elements}->[$i];
3083    
3084     ## Step 3
3085     S3: {
3086 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
3087     $last = 1;
3088     if (defined $self->{inner_html_node}) {
3089 wakaba 1.123 if ($self->{inner_html_node}->[1] & TABLE_CELL_EL) {
3090 wakaba 1.79 !!!cp ('t27');
3091 wakaba 1.29 #
3092     } else {
3093 wakaba 1.79 !!!cp ('t28');
3094 wakaba 1.29 $node = $self->{inner_html_node};
3095     }
3096 wakaba 1.3 }
3097     }
3098    
3099 wakaba 1.126 ## Step 4..14
3100     my $new_mode;
3101     if ($node->[1] & FOREIGN_EL) {
3102     ## NOTE: Strictly spaking, the line below only applies to MathML and
3103     ## SVG elements. Currently the HTML syntax supports only MathML and
3104     ## SVG elements as foreigners.
3105     $new_mode = $self->{insertion_mode} | IN_FOREIGN_CONTENT_IM;
3106     ## ISSUE: What is set as the secondary insertion mode?
3107     } else {
3108     $new_mode = {
3109 wakaba 1.54 select => IN_SELECT_IM,
3110 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
3111     ## insertion mode to "in select" by themselves.
3112 wakaba 1.54 td => IN_CELL_IM,
3113     th => IN_CELL_IM,
3114     tr => IN_ROW_IM,
3115     tbody => IN_TABLE_BODY_IM,
3116     thead => IN_TABLE_BODY_IM,
3117     tfoot => IN_TABLE_BODY_IM,
3118     caption => IN_CAPTION_IM,
3119     colgroup => IN_COLUMN_GROUP_IM,
3120     table => IN_TABLE_IM,
3121     head => IN_BODY_IM, # not in head!
3122     body => IN_BODY_IM,
3123     frameset => IN_FRAMESET_IM,
3124 wakaba 1.123 }->{$node->[0]->manakai_local_name};
3125 wakaba 1.126 }
3126     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
3127 wakaba 1.3
3128 wakaba 1.126 ## Step 15
3129 wakaba 1.123 if ($node->[1] & HTML_EL) {
3130 wakaba 1.3 unless (defined $self->{head_element}) {
3131 wakaba 1.79 !!!cp ('t29');
3132 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
3133 wakaba 1.3 } else {
3134 wakaba 1.81 ## ISSUE: Can this state be reached?
3135 wakaba 1.79 !!!cp ('t30');
3136 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3137 wakaba 1.3 }
3138     return;
3139 wakaba 1.79 } else {
3140     !!!cp ('t31');
3141 wakaba 1.3 }
3142    
3143 wakaba 1.126 ## Step 16
3144 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
3145 wakaba 1.3
3146 wakaba 1.126 ## Step 17
3147 wakaba 1.3 $i--;
3148     $node = $self->{open_elements}->[$i];
3149    
3150 wakaba 1.126 ## Step 18
3151 wakaba 1.3 redo S3;
3152     } # S3
3153 wakaba 1.79
3154     die "$0: _reset_insertion_mode: This line should never be reached";
3155 wakaba 1.3 } # _reset_insertion_mode
3156    
3157     sub _tree_construction_main ($) {
3158     my $self = shift;
3159    
3160 wakaba 1.1 my $active_formatting_elements = [];
3161    
3162     my $reconstruct_active_formatting_elements = sub { # MUST
3163     my $insert = shift;
3164    
3165     ## Step 1
3166     return unless @$active_formatting_elements;
3167    
3168     ## Step 3
3169     my $i = -1;
3170     my $entry = $active_formatting_elements->[$i];
3171    
3172     ## Step 2
3173     return if $entry->[0] eq '#marker';
3174 wakaba 1.3 for (@{$self->{open_elements}}) {
3175 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3176 wakaba 1.79 !!!cp ('t32');
3177 wakaba 1.1 return;
3178     }
3179     }
3180    
3181     S4: {
3182     ## Step 4
3183     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
3184    
3185     ## Step 5
3186     $i--;
3187     $entry = $active_formatting_elements->[$i];
3188    
3189     ## Step 6
3190     if ($entry->[0] eq '#marker') {
3191 wakaba 1.81 !!!cp ('t33_1');
3192 wakaba 1.1 #
3193     } else {
3194     my $in_open_elements;
3195 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
3196 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3197 wakaba 1.79 !!!cp ('t33');
3198 wakaba 1.1 $in_open_elements = 1;
3199     last OE;
3200     }
3201     }
3202     if ($in_open_elements) {
3203 wakaba 1.79 !!!cp ('t34');
3204 wakaba 1.1 #
3205     } else {
3206 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
3207 wakaba 1.79 !!!cp ('t35');
3208 wakaba 1.1 redo S4;
3209     }
3210     }
3211    
3212     ## Step 7
3213     $i++;
3214     $entry = $active_formatting_elements->[$i];
3215     } # S4
3216    
3217     S7: {
3218     ## Step 8
3219     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
3220    
3221     ## Step 9
3222     $insert->($clone->[0]);
3223 wakaba 1.3 push @{$self->{open_elements}}, $clone;
3224 wakaba 1.1
3225     ## Step 10
3226 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
3227 wakaba 1.1
3228     ## Step 11
3229     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
3230 wakaba 1.79 !!!cp ('t36');
3231 wakaba 1.1 ## Step 7'
3232     $i++;
3233     $entry = $active_formatting_elements->[$i];
3234    
3235     redo S7;
3236     }
3237 wakaba 1.79
3238     !!!cp ('t37');
3239 wakaba 1.1 } # S7
3240     }; # $reconstruct_active_formatting_elements
3241    
3242     my $clear_up_to_marker = sub {
3243     for (reverse 0..$#$active_formatting_elements) {
3244     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3245 wakaba 1.79 !!!cp ('t38');
3246 wakaba 1.1 splice @$active_formatting_elements, $_;
3247     return;
3248     }
3249     }
3250 wakaba 1.79
3251     !!!cp ('t39');
3252 wakaba 1.1 }; # $clear_up_to_marker
3253    
3254 wakaba 1.96 my $insert;
3255    
3256     my $parse_rcdata = sub ($) {
3257     my ($content_model_flag) = @_;
3258 wakaba 1.25
3259     ## Step 1
3260     my $start_tag_name = $token->{tag_name};
3261     my $el;
3262 wakaba 1.126 !!!create-element ($el, $HTML_NS, $start_tag_name, $token->{attributes}, $token);
3263 wakaba 1.25
3264     ## Step 2
3265 wakaba 1.96 $insert->($el);
3266 wakaba 1.25
3267     ## Step 3
3268 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
3269 wakaba 1.13 delete $self->{escape}; # MUST
3270 wakaba 1.25
3271     ## Step 4
3272 wakaba 1.1 my $text = '';
3273 wakaba 1.125 !!!nack ('t40.1');
3274 wakaba 1.1 !!!next-token;
3275 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing
3276 wakaba 1.79 !!!cp ('t40');
3277 wakaba 1.1 $text .= $token->{data};
3278     !!!next-token;
3279 wakaba 1.25 }
3280    
3281     ## Step 5
3282 wakaba 1.1 if (length $text) {
3283 wakaba 1.79 !!!cp ('t41');
3284 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
3285     $el->append_child ($text);
3286 wakaba 1.1 }
3287 wakaba 1.25
3288     ## Step 6
3289 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3290 wakaba 1.25
3291     ## Step 7
3292 wakaba 1.79 if ($token->{type} == END_TAG_TOKEN and
3293     $token->{tag_name} eq $start_tag_name) {
3294     !!!cp ('t42');
3295 wakaba 1.1 ## Ignore the token
3296     } else {
3297 wakaba 1.96 ## NOTE: An end-of-file token.
3298     if ($content_model_flag == CDATA_CONTENT_MODEL) {
3299     !!!cp ('t43');
3300 wakaba 1.113 !!!parse-error (type => 'in CDATA:#'.$token->{type}, token => $token);
3301 wakaba 1.96 } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
3302     !!!cp ('t44');
3303 wakaba 1.113 !!!parse-error (type => 'in RCDATA:#'.$token->{type}, token => $token);
3304 wakaba 1.96 } else {
3305     die "$0: $content_model_flag in parse_rcdata";
3306     }
3307 wakaba 1.1 }
3308     !!!next-token;
3309 wakaba 1.25 }; # $parse_rcdata
3310 wakaba 1.1
3311 wakaba 1.96 my $script_start_tag = sub () {
3312 wakaba 1.1 my $script_el;
3313 wakaba 1.126 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
3314 wakaba 1.1 ## TODO: mark as "parser-inserted"
3315    
3316 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
3317 wakaba 1.13 delete $self->{escape}; # MUST
3318 wakaba 1.1
3319     my $text = '';
3320 wakaba 1.125 !!!nack ('t45.1');
3321 wakaba 1.1 !!!next-token;
3322 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
3323 wakaba 1.79 !!!cp ('t45');
3324 wakaba 1.1 $text .= $token->{data};
3325     !!!next-token;
3326     } # stop if non-character token or tokenizer stops tokenising
3327     if (length $text) {
3328 wakaba 1.79 !!!cp ('t46');
3329 wakaba 1.1 $script_el->manakai_append_text ($text);
3330     }
3331    
3332 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3333 wakaba 1.1
3334 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
3335 wakaba 1.1 $token->{tag_name} eq 'script') {
3336 wakaba 1.79 !!!cp ('t47');
3337 wakaba 1.1 ## Ignore the token
3338     } else {
3339 wakaba 1.79 !!!cp ('t48');
3340 wakaba 1.113 !!!parse-error (type => 'in CDATA:#'.$token->{type}, token => $token);
3341 wakaba 1.1 ## ISSUE: And ignore?
3342     ## TODO: mark as "already executed"
3343     }
3344    
3345 wakaba 1.3 if (defined $self->{inner_html_node}) {
3346 wakaba 1.79 !!!cp ('t49');
3347 wakaba 1.3 ## TODO: mark as "already executed"
3348     } else {
3349 wakaba 1.79 !!!cp ('t50');
3350 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
3351     ## TODO: insertion point = just before the next input character
3352 wakaba 1.25
3353     $insert->($script_el);
3354 wakaba 1.1
3355     ## TODO: insertion point = $old_insertion_point (might be "undefined")
3356    
3357     ## TODO: if there is a script that will execute as soon as the parser resume, then...
3358     }
3359    
3360     !!!next-token;
3361     }; # $script_start_tag
3362    
3363 wakaba 1.102 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
3364     ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
3365     my $open_tables = [[$self->{open_elements}->[0]->[0]]];
3366    
3367 wakaba 1.1 my $formatting_end_tag = sub {
3368 wakaba 1.113 my $end_tag_token = shift;
3369     my $tag_name = $end_tag_token->{tag_name};
3370 wakaba 1.1
3371 wakaba 1.103 ## NOTE: The adoption agency algorithm (AAA).
3372 wakaba 1.102
3373 wakaba 1.1 FET: {
3374     ## Step 1
3375     my $formatting_element;
3376     my $formatting_element_i_in_active;
3377     AFE: for (reverse 0..$#$active_formatting_elements) {
3378 wakaba 1.123 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3379     !!!cp ('t52');
3380     last AFE;
3381     } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
3382     eq $tag_name) {
3383 wakaba 1.79 !!!cp ('t51');
3384 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
3385     $formatting_element_i_in_active = $_;
3386     last AFE;
3387     }
3388     } # AFE
3389     unless (defined $formatting_element) {
3390 wakaba 1.79 !!!cp ('t53');
3391 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$tag_name, token => $end_tag_token);
3392 wakaba 1.1 ## Ignore the token
3393     !!!next-token;
3394     return;
3395     }
3396     ## has an element in scope
3397     my $in_scope = 1;
3398     my $formatting_element_i_in_open;
3399 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3400     my $node = $self->{open_elements}->[$_];
3401 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
3402     if ($in_scope) {
3403 wakaba 1.79 !!!cp ('t54');
3404 wakaba 1.1 $formatting_element_i_in_open = $_;
3405     last INSCOPE;
3406     } else { # in open elements but not in scope
3407 wakaba 1.79 !!!cp ('t55');
3408 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name},
3409     token => $end_tag_token);
3410 wakaba 1.1 ## Ignore the token
3411     !!!next-token;
3412     return;
3413     }
3414 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
3415 wakaba 1.79 !!!cp ('t56');
3416 wakaba 1.1 $in_scope = 0;
3417     }
3418     } # INSCOPE
3419     unless (defined $formatting_element_i_in_open) {
3420 wakaba 1.79 !!!cp ('t57');
3421 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name},
3422     token => $end_tag_token);
3423 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
3424     !!!next-token; ## TODO: ok?
3425     return;
3426     }
3427 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
3428 wakaba 1.79 !!!cp ('t58');
3429 wakaba 1.122 !!!parse-error (type => 'not closed',
3430     value => $self->{open_elements}->[-1]->[0]
3431     ->manakai_local_name,
3432 wakaba 1.113 token => $end_tag_token);
3433 wakaba 1.1 }
3434    
3435     ## Step 2
3436     my $furthest_block;
3437     my $furthest_block_i_in_open;
3438 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3439     my $node = $self->{open_elements}->[$_];
3440 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
3441 wakaba 1.1 #not $phrasing_category->{$node->[1]} and
3442 wakaba 1.123 ($node->[1] & SPECIAL_EL or
3443     $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
3444 wakaba 1.79 !!!cp ('t59');
3445 wakaba 1.1 $furthest_block = $node;
3446     $furthest_block_i_in_open = $_;
3447     } elsif ($node->[0] eq $formatting_element->[0]) {
3448 wakaba 1.79 !!!cp ('t60');
3449 wakaba 1.1 last OE;
3450     }
3451     } # OE
3452    
3453     ## Step 3
3454     unless (defined $furthest_block) { # MUST
3455 wakaba 1.79 !!!cp ('t61');
3456 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
3457 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
3458     !!!next-token;
3459     return;
3460     }
3461    
3462     ## Step 4
3463 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
3464 wakaba 1.1
3465     ## Step 5
3466     my $furthest_block_parent = $furthest_block->[0]->parent_node;
3467     if (defined $furthest_block_parent) {
3468 wakaba 1.79 !!!cp ('t62');
3469 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
3470     }
3471    
3472     ## Step 6
3473     my $bookmark_prev_el
3474     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
3475     ->[0];
3476    
3477     ## Step 7
3478     my $node = $furthest_block;
3479     my $node_i_in_open = $furthest_block_i_in_open;
3480     my $last_node = $furthest_block;
3481     S7: {
3482     ## Step 1
3483     $node_i_in_open--;
3484 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
3485 wakaba 1.1
3486     ## Step 2
3487     my $node_i_in_active;
3488     S7S2: {
3489     for (reverse 0..$#$active_formatting_elements) {
3490     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3491 wakaba 1.79 !!!cp ('t63');
3492 wakaba 1.1 $node_i_in_active = $_;
3493     last S7S2;
3494     }
3495     }
3496 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
3497 wakaba 1.1 redo S7;
3498     } # S7S2
3499    
3500     ## Step 3
3501     last S7 if $node->[0] eq $formatting_element->[0];
3502    
3503     ## Step 4
3504     if ($last_node->[0] eq $furthest_block->[0]) {
3505 wakaba 1.79 !!!cp ('t64');
3506 wakaba 1.1 $bookmark_prev_el = $node->[0];
3507     }
3508    
3509     ## Step 5
3510     if ($node->[0]->has_child_nodes ()) {
3511 wakaba 1.79 !!!cp ('t65');
3512 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
3513     $active_formatting_elements->[$node_i_in_active] = $clone;
3514 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
3515 wakaba 1.1 $node = $clone;
3516     }
3517    
3518     ## Step 6
3519     $node->[0]->append_child ($last_node->[0]);
3520    
3521     ## Step 7
3522     $last_node = $node;
3523    
3524     ## Step 8
3525     redo S7;
3526     } # S7
3527    
3528     ## Step 8
3529 wakaba 1.123 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
3530 wakaba 1.102 my $foster_parent_element;
3531     my $next_sibling;
3532 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
3533     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
3534 wakaba 1.102 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3535     if (defined $parent and $parent->node_type == 1) {
3536     !!!cp ('t65.1');
3537     $foster_parent_element = $parent;
3538     $next_sibling = $self->{open_elements}->[$_]->[0];
3539     } else {
3540     !!!cp ('t65.2');
3541     $foster_parent_element
3542     = $self->{open_elements}->[$_ - 1]->[0];
3543     }
3544     last OE;
3545     }
3546     } # OE
3547     $foster_parent_element = $self->{open_elements}->[0]->[0]
3548     unless defined $foster_parent_element;
3549     $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
3550     $open_tables->[-1]->[1] = 1; # tainted
3551     } else {
3552     !!!cp ('t65.3');
3553     $common_ancestor_node->[0]->append_child ($last_node->[0]);
3554     }
3555 wakaba 1.1
3556     ## Step 9
3557     my $clone = [$formatting_element->[0]->clone_node (0),
3558     $formatting_element->[1]];
3559    
3560     ## Step 10
3561     my @cn = @{$furthest_block->[0]->child_nodes};
3562     $clone->[0]->append_child ($_) for @cn;
3563    
3564     ## Step 11
3565     $furthest_block->[0]->append_child ($clone->[0]);
3566    
3567     ## Step 12
3568     my $i;
3569     AFE: for (reverse 0..$#$active_formatting_elements) {
3570     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
3571 wakaba 1.79 !!!cp ('t66');
3572 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
3573     $i-- and last AFE if defined $i;
3574     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
3575 wakaba 1.79 !!!cp ('t67');
3576 wakaba 1.1 $i = $_;
3577     }
3578     } # AFE
3579     splice @$active_formatting_elements, $i + 1, 0, $clone;
3580    
3581     ## Step 13
3582     undef $i;
3583 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3584     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
3585 wakaba 1.79 !!!cp ('t68');
3586 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
3587 wakaba 1.1 $i-- and last OE if defined $i;
3588 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
3589 wakaba 1.79 !!!cp ('t69');
3590 wakaba 1.1 $i = $_;
3591     }
3592     } # OE
3593 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
3594 wakaba 1.1
3595     ## Step 14
3596     redo FET;
3597     } # FET
3598     }; # $formatting_end_tag
3599    
3600 wakaba 1.96 $insert = my $insert_to_current = sub {
3601 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
3602 wakaba 1.1 }; # $insert_to_current
3603    
3604     my $insert_to_foster = sub {
3605 wakaba 1.95 my $child = shift;
3606 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
3607 wakaba 1.95 # MUST
3608     my $foster_parent_element;
3609     my $next_sibling;
3610 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
3611     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
3612 wakaba 1.3 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3613 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3614 wakaba 1.79 !!!cp ('t70');
3615 wakaba 1.1 $foster_parent_element = $parent;
3616 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3617 wakaba 1.1 } else {
3618 wakaba 1.79 !!!cp ('t71');
3619 wakaba 1.1 $foster_parent_element
3620 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
3621 wakaba 1.1 }
3622     last OE;
3623     }
3624     } # OE
3625 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
3626 wakaba 1.1 unless defined $foster_parent_element;
3627     $foster_parent_element->insert_before
3628     ($child, $next_sibling);
3629 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
3630     } else {
3631     !!!cp ('t72');
3632     $self->{open_elements}->[-1]->[0]->append_child ($child);
3633     }
3634 wakaba 1.1 }; # $insert_to_foster
3635    
3636 wakaba 1.126 B: while (1) {
3637 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3638 wakaba 1.79 !!!cp ('t73');
3639 wakaba 1.113 !!!parse-error (type => 'DOCTYPE in the middle', token => $token);
3640 wakaba 1.52 ## Ignore the token
3641     ## Stay in the phase
3642     !!!next-token;
3643 wakaba 1.126 next B;
3644 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
3645 wakaba 1.52 $token->{tag_name} eq 'html') {
3646 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
3647 wakaba 1.79 !!!cp ('t79');
3648 wakaba 1.113 !!!parse-error (type => 'after html:html', token => $token);
3649 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
3650     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
3651 wakaba 1.79 !!!cp ('t80');
3652 wakaba 1.113 !!!parse-error (type => 'after html:html', token => $token);
3653 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
3654 wakaba 1.79 } else {
3655     !!!cp ('t81');
3656 wakaba 1.52 }
3657    
3658 wakaba 1.84 !!!cp ('t82');
3659 wakaba 1.113 !!!parse-error (type => 'not first start tag', token => $token);
3660 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
3661     for my $attr_name (keys %{$token->{attributes}}) {
3662     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
3663 wakaba 1.79 !!!cp ('t84');
3664 wakaba 1.52 $top_el->set_attribute_ns
3665     (undef, [undef, $attr_name],
3666     $token->{attributes}->{$attr_name}->{value});
3667     }
3668     }
3669 wakaba 1.125 !!!nack ('t84.1');
3670 wakaba 1.52 !!!next-token;
3671 wakaba 1.126 next B;
3672 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3673 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
3674 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
3675 wakaba 1.79 !!!cp ('t85');
3676 wakaba 1.52 $self->{document}->append_child ($comment);
3677 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
3678 wakaba 1.79 !!!cp ('t86');
3679 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
3680     } else {
3681 wakaba 1.79 !!!cp ('t87');
3682 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3683     }
3684     !!!next-token;
3685 wakaba 1.126 next B;
3686     } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
3687     if ($token->{type} == CHARACTER_TOKEN) {
3688     !!!cp ('t87.1');
3689     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3690     !!!next-token;
3691     next B;
3692     } elsif ($token->{type} == START_TAG_TOKEN) {
3693     if ($self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL or
3694     not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
3695     ($token->{tag_name} eq 'svg' and
3696     $self->{open_elements}->[-1]->[1] & MML_AXML_EL)) {
3697     ## NOTE: "using the rules for secondary insertion mode"then"continue"
3698     !!!cp ('t87.2');
3699     #
3700     } elsif ({
3701     ## TODO:
3702     }->{$token->{tag_name}}) {
3703     !!!cp ('t87.2');
3704     !!!parse-error (type => 'not closed',
3705     value => $self->{open_elements}->[-1]->[0]
3706     ->manakai_local_name,
3707     token => $token);
3708    
3709     pop @{$self->{open_elements}}
3710     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
3711    
3712     $self->{insertion_mode} &= ~ $self->{insertion_mode};
3713     ## Reprocess.
3714     next B;
3715     } else {
3716     ## TODO: case fixup
3717    
3718     !!!insert-element-f ($self->{open_elements}->[-1]->[0]->namespace_uri, $token);
3719    
3720     if ($self->{self_closing}) {
3721     pop @{$self->{open_elements}};
3722     !!!ack ('t87.3');
3723     } else {
3724     !!!cp ('t87.4');
3725     }
3726    
3727     !!!next-token;
3728     next B;
3729     }
3730     } elsif ($token->{type} == END_TAG_TOKEN) {
3731     ## NOTE: "using the rules for secondary insertion mode" then "continue"
3732     !!!cp ('t87.5');
3733     #
3734     } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3735     ## NOTE: "using the rules for secondary insertion mode" then "continue"
3736     !!!cp ('t87.6');
3737     #
3738     ## TODO: ...
3739     } else {
3740     die "$0: $token->{type}: Unknown token type";
3741     }
3742     }
3743    
3744     if ($self->{insertion_mode} & HEAD_IMS) {
3745 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
3746 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3747 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3748     !!!cp ('t88.2');
3749     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3750     } else {
3751     !!!cp ('t88.1');
3752     ## Ignore the token.
3753     !!!next-token;
3754 wakaba 1.126 next B;
3755 wakaba 1.99 }
3756 wakaba 1.52 unless (length $token->{data}) {
3757 wakaba 1.79 !!!cp ('t88');
3758 wakaba 1.52 !!!next-token;
3759 wakaba 1.126 next B;
3760 wakaba 1.1 }
3761     }
3762 wakaba 1.52
3763 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3764 wakaba 1.79 !!!cp ('t89');
3765 wakaba 1.52 ## As if <head>
3766 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
3767 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3768 wakaba 1.123 push @{$self->{open_elements}},
3769     [$self->{head_element}, $el_category->{head}];
3770 wakaba 1.52
3771     ## Reprocess in the "in head" insertion mode...
3772     pop @{$self->{open_elements}};
3773    
3774     ## Reprocess in the "after head" insertion mode...
3775 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3776 wakaba 1.79 !!!cp ('t90');
3777 wakaba 1.52 ## As if </noscript>
3778     pop @{$self->{open_elements}};
3779 wakaba 1.113 !!!parse-error (type => 'in noscript:#character', token => $token);
3780 wakaba 1.1
3781 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
3782     ## As if </head>
3783     pop @{$self->{open_elements}};
3784    
3785     ## Reprocess in the "after head" insertion mode...
3786 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
3787 wakaba 1.79 !!!cp ('t91');
3788 wakaba 1.52 pop @{$self->{open_elements}};
3789    
3790     ## Reprocess in the "after head" insertion mode...
3791 wakaba 1.79 } else {
3792     !!!cp ('t92');
3793 wakaba 1.1 }
3794 wakaba 1.52
3795 wakaba 1.123 ## "after head" insertion mode
3796     ## As if <body>
3797     !!!insert-element ('body',, $token);
3798     $self->{insertion_mode} = IN_BODY_IM;
3799     ## reprocess
3800 wakaba 1.126 next B;
3801 wakaba 1.123 } elsif ($token->{type} == START_TAG_TOKEN) {
3802     if ($token->{tag_name} eq 'head') {
3803     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3804     !!!cp ('t93');
3805 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
3806 wakaba 1.123 $self->{open_elements}->[-1]->[0]->append_child
3807     ($self->{head_element});
3808     push @{$self->{open_elements}},
3809     [$self->{head_element}, $el_category->{head}];
3810     $self->{insertion_mode} = IN_HEAD_IM;
3811 wakaba 1.125 !!!nack ('t93.1');
3812 wakaba 1.123 !!!next-token;
3813 wakaba 1.126 next B;
3814 wakaba 1.125 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3815     !!!cp ('t94');
3816     #
3817     } else {
3818     !!!cp ('t95');
3819     !!!parse-error (type => 'in head:head', token => $token); # or in head noscript
3820     ## Ignore the token
3821     !!!nack ('t95.1');
3822     !!!next-token;
3823 wakaba 1.126 next B;
3824 wakaba 1.125 }
3825     } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3826 wakaba 1.126 !!!cp ('t96');
3827     ## As if <head>
3828     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
3829     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3830     push @{$self->{open_elements}},
3831     [$self->{head_element}, $el_category->{head}];
3832 wakaba 1.52
3833 wakaba 1.126 $self->{insertion_mode} = IN_HEAD_IM;
3834     ## Reprocess in the "in head" insertion mode...
3835     } else {
3836     !!!cp ('t97');
3837     }
3838 wakaba 1.52
3839 wakaba 1.49 if ($token->{tag_name} eq 'base') {
3840 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3841 wakaba 1.79 !!!cp ('t98');
3842 wakaba 1.49 ## As if </noscript>
3843     pop @{$self->{open_elements}};
3844 wakaba 1.113 !!!parse-error (type => 'in noscript:base', token => $token);
3845 wakaba 1.49
3846 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3847 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
3848 wakaba 1.79 } else {
3849     !!!cp ('t99');
3850 wakaba 1.49 }
3851    
3852     ## NOTE: There is a "as if in head" code clone.
3853 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3854 wakaba 1.79 !!!cp ('t100');
3855 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3856 wakaba 1.123 push @{$self->{open_elements}},
3857     [$self->{head_element}, $el_category->{head}];
3858 wakaba 1.79 } else {
3859     !!!cp ('t101');
3860 wakaba 1.49 }
3861 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3862 wakaba 1.49 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3863 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3864 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3865 wakaba 1.125 !!!nack ('t101.1');
3866 wakaba 1.49 !!!next-token;
3867 wakaba 1.126 next B;
3868 wakaba 1.49 } elsif ($token->{tag_name} eq 'link') {
3869 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
3870 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3871 wakaba 1.79 !!!cp ('t102');
3872 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3873 wakaba 1.123 push @{$self->{open_elements}},
3874     [$self->{head_element}, $el_category->{head}];
3875 wakaba 1.79 } else {
3876     !!!cp ('t103');
3877 wakaba 1.25 }
3878 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3879 wakaba 1.25 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3880 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3881 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3882 wakaba 1.125 !!!ack ('t103.1');
3883 wakaba 1.1 !!!next-token;
3884 wakaba 1.126 next B;
3885 wakaba 1.34 } elsif ($token->{tag_name} eq 'meta') {
3886     ## NOTE: There is a "as if in head" code clone.
3887 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3888 wakaba 1.79 !!!cp ('t104');
3889 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3890 wakaba 1.123 push @{$self->{open_elements}},
3891     [$self->{head_element}, $el_category->{head}];
3892 wakaba 1.79 } else {
3893     !!!cp ('t105');
3894 wakaba 1.34 }
3895 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3896 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
3897 wakaba 1.34
3898     unless ($self->{confident}) {
3899     if ($token->{attributes}->{charset}) { ## TODO: And if supported
3900 wakaba 1.79 !!!cp ('t106');
3901 wakaba 1.63 $self->{change_encoding}
3902 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value},
3903     $token);
3904 wakaba 1.66
3905     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
3906     ->set_user_data (manakai_has_reference =>
3907     $token->{attributes}->{charset}
3908     ->{has_reference});
3909 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
3910 wakaba 1.35 ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
3911 wakaba 1.63 if ($token->{attributes}->{content}->{value}
3912 wakaba 1.70 =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
3913     [\x09-\x0D\x20]*=
3914 wakaba 1.34 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
3915     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
3916 wakaba 1.79 !!!cp ('t107');
3917 wakaba 1.63 $self->{change_encoding}
3918 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
3919     $token);
3920 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
3921     ->set_user_data (manakai_has_reference =>
3922     $token->{attributes}->{content}
3923     ->{has_reference});
3924 wakaba 1.79 } else {
3925     !!!cp ('t108');
3926 wakaba 1.63 }
3927 wakaba 1.34 }
3928 wakaba 1.66 } else {
3929     if ($token->{attributes}->{charset}) {
3930 wakaba 1.79 !!!cp ('t109');
3931 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
3932     ->set_user_data (manakai_has_reference =>
3933     $token->{attributes}->{charset}
3934     ->{has_reference});
3935     }
3936 wakaba 1.68 if ($token->{attributes}->{content}) {
3937 wakaba 1.79 !!!cp ('t110');
3938 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
3939     ->set_user_data (manakai_has_reference =>
3940     $token->{attributes}->{content}
3941     ->{has_reference});
3942     }
3943 wakaba 1.34 }
3944    
3945 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3946 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3947 wakaba 1.125 !!!ack ('t110.1');
3948 wakaba 1.34 !!!next-token;
3949 wakaba 1.126 next B;
3950 wakaba 1.49 } elsif ($token->{tag_name} eq 'title') {
3951 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
3952 wakaba 1.79 !!!cp ('t111');
3953 wakaba 1.49 ## As if </noscript>
3954     pop @{$self->{open_elements}};
3955 wakaba 1.113 !!!parse-error (type => 'in noscript:title', token => $token);
3956 wakaba 1.49
3957 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
3958 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
3959 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
3960 wakaba 1.79 !!!cp ('t112');
3961 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3962 wakaba 1.123 push @{$self->{open_elements}},
3963     [$self->{head_element}, $el_category->{head}];
3964 wakaba 1.79 } else {
3965     !!!cp ('t113');
3966 wakaba 1.25 }
3967 wakaba 1.49
3968     ## NOTE: There is a "as if in head" code clone.
3969 wakaba 1.31 my $parent = defined $self->{head_element} ? $self->{head_element}
3970     : $self->{open_elements}->[-1]->[0];
3971 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
3972 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3973 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3974 wakaba 1.126 next B;
3975 wakaba 1.25 } elsif ($token->{tag_name} eq 'style') {
3976     ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
3977 wakaba 1.54 ## insertion mode IN_HEAD_IM)
3978 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
3979 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
3980 wakaba 1.79 !!!cp ('t114');
3981 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
3982 wakaba 1.123 push @{$self->{open_elements}},
3983     [$self->{head_element}, $el_category->{head}];
3984 wakaba 1.79 } else {
3985     !!!cp ('t115');
3986 wakaba 1.25 }
3987 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
3988 wakaba 1.100 pop @{$self->{open_elements}} # <head>
3989 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
3990 wakaba 1.126 next B;
3991 wakaba 1.25 } elsif ($token->{tag_name} eq 'noscript') {
3992 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
3993 wakaba 1.79 !!!cp ('t116');
3994 wakaba 1.25 ## NOTE: and scripting is disalbed
3995 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3996 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
3997 wakaba 1.125 !!!nack ('t116.1');
3998 wakaba 1.1 !!!next-token;
3999 wakaba 1.126 next B;
4000 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4001 wakaba 1.79 !!!cp ('t117');
4002 wakaba 1.113 !!!parse-error (type => 'in noscript:noscript', token => $token);
4003 wakaba 1.1 ## Ignore the token
4004 wakaba 1.125 !!!nack ('t117.1');
4005 wakaba 1.41 !!!next-token;
4006 wakaba 1.126 next B;
4007 wakaba 1.1 } else {
4008 wakaba 1.79 !!!cp ('t118');
4009 wakaba 1.25 #
4010 wakaba 1.1 }
4011 wakaba 1.49 } elsif ($token->{tag_name} eq 'script') {
4012 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4013 wakaba 1.79 !!!cp ('t119');
4014 wakaba 1.49 ## As if </noscript>
4015     pop @{$self->{open_elements}};
4016 wakaba 1.113 !!!parse-error (type => 'in noscript:script', token => $token);
4017 wakaba 1.49
4018 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4019 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4020 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4021 wakaba 1.79 !!!cp ('t120');
4022 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4023 wakaba 1.123 push @{$self->{open_elements}},
4024     [$self->{head_element}, $el_category->{head}];
4025 wakaba 1.79 } else {
4026     !!!cp ('t121');
4027 wakaba 1.25 }
4028 wakaba 1.49
4029 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4030 wakaba 1.100 $script_start_tag->();
4031     pop @{$self->{open_elements}} # <head>
4032 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4033 wakaba 1.126 next B;
4034 wakaba 1.49 } elsif ($token->{tag_name} eq 'body' or
4035 wakaba 1.25 $token->{tag_name} eq 'frameset') {
4036 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4037 wakaba 1.79 !!!cp ('t122');
4038 wakaba 1.49 ## As if </noscript>
4039     pop @{$self->{open_elements}};
4040 wakaba 1.113 !!!parse-error (type => 'in noscript:'.$token->{tag_name}, token => $token);
4041 wakaba 1.49
4042     ## Reprocess in the "in head" insertion mode...
4043     ## As if </head>
4044     pop @{$self->{open_elements}};
4045    
4046     ## Reprocess in the "after head" insertion mode...
4047 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4048 wakaba 1.79 !!!cp ('t124');
4049 wakaba 1.49 pop @{$self->{open_elements}};
4050    
4051     ## Reprocess in the "after head" insertion mode...
4052 wakaba 1.79 } else {
4053     !!!cp ('t125');
4054 wakaba 1.49 }
4055    
4056     ## "after head" insertion mode
4057 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4058 wakaba 1.54 if ($token->{tag_name} eq 'body') {
4059 wakaba 1.79 !!!cp ('t126');
4060 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4061     } elsif ($token->{tag_name} eq 'frameset') {
4062 wakaba 1.79 !!!cp ('t127');
4063 wakaba 1.54 $self->{insertion_mode} = IN_FRAMESET_IM;
4064     } else {
4065     die "$0: tag name: $self->{tag_name}";
4066     }
4067 wakaba 1.125 !!!nack ('t127.1');
4068 wakaba 1.1 !!!next-token;
4069 wakaba 1.126 next B;
4070 wakaba 1.1 } else {
4071 wakaba 1.79 !!!cp ('t128');
4072 wakaba 1.1 #
4073     }
4074 wakaba 1.49
4075 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4076 wakaba 1.79 !!!cp ('t129');
4077 wakaba 1.49 ## As if </noscript>
4078     pop @{$self->{open_elements}};
4079 wakaba 1.113 !!!parse-error (type => 'in noscript:/'.$token->{tag_name}, token => $token);
4080 wakaba 1.49
4081     ## Reprocess in the "in head" insertion mode...
4082     ## As if </head>
4083 wakaba 1.25 pop @{$self->{open_elements}};
4084 wakaba 1.49
4085     ## Reprocess in the "after head" insertion mode...
4086 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4087 wakaba 1.79 !!!cp ('t130');
4088 wakaba 1.49 ## As if </head>
4089 wakaba 1.25 pop @{$self->{open_elements}};
4090 wakaba 1.49
4091     ## Reprocess in the "after head" insertion mode...
4092 wakaba 1.79 } else {
4093     !!!cp ('t131');
4094 wakaba 1.49 }
4095    
4096     ## "after head" insertion mode
4097     ## As if <body>
4098 wakaba 1.116 !!!insert-element ('body',, $token);
4099 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4100 wakaba 1.49 ## reprocess
4101 wakaba 1.125 !!!ack-later;
4102 wakaba 1.126 next B;
4103 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4104 wakaba 1.49 if ($token->{tag_name} eq 'head') {
4105 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4106 wakaba 1.79 !!!cp ('t132');
4107 wakaba 1.50 ## As if <head>
4108 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4109 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4110 wakaba 1.123 push @{$self->{open_elements}},
4111     [$self->{head_element}, $el_category->{head}];
4112 wakaba 1.50
4113     ## Reprocess in the "in head" insertion mode...
4114     pop @{$self->{open_elements}};
4115 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4116 wakaba 1.50 !!!next-token;
4117 wakaba 1.126 next B;
4118 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4119 wakaba 1.79 !!!cp ('t133');
4120 wakaba 1.49 ## As if </noscript>
4121     pop @{$self->{open_elements}};
4122 wakaba 1.113 !!!parse-error (type => 'in noscript:/head', token => $token);
4123 wakaba 1.49
4124     ## Reprocess in the "in head" insertion mode...
4125 wakaba 1.50 pop @{$self->{open_elements}};
4126 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4127 wakaba 1.50 !!!next-token;
4128 wakaba 1.126 next B;
4129 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4130 wakaba 1.79 !!!cp ('t134');
4131 wakaba 1.49 pop @{$self->{open_elements}};
4132 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4133 wakaba 1.49 !!!next-token;
4134 wakaba 1.126 next B;
4135 wakaba 1.49 } else {
4136 wakaba 1.79 !!!cp ('t135');
4137 wakaba 1.49 #
4138     }
4139     } elsif ($token->{tag_name} eq 'noscript') {
4140 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4141 wakaba 1.79 !!!cp ('t136');
4142 wakaba 1.49 pop @{$self->{open_elements}};
4143 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4144 wakaba 1.49 !!!next-token;
4145 wakaba 1.126 next B;
4146 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4147 wakaba 1.79 !!!cp ('t137');
4148 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:noscript', token => $token);
4149 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
4150     !!!next-token;
4151 wakaba 1.126 next B;
4152 wakaba 1.49 } else {
4153 wakaba 1.79 !!!cp ('t138');
4154 wakaba 1.49 #
4155     }
4156     } elsif ({
4157 wakaba 1.31 body => 1, html => 1,
4158     }->{$token->{tag_name}}) {
4159 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4160 wakaba 1.79 !!!cp ('t139');
4161 wakaba 1.50 ## As if <head>
4162 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4163 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4164 wakaba 1.123 push @{$self->{open_elements}},
4165     [$self->{head_element}, $el_category->{head}];
4166 wakaba 1.50
4167 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4168 wakaba 1.50 ## Reprocess in the "in head" insertion mode...
4169 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4170 wakaba 1.79 !!!cp ('t140');
4171 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4172 wakaba 1.49 ## Ignore the token
4173     !!!next-token;
4174 wakaba 1.126 next B;
4175 wakaba 1.79 } else {
4176     !!!cp ('t141');
4177 wakaba 1.49 }
4178 wakaba 1.50
4179     #
4180 wakaba 1.49 } elsif ({
4181 wakaba 1.31 p => 1, br => 1,
4182     }->{$token->{tag_name}}) {
4183 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4184 wakaba 1.79 !!!cp ('t142');
4185 wakaba 1.50 ## As if <head>
4186 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4187 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4188 wakaba 1.123 push @{$self->{open_elements}},
4189     [$self->{head_element}, $el_category->{head}];
4190 wakaba 1.50
4191 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4192 wakaba 1.50 ## Reprocess in the "in head" insertion mode...
4193 wakaba 1.79 } else {
4194     !!!cp ('t143');
4195 wakaba 1.50 }
4196    
4197 wakaba 1.1 #
4198 wakaba 1.25 } else {
4199 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4200 wakaba 1.79 !!!cp ('t144');
4201 wakaba 1.54 #
4202     } else {
4203 wakaba 1.79 !!!cp ('t145');
4204 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4205 wakaba 1.49 ## Ignore the token
4206     !!!next-token;
4207 wakaba 1.126 next B;
4208 wakaba 1.49 }
4209     }
4210    
4211 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4212 wakaba 1.79 !!!cp ('t146');
4213 wakaba 1.49 ## As if </noscript>
4214     pop @{$self->{open_elements}};
4215 wakaba 1.113 !!!parse-error (type => 'in noscript:/'.$token->{tag_name}, token => $token);
4216 wakaba 1.49
4217     ## Reprocess in the "in head" insertion mode...
4218     ## As if </head>
4219     pop @{$self->{open_elements}};
4220    
4221     ## Reprocess in the "after head" insertion mode...
4222 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4223 wakaba 1.79 !!!cp ('t147');
4224 wakaba 1.49 ## As if </head>
4225     pop @{$self->{open_elements}};
4226    
4227     ## Reprocess in the "after head" insertion mode...
4228 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4229 wakaba 1.82 ## ISSUE: This case cannot be reached?
4230 wakaba 1.79 !!!cp ('t148');
4231 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4232 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
4233     !!!next-token;
4234 wakaba 1.126 next B;
4235 wakaba 1.79 } else {
4236     !!!cp ('t149');
4237 wakaba 1.1 }
4238    
4239 wakaba 1.49 ## "after head" insertion mode
4240     ## As if <body>
4241 wakaba 1.116 !!!insert-element ('body',, $token);
4242 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4243 wakaba 1.52 ## reprocess
4244 wakaba 1.126 next B;
4245 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4246     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4247     !!!cp ('t149.1');
4248    
4249     ## NOTE: As if <head>
4250 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4251 wakaba 1.104 $self->{open_elements}->[-1]->[0]->append_child
4252     ($self->{head_element});
4253 wakaba 1.123 #push @{$self->{open_elements}},
4254     # [$self->{head_element}, $el_category->{head}];
4255 wakaba 1.104 #$self->{insertion_mode} = IN_HEAD_IM;
4256     ## NOTE: Reprocess.
4257    
4258     ## NOTE: As if </head>
4259     #pop @{$self->{open_elements}};
4260     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4261     ## NOTE: Reprocess.
4262    
4263     #
4264     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4265     !!!cp ('t149.2');
4266    
4267     ## NOTE: As if </head>
4268     pop @{$self->{open_elements}};
4269     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4270     ## NOTE: Reprocess.
4271    
4272     #
4273     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4274     !!!cp ('t149.3');
4275    
4276 wakaba 1.113 !!!parse-error (type => 'in noscript:#eof', token => $token);
4277 wakaba 1.104
4278     ## As if </noscript>
4279     pop @{$self->{open_elements}};
4280     #$self->{insertion_mode} = IN_HEAD_IM;
4281     ## NOTE: Reprocess.
4282    
4283     ## NOTE: As if </head>
4284     pop @{$self->{open_elements}};
4285     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4286     ## NOTE: Reprocess.
4287    
4288     #
4289     } else {
4290     !!!cp ('t149.4');
4291     #
4292     }
4293    
4294     ## NOTE: As if <body>
4295 wakaba 1.116 !!!insert-element ('body',, $token);
4296 wakaba 1.104 $self->{insertion_mode} = IN_BODY_IM;
4297     ## NOTE: Reprocess.
4298 wakaba 1.126 next B;
4299 wakaba 1.104 } else {
4300     die "$0: $token->{type}: Unknown token type";
4301     }
4302 wakaba 1.52
4303     ## ISSUE: An issue in the spec.
4304 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
4305 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4306 wakaba 1.79 !!!cp ('t150');
4307 wakaba 1.52 ## NOTE: There is a code clone of "character in body".
4308     $reconstruct_active_formatting_elements->($insert_to_current);
4309    
4310     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4311    
4312     !!!next-token;
4313 wakaba 1.126 next B;
4314 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4315 wakaba 1.52 if ({
4316     caption => 1, col => 1, colgroup => 1, tbody => 1,
4317     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
4318     }->{$token->{tag_name}}) {
4319 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
4320 wakaba 1.52 ## have an element in table scope
4321 wakaba 1.108 for (reverse 0..$#{$self->{open_elements}}) {
4322 wakaba 1.52 my $node = $self->{open_elements}->[$_];
4323 wakaba 1.123 if ($node->[1] & TABLE_CELL_EL) {
4324 wakaba 1.79 !!!cp ('t151');
4325 wakaba 1.108
4326     ## Close the cell
4327 wakaba 1.125 !!!back-token; # <x>
4328 wakaba 1.122 $token = {type => END_TAG_TOKEN,
4329     tag_name => $node->[0]->manakai_local_name,
4330 wakaba 1.114 line => $token->{line},
4331     column => $token->{column}};
4332 wakaba 1.126 next B;
4333 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4334 wakaba 1.79 !!!cp ('t152');
4335 wakaba 1.108 ## ISSUE: This case can never be reached, maybe.
4336     last;
4337 wakaba 1.52 }
4338 wakaba 1.108 }
4339    
4340     !!!cp ('t153');
4341     !!!parse-error (type => 'start tag not allowed',
4342 wakaba 1.113 value => $token->{tag_name}, token => $token);
4343 wakaba 1.108 ## Ignore the token
4344 wakaba 1.125 !!!nack ('t153.1');
4345 wakaba 1.108 !!!next-token;
4346 wakaba 1.126 next B;
4347 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4348 wakaba 1.113 !!!parse-error (type => 'not closed:caption', token => $token);
4349 wakaba 1.52
4350 wakaba 1.108 ## NOTE: As if </caption>.
4351 wakaba 1.52 ## have a table element in table scope
4352     my $i;
4353 wakaba 1.108 INSCOPE: {
4354     for (reverse 0..$#{$self->{open_elements}}) {
4355     my $node = $self->{open_elements}->[$_];
4356 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4357 wakaba 1.108 !!!cp ('t155');
4358     $i = $_;
4359     last INSCOPE;
4360 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4361 wakaba 1.108 !!!cp ('t156');
4362     last;
4363     }
4364 wakaba 1.52 }
4365 wakaba 1.108
4366     !!!cp ('t157');
4367     !!!parse-error (type => 'start tag not allowed',
4368 wakaba 1.113 value => $token->{tag_name}, token => $token);
4369 wakaba 1.108 ## Ignore the token
4370 wakaba 1.125 !!!nack ('t157.1');
4371 wakaba 1.108 !!!next-token;
4372 wakaba 1.126 next B;
4373 wakaba 1.52 } # INSCOPE
4374    
4375     ## generate implied end tags
4376 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4377     & END_TAG_OPTIONAL_EL) {
4378 wakaba 1.79 !!!cp ('t158');
4379 wakaba 1.86 pop @{$self->{open_elements}};
4380 wakaba 1.52 }
4381    
4382 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
4383 wakaba 1.79 !!!cp ('t159');
4384 wakaba 1.122 !!!parse-error (type => 'not closed',
4385     value => $self->{open_elements}->[-1]->[0]
4386     ->manakai_local_name,
4387     token => $token);
4388 wakaba 1.79 } else {
4389     !!!cp ('t160');
4390 wakaba 1.52 }
4391    
4392     splice @{$self->{open_elements}}, $i;
4393    
4394     $clear_up_to_marker->();
4395    
4396 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4397 wakaba 1.52
4398     ## reprocess
4399 wakaba 1.125 !!!ack-later;
4400 wakaba 1.126 next B;
4401 wakaba 1.52 } else {
4402 wakaba 1.79 !!!cp ('t161');
4403 wakaba 1.52 #
4404     }
4405     } else {
4406 wakaba 1.79 !!!cp ('t162');
4407 wakaba 1.52 #
4408     }
4409 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4410 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4411 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
4412 wakaba 1.43 ## have an element in table scope
4413 wakaba 1.52 my $i;
4414 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4415     my $node = $self->{open_elements}->[$_];
4416 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
4417 wakaba 1.79 !!!cp ('t163');
4418 wakaba 1.52 $i = $_;
4419 wakaba 1.43 last INSCOPE;
4420 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4421 wakaba 1.79 !!!cp ('t164');
4422 wakaba 1.43 last INSCOPE;
4423     }
4424     } # INSCOPE
4425 wakaba 1.52 unless (defined $i) {
4426 wakaba 1.79 !!!cp ('t165');
4427 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4428 wakaba 1.43 ## Ignore the token
4429     !!!next-token;
4430 wakaba 1.126 next B;
4431 wakaba 1.43 }
4432    
4433 wakaba 1.52 ## generate implied end tags
4434 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4435     & END_TAG_OPTIONAL_EL) {
4436 wakaba 1.79 !!!cp ('t166');
4437 wakaba 1.86 pop @{$self->{open_elements}};
4438 wakaba 1.52 }
4439 wakaba 1.86
4440 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
4441     ne $token->{tag_name}) {
4442 wakaba 1.79 !!!cp ('t167');
4443 wakaba 1.122 !!!parse-error (type => 'not closed',
4444     value => $self->{open_elements}->[-1]->[0]
4445     ->manakai_local_name,
4446     token => $token);
4447 wakaba 1.79 } else {
4448     !!!cp ('t168');
4449 wakaba 1.52 }
4450    
4451     splice @{$self->{open_elements}}, $i;
4452    
4453     $clear_up_to_marker->();
4454    
4455 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
4456 wakaba 1.52
4457     !!!next-token;
4458 wakaba 1.126 next B;
4459 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4460 wakaba 1.79 !!!cp ('t169');
4461 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4462 wakaba 1.52 ## Ignore the token
4463     !!!next-token;
4464 wakaba 1.126 next B;
4465 wakaba 1.52 } else {
4466 wakaba 1.79 !!!cp ('t170');
4467 wakaba 1.52 #
4468     }
4469     } elsif ($token->{tag_name} eq 'caption') {
4470 wakaba 1.54 if ($self->{insertion_mode} == IN_CAPTION_IM) {
4471 wakaba 1.43 ## have a table element in table scope
4472     my $i;
4473 wakaba 1.108 INSCOPE: {
4474     for (reverse 0..$#{$self->{open_elements}}) {
4475     my $node = $self->{open_elements}->[$_];
4476 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4477 wakaba 1.108 !!!cp ('t171');
4478     $i = $_;
4479     last INSCOPE;
4480 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4481 wakaba 1.108 !!!cp ('t172');
4482     last;
4483     }
4484 wakaba 1.43 }
4485 wakaba 1.108
4486     !!!cp ('t173');
4487     !!!parse-error (type => 'unmatched end tag',
4488 wakaba 1.113 value => $token->{tag_name}, token => $token);
4489 wakaba 1.108 ## Ignore the token
4490     !!!next-token;
4491 wakaba 1.126 next B;
4492 wakaba 1.43 } # INSCOPE
4493    
4494     ## generate implied end tags
4495 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4496     & END_TAG_OPTIONAL_EL) {
4497 wakaba 1.79 !!!cp ('t174');
4498 wakaba 1.86 pop @{$self->{open_elements}};
4499 wakaba 1.43 }
4500 wakaba 1.52
4501 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
4502 wakaba 1.79 !!!cp ('t175');
4503 wakaba 1.122 !!!parse-error (type => 'not closed',
4504     value => $self->{open_elements}->[-1]->[0]
4505     ->manakai_local_name,
4506     token => $token);
4507 wakaba 1.79 } else {
4508     !!!cp ('t176');
4509 wakaba 1.52 }
4510    
4511     splice @{$self->{open_elements}}, $i;
4512    
4513     $clear_up_to_marker->();
4514    
4515 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4516 wakaba 1.52
4517     !!!next-token;
4518 wakaba 1.126 next B;
4519 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CELL_IM) {
4520 wakaba 1.79 !!!cp ('t177');
4521 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4522 wakaba 1.52 ## Ignore the token
4523     !!!next-token;
4524 wakaba 1.126 next B;
4525 wakaba 1.52 } else {
4526 wakaba 1.79 !!!cp ('t178');
4527 wakaba 1.52 #
4528     }
4529     } elsif ({
4530     table => 1, tbody => 1, tfoot => 1,
4531     thead => 1, tr => 1,
4532     }->{$token->{tag_name}} and
4533 wakaba 1.54 $self->{insertion_mode} == IN_CELL_IM) {
4534 wakaba 1.52 ## have an element in table scope
4535     my $i;
4536     my $tn;
4537 wakaba 1.108 INSCOPE: {
4538     for (reverse 0..$#{$self->{open_elements}}) {
4539     my $node = $self->{open_elements}->[$_];
4540 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
4541 wakaba 1.108 !!!cp ('t179');
4542     $i = $_;
4543    
4544     ## Close the cell
4545 wakaba 1.125 !!!back-token; # </x>
4546 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $tn,
4547     line => $token->{line},
4548     column => $token->{column}};
4549 wakaba 1.126 next B;
4550 wakaba 1.123 } elsif ($node->[1] & TABLE_CELL_EL) {
4551 wakaba 1.108 !!!cp ('t180');
4552 wakaba 1.123 $tn = $node->[0]->manakai_local_name;
4553 wakaba 1.108 ## NOTE: There is exactly one |td| or |th| element
4554     ## in scope in the stack of open elements by definition.
4555 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4556 wakaba 1.108 ## ISSUE: Can this be reached?
4557     !!!cp ('t181');
4558     last;
4559     }
4560 wakaba 1.52 }
4561 wakaba 1.108
4562 wakaba 1.79 !!!cp ('t182');
4563 wakaba 1.108 !!!parse-error (type => 'unmatched end tag',
4564 wakaba 1.113 value => $token->{tag_name}, token => $token);
4565 wakaba 1.52 ## Ignore the token
4566     !!!next-token;
4567 wakaba 1.126 next B;
4568 wakaba 1.108 } # INSCOPE
4569 wakaba 1.52 } elsif ($token->{tag_name} eq 'table' and
4570 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
4571 wakaba 1.113 !!!parse-error (type => 'not closed:caption', token => $token);
4572 wakaba 1.52
4573     ## As if </caption>
4574     ## have a table element in table scope
4575     my $i;
4576     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4577     my $node = $self->{open_elements}->[$_];
4578 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4579 wakaba 1.79 !!!cp ('t184');
4580 wakaba 1.52 $i = $_;
4581     last INSCOPE;
4582 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4583 wakaba 1.79 !!!cp ('t185');
4584 wakaba 1.52 last INSCOPE;
4585     }
4586     } # INSCOPE
4587     unless (defined $i) {
4588 wakaba 1.79 !!!cp ('t186');
4589 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:caption', token => $token);
4590 wakaba 1.52 ## Ignore the token
4591     !!!next-token;
4592 wakaba 1.126 next B;
4593 wakaba 1.52 }
4594    
4595     ## generate implied end tags
4596 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
4597 wakaba 1.79 !!!cp ('t187');
4598 wakaba 1.86 pop @{$self->{open_elements}};
4599 wakaba 1.52 }
4600    
4601 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
4602 wakaba 1.79 !!!cp ('t188');
4603 wakaba 1.122 !!!parse-error (type => 'not closed',
4604     value => $self->{open_elements}->[-1]->[0]
4605     ->manakai_local_name,
4606     token => $token);
4607 wakaba 1.79 } else {
4608     !!!cp ('t189');
4609 wakaba 1.52 }
4610    
4611     splice @{$self->{open_elements}}, $i;
4612    
4613     $clear_up_to_marker->();
4614    
4615 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4616 wakaba 1.52
4617     ## reprocess
4618 wakaba 1.126 next B;
4619 wakaba 1.52 } elsif ({
4620     body => 1, col => 1, colgroup => 1, html => 1,
4621     }->{$token->{tag_name}}) {
4622 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
4623 wakaba 1.79 !!!cp ('t190');
4624 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4625 wakaba 1.52 ## Ignore the token
4626     !!!next-token;
4627 wakaba 1.126 next B;
4628 wakaba 1.52 } else {
4629 wakaba 1.79 !!!cp ('t191');
4630 wakaba 1.52 #
4631     }
4632     } elsif ({
4633     tbody => 1, tfoot => 1,
4634     thead => 1, tr => 1,
4635     }->{$token->{tag_name}} and
4636 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
4637 wakaba 1.79 !!!cp ('t192');
4638 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4639 wakaba 1.52 ## Ignore the token
4640     !!!next-token;
4641 wakaba 1.126 next B;
4642 wakaba 1.52 } else {
4643 wakaba 1.79 !!!cp ('t193');
4644 wakaba 1.52 #
4645     }
4646 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4647     for my $entry (@{$self->{open_elements}}) {
4648 wakaba 1.123 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
4649 wakaba 1.104 !!!cp ('t75');
4650 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4651 wakaba 1.104 last;
4652     }
4653     }
4654    
4655     ## Stop parsing.
4656     last B;
4657 wakaba 1.52 } else {
4658     die "$0: $token->{type}: Unknown token type";
4659     }
4660    
4661     $insert = $insert_to_current;
4662     #
4663 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
4664 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
4665 wakaba 1.95 if (not $open_tables->[-1]->[1] and # tainted
4666     $token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4667     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4668 wakaba 1.52
4669 wakaba 1.95 unless (length $token->{data}) {
4670     !!!cp ('t194');
4671     !!!next-token;
4672 wakaba 1.126 next B;
4673 wakaba 1.95 } else {
4674     !!!cp ('t195');
4675     }
4676     }
4677 wakaba 1.52
4678 wakaba 1.113 !!!parse-error (type => 'in table:#character', token => $token);
4679 wakaba 1.52
4680     ## As if in body, but insert into foster parent element
4681     ## ISSUE: Spec says that "whenever a node would be inserted
4682     ## into the current node" while characters might not be
4683     ## result in a new Text node.
4684     $reconstruct_active_formatting_elements->($insert_to_foster);
4685    
4686 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
4687 wakaba 1.52 # MUST
4688     my $foster_parent_element;
4689     my $next_sibling;
4690     my $prev_sibling;
4691     OE: for (reverse 0..$#{$self->{open_elements}}) {
4692 wakaba 1.123 if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
4693 wakaba 1.52 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4694     if (defined $parent and $parent->node_type == 1) {
4695 wakaba 1.79 !!!cp ('t196');
4696 wakaba 1.52 $foster_parent_element = $parent;
4697     $next_sibling = $self->{open_elements}->[$_]->[0];
4698     $prev_sibling = $next_sibling->previous_sibling;
4699     } else {
4700 wakaba 1.79 !!!cp ('t197');
4701 wakaba 1.52 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4702     $prev_sibling = $foster_parent_element->last_child;
4703     }
4704     last OE;
4705     }
4706     } # OE
4707     $foster_parent_element = $self->{open_elements}->[0]->[0] and
4708     $prev_sibling = $foster_parent_element->last_child
4709     unless defined $foster_parent_element;
4710     if (defined $prev_sibling and
4711     $prev_sibling->node_type == 3) {
4712 wakaba 1.79 !!!cp ('t198');
4713 wakaba 1.52 $prev_sibling->manakai_append_text ($token->{data});
4714     } else {
4715 wakaba 1.79 !!!cp ('t199');
4716 wakaba 1.52 $foster_parent_element->insert_before
4717     ($self->{document}->create_text_node ($token->{data}),
4718     $next_sibling);
4719     }
4720 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
4721     } else {
4722     !!!cp ('t200');
4723     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4724     }
4725 wakaba 1.52
4726 wakaba 1.95 !!!next-token;
4727 wakaba 1.126 next B;
4728 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
4729 wakaba 1.52 if ({
4730 wakaba 1.54 tr => ($self->{insertion_mode} != IN_ROW_IM),
4731 wakaba 1.52 th => 1, td => 1,
4732     }->{$token->{tag_name}}) {
4733 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_IM) {
4734 wakaba 1.52 ## Clear back to table context
4735 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4736     & TABLE_SCOPING_EL)) {
4737 wakaba 1.79 !!!cp ('t201');
4738 wakaba 1.52 pop @{$self->{open_elements}};
4739 wakaba 1.43 }
4740    
4741 wakaba 1.116 !!!insert-element ('tbody',, $token);
4742 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4743 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
4744     }
4745    
4746 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4747 wakaba 1.52 unless ($token->{tag_name} eq 'tr') {
4748 wakaba 1.79 !!!cp ('t202');
4749 wakaba 1.113 !!!parse-error (type => 'missing start tag:tr', token => $token);
4750 wakaba 1.52 }
4751 wakaba 1.43
4752 wakaba 1.52 ## Clear back to table body context
4753 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4754     & TABLE_ROWS_SCOPING_EL)) {
4755 wakaba 1.79 !!!cp ('t203');
4756 wakaba 1.83 ## ISSUE: Can this case be reached?
4757 wakaba 1.52 pop @{$self->{open_elements}};
4758     }
4759 wakaba 1.43
4760 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
4761 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
4762 wakaba 1.79 !!!cp ('t204');
4763 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4764 wakaba 1.125 !!!nack ('t204');
4765 wakaba 1.52 !!!next-token;
4766 wakaba 1.126 next B;
4767 wakaba 1.52 } else {
4768 wakaba 1.79 !!!cp ('t205');
4769 wakaba 1.116 !!!insert-element ('tr',, $token);
4770 wakaba 1.52 ## reprocess in the "in row" insertion mode
4771     }
4772 wakaba 1.79 } else {
4773     !!!cp ('t206');
4774 wakaba 1.52 }
4775    
4776     ## Clear back to table row context
4777 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4778     & TABLE_ROW_SCOPING_EL)) {
4779 wakaba 1.79 !!!cp ('t207');
4780 wakaba 1.52 pop @{$self->{open_elements}};
4781 wakaba 1.43 }
4782 wakaba 1.52
4783 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4784 wakaba 1.54 $self->{insertion_mode} = IN_CELL_IM;
4785 wakaba 1.52
4786     push @$active_formatting_elements, ['#marker', ''];
4787    
4788 wakaba 1.125 !!!nack ('t207.1');
4789 wakaba 1.52 !!!next-token;
4790 wakaba 1.126 next B;
4791 wakaba 1.52 } elsif ({
4792     caption => 1, col => 1, colgroup => 1,
4793     tbody => 1, tfoot => 1, thead => 1,
4794 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
4795 wakaba 1.52 }->{$token->{tag_name}}) {
4796 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
4797 wakaba 1.52 ## As if </tr>
4798 wakaba 1.43 ## have an element in table scope
4799     my $i;
4800     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4801     my $node = $self->{open_elements}->[$_];
4802 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
4803 wakaba 1.79 !!!cp ('t208');
4804 wakaba 1.43 $i = $_;
4805     last INSCOPE;
4806 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4807 wakaba 1.79 !!!cp ('t209');
4808 wakaba 1.43 last INSCOPE;
4809     }
4810     } # INSCOPE
4811 wakaba 1.79 unless (defined $i) {
4812 wakaba 1.125 !!!cp ('t210');
4813 wakaba 1.83 ## TODO: This type is wrong.
4814 wakaba 1.125 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name}, token => $token);
4815 wakaba 1.52 ## Ignore the token
4816 wakaba 1.125 !!!nack ('t210.1');
4817 wakaba 1.52 !!!next-token;
4818 wakaba 1.126 next B;
4819 wakaba 1.43 }
4820    
4821 wakaba 1.52 ## Clear back to table row context
4822 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4823     & TABLE_ROW_SCOPING_EL)) {
4824 wakaba 1.79 !!!cp ('t211');
4825 wakaba 1.83 ## ISSUE: Can this case be reached?
4826 wakaba 1.52 pop @{$self->{open_elements}};
4827 wakaba 1.1 }
4828 wakaba 1.43
4829 wakaba 1.52 pop @{$self->{open_elements}}; # tr
4830 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4831 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
4832 wakaba 1.79 !!!cp ('t212');
4833 wakaba 1.52 ## reprocess
4834 wakaba 1.125 !!!ack-later;
4835 wakaba 1.126 next B;
4836 wakaba 1.52 } else {
4837 wakaba 1.79 !!!cp ('t213');
4838 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
4839     }
4840 wakaba 1.1 }
4841 wakaba 1.52
4842 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4843 wakaba 1.52 ## have an element in table scope
4844 wakaba 1.43 my $i;
4845     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4846     my $node = $self->{open_elements}->[$_];
4847 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
4848 wakaba 1.79 !!!cp ('t214');
4849 wakaba 1.43 $i = $_;
4850     last INSCOPE;
4851 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4852 wakaba 1.79 !!!cp ('t215');
4853 wakaba 1.43 last INSCOPE;
4854     }
4855     } # INSCOPE
4856 wakaba 1.52 unless (defined $i) {
4857 wakaba 1.79 !!!cp ('t216');
4858 wakaba 1.82 ## TODO: This erorr type ios wrong.
4859 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4860 wakaba 1.52 ## Ignore the token
4861 wakaba 1.125 !!!nack ('t216.1');
4862 wakaba 1.52 !!!next-token;
4863 wakaba 1.126 next B;
4864 wakaba 1.43 }
4865 wakaba 1.52
4866     ## Clear back to table body context
4867 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4868     & TABLE_ROWS_SCOPING_EL)) {
4869 wakaba 1.79 !!!cp ('t217');
4870 wakaba 1.83 ## ISSUE: Can this state be reached?
4871 wakaba 1.52 pop @{$self->{open_elements}};
4872 wakaba 1.43 }
4873    
4874 wakaba 1.52 ## As if <{current node}>
4875     ## have an element in table scope
4876     ## true by definition
4877 wakaba 1.43
4878 wakaba 1.52 ## Clear back to table body context
4879     ## nop by definition
4880 wakaba 1.43
4881 wakaba 1.52 pop @{$self->{open_elements}};
4882 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4883 wakaba 1.52 ## reprocess in "in table" insertion mode...
4884 wakaba 1.79 } else {
4885     !!!cp ('t218');
4886 wakaba 1.52 }
4887    
4888     if ($token->{tag_name} eq 'col') {
4889     ## Clear back to table context
4890 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4891     & TABLE_SCOPING_EL)) {
4892 wakaba 1.79 !!!cp ('t219');
4893 wakaba 1.83 ## ISSUE: Can this state be reached?
4894 wakaba 1.52 pop @{$self->{open_elements}};
4895     }
4896 wakaba 1.43
4897 wakaba 1.116 !!!insert-element ('colgroup',, $token);
4898 wakaba 1.54 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
4899 wakaba 1.52 ## reprocess
4900 wakaba 1.125 !!!ack-later;
4901 wakaba 1.126 next B;
4902 wakaba 1.52 } elsif ({
4903     caption => 1,
4904     colgroup => 1,
4905     tbody => 1, tfoot => 1, thead => 1,
4906     }->{$token->{tag_name}}) {
4907     ## Clear back to table context
4908 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4909     & TABLE_SCOPING_EL)) {
4910 wakaba 1.79 !!!cp ('t220');
4911 wakaba 1.83 ## ISSUE: Can this state be reached?
4912 wakaba 1.52 pop @{$self->{open_elements}};
4913 wakaba 1.1 }
4914 wakaba 1.52
4915     push @$active_formatting_elements, ['#marker', '']
4916     if $token->{tag_name} eq 'caption';
4917    
4918 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4919 wakaba 1.52 $self->{insertion_mode} = {
4920 wakaba 1.54 caption => IN_CAPTION_IM,
4921     colgroup => IN_COLUMN_GROUP_IM,
4922     tbody => IN_TABLE_BODY_IM,
4923     tfoot => IN_TABLE_BODY_IM,
4924     thead => IN_TABLE_BODY_IM,
4925 wakaba 1.52 }->{$token->{tag_name}};
4926 wakaba 1.1 !!!next-token;
4927 wakaba 1.125 !!!nack ('t220.1');
4928 wakaba 1.126 next B;
4929 wakaba 1.52 } else {
4930     die "$0: in table: <>: $token->{tag_name}";
4931 wakaba 1.1 }
4932 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
4933 wakaba 1.122 !!!parse-error (type => 'not closed',
4934     value => $self->{open_elements}->[-1]->[0]
4935     ->manakai_local_name,
4936     token => $token);
4937 wakaba 1.1
4938 wakaba 1.52 ## As if </table>
4939 wakaba 1.1 ## have a table element in table scope
4940     my $i;
4941 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4942     my $node = $self->{open_elements}->[$_];
4943 wakaba 1.123 if ($node->[1] & TABLE_EL) {
4944 wakaba 1.79 !!!cp ('t221');
4945 wakaba 1.1 $i = $_;
4946     last INSCOPE;
4947 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4948 wakaba 1.79 !!!cp ('t222');
4949 wakaba 1.1 last INSCOPE;
4950     }
4951     } # INSCOPE
4952     unless (defined $i) {
4953 wakaba 1.79 !!!cp ('t223');
4954 wakaba 1.83 ## TODO: The following is wrong, maybe.
4955 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:table', token => $token);
4956 wakaba 1.52 ## Ignore tokens </table><table>
4957 wakaba 1.125 !!!nack ('t223.1');
4958 wakaba 1.1 !!!next-token;
4959 wakaba 1.126 next B;
4960 wakaba 1.1 }
4961    
4962 wakaba 1.106 ## TODO: Followings are removed from the latest spec.
4963 wakaba 1.1 ## generate implied end tags
4964 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
4965 wakaba 1.79 !!!cp ('t224');
4966 wakaba 1.86 pop @{$self->{open_elements}};
4967 wakaba 1.1 }
4968    
4969 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & TABLE_EL) {
4970 wakaba 1.79 !!!cp ('t225');
4971 wakaba 1.122 ## NOTE: |<table><tr><table>|
4972     !!!parse-error (type => 'not closed',
4973     value => $self->{open_elements}->[-1]->[0]
4974     ->manakai_local_name,
4975     token => $token);
4976 wakaba 1.79 } else {
4977     !!!cp ('t226');
4978 wakaba 1.1 }
4979    
4980 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4981 wakaba 1.95 pop @{$open_tables};
4982 wakaba 1.1
4983 wakaba 1.52 $self->_reset_insertion_mode;
4984 wakaba 1.1
4985 wakaba 1.125 ## reprocess
4986     !!!ack-later;
4987 wakaba 1.126 next B;
4988 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
4989     if (not $open_tables->[-1]->[1]) { # tainted
4990     !!!cp ('t227.8');
4991     ## NOTE: This is a "as if in head" code clone.
4992     $parse_rcdata->(CDATA_CONTENT_MODEL);
4993 wakaba 1.126 next B;
4994 wakaba 1.100 } else {
4995     !!!cp ('t227.7');
4996     #
4997     }
4998     } elsif ($token->{tag_name} eq 'script') {
4999     if (not $open_tables->[-1]->[1]) { # tainted
5000     !!!cp ('t227.6');
5001     ## NOTE: This is a "as if in head" code clone.
5002     $script_start_tag->();
5003 wakaba 1.126 next B;
5004 wakaba 1.100 } else {
5005     !!!cp ('t227.5');
5006     #
5007     }
5008 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
5009     if (not $open_tables->[-1]->[1]) { # tainted
5010     if ($token->{attributes}->{type}) { ## TODO: case
5011     my $type = lc $token->{attributes}->{type}->{value};
5012     if ($type eq 'hidden') {
5013     !!!cp ('t227.3');
5014 wakaba 1.113 !!!parse-error (type => 'in table:'.$token->{tag_name}, token => $token);
5015 wakaba 1.98
5016 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5017 wakaba 1.98
5018     ## TODO: form element pointer
5019    
5020     pop @{$self->{open_elements}};
5021    
5022     !!!next-token;
5023 wakaba 1.125 !!!ack ('t227.2.1');
5024 wakaba 1.126 next B;
5025 wakaba 1.98 } else {
5026     !!!cp ('t227.2');
5027     #
5028     }
5029     } else {
5030     !!!cp ('t227.1');
5031     #
5032     }
5033     } else {
5034     !!!cp ('t227.4');
5035     #
5036     }
5037 wakaba 1.58 } else {
5038 wakaba 1.79 !!!cp ('t227');
5039 wakaba 1.58 #
5040     }
5041 wakaba 1.98
5042 wakaba 1.113 !!!parse-error (type => 'in table:'.$token->{tag_name}, token => $token);
5043 wakaba 1.98
5044     $insert = $insert_to_foster;
5045     #
5046 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
5047 wakaba 1.52 if ($token->{tag_name} eq 'tr' and
5048 wakaba 1.54 $self->{insertion_mode} == IN_ROW_IM) {
5049 wakaba 1.52 ## have an element in table scope
5050     my $i;
5051     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5052     my $node = $self->{open_elements}->[$_];
5053 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5054 wakaba 1.79 !!!cp ('t228');
5055 wakaba 1.52 $i = $_;
5056     last INSCOPE;
5057 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5058 wakaba 1.79 !!!cp ('t229');
5059 wakaba 1.52 last INSCOPE;
5060     }
5061     } # INSCOPE
5062     unless (defined $i) {
5063 wakaba 1.79 !!!cp ('t230');
5064 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5065 wakaba 1.52 ## Ignore the token
5066 wakaba 1.125 !!!nack ('t230.1');
5067 wakaba 1.42 !!!next-token;
5068 wakaba 1.126 next B;
5069 wakaba 1.79 } else {
5070     !!!cp ('t232');
5071 wakaba 1.42 }
5072    
5073 wakaba 1.52 ## Clear back to table row context
5074 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5075     & TABLE_ROW_SCOPING_EL)) {
5076 wakaba 1.79 !!!cp ('t231');
5077 wakaba 1.83 ## ISSUE: Can this state be reached?
5078 wakaba 1.52 pop @{$self->{open_elements}};
5079     }
5080 wakaba 1.42
5081 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5082 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5083 wakaba 1.52 !!!next-token;
5084 wakaba 1.125 !!!nack ('t231.1');
5085 wakaba 1.126 next B;
5086 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
5087 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5088 wakaba 1.52 ## As if </tr>
5089     ## have an element in table scope
5090     my $i;
5091     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5092     my $node = $self->{open_elements}->[$_];
5093 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5094 wakaba 1.79 !!!cp ('t233');
5095 wakaba 1.52 $i = $_;
5096     last INSCOPE;
5097 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5098 wakaba 1.79 !!!cp ('t234');
5099 wakaba 1.52 last INSCOPE;
5100 wakaba 1.42 }
5101 wakaba 1.52 } # INSCOPE
5102     unless (defined $i) {
5103 wakaba 1.79 !!!cp ('t235');
5104 wakaba 1.83 ## TODO: The following is wrong.
5105 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{type}, token => $token);
5106 wakaba 1.52 ## Ignore the token
5107 wakaba 1.125 !!!nack ('t236.1');
5108 wakaba 1.52 !!!next-token;
5109 wakaba 1.126 next B;
5110 wakaba 1.42 }
5111 wakaba 1.52
5112     ## Clear back to table row context
5113 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5114     & TABLE_ROW_SCOPING_EL)) {
5115 wakaba 1.79 !!!cp ('t236');
5116 wakaba 1.83 ## ISSUE: Can this state be reached?
5117 wakaba 1.46 pop @{$self->{open_elements}};
5118 wakaba 1.1 }
5119 wakaba 1.46
5120 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5121 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5122 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
5123 wakaba 1.1 }
5124    
5125 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5126 wakaba 1.52 ## have an element in table scope
5127     my $i;
5128     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5129     my $node = $self->{open_elements}->[$_];
5130 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
5131 wakaba 1.79 !!!cp ('t237');
5132 wakaba 1.52 $i = $_;
5133     last INSCOPE;
5134 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5135 wakaba 1.79 !!!cp ('t238');
5136 wakaba 1.52 last INSCOPE;
5137     }
5138     } # INSCOPE
5139     unless (defined $i) {
5140 wakaba 1.79 !!!cp ('t239');
5141 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5142 wakaba 1.52 ## Ignore the token
5143 wakaba 1.125 !!!nack ('t239.1');
5144 wakaba 1.52 !!!next-token;
5145 wakaba 1.126 next B;
5146 wakaba 1.47 }
5147    
5148     ## Clear back to table body context
5149 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5150     & TABLE_ROWS_SCOPING_EL)) {
5151 wakaba 1.79 !!!cp ('t240');
5152 wakaba 1.47 pop @{$self->{open_elements}};
5153     }
5154    
5155 wakaba 1.52 ## As if <{current node}>
5156     ## have an element in table scope
5157     ## true by definition
5158    
5159     ## Clear back to table body context
5160     ## nop by definition
5161    
5162     pop @{$self->{open_elements}};
5163 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5164 wakaba 1.52 ## reprocess in the "in table" insertion mode...
5165     }
5166    
5167 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
5168     ## When you edit the code fragment below, please ensure that
5169     ## the code for <table> in the "in table" insertion mode
5170     ## is synced with it.
5171    
5172 wakaba 1.52 ## have a table element in table scope
5173     my $i;
5174     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5175     my $node = $self->{open_elements}->[$_];
5176 wakaba 1.123 if ($node->[1] & TABLE_EL) {
5177 wakaba 1.79 !!!cp ('t241');
5178 wakaba 1.52 $i = $_;
5179     last INSCOPE;
5180 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5181 wakaba 1.79 !!!cp ('t242');
5182 wakaba 1.52 last INSCOPE;
5183 wakaba 1.47 }
5184 wakaba 1.52 } # INSCOPE
5185     unless (defined $i) {
5186 wakaba 1.79 !!!cp ('t243');
5187 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5188 wakaba 1.52 ## Ignore the token
5189 wakaba 1.125 !!!nack ('t243.1');
5190 wakaba 1.52 !!!next-token;
5191 wakaba 1.126 next B;
5192 wakaba 1.3 }
5193 wakaba 1.52
5194     splice @{$self->{open_elements}}, $i;
5195 wakaba 1.95 pop @{$open_tables};
5196 wakaba 1.1
5197 wakaba 1.52 $self->_reset_insertion_mode;
5198 wakaba 1.47
5199     !!!next-token;
5200 wakaba 1.126 next B;
5201 wakaba 1.47 } elsif ({
5202 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
5203 wakaba 1.52 }->{$token->{tag_name}} and
5204 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
5205 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5206 wakaba 1.52 ## have an element in table scope
5207     my $i;
5208     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5209     my $node = $self->{open_elements}->[$_];
5210 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5211 wakaba 1.79 !!!cp ('t247');
5212 wakaba 1.52 $i = $_;
5213     last INSCOPE;
5214 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5215 wakaba 1.79 !!!cp ('t248');
5216 wakaba 1.52 last INSCOPE;
5217     }
5218     } # INSCOPE
5219     unless (defined $i) {
5220 wakaba 1.79 !!!cp ('t249');
5221 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5222 wakaba 1.52 ## Ignore the token
5223 wakaba 1.125 !!!nack ('t249.1');
5224 wakaba 1.52 !!!next-token;
5225 wakaba 1.126 next B;
5226 wakaba 1.52 }
5227    
5228 wakaba 1.48 ## As if </tr>
5229     ## have an element in table scope
5230     my $i;
5231     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5232     my $node = $self->{open_elements}->[$_];
5233 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5234 wakaba 1.79 !!!cp ('t250');
5235 wakaba 1.48 $i = $_;
5236     last INSCOPE;
5237 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5238 wakaba 1.79 !!!cp ('t251');
5239 wakaba 1.48 last INSCOPE;
5240     }
5241     } # INSCOPE
5242 wakaba 1.52 unless (defined $i) {
5243 wakaba 1.79 !!!cp ('t252');
5244 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:tr', token => $token);
5245 wakaba 1.52 ## Ignore the token
5246 wakaba 1.125 !!!nack ('t252.1');
5247 wakaba 1.52 !!!next-token;
5248 wakaba 1.126 next B;
5249 wakaba 1.52 }
5250 wakaba 1.48
5251     ## Clear back to table row context
5252 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5253     & TABLE_ROW_SCOPING_EL)) {
5254 wakaba 1.79 !!!cp ('t253');
5255 wakaba 1.83 ## ISSUE: Can this case be reached?
5256 wakaba 1.48 pop @{$self->{open_elements}};
5257     }
5258    
5259     pop @{$self->{open_elements}}; # tr
5260 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5261 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
5262     }
5263    
5264     ## have an element in table scope
5265     my $i;
5266     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5267     my $node = $self->{open_elements}->[$_];
5268 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5269 wakaba 1.79 !!!cp ('t254');
5270 wakaba 1.52 $i = $_;
5271     last INSCOPE;
5272 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5273 wakaba 1.79 !!!cp ('t255');
5274 wakaba 1.52 last INSCOPE;
5275     }
5276     } # INSCOPE
5277     unless (defined $i) {
5278 wakaba 1.79 !!!cp ('t256');
5279 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5280 wakaba 1.52 ## Ignore the token
5281 wakaba 1.125 !!!nack ('t256.1');
5282 wakaba 1.52 !!!next-token;
5283 wakaba 1.126 next B;
5284 wakaba 1.52 }
5285    
5286     ## Clear back to table body context
5287 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5288     & TABLE_ROWS_SCOPING_EL)) {
5289 wakaba 1.79 !!!cp ('t257');
5290 wakaba 1.83 ## ISSUE: Can this case be reached?
5291 wakaba 1.52 pop @{$self->{open_elements}};
5292     }
5293    
5294     pop @{$self->{open_elements}};
5295 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5296 wakaba 1.125 !!!nack ('t257.1');
5297 wakaba 1.52 !!!next-token;
5298 wakaba 1.126 next B;
5299 wakaba 1.52 } elsif ({
5300     body => 1, caption => 1, col => 1, colgroup => 1,
5301     html => 1, td => 1, th => 1,
5302 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
5303     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
5304 wakaba 1.52 }->{$token->{tag_name}}) {
5305 wakaba 1.125 !!!cp ('t258');
5306     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5307     ## Ignore the token
5308     !!!nack ('t258.1');
5309     !!!next-token;
5310 wakaba 1.126 next B;
5311 wakaba 1.58 } else {
5312 wakaba 1.79 !!!cp ('t259');
5313 wakaba 1.113 !!!parse-error (type => 'in table:/'.$token->{tag_name}, token => $token);
5314 wakaba 1.52
5315 wakaba 1.58 $insert = $insert_to_foster;
5316     #
5317     }
5318 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5319 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
5320 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5321 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5322 wakaba 1.104 !!!cp ('t259.1');
5323 wakaba 1.105 #
5324 wakaba 1.104 } else {
5325     !!!cp ('t259.2');
5326 wakaba 1.105 #
5327 wakaba 1.104 }
5328    
5329     ## Stop parsing
5330     last B;
5331 wakaba 1.58 } else {
5332     die "$0: $token->{type}: Unknown token type";
5333     }
5334 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
5335 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5336 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5337     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5338     unless (length $token->{data}) {
5339 wakaba 1.79 !!!cp ('t260');
5340 wakaba 1.52 !!!next-token;
5341 wakaba 1.126 next B;
5342 wakaba 1.52 }
5343     }
5344    
5345 wakaba 1.79 !!!cp ('t261');
5346 wakaba 1.52 #
5347 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5348 wakaba 1.52 if ($token->{tag_name} eq 'col') {
5349 wakaba 1.79 !!!cp ('t262');
5350 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5351 wakaba 1.52 pop @{$self->{open_elements}};
5352 wakaba 1.125 !!!ack ('t262.1');
5353 wakaba 1.52 !!!next-token;
5354 wakaba 1.126 next B;
5355 wakaba 1.52 } else {
5356 wakaba 1.79 !!!cp ('t263');
5357 wakaba 1.52 #
5358     }
5359 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5360 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
5361 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
5362 wakaba 1.79 !!!cp ('t264');
5363 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:colgroup', token => $token);
5364 wakaba 1.52 ## Ignore the token
5365     !!!next-token;
5366 wakaba 1.126 next B;
5367 wakaba 1.52 } else {
5368 wakaba 1.79 !!!cp ('t265');
5369 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
5370 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5371 wakaba 1.52 !!!next-token;
5372 wakaba 1.126 next B;
5373 wakaba 1.52 }
5374     } elsif ($token->{tag_name} eq 'col') {
5375 wakaba 1.79 !!!cp ('t266');
5376 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:col', token => $token);
5377 wakaba 1.52 ## Ignore the token
5378     !!!next-token;
5379 wakaba 1.126 next B;
5380 wakaba 1.52 } else {
5381 wakaba 1.79 !!!cp ('t267');
5382 wakaba 1.52 #
5383     }
5384 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5385 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
5386 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5387     !!!cp ('t270.2');
5388     ## Stop parsing.
5389     last B;
5390     } else {
5391     ## NOTE: As if </colgroup>.
5392     !!!cp ('t270.1');
5393     pop @{$self->{open_elements}}; # colgroup
5394     $self->{insertion_mode} = IN_TABLE_IM;
5395     ## Reprocess.
5396 wakaba 1.126 next B;
5397 wakaba 1.104 }
5398     } else {
5399     die "$0: $token->{type}: Unknown token type";
5400     }
5401 wakaba 1.52
5402     ## As if </colgroup>
5403 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
5404 wakaba 1.79 !!!cp ('t269');
5405 wakaba 1.104 ## TODO: Wrong error type?
5406 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:colgroup', token => $token);
5407 wakaba 1.52 ## Ignore the token
5408 wakaba 1.125 !!!nack ('t269.1');
5409 wakaba 1.52 !!!next-token;
5410 wakaba 1.126 next B;
5411 wakaba 1.52 } else {
5412 wakaba 1.79 !!!cp ('t270');
5413 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
5414 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5415 wakaba 1.125 !!!ack-later;
5416 wakaba 1.52 ## reprocess
5417 wakaba 1.126 next B;
5418 wakaba 1.52 }
5419 wakaba 1.101 } elsif ($self->{insertion_mode} & SELECT_IMS) {
5420 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
5421 wakaba 1.79 !!!cp ('t271');
5422 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5423     !!!next-token;
5424 wakaba 1.126 next B;
5425 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
5426 wakaba 1.123 if ($token->{tag_name} eq 'option') {
5427     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5428     !!!cp ('t272');
5429     ## As if </option>
5430     pop @{$self->{open_elements}};
5431     } else {
5432     !!!cp ('t273');
5433     }
5434 wakaba 1.52
5435 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5436 wakaba 1.125 !!!nack ('t273.1');
5437 wakaba 1.123 !!!next-token;
5438 wakaba 1.126 next B;
5439 wakaba 1.123 } elsif ($token->{tag_name} eq 'optgroup') {
5440     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5441     !!!cp ('t274');
5442     ## As if </option>
5443     pop @{$self->{open_elements}};
5444     } else {
5445     !!!cp ('t275');
5446     }
5447 wakaba 1.52
5448 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
5449     !!!cp ('t276');
5450     ## As if </optgroup>
5451     pop @{$self->{open_elements}};
5452     } else {
5453     !!!cp ('t277');
5454     }
5455 wakaba 1.52
5456 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5457 wakaba 1.125 !!!nack ('t277.1');
5458 wakaba 1.123 !!!next-token;
5459 wakaba 1.126 next B;
5460 wakaba 1.101 } elsif ($token->{tag_name} eq 'select' or
5461     $token->{tag_name} eq 'input' or
5462     ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
5463     {
5464     caption => 1, table => 1,
5465     tbody => 1, tfoot => 1, thead => 1,
5466     tr => 1, td => 1, th => 1,
5467     }->{$token->{tag_name}})) {
5468     ## TODO: The type below is not good - <select> is replaced by </select>
5469 wakaba 1.113 !!!parse-error (type => 'not closed:select', token => $token);
5470 wakaba 1.101 ## NOTE: As if the token were </select> (<select> case) or
5471     ## as if there were </select> (otherwise).
5472 wakaba 1.123 ## have an element in table scope
5473     my $i;
5474     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5475     my $node = $self->{open_elements}->[$_];
5476     if ($node->[1] & SELECT_EL) {
5477     !!!cp ('t278');
5478     $i = $_;
5479     last INSCOPE;
5480     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5481     !!!cp ('t279');
5482     last INSCOPE;
5483     }
5484     } # INSCOPE
5485     unless (defined $i) {
5486     !!!cp ('t280');
5487     !!!parse-error (type => 'unmatched end tag:select', token => $token);
5488     ## Ignore the token
5489 wakaba 1.125 !!!nack ('t280.1');
5490 wakaba 1.123 !!!next-token;
5491 wakaba 1.126 next B;
5492 wakaba 1.123 }
5493 wakaba 1.52
5494 wakaba 1.123 !!!cp ('t281');
5495     splice @{$self->{open_elements}}, $i;
5496 wakaba 1.52
5497 wakaba 1.123 $self->_reset_insertion_mode;
5498 wakaba 1.47
5499 wakaba 1.101 if ($token->{tag_name} eq 'select') {
5500 wakaba 1.125 !!!nack ('t281.2');
5501 wakaba 1.101 !!!next-token;
5502 wakaba 1.126 next B;
5503 wakaba 1.101 } else {
5504     !!!cp ('t281.1');
5505 wakaba 1.125 !!!ack-later;
5506 wakaba 1.101 ## Reprocess the token.
5507 wakaba 1.126 next B;
5508 wakaba 1.101 }
5509 wakaba 1.58 } else {
5510 wakaba 1.79 !!!cp ('t282');
5511 wakaba 1.113 !!!parse-error (type => 'in select:'.$token->{tag_name}, token => $token);
5512 wakaba 1.58 ## Ignore the token
5513 wakaba 1.125 !!!nack ('t282.1');
5514 wakaba 1.58 !!!next-token;
5515 wakaba 1.126 next B;
5516 wakaba 1.58 }
5517     } elsif ($token->{type} == END_TAG_TOKEN) {
5518 wakaba 1.123 if ($token->{tag_name} eq 'optgroup') {
5519     if ($self->{open_elements}->[-1]->[1] & OPTION_EL and
5520     $self->{open_elements}->[-2]->[1] & OPTGROUP_EL) {
5521     !!!cp ('t283');
5522     ## As if </option>
5523     splice @{$self->{open_elements}}, -2;
5524     } elsif ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
5525     !!!cp ('t284');
5526     pop @{$self->{open_elements}};
5527     } else {
5528     !!!cp ('t285');
5529     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5530     ## Ignore the token
5531     }
5532 wakaba 1.125 !!!nack ('t285.1');
5533 wakaba 1.123 !!!next-token;
5534 wakaba 1.126 next B;
5535 wakaba 1.123 } elsif ($token->{tag_name} eq 'option') {
5536     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5537     !!!cp ('t286');
5538     pop @{$self->{open_elements}};
5539     } else {
5540     !!!cp ('t287');
5541     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5542     ## Ignore the token
5543     }
5544 wakaba 1.125 !!!nack ('t287.1');
5545 wakaba 1.123 !!!next-token;
5546 wakaba 1.126 next B;
5547 wakaba 1.123 } elsif ($token->{tag_name} eq 'select') {
5548     ## have an element in table scope
5549     my $i;
5550     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5551     my $node = $self->{open_elements}->[$_];
5552     if ($node->[1] & SELECT_EL) {
5553     !!!cp ('t288');
5554     $i = $_;
5555     last INSCOPE;
5556     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5557     !!!cp ('t289');
5558     last INSCOPE;
5559     }
5560     } # INSCOPE
5561     unless (defined $i) {
5562     !!!cp ('t290');
5563     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5564     ## Ignore the token
5565 wakaba 1.125 !!!nack ('t290.1');
5566 wakaba 1.123 !!!next-token;
5567 wakaba 1.126 next B;
5568 wakaba 1.123 }
5569 wakaba 1.52
5570 wakaba 1.123 !!!cp ('t291');
5571     splice @{$self->{open_elements}}, $i;
5572 wakaba 1.52
5573 wakaba 1.123 $self->_reset_insertion_mode;
5574 wakaba 1.52
5575 wakaba 1.125 !!!nack ('t291.1');
5576 wakaba 1.123 !!!next-token;
5577 wakaba 1.126 next B;
5578 wakaba 1.101 } elsif ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
5579     {
5580     caption => 1, table => 1, tbody => 1,
5581     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
5582     }->{$token->{tag_name}}) {
5583 wakaba 1.83 ## TODO: The following is wrong?
5584 wakaba 1.123 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5585 wakaba 1.52
5586 wakaba 1.123 ## have an element in table scope
5587     my $i;
5588     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5589     my $node = $self->{open_elements}->[$_];
5590     if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5591     !!!cp ('t292');
5592     $i = $_;
5593     last INSCOPE;
5594     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5595     !!!cp ('t293');
5596     last INSCOPE;
5597     }
5598     } # INSCOPE
5599     unless (defined $i) {
5600     !!!cp ('t294');
5601     ## Ignore the token
5602 wakaba 1.125 !!!nack ('t294.1');
5603 wakaba 1.123 !!!next-token;
5604 wakaba 1.126 next B;
5605 wakaba 1.123 }
5606 wakaba 1.52
5607 wakaba 1.123 ## As if </select>
5608     ## have an element in table scope
5609     undef $i;
5610     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5611     my $node = $self->{open_elements}->[$_];
5612     if ($node->[1] & SELECT_EL) {
5613     !!!cp ('t295');
5614     $i = $_;
5615     last INSCOPE;
5616     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5617 wakaba 1.83 ## ISSUE: Can this state be reached?
5618 wakaba 1.123 !!!cp ('t296');
5619     last INSCOPE;
5620     }
5621     } # INSCOPE
5622     unless (defined $i) {
5623     !!!cp ('t297');
5624 wakaba 1.83 ## TODO: The following error type is correct?
5625 wakaba 1.123 !!!parse-error (type => 'unmatched end tag:select', token => $token);
5626     ## Ignore the </select> token
5627 wakaba 1.125 !!!nack ('t297.1');
5628 wakaba 1.123 !!!next-token; ## TODO: ok?
5629 wakaba 1.126 next B;
5630 wakaba 1.123 }
5631 wakaba 1.52
5632 wakaba 1.123 !!!cp ('t298');
5633     splice @{$self->{open_elements}}, $i;
5634 wakaba 1.52
5635 wakaba 1.123 $self->_reset_insertion_mode;
5636 wakaba 1.52
5637 wakaba 1.125 !!!ack-later;
5638 wakaba 1.123 ## reprocess
5639 wakaba 1.126 next B;
5640 wakaba 1.58 } else {
5641 wakaba 1.79 !!!cp ('t299');
5642 wakaba 1.113 !!!parse-error (type => 'in select:/'.$token->{tag_name}, token => $token);
5643 wakaba 1.52 ## Ignore the token
5644 wakaba 1.125 !!!nack ('t299.3');
5645 wakaba 1.52 !!!next-token;
5646 wakaba 1.126 next B;
5647 wakaba 1.58 }
5648 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5649 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
5650 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5651     !!!cp ('t299.1');
5652 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5653 wakaba 1.104 } else {
5654     !!!cp ('t299.2');
5655     }
5656    
5657     ## Stop parsing.
5658     last B;
5659 wakaba 1.58 } else {
5660     die "$0: $token->{type}: Unknown token type";
5661     }
5662 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
5663 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5664 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5665     my $data = $1;
5666     ## As if in body
5667     $reconstruct_active_formatting_elements->($insert_to_current);
5668    
5669     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5670    
5671     unless (length $token->{data}) {
5672 wakaba 1.79 !!!cp ('t300');
5673 wakaba 1.52 !!!next-token;
5674 wakaba 1.126 next B;
5675 wakaba 1.52 }
5676     }
5677    
5678 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5679 wakaba 1.79 !!!cp ('t301');
5680 wakaba 1.113 !!!parse-error (type => 'after html:#character', token => $token);
5681 wakaba 1.52
5682 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5683 wakaba 1.79 } else {
5684     !!!cp ('t302');
5685 wakaba 1.52 }
5686    
5687     ## "after body" insertion mode
5688 wakaba 1.113 !!!parse-error (type => 'after body:#character', token => $token);
5689 wakaba 1.52
5690 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5691 wakaba 1.52 ## reprocess
5692 wakaba 1.126 next B;
5693 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5694 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5695 wakaba 1.79 !!!cp ('t303');
5696 wakaba 1.113 !!!parse-error (type => 'after html:'.$token->{tag_name}, token => $token);
5697 wakaba 1.52
5698 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5699 wakaba 1.79 } else {
5700     !!!cp ('t304');
5701 wakaba 1.52 }
5702    
5703     ## "after body" insertion mode
5704 wakaba 1.113 !!!parse-error (type => 'after body:'.$token->{tag_name}, token => $token);
5705 wakaba 1.52
5706 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5707 wakaba 1.125 !!!ack-later;
5708 wakaba 1.52 ## reprocess
5709 wakaba 1.126 next B;
5710 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5711 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5712 wakaba 1.79 !!!cp ('t305');
5713 wakaba 1.113 !!!parse-error (type => 'after html:/'.$token->{tag_name}, token => $token);
5714 wakaba 1.52
5715 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
5716 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5717 wakaba 1.79 } else {
5718     !!!cp ('t306');
5719 wakaba 1.52 }
5720    
5721     ## "after body" insertion mode
5722     if ($token->{tag_name} eq 'html') {
5723     if (defined $self->{inner_html_node}) {
5724 wakaba 1.79 !!!cp ('t307');
5725 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:html', token => $token);
5726 wakaba 1.52 ## Ignore the token
5727     !!!next-token;
5728 wakaba 1.126 next B;
5729 wakaba 1.52 } else {
5730 wakaba 1.79 !!!cp ('t308');
5731 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
5732 wakaba 1.52 !!!next-token;
5733 wakaba 1.126 next B;
5734 wakaba 1.52 }
5735     } else {
5736 wakaba 1.79 !!!cp ('t309');
5737 wakaba 1.113 !!!parse-error (type => 'after body:/'.$token->{tag_name}, token => $token);
5738 wakaba 1.52
5739 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5740 wakaba 1.52 ## reprocess
5741 wakaba 1.126 next B;
5742 wakaba 1.52 }
5743 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5744     !!!cp ('t309.2');
5745     ## Stop parsing
5746     last B;
5747 wakaba 1.52 } else {
5748     die "$0: $token->{type}: Unknown token type";
5749     }
5750 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
5751 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5752 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5753     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5754    
5755     unless (length $token->{data}) {
5756 wakaba 1.79 !!!cp ('t310');
5757 wakaba 1.52 !!!next-token;
5758 wakaba 1.126 next B;
5759 wakaba 1.52 }
5760     }
5761    
5762     if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {
5763 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5764 wakaba 1.79 !!!cp ('t311');
5765 wakaba 1.113 !!!parse-error (type => 'in frameset:#character', token => $token);
5766 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
5767 wakaba 1.79 !!!cp ('t312');
5768 wakaba 1.113 !!!parse-error (type => 'after frameset:#character', token => $token);
5769 wakaba 1.52 } else { # "after html frameset"
5770 wakaba 1.79 !!!cp ('t313');
5771 wakaba 1.113 !!!parse-error (type => 'after html:#character', token => $token);
5772 wakaba 1.52
5773 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5774 wakaba 1.84 ## Reprocess in the "after frameset" insertion mode.
5775 wakaba 1.113 !!!parse-error (type => 'after frameset:#character', token => $token);
5776 wakaba 1.52 }
5777    
5778     ## Ignore the token.
5779     if (length $token->{data}) {
5780 wakaba 1.79 !!!cp ('t314');
5781 wakaba 1.52 ## reprocess the rest of characters
5782     } else {
5783 wakaba 1.79 !!!cp ('t315');
5784 wakaba 1.52 !!!next-token;
5785     }
5786 wakaba 1.126 next B;
5787 wakaba 1.52 }
5788    
5789     die qq[$0: Character "$token->{data}"];
5790 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5791 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
5792 wakaba 1.79 !!!cp ('t316');
5793 wakaba 1.113 !!!parse-error (type => 'after html:'.$token->{tag_name}, token => $token);
5794 wakaba 1.1
5795 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5796 wakaba 1.84 ## Process in the "after frameset" insertion mode.
5797 wakaba 1.79 } else {
5798     !!!cp ('t317');
5799     }
5800 wakaba 1.1
5801 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
5802 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
5803 wakaba 1.79 !!!cp ('t318');
5804 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5805 wakaba 1.125 !!!nack ('t318.1');
5806 wakaba 1.52 !!!next-token;
5807 wakaba 1.126 next B;
5808 wakaba 1.52 } elsif ($token->{tag_name} eq 'frame' and
5809 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
5810 wakaba 1.79 !!!cp ('t319');
5811 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5812 wakaba 1.52 pop @{$self->{open_elements}};
5813 wakaba 1.125 !!!ack ('t319.1');
5814 wakaba 1.52 !!!next-token;
5815 wakaba 1.126 next B;
5816 wakaba 1.52 } elsif ($token->{tag_name} eq 'noframes') {
5817 wakaba 1.79 !!!cp ('t320');
5818 wakaba 1.52 ## NOTE: As if in body.
5819 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
5820 wakaba 1.126 next B;
5821 wakaba 1.52 } else {
5822 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5823 wakaba 1.79 !!!cp ('t321');
5824 wakaba 1.113 !!!parse-error (type => 'in frameset:'.$token->{tag_name}, token => $token);
5825 wakaba 1.52 } else {
5826 wakaba 1.79 !!!cp ('t322');
5827 wakaba 1.113 !!!parse-error (type => 'after frameset:'.$token->{tag_name}, token => $token);
5828 wakaba 1.52 }
5829     ## Ignore the token
5830 wakaba 1.125 !!!nack ('t322.1');
5831 wakaba 1.52 !!!next-token;
5832 wakaba 1.126 next B;
5833 wakaba 1.52 }
5834 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5835 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
5836 wakaba 1.79 !!!cp ('t323');
5837 wakaba 1.113 !!!parse-error (type => 'after html:/'.$token->{tag_name}, token => $token);
5838 wakaba 1.1
5839 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5840 wakaba 1.84 ## Process in the "after frameset" insertion mode.
5841 wakaba 1.79 } else {
5842     !!!cp ('t324');
5843 wakaba 1.52 }
5844 wakaba 1.1
5845 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
5846 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
5847 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
5848 wakaba 1.52 @{$self->{open_elements}} == 1) {
5849 wakaba 1.79 !!!cp ('t325');
5850 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5851 wakaba 1.52 ## Ignore the token
5852     !!!next-token;
5853     } else {
5854 wakaba 1.79 !!!cp ('t326');
5855 wakaba 1.52 pop @{$self->{open_elements}};
5856     !!!next-token;
5857     }
5858 wakaba 1.47
5859 wakaba 1.52 if (not defined $self->{inner_html_node} and
5860 wakaba 1.123 not ($self->{open_elements}->[-1]->[1] & FRAMESET_EL)) {
5861 wakaba 1.79 !!!cp ('t327');
5862 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
5863 wakaba 1.79 } else {
5864     !!!cp ('t328');
5865 wakaba 1.52 }
5866 wakaba 1.126 next B;
5867 wakaba 1.52 } elsif ($token->{tag_name} eq 'html' and
5868 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
5869 wakaba 1.79 !!!cp ('t329');
5870 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
5871 wakaba 1.52 !!!next-token;
5872 wakaba 1.126 next B;
5873 wakaba 1.52 } else {
5874 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
5875 wakaba 1.79 !!!cp ('t330');
5876 wakaba 1.113 !!!parse-error (type => 'in frameset:/'.$token->{tag_name}, token => $token);
5877 wakaba 1.52 } else {
5878 wakaba 1.79 !!!cp ('t331');
5879 wakaba 1.113 !!!parse-error (type => 'after frameset:/'.$token->{tag_name}, token => $token);
5880 wakaba 1.52 }
5881     ## Ignore the token
5882     !!!next-token;
5883 wakaba 1.126 next B;
5884 wakaba 1.52 }
5885 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5886 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
5887 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5888     !!!cp ('t331.1');
5889 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5890 wakaba 1.104 } else {
5891     !!!cp ('t331.2');
5892     }
5893    
5894     ## Stop parsing
5895     last B;
5896 wakaba 1.52 } else {
5897     die "$0: $token->{type}: Unknown token type";
5898     }
5899 wakaba 1.47
5900 wakaba 1.52 ## ISSUE: An issue in spec here
5901     } else {
5902     die "$0: $self->{insertion_mode}: Unknown insertion mode";
5903     }
5904 wakaba 1.47
5905 wakaba 1.52 ## "in body" insertion mode
5906 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
5907 wakaba 1.52 if ($token->{tag_name} eq 'script') {
5908 wakaba 1.79 !!!cp ('t332');
5909 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
5910 wakaba 1.100 $script_start_tag->();
5911 wakaba 1.126 next B;
5912 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
5913 wakaba 1.79 !!!cp ('t333');
5914 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
5915 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
5916 wakaba 1.126 next B;
5917 wakaba 1.52 } elsif ({
5918     base => 1, link => 1,
5919     }->{$token->{tag_name}}) {
5920 wakaba 1.79 !!!cp ('t334');
5921 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
5922 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5923 wakaba 1.52 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
5924 wakaba 1.125 !!!ack ('t334.1');
5925 wakaba 1.52 !!!next-token;
5926 wakaba 1.126 next B;
5927 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
5928     ## NOTE: This is an "as if in head" code clone, only "-t" differs
5929 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5930 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
5931 wakaba 1.46
5932 wakaba 1.52 unless ($self->{confident}) {
5933     if ($token->{attributes}->{charset}) { ## TODO: And if supported
5934 wakaba 1.79 !!!cp ('t335');
5935 wakaba 1.63 $self->{change_encoding}
5936 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value}, $token);
5937 wakaba 1.66
5938     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
5939     ->set_user_data (manakai_has_reference =>
5940     $token->{attributes}->{charset}
5941     ->{has_reference});
5942 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
5943 wakaba 1.52 ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
5944 wakaba 1.63 if ($token->{attributes}->{content}->{value}
5945 wakaba 1.70 =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
5946     [\x09-\x0D\x20]*=
5947 wakaba 1.52 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
5948     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
5949 wakaba 1.79 !!!cp ('t336');
5950 wakaba 1.63 $self->{change_encoding}
5951 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
5952 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
5953     ->set_user_data (manakai_has_reference =>
5954     $token->{attributes}->{content}
5955     ->{has_reference});
5956 wakaba 1.63 }
5957 wakaba 1.52 }
5958 wakaba 1.66 } else {
5959     if ($token->{attributes}->{charset}) {
5960 wakaba 1.79 !!!cp ('t337');
5961 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
5962     ->set_user_data (manakai_has_reference =>
5963     $token->{attributes}->{charset}
5964     ->{has_reference});
5965     }
5966 wakaba 1.68 if ($token->{attributes}->{content}) {
5967 wakaba 1.79 !!!cp ('t338');
5968 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
5969     ->set_user_data (manakai_has_reference =>
5970     $token->{attributes}->{content}
5971     ->{has_reference});
5972     }
5973 wakaba 1.52 }
5974 wakaba 1.1
5975 wakaba 1.125 !!!ack ('t338.1');
5976 wakaba 1.52 !!!next-token;
5977 wakaba 1.126 next B;
5978 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
5979 wakaba 1.79 !!!cp ('t341');
5980 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
5981 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
5982 wakaba 1.126 next B;
5983 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
5984 wakaba 1.113 !!!parse-error (type => 'in body:body', token => $token);
5985 wakaba 1.46
5986 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
5987 wakaba 1.123 not ($self->{open_elements}->[1]->[1] & BODY_EL)) {
5988 wakaba 1.79 !!!cp ('t342');
5989 wakaba 1.52 ## Ignore the token
5990     } else {
5991     my $body_el = $self->{open_elements}->[1]->[0];
5992     for my $attr_name (keys %{$token->{attributes}}) {
5993     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
5994 wakaba 1.79 !!!cp ('t343');
5995 wakaba 1.52 $body_el->set_attribute_ns
5996     (undef, [undef, $attr_name],
5997     $token->{attributes}->{$attr_name}->{value});
5998     }
5999     }
6000     }
6001 wakaba 1.125 !!!nack ('t343.1');
6002 wakaba 1.52 !!!next-token;
6003 wakaba 1.126 next B;
6004 wakaba 1.52 } elsif ({
6005     address => 1, blockquote => 1, center => 1, dir => 1,
6006 wakaba 1.85 div => 1, dl => 1, fieldset => 1,
6007     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6008 wakaba 1.97 menu => 1, ol => 1, p => 1, ul => 1,
6009     pre => 1, listing => 1,
6010 wakaba 1.109 form => 1,
6011     table => 1,
6012     hr => 1,
6013 wakaba 1.52 }->{$token->{tag_name}}) {
6014 wakaba 1.109 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
6015     !!!cp ('t350');
6016 wakaba 1.113 !!!parse-error (type => 'in form:form', token => $token);
6017 wakaba 1.109 ## Ignore the token
6018 wakaba 1.125 !!!nack ('t350.1');
6019 wakaba 1.109 !!!next-token;
6020 wakaba 1.126 next B;
6021 wakaba 1.109 }
6022    
6023 wakaba 1.52 ## has a p element in scope
6024     INSCOPE: for (reverse @{$self->{open_elements}}) {
6025 wakaba 1.123 if ($_->[1] & P_EL) {
6026 wakaba 1.79 !!!cp ('t344');
6027 wakaba 1.125 !!!back-token; # <form>
6028 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6029     line => $token->{line}, column => $token->{column}};
6030 wakaba 1.126 next B;
6031 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6032 wakaba 1.79 !!!cp ('t345');
6033 wakaba 1.52 last INSCOPE;
6034     }
6035     } # INSCOPE
6036    
6037 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6038 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
6039 wakaba 1.125 !!!nack ('t346.1');
6040 wakaba 1.52 !!!next-token;
6041 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6042 wakaba 1.52 $token->{data} =~ s/^\x0A//;
6043     unless (length $token->{data}) {
6044 wakaba 1.79 !!!cp ('t346');
6045 wakaba 1.1 !!!next-token;
6046 wakaba 1.79 } else {
6047     !!!cp ('t349');
6048 wakaba 1.52 }
6049 wakaba 1.79 } else {
6050     !!!cp ('t348');
6051 wakaba 1.52 }
6052 wakaba 1.109 } elsif ($token->{tag_name} eq 'form') {
6053     !!!cp ('t347.1');
6054     $self->{form_element} = $self->{open_elements}->[-1]->[0];
6055    
6056 wakaba 1.125 !!!nack ('t347.2');
6057 wakaba 1.109 !!!next-token;
6058     } elsif ($token->{tag_name} eq 'table') {
6059     !!!cp ('t382');
6060     push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
6061    
6062     $self->{insertion_mode} = IN_TABLE_IM;
6063    
6064 wakaba 1.125 !!!nack ('t382.1');
6065 wakaba 1.109 !!!next-token;
6066     } elsif ($token->{tag_name} eq 'hr') {
6067     !!!cp ('t386');
6068     pop @{$self->{open_elements}};
6069    
6070 wakaba 1.125 !!!nack ('t386.1');
6071 wakaba 1.109 !!!next-token;
6072 wakaba 1.52 } else {
6073 wakaba 1.125 !!!nack ('t347.1');
6074 wakaba 1.52 !!!next-token;
6075     }
6076 wakaba 1.126 next B;
6077 wakaba 1.109 } elsif ({li => 1, dt => 1, dd => 1}->{$token->{tag_name}}) {
6078 wakaba 1.52 ## has a p element in scope
6079     INSCOPE: for (reverse @{$self->{open_elements}}) {
6080 wakaba 1.123 if ($_->[1] & P_EL) {
6081 wakaba 1.79 !!!cp ('t353');
6082 wakaba 1.125 !!!back-token; # <x>
6083 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6084     line => $token->{line}, column => $token->{column}};
6085 wakaba 1.126 next B;
6086 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6087 wakaba 1.79 !!!cp ('t354');
6088 wakaba 1.52 last INSCOPE;
6089     }
6090     } # INSCOPE
6091    
6092     ## Step 1
6093     my $i = -1;
6094     my $node = $self->{open_elements}->[$i];
6095 wakaba 1.109 my $li_or_dtdd = {li => {li => 1},
6096     dt => {dt => 1, dd => 1},
6097     dd => {dt => 1, dd => 1}}->{$token->{tag_name}};
6098 wakaba 1.52 LI: {
6099     ## Step 2
6100 wakaba 1.123 if ($li_or_dtdd->{$node->[0]->manakai_local_name}) {
6101 wakaba 1.52 if ($i != -1) {
6102 wakaba 1.79 !!!cp ('t355');
6103 wakaba 1.122 !!!parse-error (type => 'not closed',
6104     value => $self->{open_elements}->[-1]->[0]
6105     ->manakai_local_name,
6106     token => $token);
6107 wakaba 1.79 } else {
6108     !!!cp ('t356');
6109 wakaba 1.52 }
6110     splice @{$self->{open_elements}}, $i;
6111     last LI;
6112 wakaba 1.79 } else {
6113     !!!cp ('t357');
6114 wakaba 1.52 }
6115    
6116     ## Step 3
6117 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
6118 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
6119 wakaba 1.123 ($node->[1] & SPECIAL_EL or
6120     $node->[1] & SCOPING_EL) and
6121     not ($node->[1] & ADDRESS_EL) and
6122     not ($node->[1] & DIV_EL)) {
6123 wakaba 1.79 !!!cp ('t358');
6124 wakaba 1.52 last LI;
6125     }
6126    
6127 wakaba 1.79 !!!cp ('t359');
6128 wakaba 1.52 ## Step 4
6129     $i--;
6130     $node = $self->{open_elements}->[$i];
6131     redo LI;
6132     } # LI
6133    
6134 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6135 wakaba 1.125 !!!nack ('t359.1');
6136 wakaba 1.52 !!!next-token;
6137 wakaba 1.126 next B;
6138 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
6139     ## has a p element in scope
6140     INSCOPE: for (reverse @{$self->{open_elements}}) {
6141 wakaba 1.123 if ($_->[1] & P_EL) {
6142 wakaba 1.79 !!!cp ('t367');
6143 wakaba 1.125 !!!back-token; # <plaintext>
6144 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6145     line => $token->{line}, column => $token->{column}};
6146 wakaba 1.126 next B;
6147 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6148 wakaba 1.79 !!!cp ('t368');
6149 wakaba 1.52 last INSCOPE;
6150 wakaba 1.46 }
6151 wakaba 1.52 } # INSCOPE
6152    
6153 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6154 wakaba 1.52
6155     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
6156    
6157 wakaba 1.125 !!!nack ('t368.1');
6158 wakaba 1.52 !!!next-token;
6159 wakaba 1.126 next B;
6160 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
6161     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
6162     my $node = $active_formatting_elements->[$i];
6163 wakaba 1.123 if ($node->[1] & A_EL) {
6164 wakaba 1.79 !!!cp ('t371');
6165 wakaba 1.113 !!!parse-error (type => 'in a:a', token => $token);
6166 wakaba 1.52
6167 wakaba 1.125 !!!back-token; # <a>
6168 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'a',
6169     line => $token->{line}, column => $token->{column}};
6170 wakaba 1.113 $formatting_end_tag->($token);
6171 wakaba 1.52
6172     AFE2: for (reverse 0..$#$active_formatting_elements) {
6173     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
6174 wakaba 1.79 !!!cp ('t372');
6175 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
6176     last AFE2;
6177 wakaba 1.1 }
6178 wakaba 1.52 } # AFE2
6179     OE: for (reverse 0..$#{$self->{open_elements}}) {
6180     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
6181 wakaba 1.79 !!!cp ('t373');
6182 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
6183     last OE;
6184 wakaba 1.1 }
6185 wakaba 1.52 } # OE
6186     last AFE;
6187     } elsif ($node->[0] eq '#marker') {
6188 wakaba 1.79 !!!cp ('t374');
6189 wakaba 1.52 last AFE;
6190     }
6191     } # AFE
6192    
6193     $reconstruct_active_formatting_elements->($insert_to_current);
6194 wakaba 1.1
6195 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6196 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
6197 wakaba 1.1
6198 wakaba 1.125 !!!nack ('t374.1');
6199 wakaba 1.52 !!!next-token;
6200 wakaba 1.126 next B;
6201 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
6202     $reconstruct_active_formatting_elements->($insert_to_current);
6203 wakaba 1.1
6204 wakaba 1.52 ## has a |nobr| element in scope
6205     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6206     my $node = $self->{open_elements}->[$_];
6207 wakaba 1.123 if ($node->[1] & NOBR_EL) {
6208 wakaba 1.79 !!!cp ('t376');
6209 wakaba 1.113 !!!parse-error (type => 'in nobr:nobr', token => $token);
6210 wakaba 1.125 !!!back-token; # <nobr>
6211 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
6212     line => $token->{line}, column => $token->{column}};
6213 wakaba 1.126 next B;
6214 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6215 wakaba 1.79 !!!cp ('t377');
6216 wakaba 1.52 last INSCOPE;
6217     }
6218     } # INSCOPE
6219    
6220 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6221 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
6222    
6223 wakaba 1.125 !!!nack ('t377.1');
6224 wakaba 1.52 !!!next-token;
6225 wakaba 1.126 next B;
6226 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
6227     ## has a button element in scope
6228     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6229     my $node = $self->{open_elements}->[$_];
6230 wakaba 1.123 if ($node->[1] & BUTTON_EL) {
6231 wakaba 1.79 !!!cp ('t378');
6232 wakaba 1.113 !!!parse-error (type => 'in button:button', token => $token);
6233 wakaba 1.125 !!!back-token; # <button>
6234 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'button',
6235     line => $token->{line}, column => $token->{column}};
6236 wakaba 1.126 next B;
6237 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6238 wakaba 1.79 !!!cp ('t379');
6239 wakaba 1.52 last INSCOPE;
6240     }
6241     } # INSCOPE
6242    
6243     $reconstruct_active_formatting_elements->($insert_to_current);
6244    
6245 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6246 wakaba 1.85
6247     ## TODO: associate with $self->{form_element} if defined
6248    
6249 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
6250 wakaba 1.1
6251 wakaba 1.125 !!!nack ('t379.1');
6252 wakaba 1.52 !!!next-token;
6253 wakaba 1.126 next B;
6254 wakaba 1.103 } elsif ({
6255 wakaba 1.109 xmp => 1,
6256     iframe => 1,
6257     noembed => 1,
6258     noframes => 1,
6259     noscript => 0, ## TODO: 1 if scripting is enabled
6260 wakaba 1.103 }->{$token->{tag_name}}) {
6261 wakaba 1.109 if ($token->{tag_name} eq 'xmp') {
6262     !!!cp ('t381');
6263     $reconstruct_active_formatting_elements->($insert_to_current);
6264     } else {
6265     !!!cp ('t399');
6266     }
6267     ## NOTE: There is an "as if in body" code clone.
6268 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6269 wakaba 1.126 next B;
6270 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
6271 wakaba 1.113 !!!parse-error (type => 'isindex', token => $token);
6272 wakaba 1.52
6273     if (defined $self->{form_element}) {
6274 wakaba 1.79 !!!cp ('t389');
6275 wakaba 1.52 ## Ignore the token
6276 wakaba 1.125 !!!nack ('t389'); ## NOTE: Not acknowledged.
6277 wakaba 1.52 !!!next-token;
6278 wakaba 1.126 next B;
6279 wakaba 1.52 } else {
6280     my $at = $token->{attributes};
6281     my $form_attrs;
6282     $form_attrs->{action} = $at->{action} if $at->{action};
6283     my $prompt_attr = $at->{prompt};
6284     $at->{name} = {name => 'name', value => 'isindex'};
6285     delete $at->{action};
6286     delete $at->{prompt};
6287     my @tokens = (
6288 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
6289 wakaba 1.114 attributes => $form_attrs,
6290     line => $token->{line}, column => $token->{column}},
6291     {type => START_TAG_TOKEN, tag_name => 'hr',
6292     line => $token->{line}, column => $token->{column}},
6293     {type => START_TAG_TOKEN, tag_name => 'p',
6294     line => $token->{line}, column => $token->{column}},
6295     {type => START_TAG_TOKEN, tag_name => 'label',
6296     line => $token->{line}, column => $token->{column}},
6297 wakaba 1.52 );
6298     if ($prompt_attr) {
6299 wakaba 1.79 !!!cp ('t390');
6300 wakaba 1.114 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
6301 wakaba 1.118 #line => $token->{line}, column => $token->{column},
6302     };
6303 wakaba 1.1 } else {
6304 wakaba 1.79 !!!cp ('t391');
6305 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
6306 wakaba 1.114 data => 'This is a searchable index. Insert your search keywords here: ',
6307 wakaba 1.118 #line => $token->{line}, column => $token->{column},
6308     }; # SHOULD
6309 wakaba 1.52 ## TODO: make this configurable
6310 wakaba 1.1 }
6311 wakaba 1.52 push @tokens,
6312 wakaba 1.114 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
6313     line => $token->{line}, column => $token->{column}},
6314 wakaba 1.55 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
6315 wakaba 1.114 {type => END_TAG_TOKEN, tag_name => 'label',
6316     line => $token->{line}, column => $token->{column}},
6317     {type => END_TAG_TOKEN, tag_name => 'p',
6318     line => $token->{line}, column => $token->{column}},
6319     {type => START_TAG_TOKEN, tag_name => 'hr',
6320     line => $token->{line}, column => $token->{column}},
6321     {type => END_TAG_TOKEN, tag_name => 'form',
6322     line => $token->{line}, column => $token->{column}};
6323 wakaba 1.125 !!!nack ('t391.1'); ## NOTE: Not acknowledged.
6324 wakaba 1.52 !!!back-token (@tokens);
6325 wakaba 1.125 !!!next-token;
6326 wakaba 1.126 next B;
6327 wakaba 1.52 }
6328     } elsif ($token->{tag_name} eq 'textarea') {
6329     my $tag_name = $token->{tag_name};
6330     my $el;
6331 wakaba 1.126 !!!create-element ($el, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
6332 wakaba 1.52
6333     ## TODO: $self->{form_element} if defined
6334     $self->{content_model} = RCDATA_CONTENT_MODEL;
6335     delete $self->{escape}; # MUST
6336    
6337     $insert->($el);
6338    
6339     my $text = '';
6340 wakaba 1.125 !!!nack ('t392.1');
6341 wakaba 1.52 !!!next-token;
6342 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6343 wakaba 1.52 $token->{data} =~ s/^\x0A//;
6344 wakaba 1.51 unless (length $token->{data}) {
6345 wakaba 1.79 !!!cp ('t392');
6346 wakaba 1.51 !!!next-token;
6347 wakaba 1.79 } else {
6348     !!!cp ('t393');
6349 wakaba 1.51 }
6350 wakaba 1.79 } else {
6351     !!!cp ('t394');
6352 wakaba 1.51 }
6353 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
6354 wakaba 1.79 !!!cp ('t395');
6355 wakaba 1.52 $text .= $token->{data};
6356     !!!next-token;
6357     }
6358     if (length $text) {
6359 wakaba 1.79 !!!cp ('t396');
6360 wakaba 1.52 $el->manakai_append_text ($text);
6361     }
6362    
6363     $self->{content_model} = PCDATA_CONTENT_MODEL;
6364 wakaba 1.51
6365 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
6366 wakaba 1.52 $token->{tag_name} eq $tag_name) {
6367 wakaba 1.79 !!!cp ('t397');
6368 wakaba 1.52 ## Ignore the token
6369     } else {
6370 wakaba 1.79 !!!cp ('t398');
6371 wakaba 1.113 !!!parse-error (type => 'in RCDATA:#'.$token->{type}, token => $token);
6372 wakaba 1.51 }
6373 wakaba 1.52 !!!next-token;
6374 wakaba 1.126 next B;
6375     } elsif ($token->{tag_name} eq 'math' or
6376     $token->{tag_name} eq 'svg') {
6377     $reconstruct_active_formatting_elements->($insert_to_current);
6378    
6379     !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token);
6380    
6381     if ($self->{self_closing}) {
6382     pop @{$self->{open_elements}};
6383     !!!ack ('t398.1');
6384     } else {
6385     !!!cp ('t398.2');
6386     $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
6387     ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
6388     ## mode, "in body" (not "in foreign content") secondary insertion
6389     ## mode, maybe.
6390     }
6391    
6392     !!!next-token;
6393     next B;
6394 wakaba 1.52 } elsif ({
6395     caption => 1, col => 1, colgroup => 1, frame => 1,
6396     frameset => 1, head => 1, option => 1, optgroup => 1,
6397     tbody => 1, td => 1, tfoot => 1, th => 1,
6398     thead => 1, tr => 1,
6399     }->{$token->{tag_name}}) {
6400 wakaba 1.79 !!!cp ('t401');
6401 wakaba 1.113 !!!parse-error (type => 'in body:'.$token->{tag_name}, token => $token);
6402 wakaba 1.52 ## Ignore the token
6403 wakaba 1.125 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
6404 wakaba 1.52 !!!next-token;
6405 wakaba 1.126 next B;
6406 wakaba 1.52
6407     ## ISSUE: An issue on HTML5 new elements in the spec.
6408     } else {
6409 wakaba 1.110 if ($token->{tag_name} eq 'image') {
6410     !!!cp ('t384');
6411 wakaba 1.113 !!!parse-error (type => 'image', token => $token);
6412 wakaba 1.110 $token->{tag_name} = 'img';
6413     } else {
6414     !!!cp ('t385');
6415     }
6416    
6417     ## NOTE: There is an "as if <br>" code clone.
6418 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
6419    
6420 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6421 wakaba 1.109
6422 wakaba 1.110 if ({
6423     applet => 1, marquee => 1, object => 1,
6424     }->{$token->{tag_name}}) {
6425     !!!cp ('t380');
6426     push @$active_formatting_elements, ['#marker', ''];
6427 wakaba 1.125 !!!nack ('t380.1');
6428 wakaba 1.110 } elsif ({
6429     b => 1, big => 1, em => 1, font => 1, i => 1,
6430     s => 1, small => 1, strile => 1,
6431     strong => 1, tt => 1, u => 1,
6432     }->{$token->{tag_name}}) {
6433     !!!cp ('t375');
6434     push @$active_formatting_elements, $self->{open_elements}->[-1];
6435 wakaba 1.125 !!!nack ('t375.1');
6436 wakaba 1.110 } elsif ($token->{tag_name} eq 'input') {
6437     !!!cp ('t388');
6438     ## TODO: associate with $self->{form_element} if defined
6439     pop @{$self->{open_elements}};
6440 wakaba 1.125 !!!ack ('t388.2');
6441 wakaba 1.110 } elsif ({
6442     area => 1, basefont => 1, bgsound => 1, br => 1,
6443     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
6444     #image => 1,
6445     }->{$token->{tag_name}}) {
6446     !!!cp ('t388.1');
6447     pop @{$self->{open_elements}};
6448 wakaba 1.125 !!!ack ('t388.3');
6449 wakaba 1.110 } elsif ($token->{tag_name} eq 'select') {
6450 wakaba 1.109 ## TODO: associate with $self->{form_element} if defined
6451    
6452     if ($self->{insertion_mode} & TABLE_IMS or
6453     $self->{insertion_mode} & BODY_TABLE_IMS or
6454     $self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
6455     !!!cp ('t400.1');
6456     $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
6457     } else {
6458     !!!cp ('t400.2');
6459     $self->{insertion_mode} = IN_SELECT_IM;
6460     }
6461 wakaba 1.125 !!!nack ('t400.3');
6462 wakaba 1.110 } else {
6463 wakaba 1.125 !!!nack ('t402');
6464 wakaba 1.109 }
6465 wakaba 1.51
6466 wakaba 1.52 !!!next-token;
6467 wakaba 1.126 next B;
6468 wakaba 1.52 }
6469 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6470 wakaba 1.52 if ($token->{tag_name} eq 'body') {
6471 wakaba 1.107 ## has a |body| element in scope
6472     my $i;
6473 wakaba 1.111 INSCOPE: {
6474     for (reverse @{$self->{open_elements}}) {
6475 wakaba 1.123 if ($_->[1] & BODY_EL) {
6476 wakaba 1.111 !!!cp ('t405');
6477     $i = $_;
6478     last INSCOPE;
6479 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6480 wakaba 1.111 !!!cp ('t405.1');
6481     last;
6482     }
6483 wakaba 1.52 }
6484 wakaba 1.111
6485     !!!parse-error (type => 'start tag not allowed',
6486 wakaba 1.113 value => $token->{tag_name}, token => $token);
6487 wakaba 1.107 ## NOTE: Ignore the token.
6488 wakaba 1.52 !!!next-token;
6489 wakaba 1.126 next B;
6490 wakaba 1.111 } # INSCOPE
6491 wakaba 1.107
6492     for (@{$self->{open_elements}}) {
6493 wakaba 1.123 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL) {
6494 wakaba 1.107 !!!cp ('t403');
6495 wakaba 1.122 !!!parse-error (type => 'not closed',
6496     value => $_->[0]->manakai_local_name,
6497     token => $token);
6498 wakaba 1.107 last;
6499     } else {
6500     !!!cp ('t404');
6501     }
6502     }
6503    
6504     $self->{insertion_mode} = AFTER_BODY_IM;
6505     !!!next-token;
6506 wakaba 1.126 next B;
6507 wakaba 1.52 } elsif ($token->{tag_name} eq 'html') {
6508 wakaba 1.122 ## TODO: Update this code. It seems that the code below is not
6509     ## up-to-date, though it has same effect as speced.
6510 wakaba 1.123 if (@{$self->{open_elements}} > 1 and
6511     $self->{open_elements}->[1]->[1] & BODY_EL) {
6512 wakaba 1.52 ## ISSUE: There is an issue in the spec.
6513 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & BODY_EL) {
6514 wakaba 1.79 !!!cp ('t406');
6515 wakaba 1.122 !!!parse-error (type => 'not closed',
6516     value => $self->{open_elements}->[1]->[0]
6517     ->manakai_local_name,
6518     token => $token);
6519 wakaba 1.79 } else {
6520     !!!cp ('t407');
6521 wakaba 1.1 }
6522 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
6523 wakaba 1.52 ## reprocess
6524 wakaba 1.126 next B;
6525 wakaba 1.51 } else {
6526 wakaba 1.79 !!!cp ('t408');
6527 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6528 wakaba 1.52 ## Ignore the token
6529     !!!next-token;
6530 wakaba 1.126 next B;
6531 wakaba 1.51 }
6532 wakaba 1.52 } elsif ({
6533     address => 1, blockquote => 1, center => 1, dir => 1,
6534     div => 1, dl => 1, fieldset => 1, listing => 1,
6535     menu => 1, ol => 1, pre => 1, ul => 1,
6536     dd => 1, dt => 1, li => 1,
6537 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
6538 wakaba 1.52 }->{$token->{tag_name}}) {
6539     ## has an element in scope
6540     my $i;
6541     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6542     my $node = $self->{open_elements}->[$_];
6543 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6544 wakaba 1.79 !!!cp ('t410');
6545 wakaba 1.52 $i = $_;
6546 wakaba 1.87 last INSCOPE;
6547 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6548 wakaba 1.79 !!!cp ('t411');
6549 wakaba 1.52 last INSCOPE;
6550 wakaba 1.51 }
6551 wakaba 1.52 } # INSCOPE
6552 wakaba 1.89
6553     unless (defined $i) { # has an element in scope
6554     !!!cp ('t413');
6555 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6556 wakaba 1.89 } else {
6557     ## Step 1. generate implied end tags
6558     while ({
6559     dd => ($token->{tag_name} ne 'dd'),
6560     dt => ($token->{tag_name} ne 'dt'),
6561     li => ($token->{tag_name} ne 'li'),
6562     p => 1,
6563 wakaba 1.123 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
6564 wakaba 1.89 !!!cp ('t409');
6565     pop @{$self->{open_elements}};
6566     }
6567    
6568     ## Step 2.
6569 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6570     ne $token->{tag_name}) {
6571 wakaba 1.79 !!!cp ('t412');
6572 wakaba 1.122 !!!parse-error (type => 'not closed',
6573     value => $self->{open_elements}->[-1]->[0]
6574     ->manakai_local_name,
6575     token => $token);
6576 wakaba 1.51 } else {
6577 wakaba 1.89 !!!cp ('t414');
6578 wakaba 1.51 }
6579 wakaba 1.89
6580     ## Step 3.
6581 wakaba 1.52 splice @{$self->{open_elements}}, $i;
6582 wakaba 1.89
6583     ## Step 4.
6584     $clear_up_to_marker->()
6585     if {
6586 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
6587 wakaba 1.89 }->{$token->{tag_name}};
6588 wakaba 1.51 }
6589 wakaba 1.52 !!!next-token;
6590 wakaba 1.126 next B;
6591 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
6592 wakaba 1.92 undef $self->{form_element};
6593    
6594 wakaba 1.52 ## has an element in scope
6595 wakaba 1.92 my $i;
6596 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6597     my $node = $self->{open_elements}->[$_];
6598 wakaba 1.123 if ($node->[1] & FORM_EL) {
6599 wakaba 1.79 !!!cp ('t418');
6600 wakaba 1.92 $i = $_;
6601 wakaba 1.52 last INSCOPE;
6602 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6603 wakaba 1.79 !!!cp ('t419');
6604 wakaba 1.52 last INSCOPE;
6605     }
6606     } # INSCOPE
6607 wakaba 1.92
6608     unless (defined $i) { # has an element in scope
6609 wakaba 1.79 !!!cp ('t421');
6610 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6611 wakaba 1.92 } else {
6612     ## Step 1. generate implied end tags
6613 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
6614 wakaba 1.92 !!!cp ('t417');
6615     pop @{$self->{open_elements}};
6616     }
6617    
6618     ## Step 2.
6619 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6620     ne $token->{tag_name}) {
6621 wakaba 1.92 !!!cp ('t417.1');
6622 wakaba 1.122 !!!parse-error (type => 'not closed',
6623     value => $self->{open_elements}->[-1]->[0]
6624     ->manakai_local_name,
6625     token => $token);
6626 wakaba 1.92 } else {
6627     !!!cp ('t420');
6628     }
6629    
6630     ## Step 3.
6631     splice @{$self->{open_elements}}, $i;
6632 wakaba 1.52 }
6633    
6634     !!!next-token;
6635 wakaba 1.126 next B;
6636 wakaba 1.52 } elsif ({
6637     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6638     }->{$token->{tag_name}}) {
6639     ## has an element in scope
6640     my $i;
6641     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6642     my $node = $self->{open_elements}->[$_];
6643 wakaba 1.123 if ($node->[1] & HEADING_EL) {
6644 wakaba 1.79 !!!cp ('t423');
6645 wakaba 1.52 $i = $_;
6646     last INSCOPE;
6647 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6648 wakaba 1.79 !!!cp ('t424');
6649 wakaba 1.52 last INSCOPE;
6650 wakaba 1.51 }
6651 wakaba 1.52 } # INSCOPE
6652 wakaba 1.93
6653     unless (defined $i) { # has an element in scope
6654     !!!cp ('t425.1');
6655 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6656 wakaba 1.79 } else {
6657 wakaba 1.93 ## Step 1. generate implied end tags
6658 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
6659 wakaba 1.93 !!!cp ('t422');
6660     pop @{$self->{open_elements}};
6661     }
6662    
6663     ## Step 2.
6664 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6665     ne $token->{tag_name}) {
6666 wakaba 1.93 !!!cp ('t425');
6667 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6668 wakaba 1.93 } else {
6669     !!!cp ('t426');
6670     }
6671    
6672     ## Step 3.
6673     splice @{$self->{open_elements}}, $i;
6674 wakaba 1.36 }
6675 wakaba 1.52
6676     !!!next-token;
6677 wakaba 1.126 next B;
6678 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
6679     ## has an element in scope
6680     my $i;
6681     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6682     my $node = $self->{open_elements}->[$_];
6683 wakaba 1.123 if ($node->[1] & P_EL) {
6684 wakaba 1.87 !!!cp ('t410.1');
6685     $i = $_;
6686 wakaba 1.88 last INSCOPE;
6687 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6688 wakaba 1.87 !!!cp ('t411.1');
6689     last INSCOPE;
6690     }
6691     } # INSCOPE
6692 wakaba 1.91
6693     if (defined $i) {
6694 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6695     ne $token->{tag_name}) {
6696 wakaba 1.87 !!!cp ('t412.1');
6697 wakaba 1.122 !!!parse-error (type => 'not closed',
6698     value => $self->{open_elements}->[-1]->[0]
6699     ->manakai_local_name,
6700     token => $token);
6701 wakaba 1.87 } else {
6702 wakaba 1.91 !!!cp ('t414.1');
6703 wakaba 1.87 }
6704 wakaba 1.91
6705 wakaba 1.87 splice @{$self->{open_elements}}, $i;
6706     } else {
6707 wakaba 1.91 !!!cp ('t413.1');
6708 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6709 wakaba 1.91
6710 wakaba 1.87 !!!cp ('t415.1');
6711     ## As if <p>, then reprocess the current token
6712     my $el;
6713 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'p',, $token);
6714 wakaba 1.87 $insert->($el);
6715 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
6716 wakaba 1.87 }
6717 wakaba 1.91
6718 wakaba 1.87 !!!next-token;
6719 wakaba 1.126 next B;
6720 wakaba 1.52 } elsif ({
6721     a => 1,
6722     b => 1, big => 1, em => 1, font => 1, i => 1,
6723     nobr => 1, s => 1, small => 1, strile => 1,
6724     strong => 1, tt => 1, u => 1,
6725     }->{$token->{tag_name}}) {
6726 wakaba 1.79 !!!cp ('t427');
6727 wakaba 1.113 $formatting_end_tag->($token);
6728 wakaba 1.126 next B;
6729 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
6730 wakaba 1.79 !!!cp ('t428');
6731 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:br', token => $token);
6732 wakaba 1.52
6733     ## As if <br>
6734     $reconstruct_active_formatting_elements->($insert_to_current);
6735    
6736     my $el;
6737 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'br',, $token);
6738 wakaba 1.52 $insert->($el);
6739    
6740     ## Ignore the token.
6741     !!!next-token;
6742 wakaba 1.126 next B;
6743 wakaba 1.52 } elsif ({
6744     caption => 1, col => 1, colgroup => 1, frame => 1,
6745     frameset => 1, head => 1, option => 1, optgroup => 1,
6746     tbody => 1, td => 1, tfoot => 1, th => 1,
6747     thead => 1, tr => 1,
6748     area => 1, basefont => 1, bgsound => 1,
6749     embed => 1, hr => 1, iframe => 1, image => 1,
6750     img => 1, input => 1, isindex => 1, noembed => 1,
6751     noframes => 1, param => 1, select => 1, spacer => 1,
6752     table => 1, textarea => 1, wbr => 1,
6753     noscript => 0, ## TODO: if scripting is enabled
6754     }->{$token->{tag_name}}) {
6755 wakaba 1.79 !!!cp ('t429');
6756 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6757 wakaba 1.52 ## Ignore the token
6758     !!!next-token;
6759 wakaba 1.126 next B;
6760 wakaba 1.52
6761     ## ISSUE: Issue on HTML5 new elements in spec
6762    
6763     } else {
6764     ## Step 1
6765     my $node_i = -1;
6766     my $node = $self->{open_elements}->[$node_i];
6767 wakaba 1.51
6768 wakaba 1.52 ## Step 2
6769     S2: {
6770 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6771 wakaba 1.52 ## Step 1
6772     ## generate implied end tags
6773 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
6774 wakaba 1.79 !!!cp ('t430');
6775 wakaba 1.83 ## ISSUE: Can this case be reached?
6776 wakaba 1.86 pop @{$self->{open_elements}};
6777 wakaba 1.52 }
6778    
6779     ## Step 2
6780 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6781     ne $token->{tag_name}) {
6782 wakaba 1.79 !!!cp ('t431');
6783 wakaba 1.58 ## NOTE: <x><y></x>
6784 wakaba 1.122 !!!parse-error (type => 'not closed',
6785     value => $self->{open_elements}->[-1]->[0]
6786     ->manakai_local_name,
6787     token => $token);
6788 wakaba 1.79 } else {
6789     !!!cp ('t432');
6790 wakaba 1.52 }
6791    
6792     ## Step 3
6793     splice @{$self->{open_elements}}, $node_i;
6794 wakaba 1.51
6795 wakaba 1.1 !!!next-token;
6796 wakaba 1.52 last S2;
6797 wakaba 1.1 } else {
6798 wakaba 1.52 ## Step 3
6799 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
6800 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
6801 wakaba 1.123 ($node->[1] & SPECIAL_EL or
6802     $node->[1] & SCOPING_EL)) {
6803 wakaba 1.79 !!!cp ('t433');
6804 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6805 wakaba 1.52 ## Ignore the token
6806     !!!next-token;
6807     last S2;
6808     }
6809 wakaba 1.79
6810     !!!cp ('t434');
6811 wakaba 1.1 }
6812 wakaba 1.52
6813     ## Step 4
6814     $node_i--;
6815     $node = $self->{open_elements}->[$node_i];
6816    
6817     ## Step 5;
6818     redo S2;
6819     } # S2
6820 wakaba 1.126 next B;
6821 wakaba 1.1 }
6822     }
6823 wakaba 1.126 next B;
6824     } continue { # B
6825     if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
6826     ## NOTE: The code below is executed in cases where it does not have
6827     ## to be, but it it is harmless even in those cases.
6828     ## has an element in scope
6829     INSCOPE: {
6830     for (reverse 0..$#{$self->{open_elements}}) {
6831     my $node = $self->{open_elements}->[$_];
6832     if ($node->[1] & FOREIGN_EL) {
6833     last INSCOPE;
6834     } elsif ($node->[1] & SCOPING_EL) {
6835     last;
6836     }
6837     }
6838    
6839     ## NOTE: No foreign element in scope.
6840     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
6841     } # INSCOPE
6842     }
6843 wakaba 1.1 } # B
6844    
6845     ## Stop parsing # MUST
6846    
6847     ## TODO: script stuffs
6848 wakaba 1.3 } # _tree_construct_main
6849    
6850     sub set_inner_html ($$$) {
6851     my $class = shift;
6852     my $node = shift;
6853     my $s = \$_[0];
6854     my $onerror = $_[1];
6855    
6856 wakaba 1.63 ## ISSUE: Should {confident} be true?
6857    
6858 wakaba 1.3 my $nt = $node->node_type;
6859     if ($nt == 9) {
6860     # MUST
6861    
6862     ## Step 1 # MUST
6863     ## TODO: If the document has an active parser, ...
6864     ## ISSUE: There is an issue in the spec.
6865    
6866     ## Step 2 # MUST
6867     my @cn = @{$node->child_nodes};
6868     for (@cn) {
6869     $node->remove_child ($_);
6870     }
6871    
6872     ## Step 3, 4, 5 # MUST
6873     $class->parse_string ($$s => $node, $onerror);
6874     } elsif ($nt == 1) {
6875     ## TODO: If non-html element
6876    
6877     ## NOTE: Most of this code is copied from |parse_string|
6878    
6879     ## Step 1 # MUST
6880 wakaba 1.14 my $this_doc = $node->owner_document;
6881     my $doc = $this_doc->implementation->create_document;
6882 wakaba 1.18 $doc->manakai_is_html (1);
6883 wakaba 1.3 my $p = $class->new;
6884     $p->{document} = $doc;
6885    
6886 wakaba 1.84 ## Step 8 # MUST
6887 wakaba 1.3 my $i = 0;
6888 wakaba 1.121 $p->{line_prev} = $p->{line} = 1;
6889     $p->{column_prev} = $p->{column} = 0;
6890 wakaba 1.76 $p->{set_next_char} = sub {
6891 wakaba 1.3 my $self = shift;
6892 wakaba 1.14
6893 wakaba 1.76 pop @{$self->{prev_char}};
6894     unshift @{$self->{prev_char}}, $self->{next_char};
6895 wakaba 1.14
6896 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
6897     $self->{next_char} = ord substr $$s, $i++, 1;
6898 wakaba 1.121
6899     ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
6900     $p->{column}++;
6901 wakaba 1.4
6902 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
6903 wakaba 1.121 $p->{line}++;
6904     $p->{column} = 0;
6905 wakaba 1.79 !!!cp ('i1');
6906 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
6907 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
6908 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
6909 wakaba 1.121 $p->{line}++;
6910     $p->{column} = 0;
6911 wakaba 1.79 !!!cp ('i2');
6912 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
6913     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6914 wakaba 1.79 !!!cp ('i3');
6915 wakaba 1.76 } elsif ($self->{next_char} == 0x0000) { # NULL
6916 wakaba 1.79 !!!cp ('i4');
6917 wakaba 1.14 !!!parse-error (type => 'NULL');
6918 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
6919 wakaba 1.3 }
6920     };
6921 wakaba 1.76 $p->{prev_char} = [-1, -1, -1];
6922     $p->{next_char} = -1;
6923 wakaba 1.3
6924     my $ponerror = $onerror || sub {
6925     my (%opt) = @_;
6926 wakaba 1.121 my $line = $opt{line};
6927     my $column = $opt{column};
6928     if (defined $opt{token} and defined $opt{token}->{line}) {
6929     $line = $opt{token}->{line};
6930     $column = $opt{token}->{column};
6931     }
6932     warn "Parse error ($opt{type}) at line $line column $column\n";
6933 wakaba 1.3 };
6934     $p->{parse_error} = sub {
6935 wakaba 1.121 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
6936 wakaba 1.3 };
6937    
6938     $p->_initialize_tokenizer;
6939     $p->_initialize_tree_constructor;
6940    
6941     ## Step 2
6942 wakaba 1.71 my $node_ln = $node->manakai_local_name;
6943 wakaba 1.40 $p->{content_model} = {
6944     title => RCDATA_CONTENT_MODEL,
6945     textarea => RCDATA_CONTENT_MODEL,
6946     style => CDATA_CONTENT_MODEL,
6947     script => CDATA_CONTENT_MODEL,
6948     xmp => CDATA_CONTENT_MODEL,
6949     iframe => CDATA_CONTENT_MODEL,
6950     noembed => CDATA_CONTENT_MODEL,
6951     noframes => CDATA_CONTENT_MODEL,
6952     noscript => CDATA_CONTENT_MODEL,
6953     plaintext => PLAINTEXT_CONTENT_MODEL,
6954     }->{$node_ln};
6955     $p->{content_model} = PCDATA_CONTENT_MODEL
6956     unless defined $p->{content_model};
6957     ## ISSUE: What is "the name of the element"? local name?
6958 wakaba 1.3
6959 wakaba 1.123 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
6960     ## TODO: Foreign element OK?
6961 wakaba 1.3
6962 wakaba 1.84 ## Step 3
6963 wakaba 1.3 my $root = $doc->create_element_ns
6964     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
6965    
6966 wakaba 1.84 ## Step 4 # MUST
6967 wakaba 1.3 $doc->append_child ($root);
6968    
6969 wakaba 1.84 ## Step 5 # MUST
6970 wakaba 1.123 push @{$p->{open_elements}}, [$root, $el_category->{html}];
6971 wakaba 1.3
6972     undef $p->{head_element};
6973    
6974 wakaba 1.84 ## Step 6 # MUST
6975 wakaba 1.3 $p->_reset_insertion_mode;
6976    
6977 wakaba 1.84 ## Step 7 # MUST
6978 wakaba 1.3 my $anode = $node;
6979     AN: while (defined $anode) {
6980     if ($anode->node_type == 1) {
6981     my $nsuri = $anode->namespace_uri;
6982     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
6983 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
6984 wakaba 1.79 !!!cp ('i5');
6985 wakaba 1.3 $p->{form_element} = $anode;
6986     last AN;
6987     }
6988     }
6989     }
6990     $anode = $anode->parent_node;
6991     } # AN
6992    
6993 wakaba 1.84 ## Step 9 # MUST
6994 wakaba 1.3 {
6995     my $self = $p;
6996     !!!next-token;
6997     }
6998     $p->_tree_construction_main;
6999    
7000 wakaba 1.84 ## Step 10 # MUST
7001 wakaba 1.3 my @cn = @{$node->child_nodes};
7002     for (@cn) {
7003     $node->remove_child ($_);
7004     }
7005     ## ISSUE: mutation events? read-only?
7006    
7007 wakaba 1.84 ## Step 11 # MUST
7008 wakaba 1.3 @cn = @{$root->child_nodes};
7009     for (@cn) {
7010 wakaba 1.14 $this_doc->adopt_node ($_);
7011 wakaba 1.3 $node->append_child ($_);
7012     }
7013 wakaba 1.14 ## ISSUE: mutation events?
7014 wakaba 1.3
7015     $p->_terminate_tree_constructor;
7016 wakaba 1.121
7017     delete $p->{parse_error}; # delete loop
7018 wakaba 1.3 } else {
7019     die "$0: |set_inner_html| is not defined for node of type $nt";
7020     }
7021     } # set_inner_html
7022    
7023     } # tree construction stage
7024 wakaba 1.1
7025 wakaba 1.63 package Whatpm::HTML::RestartParser;
7026     push our @ISA, 'Error';
7027    
7028 wakaba 1.1 1;
7029 wakaba 1.128 # $Date: 2008/04/12 15:25:52 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24