/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.168 - (hide annotations) (download) (as text)
Sat Sep 13 10:49:21 2008 UTC (16 years, 1 month ago) by wakaba
Branch: MAIN
Changes since 1.167: +326 -235 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	13 Sep 2008 10:47:42 -0000
	* content-model-2.dat: A test case for NCR in charset=""
	is added.

2008-09-13  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	13 Sep 2008 10:48:59 -0000
	* HTML.pm.src: Finally we get rid of all the inner loops.  Remove
	entity related tokenizer states in favor of new states
	implementing the consume character reference algorithm.

2008-09-13  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm.src: "Consume a character reference" algorithm is
	* HTML.pm.src: Make |PUBLIC| and |SYSTEM| keyword tokenizing

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.168 our $VERSION=do{my @r=(q$Revision: 1.167 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.18 ## ISSUE:
7     ## var doc = implementation.createDocument (null, null, null);
8     ## doc.write ('');
9     ## alert (doc.compatMode);
10 wakaba 1.1
11 wakaba 1.139 require IO::Handle;
12    
13 wakaba 1.126 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
14     my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
15     my $SVG_NS = q<http://www.w3.org/2000/svg>;
16     my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
17     my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
18     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
19    
20 wakaba 1.123 sub A_EL () { 0b1 }
21     sub ADDRESS_EL () { 0b10 }
22     sub BODY_EL () { 0b100 }
23     sub BUTTON_EL () { 0b1000 }
24     sub CAPTION_EL () { 0b10000 }
25     sub DD_EL () { 0b100000 }
26     sub DIV_EL () { 0b1000000 }
27     sub DT_EL () { 0b10000000 }
28     sub FORM_EL () { 0b100000000 }
29     sub FORMATTING_EL () { 0b1000000000 }
30     sub FRAMESET_EL () { 0b10000000000 }
31     sub HEADING_EL () { 0b100000000000 }
32     sub HTML_EL () { 0b1000000000000 }
33     sub LI_EL () { 0b10000000000000 }
34     sub NOBR_EL () { 0b100000000000000 }
35     sub OPTION_EL () { 0b1000000000000000 }
36     sub OPTGROUP_EL () { 0b10000000000000000 }
37     sub P_EL () { 0b100000000000000000 }
38     sub SELECT_EL () { 0b1000000000000000000 }
39     sub TABLE_EL () { 0b10000000000000000000 }
40     sub TABLE_CELL_EL () { 0b100000000000000000000 }
41     sub TABLE_ROW_EL () { 0b1000000000000000000000 }
42     sub TABLE_ROW_GROUP_EL () { 0b10000000000000000000000 }
43     sub MISC_SCOPING_EL () { 0b100000000000000000000000 }
44     sub MISC_SPECIAL_EL () { 0b1000000000000000000000000 }
45 wakaba 1.126 sub FOREIGN_EL () { 0b10000000000000000000000000 }
46     sub FOREIGN_FLOW_CONTENT_EL () { 0b100000000000000000000000000 }
47     sub MML_AXML_EL () { 0b1000000000000000000000000000 }
48 wakaba 1.151 sub RUBY_EL () { 0b10000000000000000000000000000 }
49     sub RUBY_COMPONENT_EL () { 0b100000000000000000000000000000 }
50 wakaba 1.123
51     sub TABLE_ROWS_EL () {
52     TABLE_EL |
53     TABLE_ROW_EL |
54     TABLE_ROW_GROUP_EL
55     }
56    
57 wakaba 1.151 ## NOTE: Used in "generate implied end tags" algorithm.
58     ## NOTE: There is a code where a modified version of END_TAG_OPTIONAL_EL
59     ## is used in "generate implied end tags" implementation (search for the
60     ## function mae).
61 wakaba 1.123 sub END_TAG_OPTIONAL_EL () {
62     DD_EL |
63     DT_EL |
64     LI_EL |
65 wakaba 1.151 P_EL |
66     RUBY_COMPONENT_EL
67 wakaba 1.123 }
68    
69 wakaba 1.151 ## NOTE: Used in </body> and EOF algorithms.
70 wakaba 1.123 sub ALL_END_TAG_OPTIONAL_EL () {
71 wakaba 1.151 DD_EL |
72     DT_EL |
73     LI_EL |
74     P_EL |
75    
76 wakaba 1.123 BODY_EL |
77     HTML_EL |
78     TABLE_CELL_EL |
79     TABLE_ROW_EL |
80     TABLE_ROW_GROUP_EL
81     }
82    
83     sub SCOPING_EL () {
84     BUTTON_EL |
85     CAPTION_EL |
86     HTML_EL |
87     TABLE_EL |
88     TABLE_CELL_EL |
89     MISC_SCOPING_EL
90     }
91    
92     sub TABLE_SCOPING_EL () {
93     HTML_EL |
94     TABLE_EL
95     }
96    
97     sub TABLE_ROWS_SCOPING_EL () {
98     HTML_EL |
99     TABLE_ROW_GROUP_EL
100     }
101    
102     sub TABLE_ROW_SCOPING_EL () {
103     HTML_EL |
104     TABLE_ROW_EL
105     }
106    
107     sub SPECIAL_EL () {
108     ADDRESS_EL |
109     BODY_EL |
110     DIV_EL |
111 wakaba 1.151
112     DD_EL |
113     DT_EL |
114     LI_EL |
115     P_EL |
116    
117 wakaba 1.123 FORM_EL |
118     FRAMESET_EL |
119     HEADING_EL |
120     OPTION_EL |
121     OPTGROUP_EL |
122     SELECT_EL |
123     TABLE_ROW_EL |
124     TABLE_ROW_GROUP_EL |
125     MISC_SPECIAL_EL
126     }
127    
128     my $el_category = {
129     a => A_EL | FORMATTING_EL,
130     address => ADDRESS_EL,
131     applet => MISC_SCOPING_EL,
132     area => MISC_SPECIAL_EL,
133     b => FORMATTING_EL,
134     base => MISC_SPECIAL_EL,
135     basefont => MISC_SPECIAL_EL,
136     bgsound => MISC_SPECIAL_EL,
137     big => FORMATTING_EL,
138     blockquote => MISC_SPECIAL_EL,
139     body => BODY_EL,
140     br => MISC_SPECIAL_EL,
141     button => BUTTON_EL,
142     caption => CAPTION_EL,
143     center => MISC_SPECIAL_EL,
144     col => MISC_SPECIAL_EL,
145     colgroup => MISC_SPECIAL_EL,
146     dd => DD_EL,
147     dir => MISC_SPECIAL_EL,
148     div => DIV_EL,
149     dl => MISC_SPECIAL_EL,
150     dt => DT_EL,
151     em => FORMATTING_EL,
152     embed => MISC_SPECIAL_EL,
153     fieldset => MISC_SPECIAL_EL,
154     font => FORMATTING_EL,
155     form => FORM_EL,
156     frame => MISC_SPECIAL_EL,
157     frameset => FRAMESET_EL,
158     h1 => HEADING_EL,
159     h2 => HEADING_EL,
160     h3 => HEADING_EL,
161     h4 => HEADING_EL,
162     h5 => HEADING_EL,
163     h6 => HEADING_EL,
164     head => MISC_SPECIAL_EL,
165     hr => MISC_SPECIAL_EL,
166     html => HTML_EL,
167     i => FORMATTING_EL,
168     iframe => MISC_SPECIAL_EL,
169     img => MISC_SPECIAL_EL,
170     input => MISC_SPECIAL_EL,
171     isindex => MISC_SPECIAL_EL,
172     li => LI_EL,
173     link => MISC_SPECIAL_EL,
174     listing => MISC_SPECIAL_EL,
175     marquee => MISC_SCOPING_EL,
176     menu => MISC_SPECIAL_EL,
177     meta => MISC_SPECIAL_EL,
178     nobr => NOBR_EL | FORMATTING_EL,
179     noembed => MISC_SPECIAL_EL,
180     noframes => MISC_SPECIAL_EL,
181     noscript => MISC_SPECIAL_EL,
182     object => MISC_SCOPING_EL,
183     ol => MISC_SPECIAL_EL,
184     optgroup => OPTGROUP_EL,
185     option => OPTION_EL,
186     p => P_EL,
187     param => MISC_SPECIAL_EL,
188     plaintext => MISC_SPECIAL_EL,
189     pre => MISC_SPECIAL_EL,
190 wakaba 1.151 rp => RUBY_COMPONENT_EL,
191     rt => RUBY_COMPONENT_EL,
192     ruby => RUBY_EL,
193 wakaba 1.123 s => FORMATTING_EL,
194     script => MISC_SPECIAL_EL,
195     select => SELECT_EL,
196     small => FORMATTING_EL,
197     spacer => MISC_SPECIAL_EL,
198     strike => FORMATTING_EL,
199     strong => FORMATTING_EL,
200     style => MISC_SPECIAL_EL,
201     table => TABLE_EL,
202     tbody => TABLE_ROW_GROUP_EL,
203     td => TABLE_CELL_EL,
204     textarea => MISC_SPECIAL_EL,
205     tfoot => TABLE_ROW_GROUP_EL,
206     th => TABLE_CELL_EL,
207     thead => TABLE_ROW_GROUP_EL,
208     title => MISC_SPECIAL_EL,
209     tr => TABLE_ROW_EL,
210     tt => FORMATTING_EL,
211     u => FORMATTING_EL,
212     ul => MISC_SPECIAL_EL,
213     wbr => MISC_SPECIAL_EL,
214     };
215    
216 wakaba 1.126 my $el_category_f = {
217     $MML_NS => {
218     'annotation-xml' => MML_AXML_EL,
219     mi => FOREIGN_FLOW_CONTENT_EL,
220     mo => FOREIGN_FLOW_CONTENT_EL,
221     mn => FOREIGN_FLOW_CONTENT_EL,
222     ms => FOREIGN_FLOW_CONTENT_EL,
223     mtext => FOREIGN_FLOW_CONTENT_EL,
224     },
225     $SVG_NS => {
226 wakaba 1.131 foreignObject => FOREIGN_FLOW_CONTENT_EL,
227 wakaba 1.126 desc => FOREIGN_FLOW_CONTENT_EL,
228     title => FOREIGN_FLOW_CONTENT_EL,
229     },
230     ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
231     };
232    
233 wakaba 1.131 my $svg_attr_name = {
234 wakaba 1.146 attributename => 'attributeName',
235 wakaba 1.131 attributetype => 'attributeType',
236     basefrequency => 'baseFrequency',
237     baseprofile => 'baseProfile',
238     calcmode => 'calcMode',
239     clippathunits => 'clipPathUnits',
240     contentscripttype => 'contentScriptType',
241     contentstyletype => 'contentStyleType',
242     diffuseconstant => 'diffuseConstant',
243     edgemode => 'edgeMode',
244     externalresourcesrequired => 'externalResourcesRequired',
245     filterres => 'filterRes',
246     filterunits => 'filterUnits',
247     glyphref => 'glyphRef',
248     gradienttransform => 'gradientTransform',
249     gradientunits => 'gradientUnits',
250     kernelmatrix => 'kernelMatrix',
251     kernelunitlength => 'kernelUnitLength',
252     keypoints => 'keyPoints',
253     keysplines => 'keySplines',
254     keytimes => 'keyTimes',
255     lengthadjust => 'lengthAdjust',
256     limitingconeangle => 'limitingConeAngle',
257     markerheight => 'markerHeight',
258     markerunits => 'markerUnits',
259     markerwidth => 'markerWidth',
260     maskcontentunits => 'maskContentUnits',
261     maskunits => 'maskUnits',
262     numoctaves => 'numOctaves',
263     pathlength => 'pathLength',
264     patterncontentunits => 'patternContentUnits',
265     patterntransform => 'patternTransform',
266     patternunits => 'patternUnits',
267     pointsatx => 'pointsAtX',
268     pointsaty => 'pointsAtY',
269     pointsatz => 'pointsAtZ',
270     preservealpha => 'preserveAlpha',
271     preserveaspectratio => 'preserveAspectRatio',
272     primitiveunits => 'primitiveUnits',
273     refx => 'refX',
274     refy => 'refY',
275     repeatcount => 'repeatCount',
276     repeatdur => 'repeatDur',
277     requiredextensions => 'requiredExtensions',
278 wakaba 1.146 requiredfeatures => 'requiredFeatures',
279 wakaba 1.131 specularconstant => 'specularConstant',
280     specularexponent => 'specularExponent',
281     spreadmethod => 'spreadMethod',
282     startoffset => 'startOffset',
283     stddeviation => 'stdDeviation',
284     stitchtiles => 'stitchTiles',
285     surfacescale => 'surfaceScale',
286     systemlanguage => 'systemLanguage',
287     tablevalues => 'tableValues',
288     targetx => 'targetX',
289     targety => 'targetY',
290     textlength => 'textLength',
291     viewbox => 'viewBox',
292     viewtarget => 'viewTarget',
293     xchannelselector => 'xChannelSelector',
294     ychannelselector => 'yChannelSelector',
295     zoomandpan => 'zoomAndPan',
296     };
297    
298     my $foreign_attr_xname = {
299     'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
300     'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
301     'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
302     'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
303     'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
304     'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
305     'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
306     'xml:base' => [$XML_NS, ['xml', 'base']],
307     'xml:lang' => [$XML_NS, ['xml', 'lang']],
308     'xml:space' => [$XML_NS, ['xml', 'space']],
309     'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
310     'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
311     };
312    
313     ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
314    
315 wakaba 1.4 my $c1_entity_char = {
316 wakaba 1.10 0x80 => 0x20AC,
317     0x81 => 0xFFFD,
318     0x82 => 0x201A,
319     0x83 => 0x0192,
320     0x84 => 0x201E,
321     0x85 => 0x2026,
322     0x86 => 0x2020,
323     0x87 => 0x2021,
324     0x88 => 0x02C6,
325     0x89 => 0x2030,
326     0x8A => 0x0160,
327     0x8B => 0x2039,
328     0x8C => 0x0152,
329     0x8D => 0xFFFD,
330     0x8E => 0x017D,
331     0x8F => 0xFFFD,
332     0x90 => 0xFFFD,
333     0x91 => 0x2018,
334     0x92 => 0x2019,
335     0x93 => 0x201C,
336     0x94 => 0x201D,
337     0x95 => 0x2022,
338     0x96 => 0x2013,
339     0x97 => 0x2014,
340     0x98 => 0x02DC,
341     0x99 => 0x2122,
342     0x9A => 0x0161,
343     0x9B => 0x203A,
344     0x9C => 0x0153,
345     0x9D => 0xFFFD,
346     0x9E => 0x017E,
347     0x9F => 0x0178,
348 wakaba 1.4 }; # $c1_entity_char
349 wakaba 1.1
350 wakaba 1.63 sub parse_byte_string ($$$$;$) {
351 wakaba 1.138 my $self = shift;
352     my $charset_name = shift;
353     open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
354     return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
355     } # parse_byte_string
356    
357 wakaba 1.162 sub parse_byte_stream ($$$$;$$) {
358     # my ($self, $charset_name, $byte_stream, $doc, $onerror, $get_wrapper) = @_;
359 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
360 wakaba 1.133 my $charset_name = shift;
361 wakaba 1.138 my $byte_stream = $_[0];
362 wakaba 1.133
363 wakaba 1.134 my $onerror = $_[2] || sub {
364     my (%opt) = @_;
365     warn "Parse error ($opt{type})\n";
366     };
367     $self->{parse_error} = $onerror; # updated later by parse_char_string
368    
369 wakaba 1.162 my $get_wrapper = $_[3] || sub ($) {
370     return $_[0]; # $_[0] = byte stream handle, returned = arg to char handle
371     };
372    
373 wakaba 1.133 ## HTML5 encoding sniffing algorithm
374     require Message::Charset::Info;
375     my $charset;
376 wakaba 1.136 my $buffer;
377     my ($char_stream, $e_status);
378 wakaba 1.133
379     SNIFFING: {
380 wakaba 1.160 ## NOTE: By setting |allow_fallback| option true when the
381     ## |get_decode_handle| method is invoked, we ignore what the HTML5
382     ## spec requires, i.e. unsupported encoding should be ignored.
383     ## TODO: We should not do this unless the parser is invoked
384     ## in the conformance checking mode, in which this behavior
385     ## would be useful.
386 wakaba 1.133
387     ## Step 1
388     if (defined $charset_name) {
389 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
390     ## TODO: Is this ok? Transfer protocol's parameter should be
391     ## interpreted in its semantics?
392 wakaba 1.133
393     ## ISSUE: Unsupported encoding is not ignored according to the spec.
394 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
395     ($byte_stream, allow_error_reporting => 1,
396 wakaba 1.133 allow_fallback => 1);
397 wakaba 1.136 if ($char_stream) {
398 wakaba 1.133 $self->{confident} = 1;
399     last SNIFFING;
400 wakaba 1.136 } else {
401     ## TODO: unsupported error
402 wakaba 1.133 }
403     }
404    
405     ## Step 2
406 wakaba 1.136 my $byte_buffer = '';
407     for (1..1024) {
408     my $char = $byte_stream->getc;
409     last unless defined $char;
410     $byte_buffer .= $char;
411     } ## TODO: timeout
412 wakaba 1.133
413     ## Step 3
414 wakaba 1.136 if ($byte_buffer =~ /^\xFE\xFF/) {
415 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-16be');
416 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
417     ($byte_stream, allow_error_reporting => 1,
418     allow_fallback => 1, byte_buffer => \$byte_buffer);
419 wakaba 1.133 $self->{confident} = 1;
420     last SNIFFING;
421 wakaba 1.136 } elsif ($byte_buffer =~ /^\xFF\xFE/) {
422 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-16le');
423 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
424     ($byte_stream, allow_error_reporting => 1,
425     allow_fallback => 1, byte_buffer => \$byte_buffer);
426 wakaba 1.133 $self->{confident} = 1;
427     last SNIFFING;
428 wakaba 1.136 } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
429 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
430 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
431     ($byte_stream, allow_error_reporting => 1,
432     allow_fallback => 1, byte_buffer => \$byte_buffer);
433 wakaba 1.133 $self->{confident} = 1;
434     last SNIFFING;
435     }
436    
437     ## Step 4
438     ## TODO: <meta charset>
439    
440     ## Step 5
441     ## TODO: from history
442    
443     ## Step 6
444 wakaba 1.65 require Whatpm::Charset::UniversalCharDet;
445 wakaba 1.133 $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
446 wakaba 1.136 ($byte_buffer);
447 wakaba 1.133 if (defined $charset_name) {
448 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
449 wakaba 1.133
450     ## ISSUE: Unsupported encoding is not ignored according to the spec.
451 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
452     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
453     ($byte_stream);
454     ($char_stream, $e_status) = $charset->get_decode_handle
455     ($buffer, allow_error_reporting => 1,
456     allow_fallback => 1, byte_buffer => \$byte_buffer);
457     if ($char_stream) {
458     $buffer->{buffer} = $byte_buffer;
459 wakaba 1.153 !!!parse-error (type => 'sniffing:chardet',
460     text => $charset_name,
461     level => $self->{level}->{info},
462     layer => 'encode',
463 wakaba 1.134 line => 1, column => 1);
464 wakaba 1.133 $self->{confident} = 0;
465     last SNIFFING;
466     }
467     }
468    
469     ## Step 7: default
470     ## TODO: Make this configurable.
471 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('windows-1252');
472 wakaba 1.133 ## NOTE: We choose |windows-1252| here, since |utf-8| should be
473     ## detectable in the step 6.
474 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
475     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
476     ($byte_stream);
477     ($char_stream, $e_status)
478     = $charset->get_decode_handle ($buffer,
479     allow_error_reporting => 1,
480     allow_fallback => 1,
481     byte_buffer => \$byte_buffer);
482     $buffer->{buffer} = $byte_buffer;
483 wakaba 1.153 !!!parse-error (type => 'sniffing:default',
484     text => 'windows-1252',
485     level => $self->{level}->{info},
486     line => 1, column => 1,
487     layer => 'encode');
488 wakaba 1.63 $self->{confident} = 0;
489 wakaba 1.133 } # SNIFFING
490    
491     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
492 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
493 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
494 wakaba 1.160 #text => $self->{input_encoding},
495 wakaba 1.153 level => $self->{level}->{uncertain},
496     line => 1, column => 1,
497     layer => 'encode');
498 wakaba 1.133 } elsif (not ($e_status &
499     Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) {
500 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name;
501 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
502     text => $self->{input_encoding},
503     level => $self->{level}->{uncertain},
504     line => 1, column => 1,
505     layer => 'encode');
506 wakaba 1.160 } else {
507     $self->{input_encoding} = $charset->get_iana_name;
508 wakaba 1.63 }
509    
510     $self->{change_encoding} = sub {
511     my $self = shift;
512 wakaba 1.134 $charset_name = shift;
513 wakaba 1.114 my $token = shift;
514 wakaba 1.63
515 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
516 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
517     ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
518     byte_buffer => \ $buffer->{buffer});
519 wakaba 1.134
520 wakaba 1.136 if ($char_stream) { # if supported
521 wakaba 1.134 ## "Change the encoding" algorithm:
522 wakaba 1.63
523 wakaba 1.134 ## Step 1
524 wakaba 1.149 if ($charset->{category} &
525     Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
526 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
527 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
528     ($byte_stream,
529     byte_buffer => \ $buffer->{buffer});
530 wakaba 1.134 }
531     $charset_name = $charset->get_iana_name;
532    
533     ## Step 2
534     if (defined $self->{input_encoding} and
535     $self->{input_encoding} eq $charset_name) {
536 wakaba 1.153 !!!parse-error (type => 'charset label:matching',
537     text => $charset_name,
538     level => $self->{level}->{info});
539 wakaba 1.134 $self->{confident} = 1;
540     return;
541     }
542 wakaba 1.63
543 wakaba 1.153 !!!parse-error (type => 'charset label detected',
544     text => $self->{input_encoding},
545     value => $charset_name,
546     level => $self->{level}->{warn},
547     token => $token);
548 wakaba 1.134
549     ## Step 3
550     # if (can) {
551     ## change the encoding on the fly.
552     #$self->{confident} = 1;
553     #return;
554     # }
555    
556     ## Step 4
557     throw Whatpm::HTML::RestartParser ();
558 wakaba 1.63 }
559     }; # $self->{change_encoding}
560    
561 wakaba 1.136 my $char_onerror = sub {
562     my (undef, $type, %opt) = @_;
563 wakaba 1.153 !!!parse-error (layer => 'encode',
564     %opt, type => $type,
565 wakaba 1.137 line => $self->{line}, column => $self->{column} + 1);
566 wakaba 1.136 if ($opt{octets}) {
567     ${$opt{octets}} = "\x{FFFD}"; # relacement character
568     }
569     };
570 wakaba 1.162
571     my $wrapped_char_stream = $get_wrapper->($char_stream);
572     $wrapped_char_stream->onerror ($char_onerror);
573 wakaba 1.136
574 wakaba 1.63 my @args = @_; shift @args; # $s
575     my $return;
576     try {
577 wakaba 1.162 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
578 wakaba 1.63 } catch Whatpm::HTML::RestartParser with {
579 wakaba 1.134 ## NOTE: Invoked after {change_encoding}.
580    
581     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
582 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
583 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
584     level => $self->{level}->{uncertain},
585 wakaba 1.160 #text => $self->{input_encoding},
586 wakaba 1.153 line => 1, column => 1,
587     layer => 'encode');
588 wakaba 1.134 } elsif (not ($e_status &
589     Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) {
590 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name;
591 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
592     text => $self->{input_encoding},
593     level => $self->{level}->{uncertain},
594     line => 1, column => 1,
595     layer => 'encode');
596 wakaba 1.160 } else {
597     $self->{input_encoding} = $charset->get_iana_name;
598 wakaba 1.134 }
599 wakaba 1.63 $self->{confident} = 1;
600 wakaba 1.162
601     $wrapped_char_stream = $get_wrapper->($char_stream);
602     $wrapped_char_stream->onerror ($char_onerror);
603    
604     $return = $self->parse_char_stream ($wrapped_char_stream, @args);
605 wakaba 1.63 };
606     return $return;
607 wakaba 1.138 } # parse_byte_stream
608 wakaba 1.63
609 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
610     ## and the HTML layer MUST ignore it. However, we does strip BOM in
611     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
612     ## because the core part of our HTML parser expects a string of character,
613     ## not a string of bytes or code units or anything which might contain a BOM.
614     ## Therefore, any parser interface that accepts a string of bytes,
615     ## such as |parse_byte_string| in this module, must ensure that it does
616     ## strip the BOM and never strip any ZWNBSP.
617    
618 wakaba 1.162 sub parse_char_string ($$$;$$) {
619     #my ($self, $s, $doc, $onerror, $get_wrapper) = @_;
620 wakaba 1.135 my $self = shift;
621 wakaba 1.139 require utf8;
622     my $s = ref $_[0] ? $_[0] : \($_[0]);
623     open my $input, '<' . (utf8::is_utf8 ($$s) ? ':utf8' : ''), $s;
624 wakaba 1.162 if ($_[3]) {
625     $input = $_[3]->($input);
626     }
627 wakaba 1.135 return $self->parse_char_stream ($input, @_[1..$#_]);
628     } # parse_char_string
629 wakaba 1.162 *parse_string = \&parse_char_string; ## NOTE: Alias for backward compatibility.
630 wakaba 1.63
631 wakaba 1.135 sub parse_char_stream ($$$;$) {
632 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
633 wakaba 1.135 my $input = $_[0];
634 wakaba 1.1 $self->{document} = $_[1];
635 wakaba 1.63 @{$self->{document}->child_nodes} = ();
636 wakaba 1.1
637 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
638    
639 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
640 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
641     if defined $self->{input_encoding};
642 wakaba 1.63
643 wakaba 1.1 my $i = 0;
644 wakaba 1.112 $self->{line_prev} = $self->{line} = 1;
645     $self->{column_prev} = $self->{column} = 0;
646 wakaba 1.76 $self->{set_next_char} = sub {
647 wakaba 1.1 my $self = shift;
648 wakaba 1.13
649 wakaba 1.76 pop @{$self->{prev_char}};
650     unshift @{$self->{prev_char}}, $self->{next_char};
651 wakaba 1.13
652 wakaba 1.139 my $char;
653     if (defined $self->{next_next_char}) {
654     $char = $self->{next_next_char};
655     delete $self->{next_next_char};
656     } else {
657     $char = $input->getc;
658     }
659 wakaba 1.135 $self->{next_char} = -1 and return unless defined $char;
660     $self->{next_char} = ord $char;
661 wakaba 1.112
662     ($self->{line_prev}, $self->{column_prev})
663     = ($self->{line}, $self->{column});
664     $self->{column}++;
665 wakaba 1.1
666 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
667 wakaba 1.132 !!!cp ('j1');
668 wakaba 1.112 $self->{line}++;
669     $self->{column} = 0;
670 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
671 wakaba 1.132 !!!cp ('j2');
672 wakaba 1.135 my $next = $input->getc;
673 wakaba 1.139 if (defined $next and $next ne "\x0A") {
674     $self->{next_next_char} = $next;
675 wakaba 1.135 }
676 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
677 wakaba 1.112 $self->{line}++;
678     $self->{column} = 0;
679 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
680 wakaba 1.132 !!!cp ('j3');
681 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
682     } elsif ($self->{next_char} == 0x0000) { # NULL
683 wakaba 1.132 !!!cp ('j4');
684 wakaba 1.8 !!!parse-error (type => 'NULL');
685 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
686 wakaba 1.132 } elsif ($self->{next_char} <= 0x0008 or
687     (0x000E <= $self->{next_char} and $self->{next_char} <= 0x001F) or
688     (0x007F <= $self->{next_char} and $self->{next_char} <= 0x009F) or
689     (0xD800 <= $self->{next_char} and $self->{next_char} <= 0xDFFF) or
690     (0xFDD0 <= $self->{next_char} and $self->{next_char} <= 0xFDDF) or
691     {
692     0xFFFE => 1, 0xFFFF => 1, 0x1FFFE => 1, 0x1FFFF => 1,
693     0x2FFFE => 1, 0x2FFFF => 1, 0x3FFFE => 1, 0x3FFFF => 1,
694     0x4FFFE => 1, 0x4FFFF => 1, 0x5FFFE => 1, 0x5FFFF => 1,
695     0x6FFFE => 1, 0x6FFFF => 1, 0x7FFFE => 1, 0x7FFFF => 1,
696     0x8FFFE => 1, 0x8FFFF => 1, 0x9FFFE => 1, 0x9FFFF => 1,
697     0xAFFFE => 1, 0xAFFFF => 1, 0xBFFFE => 1, 0xBFFFF => 1,
698     0xCFFFE => 1, 0xCFFFF => 1, 0xDFFFE => 1, 0xDFFFF => 1,
699     0xEFFFE => 1, 0xEFFFF => 1, 0xFFFFE => 1, 0xFFFFF => 1,
700     0x10FFFE => 1, 0x10FFFF => 1,
701     }->{$self->{next_char}}) {
702     !!!cp ('j5');
703 wakaba 1.153 if ($self->{next_char} < 0x10000) {
704     !!!parse-error (type => 'control char',
705     text => (sprintf 'U+%04X', $self->{next_char}));
706     } else {
707     !!!parse-error (type => 'control char',
708     text => (sprintf 'U-%08X', $self->{next_char}));
709     }
710 wakaba 1.1 }
711     };
712 wakaba 1.76 $self->{prev_char} = [-1, -1, -1];
713     $self->{next_char} = -1;
714 wakaba 1.1
715 wakaba 1.3 my $onerror = $_[2] || sub {
716     my (%opt) = @_;
717 wakaba 1.112 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
718     my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
719     warn "Parse error ($opt{type}) at line $line column $column\n";
720 wakaba 1.3 };
721     $self->{parse_error} = sub {
722 wakaba 1.112 $onerror->(line => $self->{line}, column => $self->{column}, @_);
723 wakaba 1.1 };
724    
725     $self->_initialize_tokenizer;
726     $self->_initialize_tree_constructor;
727     $self->_construct_tree;
728     $self->_terminate_tree_constructor;
729    
730 wakaba 1.112 delete $self->{parse_error}; # remove loop
731    
732 wakaba 1.1 return $self->{document};
733 wakaba 1.135 } # parse_char_stream
734 wakaba 1.1
735     sub new ($) {
736     my $class = shift;
737 wakaba 1.134 my $self = bless {
738 wakaba 1.153 level => {must => 'm',
739 wakaba 1.159 should => 's',
740 wakaba 1.153 warn => 'w',
741     info => 'i',
742     uncertain => 'u'},
743 wakaba 1.134 }, $class;
744 wakaba 1.76 $self->{set_next_char} = sub {
745     $self->{next_char} = -1;
746 wakaba 1.1 };
747     $self->{parse_error} = sub {
748     #
749     };
750 wakaba 1.63 $self->{change_encoding} = sub {
751     # if ($_[0] is a supported encoding) {
752     # run "change the encoding" algorithm;
753     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
754     # }
755     };
756 wakaba 1.61 $self->{application_cache_selection} = sub {
757     #
758     };
759 wakaba 1.1 return $self;
760     } # new
761    
762 wakaba 1.40 sub CM_ENTITY () { 0b001 } # & markup in data
763     sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
764     sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
765    
766     sub PLAINTEXT_CONTENT_MODEL () { 0 }
767     sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
768     sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
769     sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
770    
771 wakaba 1.57 sub DATA_STATE () { 0 }
772 wakaba 1.168 #sub ENTITY_DATA_STATE () { 1 }
773 wakaba 1.57 sub TAG_OPEN_STATE () { 2 }
774     sub CLOSE_TAG_OPEN_STATE () { 3 }
775     sub TAG_NAME_STATE () { 4 }
776     sub BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
777     sub ATTRIBUTE_NAME_STATE () { 6 }
778     sub AFTER_ATTRIBUTE_NAME_STATE () { 7 }
779     sub BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
780     sub ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
781     sub ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
782     sub ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
783 wakaba 1.168 #sub ENTITY_IN_ATTRIBUTE_VALUE_STATE () { 12 }
784 wakaba 1.57 sub MARKUP_DECLARATION_OPEN_STATE () { 13 }
785     sub COMMENT_START_STATE () { 14 }
786     sub COMMENT_START_DASH_STATE () { 15 }
787     sub COMMENT_STATE () { 16 }
788     sub COMMENT_END_STATE () { 17 }
789     sub COMMENT_END_DASH_STATE () { 18 }
790     sub BOGUS_COMMENT_STATE () { 19 }
791     sub DOCTYPE_STATE () { 20 }
792     sub BEFORE_DOCTYPE_NAME_STATE () { 21 }
793     sub DOCTYPE_NAME_STATE () { 22 }
794     sub AFTER_DOCTYPE_NAME_STATE () { 23 }
795     sub BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
796     sub DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
797     sub DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
798     sub AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
799     sub BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
800     sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
801     sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
802     sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
803     sub BOGUS_DOCTYPE_STATE () { 32 }
804 wakaba 1.72 sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
805 wakaba 1.125 sub SELF_CLOSING_START_TAG_STATE () { 34 }
806 wakaba 1.165 sub CDATA_SECTION_STATE () { 35 }
807 wakaba 1.164 sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec
808     sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec
809     sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec
810     sub CDATA_PCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec
811 wakaba 1.165 sub CDATA_SECTION_MSE1_STATE () { 40 } # "CDATA section state" in the spec
812     sub CDATA_SECTION_MSE2_STATE () { 41 } # "CDATA section state" in the spec
813 wakaba 1.166 sub PUBLIC_STATE () { 42 } # "after DOCTYPE name state" in the spec
814     sub SYSTEM_STATE () { 43 } # "after DOCTYPE name state" in the spec
815 wakaba 1.168 ## NOTE: "Entity data state", "entity in attribute value state", and
816     ## "consume a character reference" algorithm are jointly implemented
817     ## using the following six states:
818     sub ENTITY_STATE () { 44 }
819     sub ENTITY_HASH_STATE () { 45 }
820     sub NCR_NUM_STATE () { 46 }
821     sub HEXREF_X_STATE () { 47 }
822     sub HEXREF_HEX_STATE () { 48 }
823     sub ENTITY_NAME_STATE () { 49 }
824 wakaba 1.57
825 wakaba 1.55 sub DOCTYPE_TOKEN () { 1 }
826     sub COMMENT_TOKEN () { 2 }
827     sub START_TAG_TOKEN () { 3 }
828     sub END_TAG_TOKEN () { 4 }
829     sub END_OF_FILE_TOKEN () { 5 }
830     sub CHARACTER_TOKEN () { 6 }
831    
832 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
833     sub HEAD_IMS () { 0b1000 }
834     sub BODY_IMS () { 0b10000 }
835 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
836 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
837 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
838 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
839     sub FRAME_IMS () { 0b1000000000 }
840 wakaba 1.101 sub SELECT_IMS () { 0b10000000000 }
841 wakaba 1.126 sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 }
842     ## NOTE: "in foreign content" insertion mode is special; it is combined
843     ## with the secondary insertion mode. In this parser, they are stored
844     ## together in the bit-or'ed form.
845 wakaba 1.54
846 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
847    
848     ## NOTE: "after after body" insertion mode.
849 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
850 wakaba 1.84
851     ## NOTE: "after after frameset" insertion mode.
852 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
853 wakaba 1.84
854 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
855     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
856     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
857     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
858     sub IN_BODY_IM () { BODY_IMS }
859 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
860     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
861     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
862     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
863 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
864     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
865     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
866     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
867 wakaba 1.101 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
868     sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
869 wakaba 1.54 sub IN_COLUMN_GROUP_IM () { 0b10 }
870    
871 wakaba 1.1 ## Implementations MUST act as if state machine in the spec
872    
873     sub _initialize_tokenizer ($) {
874     my $self = shift;
875 wakaba 1.57 $self->{state} = DATA_STATE; # MUST
876 wakaba 1.163 #$self->{state_keyword}; # initialized when used
877 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # be
878 wakaba 1.165 undef $self->{current_token};
879 wakaba 1.1 undef $self->{current_attribute};
880     undef $self->{last_emitted_start_tag_name};
881     undef $self->{last_attribute_value_state};
882 wakaba 1.125 delete $self->{self_closing};
883 wakaba 1.1 $self->{char} = [];
884 wakaba 1.76 # $self->{next_char}
885 wakaba 1.1 !!!next-input-character;
886     $self->{token} = [];
887 wakaba 1.18 # $self->{escape}
888 wakaba 1.1 } # _initialize_tokenizer
889    
890     ## A token has:
891 wakaba 1.55 ## ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
892     ## CHARACTER_TOKEN, or END_OF_FILE_TOKEN
893     ## ->{name} (DOCTYPE_TOKEN)
894     ## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
895     ## ->{public_identifier} (DOCTYPE_TOKEN)
896     ## ->{system_identifier} (DOCTYPE_TOKEN)
897 wakaba 1.75 ## ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
898 wakaba 1.55 ## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
899 wakaba 1.66 ## ->{name}
900     ## ->{value}
901     ## ->{has_reference} == 1 or 0
902 wakaba 1.55 ## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)
903 wakaba 1.125 ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
904     ## |->{self_closing}| is used to save the value of |$self->{self_closing}|
905     ## while the token is pushed back to the stack.
906    
907 wakaba 1.1 ## Emitted token MUST immediately be handled by the tree construction state.
908    
909     ## Before each step, UA MAY check to see if either one of the scripts in
910     ## "list of scripts that will execute as soon as possible" or the first
911     ## script in the "list of scripts that will execute asynchronously",
912     ## has completed loading. If one has, then it MUST be executed
913     ## and removed from the list.
914    
915 wakaba 1.59 ## NOTE: HTML5 "Writing HTML documents" section, applied to
916     ## documents and not to user agents and conformance checkers,
917     ## contains some requirements that are not detected by the
918     ## parsing algorithm:
919     ## - Some requirements on character encoding declarations. ## TODO
920     ## - "Elements MUST NOT contain content that their content model disallows."
921     ## ... Some are parse error, some are not (will be reported by c.c.).
922     ## - Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) ## TODO
923     ## - Text (in elements, attributes, and comments) SHOULD NOT contain
924     ## control characters other than space characters. ## TODO: (what is control character? C0, C1 and DEL? Unicode control character?)
925    
926     ## TODO: HTML5 poses authors two SHOULD-level requirements that cannot
927     ## be detected by the HTML5 parsing algorithm:
928     ## - Text,
929    
930 wakaba 1.1 sub _get_next_token ($) {
931     my $self = shift;
932 wakaba 1.125
933     if ($self->{self_closing}) {
934     !!!parse-error (type => 'nestc', token => $self->{current_token});
935     ## NOTE: The |self_closing| flag is only set by start tag token.
936     ## In addition, when a start tag token is emitted, it is always set to
937     ## |current_token|.
938     delete $self->{self_closing};
939     }
940    
941 wakaba 1.1 if (@{$self->{token}}) {
942 wakaba 1.125 $self->{self_closing} = $self->{token}->[0]->{self_closing};
943 wakaba 1.1 return shift @{$self->{token}};
944     }
945    
946     A: {
947 wakaba 1.57 if ($self->{state} == DATA_STATE) {
948 wakaba 1.76 if ($self->{next_char} == 0x0026) { # &
949 wakaba 1.72 if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
950     not $self->{escape}) {
951 wakaba 1.77 !!!cp (1);
952 wakaba 1.167 ## NOTE: In the spec, the tokenizer is switched to the
953     ## "entity data state". In this implementation, the tokenizer
954     ## is switched to the |ENTITY_STATE|, which is an implementation
955     ## of the "consume a character reference" algorithm.
956     $self->{entity_in_attr} = 0;
957     $self->{entity_additional} = -1;
958     $self->{state} = ENTITY_STATE;
959 wakaba 1.1 !!!next-input-character;
960     redo A;
961     } else {
962 wakaba 1.77 !!!cp (2);
963 wakaba 1.1 #
964     }
965 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
966 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
967 wakaba 1.13 unless ($self->{escape}) {
968 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
969     $self->{prev_char}->[1] == 0x0021 and # !
970     $self->{prev_char}->[2] == 0x003C) { # <
971 wakaba 1.77 !!!cp (3);
972 wakaba 1.13 $self->{escape} = 1;
973 wakaba 1.77 } else {
974     !!!cp (4);
975 wakaba 1.13 }
976 wakaba 1.77 } else {
977     !!!cp (5);
978 wakaba 1.13 }
979     }
980    
981     #
982 wakaba 1.76 } elsif ($self->{next_char} == 0x003C) { # <
983 wakaba 1.40 if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
984     (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
985 wakaba 1.13 not $self->{escape})) {
986 wakaba 1.77 !!!cp (6);
987 wakaba 1.57 $self->{state} = TAG_OPEN_STATE;
988 wakaba 1.1 !!!next-input-character;
989     redo A;
990     } else {
991 wakaba 1.77 !!!cp (7);
992 wakaba 1.1 #
993     }
994 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
995 wakaba 1.13 if ($self->{escape} and
996 wakaba 1.40 ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
997 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
998     $self->{prev_char}->[1] == 0x002D) { # -
999 wakaba 1.77 !!!cp (8);
1000 wakaba 1.13 delete $self->{escape};
1001 wakaba 1.77 } else {
1002     !!!cp (9);
1003 wakaba 1.13 }
1004 wakaba 1.77 } else {
1005     !!!cp (10);
1006 wakaba 1.13 }
1007    
1008     #
1009 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1010 wakaba 1.77 !!!cp (11);
1011 wakaba 1.112 !!!emit ({type => END_OF_FILE_TOKEN,
1012     line => $self->{line}, column => $self->{column}});
1013 wakaba 1.1 last A; ## TODO: ok?
1014 wakaba 1.77 } else {
1015     !!!cp (12);
1016 wakaba 1.1 }
1017     # Anything else
1018 wakaba 1.55 my $token = {type => CHARACTER_TOKEN,
1019 wakaba 1.112 data => chr $self->{next_char},
1020 wakaba 1.120 line => $self->{line}, column => $self->{column},
1021 wakaba 1.118 };
1022 wakaba 1.1 ## Stay in the data state
1023     !!!next-input-character;
1024    
1025     !!!emit ($token);
1026    
1027     redo A;
1028 wakaba 1.57 } elsif ($self->{state} == TAG_OPEN_STATE) {
1029 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
1030 wakaba 1.76 if ($self->{next_char} == 0x002F) { # /
1031 wakaba 1.77 !!!cp (15);
1032 wakaba 1.1 !!!next-input-character;
1033 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
1034 wakaba 1.1 redo A;
1035     } else {
1036 wakaba 1.77 !!!cp (16);
1037 wakaba 1.1 ## reconsume
1038 wakaba 1.57 $self->{state} = DATA_STATE;
1039 wakaba 1.1
1040 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
1041 wakaba 1.120 line => $self->{line_prev},
1042     column => $self->{column_prev},
1043 wakaba 1.118 });
1044 wakaba 1.1
1045     redo A;
1046     }
1047 wakaba 1.40 } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
1048 wakaba 1.76 if ($self->{next_char} == 0x0021) { # !
1049 wakaba 1.77 !!!cp (17);
1050 wakaba 1.57 $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
1051 wakaba 1.1 !!!next-input-character;
1052     redo A;
1053 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1054 wakaba 1.77 !!!cp (18);
1055 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
1056 wakaba 1.1 !!!next-input-character;
1057     redo A;
1058 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1059     $self->{next_char} <= 0x005A) { # A..Z
1060 wakaba 1.77 !!!cp (19);
1061 wakaba 1.1 $self->{current_token}
1062 wakaba 1.55 = {type => START_TAG_TOKEN,
1063 wakaba 1.112 tag_name => chr ($self->{next_char} + 0x0020),
1064     line => $self->{line_prev},
1065     column => $self->{column_prev}};
1066 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1067 wakaba 1.1 !!!next-input-character;
1068     redo A;
1069 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
1070     $self->{next_char} <= 0x007A) { # a..z
1071 wakaba 1.77 !!!cp (20);
1072 wakaba 1.55 $self->{current_token} = {type => START_TAG_TOKEN,
1073 wakaba 1.112 tag_name => chr ($self->{next_char}),
1074     line => $self->{line_prev},
1075     column => $self->{column_prev}};
1076 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1077 wakaba 1.1 !!!next-input-character;
1078     redo A;
1079 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1080 wakaba 1.77 !!!cp (21);
1081 wakaba 1.115 !!!parse-error (type => 'empty start tag',
1082     line => $self->{line_prev},
1083     column => $self->{column_prev});
1084 wakaba 1.57 $self->{state} = DATA_STATE;
1085 wakaba 1.1 !!!next-input-character;
1086    
1087 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<>',
1088 wakaba 1.120 line => $self->{line_prev},
1089     column => $self->{column_prev},
1090 wakaba 1.118 });
1091 wakaba 1.1
1092     redo A;
1093 wakaba 1.76 } elsif ($self->{next_char} == 0x003F) { # ?
1094 wakaba 1.77 !!!cp (22);
1095 wakaba 1.115 !!!parse-error (type => 'pio',
1096     line => $self->{line_prev},
1097     column => $self->{column_prev});
1098 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1099 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1100 wakaba 1.120 line => $self->{line_prev},
1101     column => $self->{column_prev},
1102 wakaba 1.118 };
1103 wakaba 1.76 ## $self->{next_char} is intentionally left as is
1104 wakaba 1.1 redo A;
1105     } else {
1106 wakaba 1.77 !!!cp (23);
1107 wakaba 1.136 !!!parse-error (type => 'bare stago',
1108     line => $self->{line_prev},
1109     column => $self->{column_prev});
1110 wakaba 1.57 $self->{state} = DATA_STATE;
1111 wakaba 1.1 ## reconsume
1112    
1113 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
1114 wakaba 1.120 line => $self->{line_prev},
1115     column => $self->{column_prev},
1116 wakaba 1.118 });
1117 wakaba 1.1
1118     redo A;
1119     }
1120     } else {
1121 wakaba 1.40 die "$0: $self->{content_model} in tag open";
1122 wakaba 1.1 }
1123 wakaba 1.57 } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
1124 wakaba 1.164 ## NOTE: The "close tag open state" in the spec is implemented as
1125     ## |CLOSE_TAG_OPEN_STATE| and |CDATA_PCDATA_CLOSE_TAG_STATE|.
1126    
1127 wakaba 1.113 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
1128 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
1129 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
1130 wakaba 1.164 $self->{state} = CDATA_PCDATA_CLOSE_TAG_STATE;
1131     $self->{state_keyword} = '';
1132     ## Reconsume.
1133     redo A;
1134 wakaba 1.23 } else {
1135     ## No start tag token has ever been emitted
1136 wakaba 1.164 ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.
1137 wakaba 1.77 !!!cp (28);
1138 wakaba 1.57 $self->{state} = DATA_STATE;
1139 wakaba 1.164 ## Reconsume.
1140 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1141 wakaba 1.120 line => $l, column => $c,
1142 wakaba 1.118 });
1143 wakaba 1.1 redo A;
1144     }
1145     }
1146 wakaba 1.164
1147 wakaba 1.76 if (0x0041 <= $self->{next_char} and
1148     $self->{next_char} <= 0x005A) { # A..Z
1149 wakaba 1.77 !!!cp (29);
1150 wakaba 1.112 $self->{current_token}
1151     = {type => END_TAG_TOKEN,
1152     tag_name => chr ($self->{next_char} + 0x0020),
1153     line => $l, column => $c};
1154 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1155 wakaba 1.1 !!!next-input-character;
1156     redo A;
1157 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
1158     $self->{next_char} <= 0x007A) { # a..z
1159 wakaba 1.77 !!!cp (30);
1160 wakaba 1.55 $self->{current_token} = {type => END_TAG_TOKEN,
1161 wakaba 1.112 tag_name => chr ($self->{next_char}),
1162     line => $l, column => $c};
1163 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1164 wakaba 1.1 !!!next-input-character;
1165     redo A;
1166 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1167 wakaba 1.77 !!!cp (31);
1168 wakaba 1.115 !!!parse-error (type => 'empty end tag',
1169     line => $self->{line_prev}, ## "<" in "</>"
1170     column => $self->{column_prev} - 1);
1171 wakaba 1.57 $self->{state} = DATA_STATE;
1172 wakaba 1.1 !!!next-input-character;
1173     redo A;
1174 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1175 wakaba 1.77 !!!cp (32);
1176 wakaba 1.3 !!!parse-error (type => 'bare etago');
1177 wakaba 1.57 $self->{state} = DATA_STATE;
1178 wakaba 1.1 # reconsume
1179    
1180 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1181 wakaba 1.120 line => $l, column => $c,
1182 wakaba 1.118 });
1183 wakaba 1.1
1184     redo A;
1185     } else {
1186 wakaba 1.77 !!!cp (33);
1187 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
1188 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1189 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1190 wakaba 1.120 line => $self->{line_prev}, # "<" of "</"
1191     column => $self->{column_prev} - 1,
1192 wakaba 1.118 };
1193 wakaba 1.164 ## NOTE: $self->{next_char} is intentionally left as is.
1194     ## Although the "anything else" case of the spec not explicitly
1195     ## states that the next input character is to be reconsumed,
1196     ## it will be included to the |data| of the comment token
1197     ## generated from the bogus end tag, as defined in the
1198     ## "bogus comment state" entry.
1199     redo A;
1200     }
1201     } elsif ($self->{state} == CDATA_PCDATA_CLOSE_TAG_STATE) {
1202     my $ch = substr $self->{last_emitted_start_tag_name}, length $self->{state_keyword}, 1;
1203     if (length $ch) {
1204     my $CH = $ch;
1205     $ch =~ tr/a-z/A-Z/;
1206     my $nch = chr $self->{next_char};
1207     if ($nch eq $ch or $nch eq $CH) {
1208     !!!cp (24);
1209     ## Stay in the state.
1210     $self->{state_keyword} .= $nch;
1211     !!!next-input-character;
1212     redo A;
1213     } else {
1214     !!!cp (25);
1215     $self->{state} = DATA_STATE;
1216     ## Reconsume.
1217     !!!emit ({type => CHARACTER_TOKEN,
1218     data => '</' . $self->{state_keyword},
1219     line => $self->{line_prev},
1220     column => $self->{column_prev} - 1 - length $self->{state_keyword},
1221     });
1222     redo A;
1223     }
1224     } else { # after "<{tag-name}"
1225     unless ({
1226     0x0009 => 1, # HT
1227     0x000A => 1, # LF
1228     0x000B => 1, # VT
1229     0x000C => 1, # FF
1230     0x0020 => 1, # SP
1231     0x003E => 1, # >
1232     0x002F => 1, # /
1233     -1 => 1, # EOF
1234     }->{$self->{next_char}}) {
1235     !!!cp (26);
1236     ## Reconsume.
1237     $self->{state} = DATA_STATE;
1238     !!!emit ({type => CHARACTER_TOKEN,
1239     data => '</' . $self->{state_keyword},
1240     line => $self->{line_prev},
1241     column => $self->{column_prev} - 1 - length $self->{state_keyword},
1242     });
1243     redo A;
1244     } else {
1245     !!!cp (27);
1246     $self->{current_token}
1247     = {type => END_TAG_TOKEN,
1248     tag_name => $self->{last_emitted_start_tag_name},
1249     line => $self->{line_prev},
1250     column => $self->{column_prev} - 1 - length $self->{state_keyword}};
1251     $self->{state} = TAG_NAME_STATE;
1252     ## Reconsume.
1253     redo A;
1254     }
1255 wakaba 1.1 }
1256 wakaba 1.57 } elsif ($self->{state} == TAG_NAME_STATE) {
1257 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1258     $self->{next_char} == 0x000A or # LF
1259     $self->{next_char} == 0x000B or # VT
1260     $self->{next_char} == 0x000C or # FF
1261     $self->{next_char} == 0x0020) { # SP
1262 wakaba 1.77 !!!cp (34);
1263 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1264 wakaba 1.1 !!!next-input-character;
1265     redo A;
1266 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1267 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1268 wakaba 1.77 !!!cp (35);
1269 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1270 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1271 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1272 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
1273     # ## NOTE: This should never be reached.
1274     # !!! cp (36);
1275     # !!! parse-error (type => 'end tag attribute');
1276     #} else {
1277 wakaba 1.77 !!!cp (37);
1278 wakaba 1.78 #}
1279 wakaba 1.1 } else {
1280     die "$0: $self->{current_token}->{type}: Unknown token type";
1281     }
1282 wakaba 1.57 $self->{state} = DATA_STATE;
1283 wakaba 1.1 !!!next-input-character;
1284    
1285     !!!emit ($self->{current_token}); # start tag or end tag
1286    
1287     redo A;
1288 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1289     $self->{next_char} <= 0x005A) { # A..Z
1290 wakaba 1.77 !!!cp (38);
1291 wakaba 1.76 $self->{current_token}->{tag_name} .= chr ($self->{next_char} + 0x0020);
1292 wakaba 1.1 # start tag or end tag
1293     ## Stay in this state
1294     !!!next-input-character;
1295     redo A;
1296 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1297 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1298 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1299 wakaba 1.77 !!!cp (39);
1300 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1301 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1302 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1303 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
1304     # ## NOTE: This state should never be reached.
1305     # !!! cp (40);
1306     # !!! parse-error (type => 'end tag attribute');
1307     #} else {
1308 wakaba 1.77 !!!cp (41);
1309 wakaba 1.78 #}
1310 wakaba 1.1 } else {
1311     die "$0: $self->{current_token}->{type}: Unknown token type";
1312     }
1313 wakaba 1.57 $self->{state} = DATA_STATE;
1314 wakaba 1.1 # reconsume
1315    
1316     !!!emit ($self->{current_token}); # start tag or end tag
1317    
1318     redo A;
1319 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1320 wakaba 1.125 !!!cp (42);
1321     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1322 wakaba 1.1 !!!next-input-character;
1323     redo A;
1324     } else {
1325 wakaba 1.77 !!!cp (44);
1326 wakaba 1.76 $self->{current_token}->{tag_name} .= chr $self->{next_char};
1327 wakaba 1.1 # start tag or end tag
1328     ## Stay in the state
1329     !!!next-input-character;
1330     redo A;
1331     }
1332 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1333 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1334     $self->{next_char} == 0x000A or # LF
1335     $self->{next_char} == 0x000B or # VT
1336     $self->{next_char} == 0x000C or # FF
1337     $self->{next_char} == 0x0020) { # SP
1338 wakaba 1.77 !!!cp (45);
1339 wakaba 1.1 ## Stay in the state
1340     !!!next-input-character;
1341     redo A;
1342 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1343 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1344 wakaba 1.77 !!!cp (46);
1345 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1346 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1347 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1348 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1349 wakaba 1.77 !!!cp (47);
1350 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1351 wakaba 1.77 } else {
1352     !!!cp (48);
1353 wakaba 1.1 }
1354     } else {
1355     die "$0: $self->{current_token}->{type}: Unknown token type";
1356     }
1357 wakaba 1.57 $self->{state} = DATA_STATE;
1358 wakaba 1.1 !!!next-input-character;
1359    
1360     !!!emit ($self->{current_token}); # start tag or end tag
1361    
1362     redo A;
1363 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1364     $self->{next_char} <= 0x005A) { # A..Z
1365 wakaba 1.77 !!!cp (49);
1366 wakaba 1.119 $self->{current_attribute}
1367     = {name => chr ($self->{next_char} + 0x0020),
1368     value => '',
1369     line => $self->{line}, column => $self->{column}};
1370 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1371 wakaba 1.1 !!!next-input-character;
1372     redo A;
1373 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1374 wakaba 1.125 !!!cp (50);
1375     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1376 wakaba 1.1 !!!next-input-character;
1377     redo A;
1378 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1379 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1380 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1381 wakaba 1.77 !!!cp (52);
1382 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1383 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1384 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1385 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1386 wakaba 1.77 !!!cp (53);
1387 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1388 wakaba 1.77 } else {
1389     !!!cp (54);
1390 wakaba 1.1 }
1391     } else {
1392     die "$0: $self->{current_token}->{type}: Unknown token type";
1393     }
1394 wakaba 1.57 $self->{state} = DATA_STATE;
1395 wakaba 1.1 # reconsume
1396    
1397     !!!emit ($self->{current_token}); # start tag or end tag
1398    
1399     redo A;
1400     } else {
1401 wakaba 1.72 if ({
1402     0x0022 => 1, # "
1403     0x0027 => 1, # '
1404     0x003D => 1, # =
1405 wakaba 1.76 }->{$self->{next_char}}) {
1406 wakaba 1.77 !!!cp (55);
1407 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1408 wakaba 1.77 } else {
1409     !!!cp (56);
1410 wakaba 1.72 }
1411 wakaba 1.119 $self->{current_attribute}
1412     = {name => chr ($self->{next_char}),
1413     value => '',
1414     line => $self->{line}, column => $self->{column}};
1415 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1416 wakaba 1.1 !!!next-input-character;
1417     redo A;
1418     }
1419 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1420 wakaba 1.1 my $before_leave = sub {
1421     if (exists $self->{current_token}->{attributes} # start tag or end tag
1422     ->{$self->{current_attribute}->{name}}) { # MUST
1423 wakaba 1.77 !!!cp (57);
1424 wakaba 1.153 !!!parse-error (type => 'duplicate attribute', text => $self->{current_attribute}->{name}, line => $self->{current_attribute}->{line}, column => $self->{current_attribute}->{column});
1425 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
1426     } else {
1427 wakaba 1.77 !!!cp (58);
1428 wakaba 1.1 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
1429     = $self->{current_attribute};
1430     }
1431     }; # $before_leave
1432    
1433 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1434     $self->{next_char} == 0x000A or # LF
1435     $self->{next_char} == 0x000B or # VT
1436     $self->{next_char} == 0x000C or # FF
1437     $self->{next_char} == 0x0020) { # SP
1438 wakaba 1.77 !!!cp (59);
1439 wakaba 1.1 $before_leave->();
1440 wakaba 1.57 $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
1441 wakaba 1.1 !!!next-input-character;
1442     redo A;
1443 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1444 wakaba 1.77 !!!cp (60);
1445 wakaba 1.1 $before_leave->();
1446 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1447 wakaba 1.1 !!!next-input-character;
1448     redo A;
1449 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1450 wakaba 1.1 $before_leave->();
1451 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1452 wakaba 1.77 !!!cp (61);
1453 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1454 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1455 wakaba 1.77 !!!cp (62);
1456 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1457 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1458 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1459 wakaba 1.1 }
1460     } else {
1461     die "$0: $self->{current_token}->{type}: Unknown token type";
1462     }
1463 wakaba 1.57 $self->{state} = DATA_STATE;
1464 wakaba 1.1 !!!next-input-character;
1465    
1466     !!!emit ($self->{current_token}); # start tag or end tag
1467    
1468     redo A;
1469 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1470     $self->{next_char} <= 0x005A) { # A..Z
1471 wakaba 1.77 !!!cp (63);
1472 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char} + 0x0020);
1473 wakaba 1.1 ## Stay in the state
1474     !!!next-input-character;
1475     redo A;
1476 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1477 wakaba 1.125 !!!cp (64);
1478 wakaba 1.1 $before_leave->();
1479 wakaba 1.125 $self->{state} = SELF_CLOSING_START_TAG_STATE;
1480 wakaba 1.1 !!!next-input-character;
1481     redo A;
1482 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1483 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1484 wakaba 1.1 $before_leave->();
1485 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1486 wakaba 1.77 !!!cp (66);
1487 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1488 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1489 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1490 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1491 wakaba 1.77 !!!cp (67);
1492 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1493 wakaba 1.77 } else {
1494 wakaba 1.78 ## NOTE: This state should never be reached.
1495 wakaba 1.77 !!!cp (68);
1496 wakaba 1.1 }
1497     } else {
1498     die "$0: $self->{current_token}->{type}: Unknown token type";
1499     }
1500 wakaba 1.57 $self->{state} = DATA_STATE;
1501 wakaba 1.1 # reconsume
1502    
1503     !!!emit ($self->{current_token}); # start tag or end tag
1504    
1505     redo A;
1506     } else {
1507 wakaba 1.76 if ($self->{next_char} == 0x0022 or # "
1508     $self->{next_char} == 0x0027) { # '
1509 wakaba 1.77 !!!cp (69);
1510 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1511 wakaba 1.77 } else {
1512     !!!cp (70);
1513 wakaba 1.72 }
1514 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char});
1515 wakaba 1.1 ## Stay in the state
1516     !!!next-input-character;
1517     redo A;
1518     }
1519 wakaba 1.57 } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1520 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1521     $self->{next_char} == 0x000A or # LF
1522     $self->{next_char} == 0x000B or # VT
1523     $self->{next_char} == 0x000C or # FF
1524     $self->{next_char} == 0x0020) { # SP
1525 wakaba 1.77 !!!cp (71);
1526 wakaba 1.1 ## Stay in the state
1527     !!!next-input-character;
1528     redo A;
1529 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1530 wakaba 1.77 !!!cp (72);
1531 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1532 wakaba 1.1 !!!next-input-character;
1533     redo A;
1534 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1535 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1536 wakaba 1.77 !!!cp (73);
1537 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1538 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1539 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1540 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1541 wakaba 1.77 !!!cp (74);
1542 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1543 wakaba 1.77 } else {
1544 wakaba 1.78 ## NOTE: This state should never be reached.
1545 wakaba 1.77 !!!cp (75);
1546 wakaba 1.1 }
1547     } else {
1548     die "$0: $self->{current_token}->{type}: Unknown token type";
1549     }
1550 wakaba 1.57 $self->{state} = DATA_STATE;
1551 wakaba 1.1 !!!next-input-character;
1552    
1553     !!!emit ($self->{current_token}); # start tag or end tag
1554    
1555     redo A;
1556 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1557     $self->{next_char} <= 0x005A) { # A..Z
1558 wakaba 1.77 !!!cp (76);
1559 wakaba 1.119 $self->{current_attribute}
1560     = {name => chr ($self->{next_char} + 0x0020),
1561     value => '',
1562     line => $self->{line}, column => $self->{column}};
1563 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1564 wakaba 1.1 !!!next-input-character;
1565     redo A;
1566 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1567 wakaba 1.125 !!!cp (77);
1568     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1569 wakaba 1.1 !!!next-input-character;
1570     redo A;
1571 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1572 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1573 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1574 wakaba 1.77 !!!cp (79);
1575 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1576 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1577 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1578 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1579 wakaba 1.77 !!!cp (80);
1580 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1581 wakaba 1.77 } else {
1582 wakaba 1.78 ## NOTE: This state should never be reached.
1583 wakaba 1.77 !!!cp (81);
1584 wakaba 1.1 }
1585     } else {
1586     die "$0: $self->{current_token}->{type}: Unknown token type";
1587     }
1588 wakaba 1.57 $self->{state} = DATA_STATE;
1589 wakaba 1.1 # reconsume
1590    
1591     !!!emit ($self->{current_token}); # start tag or end tag
1592    
1593     redo A;
1594     } else {
1595 wakaba 1.156 if ($self->{next_char} == 0x0022 or # "
1596     $self->{next_char} == 0x0027) { # '
1597     !!!cp (78);
1598     !!!parse-error (type => 'bad attribute name');
1599     } else {
1600     !!!cp (82);
1601     }
1602 wakaba 1.119 $self->{current_attribute}
1603     = {name => chr ($self->{next_char}),
1604     value => '',
1605     line => $self->{line}, column => $self->{column}};
1606 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1607 wakaba 1.1 !!!next-input-character;
1608     redo A;
1609     }
1610 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1611 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1612     $self->{next_char} == 0x000A or # LF
1613     $self->{next_char} == 0x000B or # VT
1614     $self->{next_char} == 0x000C or # FF
1615     $self->{next_char} == 0x0020) { # SP
1616 wakaba 1.77 !!!cp (83);
1617 wakaba 1.1 ## Stay in the state
1618     !!!next-input-character;
1619     redo A;
1620 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
1621 wakaba 1.77 !!!cp (84);
1622 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
1623 wakaba 1.1 !!!next-input-character;
1624     redo A;
1625 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1626 wakaba 1.77 !!!cp (85);
1627 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1628 wakaba 1.1 ## reconsume
1629     redo A;
1630 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
1631 wakaba 1.77 !!!cp (86);
1632 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
1633 wakaba 1.1 !!!next-input-character;
1634     redo A;
1635 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1636 wakaba 1.156 !!!parse-error (type => 'empty unquoted attribute value');
1637 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1638 wakaba 1.77 !!!cp (87);
1639 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1640 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1641 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1642 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1643 wakaba 1.77 !!!cp (88);
1644 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1645 wakaba 1.77 } else {
1646 wakaba 1.78 ## NOTE: This state should never be reached.
1647 wakaba 1.77 !!!cp (89);
1648 wakaba 1.1 }
1649     } else {
1650     die "$0: $self->{current_token}->{type}: Unknown token type";
1651     }
1652 wakaba 1.57 $self->{state} = DATA_STATE;
1653 wakaba 1.1 !!!next-input-character;
1654    
1655     !!!emit ($self->{current_token}); # start tag or end tag
1656    
1657     redo A;
1658 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1659 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1660 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1661 wakaba 1.77 !!!cp (90);
1662 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1663 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1664 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1665 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1666 wakaba 1.77 !!!cp (91);
1667 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1668 wakaba 1.77 } else {
1669 wakaba 1.78 ## NOTE: This state should never be reached.
1670 wakaba 1.77 !!!cp (92);
1671 wakaba 1.1 }
1672     } else {
1673     die "$0: $self->{current_token}->{type}: Unknown token type";
1674     }
1675 wakaba 1.57 $self->{state} = DATA_STATE;
1676 wakaba 1.1 ## reconsume
1677    
1678     !!!emit ($self->{current_token}); # start tag or end tag
1679    
1680     redo A;
1681     } else {
1682 wakaba 1.76 if ($self->{next_char} == 0x003D) { # =
1683 wakaba 1.77 !!!cp (93);
1684 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1685 wakaba 1.77 } else {
1686     !!!cp (94);
1687 wakaba 1.72 }
1688 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1689 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1690 wakaba 1.1 !!!next-input-character;
1691     redo A;
1692     }
1693 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1694 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
1695 wakaba 1.77 !!!cp (95);
1696 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1697 wakaba 1.1 !!!next-input-character;
1698     redo A;
1699 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1700 wakaba 1.77 !!!cp (96);
1701 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1702 wakaba 1.167 ## NOTE: In the spec, the tokenizer is switched to the
1703     ## "entity in attribute value state". In this implementation, the
1704     ## tokenizer is switched to the |ENTITY_STATE|, which is an
1705     ## implementation of the "consume a character reference" algorithm.
1706     $self->{entity_in_attr} = 1;
1707     $self->{entity_additional} = 0x0022; # "
1708     $self->{state} = ENTITY_STATE;
1709 wakaba 1.1 !!!next-input-character;
1710     redo A;
1711 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1712 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1713 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1714 wakaba 1.77 !!!cp (97);
1715 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1716 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1717 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1718 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1719 wakaba 1.77 !!!cp (98);
1720 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1721 wakaba 1.77 } else {
1722 wakaba 1.78 ## NOTE: This state should never be reached.
1723 wakaba 1.77 !!!cp (99);
1724 wakaba 1.1 }
1725     } else {
1726     die "$0: $self->{current_token}->{type}: Unknown token type";
1727     }
1728 wakaba 1.57 $self->{state} = DATA_STATE;
1729 wakaba 1.1 ## reconsume
1730    
1731     !!!emit ($self->{current_token}); # start tag or end tag
1732    
1733     redo A;
1734     } else {
1735 wakaba 1.77 !!!cp (100);
1736 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1737 wakaba 1.1 ## Stay in the state
1738     !!!next-input-character;
1739     redo A;
1740     }
1741 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1742 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
1743 wakaba 1.77 !!!cp (101);
1744 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1745 wakaba 1.1 !!!next-input-character;
1746     redo A;
1747 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1748 wakaba 1.77 !!!cp (102);
1749 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1750 wakaba 1.167 ## NOTE: In the spec, the tokenizer is switched to the
1751     ## "entity in attribute value state". In this implementation, the
1752     ## tokenizer is switched to the |ENTITY_STATE|, which is an
1753     ## implementation of the "consume a character reference" algorithm.
1754     $self->{entity_in_attr} = 1;
1755     $self->{entity_additional} = 0x0027; # '
1756     $self->{state} = ENTITY_STATE;
1757 wakaba 1.1 !!!next-input-character;
1758     redo A;
1759 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1760 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1761 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1762 wakaba 1.77 !!!cp (103);
1763 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1764 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1765 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1766 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1767 wakaba 1.77 !!!cp (104);
1768 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1769 wakaba 1.77 } else {
1770 wakaba 1.78 ## NOTE: This state should never be reached.
1771 wakaba 1.77 !!!cp (105);
1772 wakaba 1.1 }
1773     } else {
1774     die "$0: $self->{current_token}->{type}: Unknown token type";
1775     }
1776 wakaba 1.57 $self->{state} = DATA_STATE;
1777 wakaba 1.1 ## reconsume
1778    
1779     !!!emit ($self->{current_token}); # start tag or end tag
1780    
1781     redo A;
1782     } else {
1783 wakaba 1.77 !!!cp (106);
1784 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1785 wakaba 1.1 ## Stay in the state
1786     !!!next-input-character;
1787     redo A;
1788     }
1789 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1790 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1791     $self->{next_char} == 0x000A or # LF
1792     $self->{next_char} == 0x000B or # HT
1793     $self->{next_char} == 0x000C or # FF
1794     $self->{next_char} == 0x0020) { # SP
1795 wakaba 1.77 !!!cp (107);
1796 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1797 wakaba 1.1 !!!next-input-character;
1798     redo A;
1799 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1800 wakaba 1.77 !!!cp (108);
1801 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1802 wakaba 1.167 ## NOTE: In the spec, the tokenizer is switched to the
1803     ## "entity in attribute value state". In this implementation, the
1804     ## tokenizer is switched to the |ENTITY_STATE|, which is an
1805     ## implementation of the "consume a character reference" algorithm.
1806     $self->{entity_in_attr} = 1;
1807     $self->{entity_additional} = -1;
1808     $self->{state} = ENTITY_STATE;
1809 wakaba 1.1 !!!next-input-character;
1810     redo A;
1811 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1812 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1813 wakaba 1.77 !!!cp (109);
1814 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1815 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1816 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1817 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1818 wakaba 1.77 !!!cp (110);
1819 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1820 wakaba 1.77 } else {
1821 wakaba 1.78 ## NOTE: This state should never be reached.
1822 wakaba 1.77 !!!cp (111);
1823 wakaba 1.1 }
1824     } else {
1825     die "$0: $self->{current_token}->{type}: Unknown token type";
1826     }
1827 wakaba 1.57 $self->{state} = DATA_STATE;
1828 wakaba 1.1 !!!next-input-character;
1829    
1830     !!!emit ($self->{current_token}); # start tag or end tag
1831    
1832     redo A;
1833 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1834 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1835 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1836 wakaba 1.77 !!!cp (112);
1837 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1838 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1839 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1840 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1841 wakaba 1.77 !!!cp (113);
1842 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1843 wakaba 1.77 } else {
1844 wakaba 1.78 ## NOTE: This state should never be reached.
1845 wakaba 1.77 !!!cp (114);
1846 wakaba 1.1 }
1847     } else {
1848     die "$0: $self->{current_token}->{type}: Unknown token type";
1849     }
1850 wakaba 1.57 $self->{state} = DATA_STATE;
1851 wakaba 1.1 ## reconsume
1852    
1853     !!!emit ($self->{current_token}); # start tag or end tag
1854    
1855     redo A;
1856     } else {
1857 wakaba 1.72 if ({
1858     0x0022 => 1, # "
1859     0x0027 => 1, # '
1860     0x003D => 1, # =
1861 wakaba 1.76 }->{$self->{next_char}}) {
1862 wakaba 1.77 !!!cp (115);
1863 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1864 wakaba 1.77 } else {
1865     !!!cp (116);
1866 wakaba 1.72 }
1867 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1868 wakaba 1.1 ## Stay in the state
1869     !!!next-input-character;
1870     redo A;
1871     }
1872 wakaba 1.72 } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1873 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1874     $self->{next_char} == 0x000A or # LF
1875     $self->{next_char} == 0x000B or # VT
1876     $self->{next_char} == 0x000C or # FF
1877     $self->{next_char} == 0x0020) { # SP
1878 wakaba 1.77 !!!cp (118);
1879 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1880     !!!next-input-character;
1881     redo A;
1882 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1883 wakaba 1.72 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1884 wakaba 1.77 !!!cp (119);
1885 wakaba 1.72 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1886     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1887     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1888     if ($self->{current_token}->{attributes}) {
1889 wakaba 1.77 !!!cp (120);
1890 wakaba 1.72 !!!parse-error (type => 'end tag attribute');
1891 wakaba 1.77 } else {
1892 wakaba 1.78 ## NOTE: This state should never be reached.
1893 wakaba 1.77 !!!cp (121);
1894 wakaba 1.72 }
1895     } else {
1896     die "$0: $self->{current_token}->{type}: Unknown token type";
1897     }
1898     $self->{state} = DATA_STATE;
1899     !!!next-input-character;
1900    
1901     !!!emit ($self->{current_token}); # start tag or end tag
1902    
1903     redo A;
1904 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1905 wakaba 1.125 !!!cp (122);
1906     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1907 wakaba 1.72 !!!next-input-character;
1908 wakaba 1.125 redo A;
1909 wakaba 1.141 } elsif ($self->{next_char} == -1) {
1910     !!!parse-error (type => 'unclosed tag');
1911     if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1912     !!!cp (122.3);
1913     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1914     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1915     if ($self->{current_token}->{attributes}) {
1916     !!!cp (122.1);
1917     !!!parse-error (type => 'end tag attribute');
1918     } else {
1919     ## NOTE: This state should never be reached.
1920     !!!cp (122.2);
1921     }
1922     } else {
1923     die "$0: $self->{current_token}->{type}: Unknown token type";
1924     }
1925     $self->{state} = DATA_STATE;
1926     ## Reconsume.
1927     !!!emit ($self->{current_token}); # start tag or end tag
1928     redo A;
1929 wakaba 1.125 } else {
1930     !!!cp ('124.1');
1931     !!!parse-error (type => 'no space between attributes');
1932     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1933     ## reconsume
1934     redo A;
1935     }
1936     } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
1937     if ($self->{next_char} == 0x003E) { # >
1938     if ($self->{current_token}->{type} == END_TAG_TOKEN) {
1939     !!!cp ('124.2');
1940     !!!parse-error (type => 'nestc', token => $self->{current_token});
1941     ## TODO: Different type than slash in start tag
1942     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1943     if ($self->{current_token}->{attributes}) {
1944     !!!cp ('124.4');
1945     !!!parse-error (type => 'end tag attribute');
1946     } else {
1947     !!!cp ('124.5');
1948     }
1949     ## TODO: Test |<title></title/>|
1950 wakaba 1.72 } else {
1951 wakaba 1.125 !!!cp ('124.3');
1952     $self->{self_closing} = 1;
1953 wakaba 1.72 }
1954 wakaba 1.125
1955     $self->{state} = DATA_STATE;
1956     !!!next-input-character;
1957    
1958     !!!emit ($self->{current_token}); # start tag or end tag
1959    
1960 wakaba 1.72 redo A;
1961 wakaba 1.141 } elsif ($self->{next_char} == -1) {
1962     !!!parse-error (type => 'unclosed tag');
1963     if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1964     !!!cp (124.7);
1965     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1966     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1967     if ($self->{current_token}->{attributes}) {
1968     !!!cp (124.5);
1969     !!!parse-error (type => 'end tag attribute');
1970     } else {
1971     ## NOTE: This state should never be reached.
1972     !!!cp (124.6);
1973     }
1974     } else {
1975     die "$0: $self->{current_token}->{type}: Unknown token type";
1976     }
1977     $self->{state} = DATA_STATE;
1978     ## Reconsume.
1979     !!!emit ($self->{current_token}); # start tag or end tag
1980     redo A;
1981 wakaba 1.72 } else {
1982 wakaba 1.125 !!!cp ('124.4');
1983     !!!parse-error (type => 'nestc');
1984     ## TODO: This error type is wrong.
1985 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1986 wakaba 1.125 ## Reconsume.
1987 wakaba 1.72 redo A;
1988     }
1989 wakaba 1.57 } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
1990 wakaba 1.1 ## (only happen if PCDATA state)
1991 wakaba 1.167
1992     ## NOTE: Unlike spec's "bogus comment state", this implementation
1993     ## consumes characters one-by-one basis.
1994 wakaba 1.1
1995 wakaba 1.167 if ($self->{next_char} == 0x003E) { # >
1996     !!!cp (124);
1997     $self->{state} = DATA_STATE;
1998     !!!next-input-character;
1999 wakaba 1.1
2000 wakaba 1.167 !!!emit ($self->{current_token}); # comment
2001     redo A;
2002     } elsif ($self->{next_char} == -1) {
2003     !!!cp (125);
2004     $self->{state} = DATA_STATE;
2005     ## reconsume
2006 wakaba 1.1
2007 wakaba 1.167 !!!emit ($self->{current_token}); # comment
2008     redo A;
2009     } else {
2010     !!!cp (126);
2011     $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
2012     ## Stay in the state.
2013     !!!next-input-character;
2014     redo A;
2015     }
2016 wakaba 1.57 } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
2017 wakaba 1.1 ## (only happen if PCDATA state)
2018    
2019 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2020 wakaba 1.163 !!!cp (133);
2021     $self->{state} = MD_HYPHEN_STATE;
2022 wakaba 1.1 !!!next-input-character;
2023 wakaba 1.163 redo A;
2024 wakaba 1.76 } elsif ($self->{next_char} == 0x0044 or # D
2025     $self->{next_char} == 0x0064) { # d
2026 wakaba 1.163 ## ASCII case-insensitive.
2027     !!!cp (130);
2028     $self->{state} = MD_DOCTYPE_STATE;
2029     $self->{state_keyword} = chr $self->{next_char};
2030 wakaba 1.1 !!!next-input-character;
2031 wakaba 1.163 redo A;
2032 wakaba 1.127 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
2033     $self->{open_elements}->[-1]->[1] & FOREIGN_EL and
2034     $self->{next_char} == 0x005B) { # [
2035 wakaba 1.163 !!!cp (135.4);
2036     $self->{state} = MD_CDATA_STATE;
2037     $self->{state_keyword} = '[';
2038 wakaba 1.127 !!!next-input-character;
2039 wakaba 1.163 redo A;
2040 wakaba 1.77 } else {
2041     !!!cp (136);
2042 wakaba 1.1 }
2043    
2044 wakaba 1.163 !!!parse-error (type => 'bogus comment',
2045     line => $self->{line_prev},
2046     column => $self->{column_prev} - 1);
2047     ## Reconsume.
2048 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
2049 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
2050 wakaba 1.163 line => $self->{line_prev},
2051     column => $self->{column_prev} - 1,
2052 wakaba 1.118 };
2053 wakaba 1.1 redo A;
2054 wakaba 1.163 } elsif ($self->{state} == MD_HYPHEN_STATE) {
2055     if ($self->{next_char} == 0x002D) { # -
2056     !!!cp (127);
2057     $self->{current_token} = {type => COMMENT_TOKEN, data => '',
2058     line => $self->{line_prev},
2059     column => $self->{column_prev} - 2,
2060     };
2061     $self->{state} = COMMENT_START_STATE;
2062     !!!next-input-character;
2063     redo A;
2064     } else {
2065     !!!cp (128);
2066     !!!parse-error (type => 'bogus comment',
2067     line => $self->{line_prev},
2068     column => $self->{column_prev} - 2);
2069     $self->{state} = BOGUS_COMMENT_STATE;
2070     ## Reconsume.
2071     $self->{current_token} = {type => COMMENT_TOKEN,
2072     data => '-',
2073     line => $self->{line_prev},
2074     column => $self->{column_prev} - 2,
2075     };
2076     redo A;
2077     }
2078     } elsif ($self->{state} == MD_DOCTYPE_STATE) {
2079     ## ASCII case-insensitive.
2080     if ($self->{next_char} == [
2081     undef,
2082     0x004F, # O
2083     0x0043, # C
2084     0x0054, # T
2085     0x0059, # Y
2086     0x0050, # P
2087     ]->[length $self->{state_keyword}] or
2088     $self->{next_char} == [
2089     undef,
2090     0x006F, # o
2091     0x0063, # c
2092     0x0074, # t
2093     0x0079, # y
2094     0x0070, # p
2095     ]->[length $self->{state_keyword}]) {
2096     !!!cp (131);
2097     ## Stay in the state.
2098     $self->{state_keyword} .= chr $self->{next_char};
2099     !!!next-input-character;
2100     redo A;
2101     } elsif ((length $self->{state_keyword}) == 6 and
2102     ($self->{next_char} == 0x0045 or # E
2103     $self->{next_char} == 0x0065)) { # e
2104     !!!cp (129);
2105     $self->{state} = DOCTYPE_STATE;
2106     $self->{current_token} = {type => DOCTYPE_TOKEN,
2107     quirks => 1,
2108     line => $self->{line_prev},
2109     column => $self->{column_prev} - 7,
2110     };
2111     !!!next-input-character;
2112     redo A;
2113     } else {
2114     !!!cp (132);
2115     !!!parse-error (type => 'bogus comment',
2116     line => $self->{line_prev},
2117     column => $self->{column_prev} - 1 - length $self->{state_keyword});
2118     $self->{state} = BOGUS_COMMENT_STATE;
2119     ## Reconsume.
2120     $self->{current_token} = {type => COMMENT_TOKEN,
2121     data => $self->{state_keyword},
2122     line => $self->{line_prev},
2123     column => $self->{column_prev} - 1 - length $self->{state_keyword},
2124     };
2125     redo A;
2126     }
2127     } elsif ($self->{state} == MD_CDATA_STATE) {
2128     if ($self->{next_char} == {
2129     '[' => 0x0043, # C
2130     '[C' => 0x0044, # D
2131     '[CD' => 0x0041, # A
2132     '[CDA' => 0x0054, # T
2133     '[CDAT' => 0x0041, # A
2134     }->{$self->{state_keyword}}) {
2135     !!!cp (135.1);
2136     ## Stay in the state.
2137     $self->{state_keyword} .= chr $self->{next_char};
2138     !!!next-input-character;
2139     redo A;
2140     } elsif ($self->{state_keyword} eq '[CDATA' and
2141     $self->{next_char} == 0x005B) { # [
2142     !!!cp (135.2);
2143 wakaba 1.165 $self->{current_token} = {type => CHARACTER_TOKEN,
2144     data => '',
2145     line => $self->{line_prev},
2146     column => $self->{column_prev} - 7};
2147     $self->{state} = CDATA_SECTION_STATE;
2148 wakaba 1.163 !!!next-input-character;
2149     redo A;
2150     } else {
2151     !!!cp (135.3);
2152     !!!parse-error (type => 'bogus comment',
2153     line => $self->{line_prev},
2154     column => $self->{column_prev} - 1 - length $self->{state_keyword});
2155     $self->{state} = BOGUS_COMMENT_STATE;
2156     ## Reconsume.
2157     $self->{current_token} = {type => COMMENT_TOKEN,
2158     data => $self->{state_keyword},
2159     line => $self->{line_prev},
2160     column => $self->{column_prev} - 1 - length $self->{state_keyword},
2161     };
2162     redo A;
2163     }
2164 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_STATE) {
2165 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2166 wakaba 1.77 !!!cp (137);
2167 wakaba 1.57 $self->{state} = COMMENT_START_DASH_STATE;
2168 wakaba 1.23 !!!next-input-character;
2169     redo A;
2170 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2171 wakaba 1.77 !!!cp (138);
2172 wakaba 1.23 !!!parse-error (type => 'bogus comment');
2173 wakaba 1.57 $self->{state} = DATA_STATE;
2174 wakaba 1.23 !!!next-input-character;
2175    
2176     !!!emit ($self->{current_token}); # comment
2177    
2178     redo A;
2179 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2180 wakaba 1.77 !!!cp (139);
2181 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
2182 wakaba 1.57 $self->{state} = DATA_STATE;
2183 wakaba 1.23 ## reconsume
2184    
2185     !!!emit ($self->{current_token}); # comment
2186    
2187     redo A;
2188     } else {
2189 wakaba 1.77 !!!cp (140);
2190 wakaba 1.23 $self->{current_token}->{data} # comment
2191 wakaba 1.76 .= chr ($self->{next_char});
2192 wakaba 1.57 $self->{state} = COMMENT_STATE;
2193 wakaba 1.23 !!!next-input-character;
2194     redo A;
2195     }
2196 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
2197 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2198 wakaba 1.77 !!!cp (141);
2199 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
2200 wakaba 1.23 !!!next-input-character;
2201     redo A;
2202 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2203 wakaba 1.77 !!!cp (142);
2204 wakaba 1.23 !!!parse-error (type => 'bogus comment');
2205 wakaba 1.57 $self->{state} = DATA_STATE;
2206 wakaba 1.23 !!!next-input-character;
2207    
2208     !!!emit ($self->{current_token}); # comment
2209    
2210     redo A;
2211 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2212 wakaba 1.77 !!!cp (143);
2213 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
2214 wakaba 1.57 $self->{state} = DATA_STATE;
2215 wakaba 1.23 ## reconsume
2216    
2217     !!!emit ($self->{current_token}); # comment
2218    
2219     redo A;
2220     } else {
2221 wakaba 1.77 !!!cp (144);
2222 wakaba 1.23 $self->{current_token}->{data} # comment
2223 wakaba 1.76 .= '-' . chr ($self->{next_char});
2224 wakaba 1.57 $self->{state} = COMMENT_STATE;
2225 wakaba 1.23 !!!next-input-character;
2226     redo A;
2227     }
2228 wakaba 1.57 } elsif ($self->{state} == COMMENT_STATE) {
2229 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2230 wakaba 1.77 !!!cp (145);
2231 wakaba 1.57 $self->{state} = COMMENT_END_DASH_STATE;
2232 wakaba 1.1 !!!next-input-character;
2233     redo A;
2234 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2235 wakaba 1.77 !!!cp (146);
2236 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2237 wakaba 1.57 $self->{state} = DATA_STATE;
2238 wakaba 1.1 ## reconsume
2239    
2240     !!!emit ($self->{current_token}); # comment
2241    
2242     redo A;
2243     } else {
2244 wakaba 1.77 !!!cp (147);
2245 wakaba 1.76 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
2246 wakaba 1.1 ## Stay in the state
2247     !!!next-input-character;
2248     redo A;
2249     }
2250 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2251 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2252 wakaba 1.77 !!!cp (148);
2253 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
2254 wakaba 1.1 !!!next-input-character;
2255     redo A;
2256 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2257 wakaba 1.77 !!!cp (149);
2258 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2259 wakaba 1.57 $self->{state} = DATA_STATE;
2260 wakaba 1.1 ## reconsume
2261    
2262     !!!emit ($self->{current_token}); # comment
2263    
2264     redo A;
2265     } else {
2266 wakaba 1.77 !!!cp (150);
2267 wakaba 1.76 $self->{current_token}->{data} .= '-' . chr ($self->{next_char}); # comment
2268 wakaba 1.57 $self->{state} = COMMENT_STATE;
2269 wakaba 1.1 !!!next-input-character;
2270     redo A;
2271     }
2272 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_STATE) {
2273 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2274 wakaba 1.77 !!!cp (151);
2275 wakaba 1.57 $self->{state} = DATA_STATE;
2276 wakaba 1.1 !!!next-input-character;
2277    
2278     !!!emit ($self->{current_token}); # comment
2279    
2280     redo A;
2281 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
2282 wakaba 1.77 !!!cp (152);
2283 wakaba 1.114 !!!parse-error (type => 'dash in comment',
2284     line => $self->{line_prev},
2285     column => $self->{column_prev});
2286 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
2287     ## Stay in the state
2288     !!!next-input-character;
2289     redo A;
2290 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2291 wakaba 1.77 !!!cp (153);
2292 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2293 wakaba 1.57 $self->{state} = DATA_STATE;
2294 wakaba 1.1 ## reconsume
2295    
2296     !!!emit ($self->{current_token}); # comment
2297    
2298     redo A;
2299     } else {
2300 wakaba 1.77 !!!cp (154);
2301 wakaba 1.114 !!!parse-error (type => 'dash in comment',
2302     line => $self->{line_prev},
2303     column => $self->{column_prev});
2304 wakaba 1.76 $self->{current_token}->{data} .= '--' . chr ($self->{next_char}); # comment
2305 wakaba 1.57 $self->{state} = COMMENT_STATE;
2306 wakaba 1.1 !!!next-input-character;
2307     redo A;
2308     }
2309 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_STATE) {
2310 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2311     $self->{next_char} == 0x000A or # LF
2312     $self->{next_char} == 0x000B or # VT
2313     $self->{next_char} == 0x000C or # FF
2314     $self->{next_char} == 0x0020) { # SP
2315 wakaba 1.77 !!!cp (155);
2316 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2317 wakaba 1.1 !!!next-input-character;
2318     redo A;
2319     } else {
2320 wakaba 1.77 !!!cp (156);
2321 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
2322 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2323 wakaba 1.1 ## reconsume
2324     redo A;
2325     }
2326 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
2327 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2328     $self->{next_char} == 0x000A or # LF
2329     $self->{next_char} == 0x000B or # VT
2330     $self->{next_char} == 0x000C or # FF
2331     $self->{next_char} == 0x0020) { # SP
2332 wakaba 1.77 !!!cp (157);
2333 wakaba 1.1 ## Stay in the state
2334     !!!next-input-character;
2335     redo A;
2336 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2337 wakaba 1.77 !!!cp (158);
2338 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
2339 wakaba 1.57 $self->{state} = DATA_STATE;
2340 wakaba 1.1 !!!next-input-character;
2341    
2342 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
2343 wakaba 1.1
2344     redo A;
2345 wakaba 1.77 } elsif ($self->{next_char} == -1) {
2346     !!!cp (159);
2347 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
2348 wakaba 1.57 $self->{state} = DATA_STATE;
2349 wakaba 1.1 ## reconsume
2350    
2351 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
2352 wakaba 1.1
2353     redo A;
2354     } else {
2355 wakaba 1.77 !!!cp (160);
2356 wakaba 1.112 $self->{current_token}->{name} = chr $self->{next_char};
2357     delete $self->{current_token}->{quirks};
2358 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
2359 wakaba 1.57 $self->{state} = DOCTYPE_NAME_STATE;
2360 wakaba 1.1 !!!next-input-character;
2361     redo A;
2362     }
2363 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
2364 wakaba 1.18 ## ISSUE: Redundant "First," in the spec.
2365 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2366     $self->{next_char} == 0x000A or # LF
2367     $self->{next_char} == 0x000B or # VT
2368     $self->{next_char} == 0x000C or # FF
2369     $self->{next_char} == 0x0020) { # SP
2370 wakaba 1.77 !!!cp (161);
2371 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_NAME_STATE;
2372 wakaba 1.1 !!!next-input-character;
2373     redo A;
2374 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2375 wakaba 1.77 !!!cp (162);
2376 wakaba 1.57 $self->{state} = DATA_STATE;
2377 wakaba 1.1 !!!next-input-character;
2378    
2379     !!!emit ($self->{current_token}); # DOCTYPE
2380    
2381     redo A;
2382 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2383 wakaba 1.77 !!!cp (163);
2384 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2385 wakaba 1.57 $self->{state} = DATA_STATE;
2386 wakaba 1.1 ## reconsume
2387    
2388 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2389 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2390 wakaba 1.1
2391     redo A;
2392     } else {
2393 wakaba 1.77 !!!cp (164);
2394 wakaba 1.1 $self->{current_token}->{name}
2395 wakaba 1.76 .= chr ($self->{next_char}); # DOCTYPE
2396 wakaba 1.1 ## Stay in the state
2397     !!!next-input-character;
2398     redo A;
2399     }
2400 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
2401 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2402     $self->{next_char} == 0x000A or # LF
2403     $self->{next_char} == 0x000B or # VT
2404     $self->{next_char} == 0x000C or # FF
2405     $self->{next_char} == 0x0020) { # SP
2406 wakaba 1.77 !!!cp (165);
2407 wakaba 1.1 ## Stay in the state
2408     !!!next-input-character;
2409     redo A;
2410 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2411 wakaba 1.77 !!!cp (166);
2412 wakaba 1.57 $self->{state} = DATA_STATE;
2413 wakaba 1.1 !!!next-input-character;
2414    
2415     !!!emit ($self->{current_token}); # DOCTYPE
2416    
2417     redo A;
2418 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2419 wakaba 1.77 !!!cp (167);
2420 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2421 wakaba 1.57 $self->{state} = DATA_STATE;
2422 wakaba 1.1 ## reconsume
2423    
2424 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2425 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2426    
2427     redo A;
2428 wakaba 1.76 } elsif ($self->{next_char} == 0x0050 or # P
2429     $self->{next_char} == 0x0070) { # p
2430 wakaba 1.166 $self->{state} = PUBLIC_STATE;
2431     $self->{state_keyword} = chr $self->{next_char};
2432 wakaba 1.18 !!!next-input-character;
2433 wakaba 1.166 redo A;
2434 wakaba 1.76 } elsif ($self->{next_char} == 0x0053 or # S
2435     $self->{next_char} == 0x0073) { # s
2436 wakaba 1.166 $self->{state} = SYSTEM_STATE;
2437     $self->{state_keyword} = chr $self->{next_char};
2438 wakaba 1.18 !!!next-input-character;
2439 wakaba 1.166 redo A;
2440 wakaba 1.18 } else {
2441 wakaba 1.77 !!!cp (180);
2442 wakaba 1.166 !!!parse-error (type => 'string after DOCTYPE name');
2443     $self->{current_token}->{quirks} = 1;
2444    
2445     $self->{state} = BOGUS_DOCTYPE_STATE;
2446 wakaba 1.18 !!!next-input-character;
2447 wakaba 1.166 redo A;
2448 wakaba 1.18 }
2449 wakaba 1.166 } elsif ($self->{state} == PUBLIC_STATE) {
2450     ## ASCII case-insensitive
2451     if ($self->{next_char} == [
2452     undef,
2453     0x0055, # U
2454     0x0042, # B
2455     0x004C, # L
2456     0x0049, # I
2457     ]->[length $self->{state_keyword}] or
2458     $self->{next_char} == [
2459     undef,
2460     0x0075, # u
2461     0x0062, # b
2462     0x006C, # l
2463     0x0069, # i
2464     ]->[length $self->{state_keyword}]) {
2465     !!!cp (175);
2466     ## Stay in the state.
2467     $self->{state_keyword} .= chr $self->{next_char};
2468     !!!next-input-character;
2469     redo A;
2470     } elsif ((length $self->{state_keyword}) == 5 and
2471     ($self->{next_char} == 0x0043 or # C
2472     $self->{next_char} == 0x0063)) { # c
2473     !!!cp (168);
2474     $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2475     !!!next-input-character;
2476     redo A;
2477     } else {
2478     !!!cp (169);
2479     !!!parse-error (type => 'string after DOCTYPE name',
2480     line => $self->{line_prev},
2481     column => $self->{column_prev} + 1 - length $self->{state_keyword});
2482     $self->{current_token}->{quirks} = 1;
2483 wakaba 1.18
2484 wakaba 1.166 $self->{state} = BOGUS_DOCTYPE_STATE;
2485     ## Reconsume.
2486     redo A;
2487     }
2488     } elsif ($self->{state} == SYSTEM_STATE) {
2489     ## ASCII case-insensitive
2490     if ($self->{next_char} == [
2491     undef,
2492     0x0059, # Y
2493     0x0053, # S
2494     0x0054, # T
2495     0x0045, # E
2496     ]->[length $self->{state_keyword}] or
2497     $self->{next_char} == [
2498     undef,
2499     0x0079, # y
2500     0x0073, # s
2501     0x0074, # t
2502     0x0065, # e
2503     ]->[length $self->{state_keyword}]) {
2504     !!!cp (170);
2505     ## Stay in the state.
2506     $self->{state_keyword} .= chr $self->{next_char};
2507     !!!next-input-character;
2508     redo A;
2509     } elsif ((length $self->{state_keyword}) == 5 and
2510     ($self->{next_char} == 0x004D or # M
2511     $self->{next_char} == 0x006D)) { # m
2512     !!!cp (171);
2513     $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2514     !!!next-input-character;
2515     redo A;
2516     } else {
2517     !!!cp (172);
2518     !!!parse-error (type => 'string after DOCTYPE name',
2519     line => $self->{line_prev},
2520     column => $self->{column_prev} + 1 - length $self->{state_keyword});
2521     $self->{current_token}->{quirks} = 1;
2522 wakaba 1.73
2523 wakaba 1.166 $self->{state} = BOGUS_DOCTYPE_STATE;
2524     ## Reconsume.
2525     redo A;
2526     }
2527 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2528 wakaba 1.18 if ({
2529     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2530     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2531 wakaba 1.76 }->{$self->{next_char}}) {
2532 wakaba 1.77 !!!cp (181);
2533 wakaba 1.18 ## Stay in the state
2534     !!!next-input-character;
2535     redo A;
2536 wakaba 1.76 } elsif ($self->{next_char} eq 0x0022) { # "
2537 wakaba 1.77 !!!cp (182);
2538 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2539 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
2540 wakaba 1.18 !!!next-input-character;
2541     redo A;
2542 wakaba 1.76 } elsif ($self->{next_char} eq 0x0027) { # '
2543 wakaba 1.77 !!!cp (183);
2544 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2545 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
2546 wakaba 1.18 !!!next-input-character;
2547     redo A;
2548 wakaba 1.76 } elsif ($self->{next_char} eq 0x003E) { # >
2549 wakaba 1.77 !!!cp (184);
2550 wakaba 1.18 !!!parse-error (type => 'no PUBLIC literal');
2551    
2552 wakaba 1.57 $self->{state} = DATA_STATE;
2553 wakaba 1.18 !!!next-input-character;
2554    
2555 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2556 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2557    
2558     redo A;
2559 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2560 wakaba 1.77 !!!cp (185);
2561 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2562    
2563 wakaba 1.57 $self->{state} = DATA_STATE;
2564 wakaba 1.18 ## reconsume
2565    
2566 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2567 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2568    
2569     redo A;
2570     } else {
2571 wakaba 1.77 !!!cp (186);
2572 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC');
2573 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2574 wakaba 1.73
2575 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2576 wakaba 1.18 !!!next-input-character;
2577     redo A;
2578     }
2579 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2580 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2581 wakaba 1.77 !!!cp (187);
2582 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2583 wakaba 1.18 !!!next-input-character;
2584     redo A;
2585 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2586 wakaba 1.77 !!!cp (188);
2587 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2588    
2589     $self->{state} = DATA_STATE;
2590     !!!next-input-character;
2591    
2592 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2593 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2594    
2595     redo A;
2596 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2597 wakaba 1.77 !!!cp (189);
2598 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2599    
2600 wakaba 1.57 $self->{state} = DATA_STATE;
2601 wakaba 1.18 ## reconsume
2602    
2603 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2604 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2605    
2606     redo A;
2607     } else {
2608 wakaba 1.77 !!!cp (190);
2609 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2610 wakaba 1.76 .= chr $self->{next_char};
2611 wakaba 1.18 ## Stay in the state
2612     !!!next-input-character;
2613     redo A;
2614     }
2615 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
2616 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2617 wakaba 1.77 !!!cp (191);
2618 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2619 wakaba 1.18 !!!next-input-character;
2620     redo A;
2621 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2622 wakaba 1.77 !!!cp (192);
2623 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2624    
2625     $self->{state} = DATA_STATE;
2626     !!!next-input-character;
2627    
2628 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2629 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2630    
2631     redo A;
2632 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2633 wakaba 1.77 !!!cp (193);
2634 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2635    
2636 wakaba 1.57 $self->{state} = DATA_STATE;
2637 wakaba 1.18 ## reconsume
2638    
2639 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2640 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2641    
2642     redo A;
2643     } else {
2644 wakaba 1.77 !!!cp (194);
2645 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2646 wakaba 1.76 .= chr $self->{next_char};
2647 wakaba 1.18 ## Stay in the state
2648     !!!next-input-character;
2649     redo A;
2650     }
2651 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2652 wakaba 1.18 if ({
2653     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2654     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2655 wakaba 1.76 }->{$self->{next_char}}) {
2656 wakaba 1.77 !!!cp (195);
2657 wakaba 1.18 ## Stay in the state
2658     !!!next-input-character;
2659     redo A;
2660 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2661 wakaba 1.77 !!!cp (196);
2662 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2663 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2664 wakaba 1.18 !!!next-input-character;
2665     redo A;
2666 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2667 wakaba 1.77 !!!cp (197);
2668 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2669 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2670 wakaba 1.18 !!!next-input-character;
2671     redo A;
2672 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2673 wakaba 1.77 !!!cp (198);
2674 wakaba 1.57 $self->{state} = DATA_STATE;
2675 wakaba 1.18 !!!next-input-character;
2676    
2677     !!!emit ($self->{current_token}); # DOCTYPE
2678    
2679     redo A;
2680 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2681 wakaba 1.77 !!!cp (199);
2682 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2683    
2684 wakaba 1.57 $self->{state} = DATA_STATE;
2685 wakaba 1.26 ## reconsume
2686 wakaba 1.18
2687 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2688 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2689    
2690     redo A;
2691     } else {
2692 wakaba 1.77 !!!cp (200);
2693 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC literal');
2694 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2695 wakaba 1.73
2696 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2697 wakaba 1.18 !!!next-input-character;
2698     redo A;
2699     }
2700 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2701 wakaba 1.18 if ({
2702     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2703     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2704 wakaba 1.76 }->{$self->{next_char}}) {
2705 wakaba 1.77 !!!cp (201);
2706 wakaba 1.18 ## Stay in the state
2707     !!!next-input-character;
2708     redo A;
2709 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2710 wakaba 1.77 !!!cp (202);
2711 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2712 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2713 wakaba 1.18 !!!next-input-character;
2714     redo A;
2715 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2716 wakaba 1.77 !!!cp (203);
2717 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2718 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2719 wakaba 1.18 !!!next-input-character;
2720     redo A;
2721 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2722 wakaba 1.77 !!!cp (204);
2723 wakaba 1.18 !!!parse-error (type => 'no SYSTEM literal');
2724 wakaba 1.57 $self->{state} = DATA_STATE;
2725 wakaba 1.18 !!!next-input-character;
2726    
2727 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2728 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2729    
2730     redo A;
2731 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2732 wakaba 1.77 !!!cp (205);
2733 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2734    
2735 wakaba 1.57 $self->{state} = DATA_STATE;
2736 wakaba 1.26 ## reconsume
2737 wakaba 1.18
2738 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2739 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2740    
2741     redo A;
2742     } else {
2743 wakaba 1.77 !!!cp (206);
2744 wakaba 1.30 !!!parse-error (type => 'string after SYSTEM');
2745 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2746 wakaba 1.73
2747 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2748 wakaba 1.18 !!!next-input-character;
2749     redo A;
2750     }
2751 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2752 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2753 wakaba 1.77 !!!cp (207);
2754 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2755 wakaba 1.18 !!!next-input-character;
2756     redo A;
2757 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2758 wakaba 1.77 !!!cp (208);
2759 wakaba 1.153 !!!parse-error (type => 'unclosed SYSTEM literal');
2760 wakaba 1.69
2761     $self->{state} = DATA_STATE;
2762     !!!next-input-character;
2763    
2764 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2765 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2766    
2767     redo A;
2768 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2769 wakaba 1.77 !!!cp (209);
2770 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2771    
2772 wakaba 1.57 $self->{state} = DATA_STATE;
2773 wakaba 1.18 ## reconsume
2774    
2775 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2776 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2777    
2778     redo A;
2779     } else {
2780 wakaba 1.77 !!!cp (210);
2781 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2782 wakaba 1.76 .= chr $self->{next_char};
2783 wakaba 1.18 ## Stay in the state
2784     !!!next-input-character;
2785     redo A;
2786     }
2787 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
2788 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2789 wakaba 1.77 !!!cp (211);
2790 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2791 wakaba 1.18 !!!next-input-character;
2792     redo A;
2793 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2794 wakaba 1.77 !!!cp (212);
2795 wakaba 1.153 !!!parse-error (type => 'unclosed SYSTEM literal');
2796 wakaba 1.69
2797     $self->{state} = DATA_STATE;
2798     !!!next-input-character;
2799    
2800 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2801 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2802    
2803     redo A;
2804 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2805 wakaba 1.77 !!!cp (213);
2806 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2807    
2808 wakaba 1.57 $self->{state} = DATA_STATE;
2809 wakaba 1.18 ## reconsume
2810    
2811 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2812 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
2813    
2814     redo A;
2815     } else {
2816 wakaba 1.77 !!!cp (214);
2817 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2818 wakaba 1.76 .= chr $self->{next_char};
2819 wakaba 1.18 ## Stay in the state
2820     !!!next-input-character;
2821     redo A;
2822     }
2823 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2824 wakaba 1.18 if ({
2825     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2826     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2827 wakaba 1.76 }->{$self->{next_char}}) {
2828 wakaba 1.77 !!!cp (215);
2829 wakaba 1.18 ## Stay in the state
2830     !!!next-input-character;
2831     redo A;
2832 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2833 wakaba 1.77 !!!cp (216);
2834 wakaba 1.57 $self->{state} = DATA_STATE;
2835 wakaba 1.18 !!!next-input-character;
2836    
2837     !!!emit ($self->{current_token}); # DOCTYPE
2838    
2839     redo A;
2840 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2841 wakaba 1.77 !!!cp (217);
2842 wakaba 1.150 !!!parse-error (type => 'unclosed DOCTYPE');
2843 wakaba 1.57 $self->{state} = DATA_STATE;
2844 wakaba 1.26 ## reconsume
2845 wakaba 1.18
2846 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2847 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2848    
2849     redo A;
2850     } else {
2851 wakaba 1.77 !!!cp (218);
2852 wakaba 1.18 !!!parse-error (type => 'string after SYSTEM literal');
2853 wakaba 1.75 #$self->{current_token}->{quirks} = 1;
2854 wakaba 1.73
2855 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2856 wakaba 1.1 !!!next-input-character;
2857     redo A;
2858     }
2859 wakaba 1.57 } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
2860 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2861 wakaba 1.77 !!!cp (219);
2862 wakaba 1.57 $self->{state} = DATA_STATE;
2863 wakaba 1.1 !!!next-input-character;
2864    
2865     !!!emit ($self->{current_token}); # DOCTYPE
2866    
2867     redo A;
2868 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2869 wakaba 1.77 !!!cp (220);
2870 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2871 wakaba 1.57 $self->{state} = DATA_STATE;
2872 wakaba 1.1 ## reconsume
2873    
2874     !!!emit ($self->{current_token}); # DOCTYPE
2875    
2876     redo A;
2877     } else {
2878 wakaba 1.77 !!!cp (221);
2879 wakaba 1.1 ## Stay in the state
2880     !!!next-input-character;
2881     redo A;
2882     }
2883 wakaba 1.165 } elsif ($self->{state} == CDATA_SECTION_STATE) {
2884     ## NOTE: "CDATA section state" in the state is jointly implemented
2885     ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,
2886     ## and |CDATA_SECTION_MSE2_STATE|.
2887 wakaba 1.127
2888 wakaba 1.165 if ($self->{next_char} == 0x005D) { # ]
2889     !!!cp (221.1);
2890     $self->{state} = CDATA_SECTION_MSE1_STATE;
2891     !!!next-input-character;
2892     redo A;
2893     } elsif ($self->{next_char} == -1) {
2894     $self->{state} = DATA_STATE;
2895     !!!next-input-character;
2896     if (length $self->{current_token}->{data}) { # character
2897     !!!cp (221.2);
2898     !!!emit ($self->{current_token}); # character
2899     } else {
2900     !!!cp (221.3);
2901     ## No token to emit. $self->{current_token} is discarded.
2902     }
2903     redo A;
2904     } else {
2905     !!!cp (221.4);
2906     $self->{current_token}->{data} .= chr $self->{next_char};
2907     ## Stay in the state.
2908     !!!next-input-character;
2909     redo A;
2910     }
2911 wakaba 1.127
2912 wakaba 1.165 ## ISSUE: "text tokens" in spec.
2913     } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {
2914     if ($self->{next_char} == 0x005D) { # ]
2915     !!!cp (221.5);
2916     $self->{state} = CDATA_SECTION_MSE2_STATE;
2917     !!!next-input-character;
2918     redo A;
2919     } else {
2920     !!!cp (221.6);
2921     $self->{current_token}->{data} .= ']';
2922     $self->{state} = CDATA_SECTION_STATE;
2923     ## Reconsume.
2924     redo A;
2925     }
2926     } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
2927     if ($self->{next_char} == 0x003E) { # >
2928     $self->{state} = DATA_STATE;
2929     !!!next-input-character;
2930     if (length $self->{current_token}->{data}) { # character
2931     !!!cp (221.7);
2932     !!!emit ($self->{current_token}); # character
2933 wakaba 1.127 } else {
2934 wakaba 1.165 !!!cp (221.8);
2935     ## No token to emit. $self->{current_token} is discarded.
2936 wakaba 1.127 }
2937 wakaba 1.165 redo A;
2938     } elsif ($self->{next_char} == 0x005D) { # ]
2939     !!!cp (221.9); # character
2940     $self->{current_token}->{data} .= ']'; ## Add first "]" of "]]]".
2941     ## Stay in the state.
2942 wakaba 1.127 !!!next-input-character;
2943 wakaba 1.165 redo A;
2944 wakaba 1.127 } else {
2945 wakaba 1.165 !!!cp (221.11);
2946     $self->{current_token}->{data} .= ']]'; # character
2947     $self->{state} = CDATA_SECTION_STATE;
2948     ## Reconsume.
2949     redo A;
2950 wakaba 1.127 }
2951 wakaba 1.167 } elsif ($self->{state} == ENTITY_STATE) {
2952 wakaba 1.168 if ({
2953     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2954     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, &
2955     $self->{entity_additional} => 1,
2956     }->{$self->{next_char}}) {
2957     !!!cp (1001);
2958     ## Don't consume
2959     ## No error
2960     ## Return nothing.
2961     #
2962     } elsif ($self->{next_char} == 0x0023) { # #
2963     $self->{state} = ENTITY_HASH_STATE;
2964     $self->{state_keyword} = '#';
2965     !!!next-input-character;
2966     redo A;
2967     } elsif ((0x0041 <= $self->{next_char} and
2968     $self->{next_char} <= 0x005A) or # A..Z
2969     (0x0061 <= $self->{next_char} and
2970     $self->{next_char} <= 0x007A)) { # a..z
2971     require Whatpm::_NamedEntityList;
2972     $self->{state} = ENTITY_NAME_STATE;
2973     $self->{state_keyword} = chr $self->{next_char};
2974     $self->{entity__value} = $self->{state_keyword};
2975     $self->{entity__match} = 0;
2976     !!!next-input-character;
2977     redo A;
2978     } else {
2979     !!!cp (1027);
2980     !!!parse-error (type => 'bare ero');
2981     ## Return nothing.
2982     #
2983     }
2984 wakaba 1.20
2985 wakaba 1.168 ## NOTE: No character is consumed by the "consume a character
2986     ## reference" algorithm. In other word, there is an "&" character
2987     ## that does not introduce a character reference, which would be
2988     ## appended to the parent element or the attribute value in later
2989     ## process of the tokenizer.
2990 wakaba 1.112
2991 wakaba 1.168 if ($self->{entity_in_attr}) {
2992     $self->{current_attribute}->{value} .= '&';
2993     $self->{state} = $self->{last_attribute_value_state};
2994     ## Reconsume.
2995     redo A;
2996     } else {
2997     $self->{state} = DATA_STATE;
2998     ## Reconsume.
2999     !!!emit ({type => CHARACTER_TOKEN, data => '&',
3000     line => $self->{line_prev},
3001     column => $self->{column_prev},
3002     });
3003     redo A;
3004     }
3005     } elsif ($self->{state} == ENTITY_HASH_STATE) {
3006     if ($self->{next_char} == 0x0078 or # x
3007     $self->{next_char} == 0x0058) { # X
3008     $self->{state} = HEXREF_X_STATE;
3009     $self->{state_keyword} .= chr $self->{next_char};
3010     !!!next-input-character;
3011     redo A;
3012     } elsif (0x0030 <= $self->{next_char} and
3013     $self->{next_char} <= 0x0039) { # 0..9
3014     $self->{state} = NCR_NUM_STATE;
3015     $self->{state_keyword} = $self->{next_char} - 0x0030;
3016     !!!next-input-character;
3017     redo A;
3018     } else {
3019     !!!cp (1019);
3020     !!!parse-error (type => 'bare nero',
3021     line => $self->{line_prev},
3022     column => $self->{column_prev} - 1);
3023    
3024     ## NOTE: According to the spec algorithm, nothing is returned,
3025     ## and then "&#" is appended to the parent element or the attribute
3026     ## value in the later processing.
3027    
3028     if ($self->{entity_in_attr}) {
3029     $self->{current_attribute}->{value} .= '&#';
3030     $self->{state} = $self->{last_attribute_value_state};
3031     ## Reconsume.
3032 wakaba 1.167 redo A;
3033 wakaba 1.1 } else {
3034 wakaba 1.168 $self->{state} = DATA_STATE;
3035     ## Reconsume.
3036     !!!emit ({type => CHARACTER_TOKEN,
3037     data => '&#',
3038     line => $self->{line_prev},
3039     column => $self->{column_prev} - 1,
3040     });
3041     redo A;
3042 wakaba 1.1 }
3043 wakaba 1.168 }
3044     } elsif ($self->{state} == NCR_NUM_STATE) {
3045     if (0x0030 <= $self->{next_char} and
3046     $self->{next_char} <= 0x0039) { # 0..9
3047 wakaba 1.78 !!!cp (1012);
3048 wakaba 1.168 $self->{state_keyword} *= 10;
3049     $self->{state_keyword} += $self->{next_char} - 0x0030;
3050 wakaba 1.1
3051 wakaba 1.168 ## Stay in the state.
3052 wakaba 1.1 !!!next-input-character;
3053 wakaba 1.168 redo A;
3054     } elsif ($self->{next_char} == 0x003B) { # ;
3055 wakaba 1.78 !!!cp (1013);
3056 wakaba 1.1 !!!next-input-character;
3057 wakaba 1.168 #
3058 wakaba 1.1 } else {
3059 wakaba 1.78 !!!cp (1014);
3060 wakaba 1.168 !!!parse-error (type => 'no refc');
3061     ## Reconsume.
3062     #
3063 wakaba 1.1 }
3064    
3065 wakaba 1.168 my $code = $self->{state_keyword};
3066     my $l = $self->{line_prev};
3067     my $c = $self->{column_prev};
3068 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
3069 wakaba 1.78 !!!cp (1015);
3070 wakaba 1.153 !!!parse-error (type => 'invalid character reference',
3071     text => (sprintf 'U+%04X', $code),
3072     line => $l, column => $c);
3073 wakaba 1.26 $code = 0xFFFD;
3074     } elsif ($code > 0x10FFFF) {
3075 wakaba 1.78 !!!cp (1016);
3076 wakaba 1.153 !!!parse-error (type => 'invalid character reference',
3077     text => (sprintf 'U-%08X', $code),
3078     line => $l, column => $c);
3079 wakaba 1.26 $code = 0xFFFD;
3080     } elsif ($code == 0x000D) {
3081 wakaba 1.78 !!!cp (1017);
3082 wakaba 1.153 !!!parse-error (type => 'CR character reference',
3083     line => $l, column => $c);
3084 wakaba 1.26 $code = 0x000A;
3085 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
3086 wakaba 1.78 !!!cp (1018);
3087 wakaba 1.153 !!!parse-error (type => 'C1 character reference',
3088     text => (sprintf 'U+%04X', $code),
3089     line => $l, column => $c);
3090 wakaba 1.4 $code = $c1_entity_char->{$code};
3091 wakaba 1.1 }
3092 wakaba 1.168
3093     if ($self->{entity_in_attr}) {
3094     $self->{current_attribute}->{value} .= chr $code;
3095     $self->{current_attribute}->{has_reference} = 1;
3096     $self->{state} = $self->{last_attribute_value_state};
3097     ## Reconsume.
3098     redo A;
3099     } else {
3100     $self->{state} = DATA_STATE;
3101     ## Reconsume.
3102     !!!emit ({type => CHARACTER_TOKEN, data => chr $code,
3103     has_reference => 1,
3104     line => $l, column => $c,
3105     });
3106     redo A;
3107     }
3108     } elsif ($self->{state} == HEXREF_X_STATE) {
3109     if ((0x0030 <= $self->{next_char} and $self->{next_char} <= 0x0039) or
3110     (0x0041 <= $self->{next_char} and $self->{next_char} <= 0x0046) or
3111     (0x0061 <= $self->{next_char} and $self->{next_char} <= 0x0066)) {
3112     # 0..9, A..F, a..f
3113     $self->{state} = HEXREF_HEX_STATE;
3114     $self->{state_keyword} = 0;
3115     ## Reconsume.
3116     redo A;
3117     } else {
3118     !!!cp (1005);
3119     !!!parse-error (type => 'bare hcro',
3120     line => $self->{line_prev},
3121     column => $self->{column_prev} - 2);
3122    
3123     ## NOTE: According to the spec algorithm, nothing is returned,
3124     ## and then "&#" followed by "X" or "x" is appended to the parent
3125     ## element or the attribute value in the later processing.
3126    
3127     if ($self->{entity_in_attr}) {
3128     $self->{current_attribute}->{value} .= '&' . $self->{state_keyword};
3129     $self->{state} = $self->{last_attribute_value_state};
3130     ## Reconsume.
3131     redo A;
3132     } else {
3133     $self->{state} = DATA_STATE;
3134     ## Reconsume.
3135     !!!emit ({type => CHARACTER_TOKEN,
3136     data => '&' . $self->{state_keyword},
3137     line => $self->{line_prev},
3138     column => $self->{column_prev} - length $self->{state_keyword},
3139     });
3140     redo A;
3141     }
3142     }
3143     } elsif ($self->{state} == HEXREF_HEX_STATE) {
3144     if (0x0030 <= $self->{next_char} and $self->{next_char} <= 0x0039) {
3145     # 0..9
3146     !!!cp (1002);
3147     $self->{state_keyword} *= 0x10;
3148     $self->{state_keyword} += $self->{next_char} - 0x0030;
3149     ## Stay in the state.
3150     !!!next-input-character;
3151     redo A;
3152     } elsif (0x0061 <= $self->{next_char} and
3153     $self->{next_char} <= 0x0066) { # a..f
3154     !!!cp (1003);
3155     $self->{state_keyword} *= 0x10;
3156     $self->{state_keyword} += $self->{next_char} - 0x0060 + 9;
3157     ## Stay in the state.
3158     !!!next-input-character;
3159     redo A;
3160     } elsif (0x0041 <= $self->{next_char} and
3161     $self->{next_char} <= 0x0046) { # A..F
3162     !!!cp (1004);
3163     $self->{state_keyword} *= 0x10;
3164     $self->{state_keyword} += $self->{next_char} - 0x0040 + 9;
3165     ## Stay in the state.
3166     !!!next-input-character;
3167     redo A;
3168     } elsif ($self->{next_char} == 0x003B) { # ;
3169     !!!cp (1006);
3170     !!!next-input-character;
3171     #
3172     } else {
3173     !!!cp (1007);
3174     !!!parse-error (type => 'no refc',
3175     line => $self->{line},
3176     column => $self->{column});
3177     ## Reconsume.
3178     #
3179     }
3180    
3181     my $code = $self->{state_keyword};
3182     my $l = $self->{line_prev};
3183     my $c = $self->{column_prev};
3184     if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
3185     !!!cp (1008);
3186     !!!parse-error (type => 'invalid character reference',
3187     text => (sprintf 'U+%04X', $code),
3188     line => $l, column => $c);
3189     $code = 0xFFFD;
3190     } elsif ($code > 0x10FFFF) {
3191     !!!cp (1009);
3192     !!!parse-error (type => 'invalid character reference',
3193     text => (sprintf 'U-%08X', $code),
3194     line => $l, column => $c);
3195     $code = 0xFFFD;
3196     } elsif ($code == 0x000D) {
3197     !!!cp (1010);
3198     !!!parse-error (type => 'CR character reference', line => $l, column => $c);
3199     $code = 0x000A;
3200     } elsif (0x80 <= $code and $code <= 0x9F) {
3201     !!!cp (1011);
3202     !!!parse-error (type => 'C1 character reference', text => (sprintf 'U+%04X', $code), line => $l, column => $c);
3203     $code = $c1_entity_char->{$code};
3204     }
3205    
3206     if ($self->{entity_in_attr}) {
3207     $self->{current_attribute}->{value} .= chr $code;
3208     $self->{current_attribute}->{has_reference} = 1;
3209     $self->{state} = $self->{last_attribute_value_state};
3210     ## Reconsume.
3211     redo A;
3212     } else {
3213     $self->{state} = DATA_STATE;
3214     ## Reconsume.
3215     !!!emit ({type => CHARACTER_TOKEN, data => chr $code,
3216     has_reference => 1,
3217     line => $l, column => $c,
3218     });
3219     redo A;
3220     }
3221     } elsif ($self->{state} == ENTITY_NAME_STATE) {
3222     if (length $self->{state_keyword} < 30 and
3223     ## NOTE: Some number greater than the maximum length of entity name
3224     ((0x0041 <= $self->{next_char} and # a
3225     $self->{next_char} <= 0x005A) or # x
3226     (0x0061 <= $self->{next_char} and # a
3227     $self->{next_char} <= 0x007A) or # z
3228     (0x0030 <= $self->{next_char} and # 0
3229     $self->{next_char} <= 0x0039) or # 9
3230     $self->{next_char} == 0x003B)) { # ;
3231     our $EntityChar;
3232     $self->{state_keyword} .= chr $self->{next_char};
3233     if (defined $EntityChar->{$self->{state_keyword}}) {
3234     if ($self->{next_char} == 0x003B) { # ;
3235     !!!cp (1020);
3236     $self->{entity__value} = $EntityChar->{$self->{state_keyword}};
3237     $self->{entity__match} = 1;
3238     !!!next-input-character;
3239     #
3240     } else {
3241     !!!cp (1021);
3242     $self->{entity__value} = $EntityChar->{$self->{state_keyword}};
3243     $self->{entity__match} = -1;
3244     ## Stay in the state.
3245     !!!next-input-character;
3246     redo A;
3247     }
3248     } else {
3249     !!!cp (1022);
3250     $self->{entity__value} .= chr $self->{next_char};
3251     $self->{entity__match} *= 2;
3252     ## Stay in the state.
3253 wakaba 1.16 !!!next-input-character;
3254 wakaba 1.168 redo A;
3255     }
3256     }
3257    
3258     my $data;
3259     my $has_ref;
3260     if ($self->{entity__match} > 0) {
3261     !!!cp (1023);
3262     $data = $self->{entity__value};
3263     $has_ref = 1;
3264     #
3265     } elsif ($self->{entity__match} < 0) {
3266     !!!parse-error (type => 'no refc');
3267     if ($self->{entity_in_attr} and $self->{entity__match} < -1) {
3268     !!!cp (1024);
3269     $data = '&' . $self->{state_keyword};
3270     #
3271 wakaba 1.37 } else {
3272 wakaba 1.168 !!!cp (1025);
3273     $data = $self->{entity__value};
3274     $has_ref = 1;
3275     #
3276 wakaba 1.16 }
3277 wakaba 1.1 } else {
3278 wakaba 1.168 !!!cp (1026);
3279     !!!parse-error (type => 'bare ero',
3280     line => $self->{line_prev},
3281     column => $self->{column_prev});
3282     $data = '&' . $self->{state_keyword};
3283     #
3284 wakaba 1.1 }
3285 wakaba 1.168
3286     ## NOTE: In these cases, when a character reference is found,
3287     ## it is consumed and a character token is returned, or, otherwise,
3288     ## nothing is consumed and returned, according to the spec algorithm.
3289     ## In this implementation, anything that has been examined by the
3290     ## tokenizer is appended to the parent element or the attribute value
3291     ## as string, either literal string when no character reference or
3292     ## entity-replaced string otherwise, in this stage, since any characters
3293     ## that would not be consumed are appended in the data state or in an
3294     ## appropriate attribute value state anyway.
3295    
3296     if ($self->{entity_in_attr}) {
3297     $self->{current_attribute}->{value} .= $data;
3298     $self->{current_attribute}->{has_reference} = 1 if $has_ref;
3299     $self->{state} = $self->{last_attribute_value_state};
3300     ## Reconsume.
3301 wakaba 1.167 redo A;
3302 wakaba 1.37 } else {
3303 wakaba 1.168 $self->{state} = DATA_STATE;
3304     ## Reconsume.
3305     !!!emit ({type => CHARACTER_TOKEN,
3306     data => $data, has_reference => $has_ref,
3307     line => $self->{line_prev},
3308     column => $self->{column_prev} + 1 - length $self->{state_keyword},
3309     });
3310 wakaba 1.167 redo A;
3311 wakaba 1.37 }
3312 wakaba 1.1 } else {
3313 wakaba 1.167 die "$0: $self->{state}: Unknown state";
3314     }
3315     } # A
3316    
3317     die "$0: _get_next_token: unexpected case";
3318     } # _get_next_token
3319 wakaba 1.1
3320     sub _initialize_tree_constructor ($) {
3321     my $self = shift;
3322     ## NOTE: $self->{document} MUST be specified before this method is called
3323     $self->{document}->strict_error_checking (0);
3324     ## TODO: Turn mutation events off # MUST
3325     ## TODO: Turn loose Document option (manakai extension) on
3326 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
3327 wakaba 1.154 $self->{document}->set_user_data (manakai_source_line => 1);
3328     $self->{document}->set_user_data (manakai_source_column => 1);
3329 wakaba 1.1 } # _initialize_tree_constructor
3330    
3331     sub _terminate_tree_constructor ($) {
3332     my $self = shift;
3333     $self->{document}->strict_error_checking (1);
3334     ## TODO: Turn mutation events on
3335     } # _terminate_tree_constructor
3336    
3337     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
3338    
3339 wakaba 1.3 { # tree construction stage
3340     my $token;
3341    
3342 wakaba 1.1 sub _construct_tree ($) {
3343     my ($self) = @_;
3344    
3345     ## When an interactive UA render the $self->{document} available
3346     ## to the user, or when it begin accepting user input, are
3347     ## not defined.
3348    
3349     ## Append a character: collect it and all subsequent consecutive
3350     ## characters and insert one Text node whose data is concatenation
3351     ## of all those characters. # MUST
3352    
3353     !!!next-token;
3354    
3355 wakaba 1.3 undef $self->{form_element};
3356     undef $self->{head_element};
3357     $self->{open_elements} = [];
3358     undef $self->{inner_html_node};
3359    
3360 wakaba 1.84 ## NOTE: The "initial" insertion mode.
3361 wakaba 1.3 $self->_tree_construction_initial; # MUST
3362 wakaba 1.84
3363     ## NOTE: The "before html" insertion mode.
3364 wakaba 1.3 $self->_tree_construction_root_element;
3365 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
3366    
3367     ## NOTE: The "before head" insertion mode and so on.
3368 wakaba 1.3 $self->_tree_construction_main;
3369     } # _construct_tree
3370    
3371     sub _tree_construction_initial ($) {
3372     my $self = shift;
3373 wakaba 1.84
3374     ## NOTE: "initial" insertion mode
3375    
3376 wakaba 1.18 INITIAL: {
3377 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3378 wakaba 1.18 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
3379     ## error, switch to a conformance checking mode for another
3380     ## language.
3381     my $doctype_name = $token->{name};
3382     $doctype_name = '' unless defined $doctype_name;
3383 wakaba 1.159 $doctype_name =~ tr/a-z/A-Z/; # ASCII case-insensitive
3384 wakaba 1.18 if (not defined $token->{name} or # <!DOCTYPE>
3385     defined $token->{system_identifier}) {
3386 wakaba 1.79 !!!cp ('t1');
3387 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
3388 wakaba 1.18 } elsif ($doctype_name ne 'HTML') {
3389 wakaba 1.79 !!!cp ('t2');
3390 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
3391 wakaba 1.159 } elsif (defined $token->{public_identifier}) {
3392     if ($token->{public_identifier} eq 'XSLT-compat') {
3393     !!!cp ('t1.2');
3394     !!!parse-error (type => 'XSLT-compat', token => $token,
3395     level => $self->{level}->{should});
3396     } else {
3397     !!!parse-error (type => 'not HTML5', token => $token);
3398     }
3399 wakaba 1.79 } else {
3400     !!!cp ('t3');
3401 wakaba 1.159 #
3402 wakaba 1.18 }
3403    
3404     my $doctype = $self->{document}->create_document_type_definition
3405     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
3406 wakaba 1.122 ## NOTE: Default value for both |public_id| and |system_id| attributes
3407     ## are empty strings, so that we don't set any value in missing cases.
3408 wakaba 1.18 $doctype->public_id ($token->{public_identifier})
3409     if defined $token->{public_identifier};
3410     $doctype->system_id ($token->{system_identifier})
3411     if defined $token->{system_identifier};
3412     ## NOTE: Other DocumentType attributes are null or empty lists.
3413     ## ISSUE: internalSubset = null??
3414     $self->{document}->append_child ($doctype);
3415    
3416 wakaba 1.75 if ($token->{quirks} or $doctype_name ne 'HTML') {
3417 wakaba 1.79 !!!cp ('t4');
3418 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3419     } elsif (defined $token->{public_identifier}) {
3420     my $pubid = $token->{public_identifier};
3421     $pubid =~ tr/a-z/A-z/;
3422 wakaba 1.143 my $prefix = [
3423     "+//SILMARIL//DTD HTML PRO V0R11 19970101//",
3424     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
3425     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
3426     "-//IETF//DTD HTML 2.0 LEVEL 1//",
3427     "-//IETF//DTD HTML 2.0 LEVEL 2//",
3428     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//",
3429     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//",
3430     "-//IETF//DTD HTML 2.0 STRICT//",
3431     "-//IETF//DTD HTML 2.0//",
3432     "-//IETF//DTD HTML 2.1E//",
3433     "-//IETF//DTD HTML 3.0//",
3434     "-//IETF//DTD HTML 3.2 FINAL//",
3435     "-//IETF//DTD HTML 3.2//",
3436     "-//IETF//DTD HTML 3//",
3437     "-//IETF//DTD HTML LEVEL 0//",
3438     "-//IETF//DTD HTML LEVEL 1//",
3439     "-//IETF//DTD HTML LEVEL 2//",
3440     "-//IETF//DTD HTML LEVEL 3//",
3441     "-//IETF//DTD HTML STRICT LEVEL 0//",
3442     "-//IETF//DTD HTML STRICT LEVEL 1//",
3443     "-//IETF//DTD HTML STRICT LEVEL 2//",
3444     "-//IETF//DTD HTML STRICT LEVEL 3//",
3445     "-//IETF//DTD HTML STRICT//",
3446     "-//IETF//DTD HTML//",
3447     "-//METRIUS//DTD METRIUS PRESENTATIONAL//",
3448     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//",
3449     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//",
3450     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//",
3451     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//",
3452     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//",
3453     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//",
3454     "-//NETSCAPE COMM. CORP.//DTD HTML//",
3455     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//",
3456     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//",
3457     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//",
3458     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//",
3459     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//",
3460     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//",
3461     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//",
3462     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//",
3463     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//",
3464     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//",
3465     "-//W3C//DTD HTML 3 1995-03-24//",
3466     "-//W3C//DTD HTML 3.2 DRAFT//",
3467     "-//W3C//DTD HTML 3.2 FINAL//",
3468     "-//W3C//DTD HTML 3.2//",
3469     "-//W3C//DTD HTML 3.2S DRAFT//",
3470     "-//W3C//DTD HTML 4.0 FRAMESET//",
3471     "-//W3C//DTD HTML 4.0 TRANSITIONAL//",
3472     "-//W3C//DTD HTML EXPERIMETNAL 19960712//",
3473     "-//W3C//DTD HTML EXPERIMENTAL 970421//",
3474     "-//W3C//DTD W3 HTML//",
3475     "-//W3O//DTD W3 HTML 3.0//",
3476     "-//WEBTECHS//DTD MOZILLA HTML 2.0//",
3477     "-//WEBTECHS//DTD MOZILLA HTML//",
3478     ]; # $prefix
3479     my $match;
3480     for (@$prefix) {
3481     if (substr ($prefix, 0, length $_) eq $_) {
3482     $match = 1;
3483     last;
3484     }
3485     }
3486     if ($match or
3487     $pubid eq "-//W3O//DTD W3 HTML STRICT 3.0//EN//" or
3488     $pubid eq "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" or
3489     $pubid eq "HTML") {
3490 wakaba 1.79 !!!cp ('t5');
3491 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3492 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
3493     $pubid =~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
3494 wakaba 1.18 if (defined $token->{system_identifier}) {
3495 wakaba 1.79 !!!cp ('t6');
3496 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3497     } else {
3498 wakaba 1.79 !!!cp ('t7');
3499 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
3500 wakaba 1.3 }
3501 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
3502     $pubid =~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
3503 wakaba 1.79 !!!cp ('t8');
3504 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
3505 wakaba 1.79 } else {
3506     !!!cp ('t9');
3507 wakaba 1.18 }
3508 wakaba 1.79 } else {
3509     !!!cp ('t10');
3510 wakaba 1.18 }
3511     if (defined $token->{system_identifier}) {
3512     my $sysid = $token->{system_identifier};
3513     $sysid =~ tr/A-Z/a-z/;
3514     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
3515 wakaba 1.143 ## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"| is
3516     ## marked as quirks.
3517 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3518 wakaba 1.79 !!!cp ('t11');
3519     } else {
3520     !!!cp ('t12');
3521 wakaba 1.18 }
3522 wakaba 1.79 } else {
3523     !!!cp ('t13');
3524 wakaba 1.18 }
3525    
3526 wakaba 1.84 ## Go to the "before html" insertion mode.
3527 wakaba 1.18 !!!next-token;
3528     return;
3529     } elsif ({
3530 wakaba 1.55 START_TAG_TOKEN, 1,
3531     END_TAG_TOKEN, 1,
3532     END_OF_FILE_TOKEN, 1,
3533 wakaba 1.18 }->{$token->{type}}) {
3534 wakaba 1.79 !!!cp ('t14');
3535 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
3536 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3537 wakaba 1.84 ## Go to the "before html" insertion mode.
3538 wakaba 1.18 ## reprocess
3539 wakaba 1.125 !!!ack-later;
3540 wakaba 1.18 return;
3541 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
3542 wakaba 1.18 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
3543     ## Ignore the token
3544 wakaba 1.26
3545 wakaba 1.18 unless (length $token->{data}) {
3546 wakaba 1.79 !!!cp ('t15');
3547 wakaba 1.84 ## Stay in the insertion mode.
3548 wakaba 1.18 !!!next-token;
3549     redo INITIAL;
3550 wakaba 1.79 } else {
3551     !!!cp ('t16');
3552 wakaba 1.3 }
3553 wakaba 1.79 } else {
3554     !!!cp ('t17');
3555 wakaba 1.3 }
3556 wakaba 1.18
3557 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
3558 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3559 wakaba 1.84 ## Go to the "before html" insertion mode.
3560 wakaba 1.18 ## reprocess
3561     return;
3562 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3563 wakaba 1.79 !!!cp ('t18');
3564 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
3565     $self->{document}->append_child ($comment);
3566    
3567 wakaba 1.84 ## Stay in the insertion mode.
3568 wakaba 1.18 !!!next-token;
3569     redo INITIAL;
3570     } else {
3571 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
3572 wakaba 1.18 }
3573     } # INITIAL
3574 wakaba 1.79
3575     die "$0: _tree_construction_initial: This should be never reached";
3576 wakaba 1.3 } # _tree_construction_initial
3577    
3578     sub _tree_construction_root_element ($) {
3579     my $self = shift;
3580 wakaba 1.84
3581     ## NOTE: "before html" insertion mode.
3582 wakaba 1.3
3583     B: {
3584 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3585 wakaba 1.79 !!!cp ('t19');
3586 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
3587 wakaba 1.3 ## Ignore the token
3588 wakaba 1.84 ## Stay in the insertion mode.
3589 wakaba 1.3 !!!next-token;
3590     redo B;
3591 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3592 wakaba 1.79 !!!cp ('t20');
3593 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
3594     $self->{document}->append_child ($comment);
3595 wakaba 1.84 ## Stay in the insertion mode.
3596 wakaba 1.3 !!!next-token;
3597     redo B;
3598 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
3599 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
3600     ## Ignore the token.
3601    
3602 wakaba 1.3 unless (length $token->{data}) {
3603 wakaba 1.79 !!!cp ('t21');
3604 wakaba 1.84 ## Stay in the insertion mode.
3605 wakaba 1.3 !!!next-token;
3606     redo B;
3607 wakaba 1.79 } else {
3608     !!!cp ('t22');
3609 wakaba 1.3 }
3610 wakaba 1.79 } else {
3611     !!!cp ('t23');
3612 wakaba 1.3 }
3613 wakaba 1.61
3614     $self->{application_cache_selection}->(undef);
3615    
3616     #
3617     } elsif ($token->{type} == START_TAG_TOKEN) {
3618 wakaba 1.84 if ($token->{tag_name} eq 'html') {
3619     my $root_element;
3620 wakaba 1.126 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
3621 wakaba 1.84 $self->{document}->append_child ($root_element);
3622 wakaba 1.123 push @{$self->{open_elements}},
3623     [$root_element, $el_category->{html}];
3624 wakaba 1.84
3625     if ($token->{attributes}->{manifest}) {
3626     !!!cp ('t24');
3627     $self->{application_cache_selection}
3628     ->($token->{attributes}->{manifest}->{value});
3629 wakaba 1.118 ## ISSUE: Spec is unclear on relative references.
3630     ## According to Hixie (#whatwg 2008-03-19), it should be
3631     ## resolved against the base URI of the document in HTML
3632     ## or xml:base of the element in XHTML.
3633 wakaba 1.84 } else {
3634     !!!cp ('t25');
3635     $self->{application_cache_selection}->(undef);
3636     }
3637    
3638 wakaba 1.125 !!!nack ('t25c');
3639    
3640 wakaba 1.84 !!!next-token;
3641     return; ## Go to the "before head" insertion mode.
3642 wakaba 1.61 } else {
3643 wakaba 1.84 !!!cp ('t25.1');
3644     #
3645 wakaba 1.61 }
3646 wakaba 1.3 } elsif ({
3647 wakaba 1.55 END_TAG_TOKEN, 1,
3648     END_OF_FILE_TOKEN, 1,
3649 wakaba 1.3 }->{$token->{type}}) {
3650 wakaba 1.79 !!!cp ('t26');
3651 wakaba 1.3 #
3652     } else {
3653 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
3654 wakaba 1.3 }
3655 wakaba 1.61
3656 wakaba 1.126 my $root_element;
3657     !!!create-element ($root_element, $HTML_NS, 'html',, $token);
3658 wakaba 1.84 $self->{document}->append_child ($root_element);
3659 wakaba 1.123 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
3660 wakaba 1.84
3661     $self->{application_cache_selection}->(undef);
3662    
3663     ## NOTE: Reprocess the token.
3664 wakaba 1.125 !!!ack-later;
3665 wakaba 1.84 return; ## Go to the "before head" insertion mode.
3666    
3667     ## ISSUE: There is an issue in the spec
3668 wakaba 1.3 } # B
3669 wakaba 1.79
3670     die "$0: _tree_construction_root_element: This should never be reached";
3671 wakaba 1.3 } # _tree_construction_root_element
3672    
3673     sub _reset_insertion_mode ($) {
3674     my $self = shift;
3675    
3676     ## Step 1
3677     my $last;
3678    
3679     ## Step 2
3680     my $i = -1;
3681     my $node = $self->{open_elements}->[$i];
3682    
3683     ## Step 3
3684     S3: {
3685 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
3686     $last = 1;
3687     if (defined $self->{inner_html_node}) {
3688 wakaba 1.140 !!!cp ('t28');
3689     $node = $self->{inner_html_node};
3690     } else {
3691     die "_reset_insertion_mode: t27";
3692 wakaba 1.3 }
3693     }
3694 wakaba 1.140
3695     ## Step 4..14
3696     my $new_mode;
3697     if ($node->[1] & FOREIGN_EL) {
3698     !!!cp ('t28.1');
3699     ## NOTE: Strictly spaking, the line below only applies to MathML and
3700     ## SVG elements. Currently the HTML syntax supports only MathML and
3701     ## SVG elements as foreigners.
3702 wakaba 1.148 $new_mode = IN_BODY_IM | IN_FOREIGN_CONTENT_IM;
3703 wakaba 1.140 } elsif ($node->[1] & TABLE_CELL_EL) {
3704     if ($last) {
3705     !!!cp ('t28.2');
3706     #
3707     } else {
3708     !!!cp ('t28.3');
3709     $new_mode = IN_CELL_IM;
3710     }
3711     } else {
3712     !!!cp ('t28.4');
3713     $new_mode = {
3714 wakaba 1.54 select => IN_SELECT_IM,
3715 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
3716     ## insertion mode to "in select" by themselves.
3717 wakaba 1.54 tr => IN_ROW_IM,
3718     tbody => IN_TABLE_BODY_IM,
3719     thead => IN_TABLE_BODY_IM,
3720     tfoot => IN_TABLE_BODY_IM,
3721     caption => IN_CAPTION_IM,
3722     colgroup => IN_COLUMN_GROUP_IM,
3723     table => IN_TABLE_IM,
3724     head => IN_BODY_IM, # not in head!
3725     body => IN_BODY_IM,
3726     frameset => IN_FRAMESET_IM,
3727 wakaba 1.123 }->{$node->[0]->manakai_local_name};
3728 wakaba 1.140 }
3729     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
3730 wakaba 1.3
3731 wakaba 1.126 ## Step 15
3732 wakaba 1.123 if ($node->[1] & HTML_EL) {
3733 wakaba 1.3 unless (defined $self->{head_element}) {
3734 wakaba 1.79 !!!cp ('t29');
3735 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
3736 wakaba 1.3 } else {
3737 wakaba 1.81 ## ISSUE: Can this state be reached?
3738 wakaba 1.79 !!!cp ('t30');
3739 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3740 wakaba 1.3 }
3741     return;
3742 wakaba 1.79 } else {
3743     !!!cp ('t31');
3744 wakaba 1.3 }
3745    
3746 wakaba 1.126 ## Step 16
3747 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
3748 wakaba 1.3
3749 wakaba 1.126 ## Step 17
3750 wakaba 1.3 $i--;
3751     $node = $self->{open_elements}->[$i];
3752    
3753 wakaba 1.126 ## Step 18
3754 wakaba 1.3 redo S3;
3755     } # S3
3756 wakaba 1.79
3757     die "$0: _reset_insertion_mode: This line should never be reached";
3758 wakaba 1.3 } # _reset_insertion_mode
3759    
3760     sub _tree_construction_main ($) {
3761     my $self = shift;
3762    
3763 wakaba 1.1 my $active_formatting_elements = [];
3764    
3765     my $reconstruct_active_formatting_elements = sub { # MUST
3766     my $insert = shift;
3767    
3768     ## Step 1
3769     return unless @$active_formatting_elements;
3770    
3771     ## Step 3
3772     my $i = -1;
3773     my $entry = $active_formatting_elements->[$i];
3774    
3775     ## Step 2
3776     return if $entry->[0] eq '#marker';
3777 wakaba 1.3 for (@{$self->{open_elements}}) {
3778 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3779 wakaba 1.79 !!!cp ('t32');
3780 wakaba 1.1 return;
3781     }
3782     }
3783    
3784     S4: {
3785     ## Step 4
3786     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
3787    
3788     ## Step 5
3789     $i--;
3790     $entry = $active_formatting_elements->[$i];
3791    
3792     ## Step 6
3793     if ($entry->[0] eq '#marker') {
3794 wakaba 1.81 !!!cp ('t33_1');
3795 wakaba 1.1 #
3796     } else {
3797     my $in_open_elements;
3798 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
3799 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3800 wakaba 1.79 !!!cp ('t33');
3801 wakaba 1.1 $in_open_elements = 1;
3802     last OE;
3803     }
3804     }
3805     if ($in_open_elements) {
3806 wakaba 1.79 !!!cp ('t34');
3807 wakaba 1.1 #
3808     } else {
3809 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
3810 wakaba 1.79 !!!cp ('t35');
3811 wakaba 1.1 redo S4;
3812     }
3813     }
3814    
3815     ## Step 7
3816     $i++;
3817     $entry = $active_formatting_elements->[$i];
3818     } # S4
3819    
3820     S7: {
3821     ## Step 8
3822     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
3823    
3824     ## Step 9
3825     $insert->($clone->[0]);
3826 wakaba 1.3 push @{$self->{open_elements}}, $clone;
3827 wakaba 1.1
3828     ## Step 10
3829 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
3830 wakaba 1.1
3831     ## Step 11
3832     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
3833 wakaba 1.79 !!!cp ('t36');
3834 wakaba 1.1 ## Step 7'
3835     $i++;
3836     $entry = $active_formatting_elements->[$i];
3837    
3838     redo S7;
3839     }
3840 wakaba 1.79
3841     !!!cp ('t37');
3842 wakaba 1.1 } # S7
3843     }; # $reconstruct_active_formatting_elements
3844    
3845     my $clear_up_to_marker = sub {
3846     for (reverse 0..$#$active_formatting_elements) {
3847     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3848 wakaba 1.79 !!!cp ('t38');
3849 wakaba 1.1 splice @$active_formatting_elements, $_;
3850     return;
3851     }
3852     }
3853 wakaba 1.79
3854     !!!cp ('t39');
3855 wakaba 1.1 }; # $clear_up_to_marker
3856    
3857 wakaba 1.96 my $insert;
3858    
3859     my $parse_rcdata = sub ($) {
3860     my ($content_model_flag) = @_;
3861 wakaba 1.25
3862     ## Step 1
3863     my $start_tag_name = $token->{tag_name};
3864     my $el;
3865 wakaba 1.126 !!!create-element ($el, $HTML_NS, $start_tag_name, $token->{attributes}, $token);
3866 wakaba 1.25
3867     ## Step 2
3868 wakaba 1.96 $insert->($el);
3869 wakaba 1.25
3870     ## Step 3
3871 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
3872 wakaba 1.13 delete $self->{escape}; # MUST
3873 wakaba 1.25
3874     ## Step 4
3875 wakaba 1.1 my $text = '';
3876 wakaba 1.125 !!!nack ('t40.1');
3877 wakaba 1.1 !!!next-token;
3878 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing
3879 wakaba 1.79 !!!cp ('t40');
3880 wakaba 1.1 $text .= $token->{data};
3881     !!!next-token;
3882 wakaba 1.25 }
3883    
3884     ## Step 5
3885 wakaba 1.1 if (length $text) {
3886 wakaba 1.79 !!!cp ('t41');
3887 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
3888     $el->append_child ($text);
3889 wakaba 1.1 }
3890 wakaba 1.25
3891     ## Step 6
3892 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3893 wakaba 1.25
3894     ## Step 7
3895 wakaba 1.79 if ($token->{type} == END_TAG_TOKEN and
3896     $token->{tag_name} eq $start_tag_name) {
3897     !!!cp ('t42');
3898 wakaba 1.1 ## Ignore the token
3899     } else {
3900 wakaba 1.96 ## NOTE: An end-of-file token.
3901     if ($content_model_flag == CDATA_CONTENT_MODEL) {
3902     !!!cp ('t43');
3903 wakaba 1.153 !!!parse-error (type => 'in CDATA:#eof', token => $token);
3904 wakaba 1.96 } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
3905     !!!cp ('t44');
3906 wakaba 1.153 !!!parse-error (type => 'in RCDATA:#eof', token => $token);
3907 wakaba 1.96 } else {
3908     die "$0: $content_model_flag in parse_rcdata";
3909     }
3910 wakaba 1.1 }
3911     !!!next-token;
3912 wakaba 1.25 }; # $parse_rcdata
3913 wakaba 1.1
3914 wakaba 1.96 my $script_start_tag = sub () {
3915 wakaba 1.1 my $script_el;
3916 wakaba 1.126 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
3917 wakaba 1.1 ## TODO: mark as "parser-inserted"
3918    
3919 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
3920 wakaba 1.13 delete $self->{escape}; # MUST
3921 wakaba 1.1
3922     my $text = '';
3923 wakaba 1.125 !!!nack ('t45.1');
3924 wakaba 1.1 !!!next-token;
3925 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
3926 wakaba 1.79 !!!cp ('t45');
3927 wakaba 1.1 $text .= $token->{data};
3928     !!!next-token;
3929     } # stop if non-character token or tokenizer stops tokenising
3930     if (length $text) {
3931 wakaba 1.79 !!!cp ('t46');
3932 wakaba 1.1 $script_el->manakai_append_text ($text);
3933     }
3934    
3935 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3936 wakaba 1.1
3937 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
3938 wakaba 1.1 $token->{tag_name} eq 'script') {
3939 wakaba 1.79 !!!cp ('t47');
3940 wakaba 1.1 ## Ignore the token
3941     } else {
3942 wakaba 1.79 !!!cp ('t48');
3943 wakaba 1.153 !!!parse-error (type => 'in CDATA:#eof', token => $token);
3944 wakaba 1.1 ## ISSUE: And ignore?
3945     ## TODO: mark as "already executed"
3946     }
3947    
3948 wakaba 1.3 if (defined $self->{inner_html_node}) {
3949 wakaba 1.79 !!!cp ('t49');
3950 wakaba 1.3 ## TODO: mark as "already executed"
3951     } else {
3952 wakaba 1.79 !!!cp ('t50');
3953 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
3954     ## TODO: insertion point = just before the next input character
3955 wakaba 1.25
3956     $insert->($script_el);
3957 wakaba 1.1
3958     ## TODO: insertion point = $old_insertion_point (might be "undefined")
3959    
3960     ## TODO: if there is a script that will execute as soon as the parser resume, then...
3961     }
3962    
3963     !!!next-token;
3964     }; # $script_start_tag
3965    
3966 wakaba 1.102 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
3967     ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
3968     my $open_tables = [[$self->{open_elements}->[0]->[0]]];
3969    
3970 wakaba 1.1 my $formatting_end_tag = sub {
3971 wakaba 1.113 my $end_tag_token = shift;
3972     my $tag_name = $end_tag_token->{tag_name};
3973 wakaba 1.1
3974 wakaba 1.103 ## NOTE: The adoption agency algorithm (AAA).
3975 wakaba 1.102
3976 wakaba 1.1 FET: {
3977     ## Step 1
3978     my $formatting_element;
3979     my $formatting_element_i_in_active;
3980     AFE: for (reverse 0..$#$active_formatting_elements) {
3981 wakaba 1.123 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3982     !!!cp ('t52');
3983     last AFE;
3984     } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
3985     eq $tag_name) {
3986 wakaba 1.79 !!!cp ('t51');
3987 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
3988     $formatting_element_i_in_active = $_;
3989     last AFE;
3990     }
3991     } # AFE
3992     unless (defined $formatting_element) {
3993 wakaba 1.79 !!!cp ('t53');
3994 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => $tag_name, token => $end_tag_token);
3995 wakaba 1.1 ## Ignore the token
3996     !!!next-token;
3997     return;
3998     }
3999     ## has an element in scope
4000     my $in_scope = 1;
4001     my $formatting_element_i_in_open;
4002 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4003     my $node = $self->{open_elements}->[$_];
4004 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
4005     if ($in_scope) {
4006 wakaba 1.79 !!!cp ('t54');
4007 wakaba 1.1 $formatting_element_i_in_open = $_;
4008     last INSCOPE;
4009     } else { # in open elements but not in scope
4010 wakaba 1.79 !!!cp ('t55');
4011 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4012     text => $token->{tag_name},
4013 wakaba 1.113 token => $end_tag_token);
4014 wakaba 1.1 ## Ignore the token
4015     !!!next-token;
4016     return;
4017     }
4018 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
4019 wakaba 1.79 !!!cp ('t56');
4020 wakaba 1.1 $in_scope = 0;
4021     }
4022     } # INSCOPE
4023     unless (defined $formatting_element_i_in_open) {
4024 wakaba 1.79 !!!cp ('t57');
4025 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4026     text => $token->{tag_name},
4027 wakaba 1.113 token => $end_tag_token);
4028 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
4029     !!!next-token; ## TODO: ok?
4030     return;
4031     }
4032 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
4033 wakaba 1.79 !!!cp ('t58');
4034 wakaba 1.122 !!!parse-error (type => 'not closed',
4035 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4036 wakaba 1.122 ->manakai_local_name,
4037 wakaba 1.113 token => $end_tag_token);
4038 wakaba 1.1 }
4039    
4040     ## Step 2
4041     my $furthest_block;
4042     my $furthest_block_i_in_open;
4043 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
4044     my $node = $self->{open_elements}->[$_];
4045 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
4046 wakaba 1.1 #not $phrasing_category->{$node->[1]} and
4047 wakaba 1.123 ($node->[1] & SPECIAL_EL or
4048     $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
4049 wakaba 1.79 !!!cp ('t59');
4050 wakaba 1.1 $furthest_block = $node;
4051     $furthest_block_i_in_open = $_;
4052     } elsif ($node->[0] eq $formatting_element->[0]) {
4053 wakaba 1.79 !!!cp ('t60');
4054 wakaba 1.1 last OE;
4055     }
4056     } # OE
4057    
4058     ## Step 3
4059     unless (defined $furthest_block) { # MUST
4060 wakaba 1.79 !!!cp ('t61');
4061 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
4062 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
4063     !!!next-token;
4064     return;
4065     }
4066    
4067     ## Step 4
4068 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
4069 wakaba 1.1
4070     ## Step 5
4071     my $furthest_block_parent = $furthest_block->[0]->parent_node;
4072     if (defined $furthest_block_parent) {
4073 wakaba 1.79 !!!cp ('t62');
4074 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
4075     }
4076    
4077     ## Step 6
4078     my $bookmark_prev_el
4079     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
4080     ->[0];
4081    
4082     ## Step 7
4083     my $node = $furthest_block;
4084     my $node_i_in_open = $furthest_block_i_in_open;
4085     my $last_node = $furthest_block;
4086     S7: {
4087     ## Step 1
4088     $node_i_in_open--;
4089 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
4090 wakaba 1.1
4091     ## Step 2
4092     my $node_i_in_active;
4093     S7S2: {
4094     for (reverse 0..$#$active_formatting_elements) {
4095     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
4096 wakaba 1.79 !!!cp ('t63');
4097 wakaba 1.1 $node_i_in_active = $_;
4098     last S7S2;
4099     }
4100     }
4101 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
4102 wakaba 1.1 redo S7;
4103     } # S7S2
4104    
4105     ## Step 3
4106     last S7 if $node->[0] eq $formatting_element->[0];
4107    
4108     ## Step 4
4109     if ($last_node->[0] eq $furthest_block->[0]) {
4110 wakaba 1.79 !!!cp ('t64');
4111 wakaba 1.1 $bookmark_prev_el = $node->[0];
4112     }
4113    
4114     ## Step 5
4115     if ($node->[0]->has_child_nodes ()) {
4116 wakaba 1.79 !!!cp ('t65');
4117 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
4118     $active_formatting_elements->[$node_i_in_active] = $clone;
4119 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
4120 wakaba 1.1 $node = $clone;
4121     }
4122    
4123     ## Step 6
4124     $node->[0]->append_child ($last_node->[0]);
4125    
4126     ## Step 7
4127     $last_node = $node;
4128    
4129     ## Step 8
4130     redo S7;
4131     } # S7
4132    
4133     ## Step 8
4134 wakaba 1.123 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
4135 wakaba 1.102 my $foster_parent_element;
4136     my $next_sibling;
4137 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
4138     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
4139 wakaba 1.102 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4140     if (defined $parent and $parent->node_type == 1) {
4141     !!!cp ('t65.1');
4142     $foster_parent_element = $parent;
4143     $next_sibling = $self->{open_elements}->[$_]->[0];
4144     } else {
4145     !!!cp ('t65.2');
4146     $foster_parent_element
4147     = $self->{open_elements}->[$_ - 1]->[0];
4148     }
4149     last OE;
4150     }
4151     } # OE
4152     $foster_parent_element = $self->{open_elements}->[0]->[0]
4153     unless defined $foster_parent_element;
4154     $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
4155     $open_tables->[-1]->[1] = 1; # tainted
4156     } else {
4157     !!!cp ('t65.3');
4158     $common_ancestor_node->[0]->append_child ($last_node->[0]);
4159     }
4160 wakaba 1.1
4161     ## Step 9
4162     my $clone = [$formatting_element->[0]->clone_node (0),
4163     $formatting_element->[1]];
4164    
4165     ## Step 10
4166     my @cn = @{$furthest_block->[0]->child_nodes};
4167     $clone->[0]->append_child ($_) for @cn;
4168    
4169     ## Step 11
4170     $furthest_block->[0]->append_child ($clone->[0]);
4171    
4172     ## Step 12
4173     my $i;
4174     AFE: for (reverse 0..$#$active_formatting_elements) {
4175     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
4176 wakaba 1.79 !!!cp ('t66');
4177 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
4178     $i-- and last AFE if defined $i;
4179     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
4180 wakaba 1.79 !!!cp ('t67');
4181 wakaba 1.1 $i = $_;
4182     }
4183     } # AFE
4184     splice @$active_formatting_elements, $i + 1, 0, $clone;
4185    
4186     ## Step 13
4187     undef $i;
4188 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
4189     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
4190 wakaba 1.79 !!!cp ('t68');
4191 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
4192 wakaba 1.1 $i-- and last OE if defined $i;
4193 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
4194 wakaba 1.79 !!!cp ('t69');
4195 wakaba 1.1 $i = $_;
4196     }
4197     } # OE
4198 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
4199 wakaba 1.1
4200     ## Step 14
4201     redo FET;
4202     } # FET
4203     }; # $formatting_end_tag
4204    
4205 wakaba 1.96 $insert = my $insert_to_current = sub {
4206 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
4207 wakaba 1.1 }; # $insert_to_current
4208    
4209     my $insert_to_foster = sub {
4210 wakaba 1.95 my $child = shift;
4211 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
4212 wakaba 1.95 # MUST
4213     my $foster_parent_element;
4214     my $next_sibling;
4215 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
4216     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
4217 wakaba 1.3 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4218 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
4219 wakaba 1.79 !!!cp ('t70');
4220 wakaba 1.1 $foster_parent_element = $parent;
4221 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
4222 wakaba 1.1 } else {
4223 wakaba 1.79 !!!cp ('t71');
4224 wakaba 1.1 $foster_parent_element
4225 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
4226 wakaba 1.1 }
4227     last OE;
4228     }
4229     } # OE
4230 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
4231 wakaba 1.1 unless defined $foster_parent_element;
4232     $foster_parent_element->insert_before
4233     ($child, $next_sibling);
4234 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
4235     } else {
4236     !!!cp ('t72');
4237     $self->{open_elements}->[-1]->[0]->append_child ($child);
4238     }
4239 wakaba 1.1 }; # $insert_to_foster
4240    
4241 wakaba 1.126 B: while (1) {
4242 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
4243 wakaba 1.79 !!!cp ('t73');
4244 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
4245 wakaba 1.52 ## Ignore the token
4246     ## Stay in the phase
4247     !!!next-token;
4248 wakaba 1.126 next B;
4249 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
4250 wakaba 1.52 $token->{tag_name} eq 'html') {
4251 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4252 wakaba 1.79 !!!cp ('t79');
4253 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
4254 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
4255     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
4256 wakaba 1.79 !!!cp ('t80');
4257 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
4258 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
4259 wakaba 1.79 } else {
4260     !!!cp ('t81');
4261 wakaba 1.52 }
4262    
4263 wakaba 1.84 !!!cp ('t82');
4264 wakaba 1.113 !!!parse-error (type => 'not first start tag', token => $token);
4265 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
4266     for my $attr_name (keys %{$token->{attributes}}) {
4267     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
4268 wakaba 1.79 !!!cp ('t84');
4269 wakaba 1.52 $top_el->set_attribute_ns
4270     (undef, [undef, $attr_name],
4271     $token->{attributes}->{$attr_name}->{value});
4272     }
4273     }
4274 wakaba 1.125 !!!nack ('t84.1');
4275 wakaba 1.52 !!!next-token;
4276 wakaba 1.126 next B;
4277 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
4278 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
4279 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
4280 wakaba 1.79 !!!cp ('t85');
4281 wakaba 1.52 $self->{document}->append_child ($comment);
4282 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
4283 wakaba 1.79 !!!cp ('t86');
4284 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
4285     } else {
4286 wakaba 1.79 !!!cp ('t87');
4287 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4288     }
4289     !!!next-token;
4290 wakaba 1.126 next B;
4291     } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
4292     if ($token->{type} == CHARACTER_TOKEN) {
4293     !!!cp ('t87.1');
4294     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4295     !!!next-token;
4296     next B;
4297     } elsif ($token->{type} == START_TAG_TOKEN) {
4298 wakaba 1.129 if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
4299     $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
4300 wakaba 1.126 not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
4301     ($token->{tag_name} eq 'svg' and
4302     $self->{open_elements}->[-1]->[1] & MML_AXML_EL)) {
4303     ## NOTE: "using the rules for secondary insertion mode"then"continue"
4304     !!!cp ('t87.2');
4305     #
4306     } elsif ({
4307 wakaba 1.130 b => 1, big => 1, blockquote => 1, body => 1, br => 1,
4308 wakaba 1.146 center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1,
4309     em => 1, embed => 1, font => 1, h1 => 1, h2 => 1, h3 => 1,
4310     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1,
4311     img => 1, li => 1, listing => 1, menu => 1, meta => 1,
4312     nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1,
4313     small => 1, span => 1, strong => 1, strike => 1, sub => 1,
4314     sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1,
4315 wakaba 1.126 }->{$token->{tag_name}}) {
4316     !!!cp ('t87.2');
4317     !!!parse-error (type => 'not closed',
4318 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4319 wakaba 1.126 ->manakai_local_name,
4320     token => $token);
4321    
4322     pop @{$self->{open_elements}}
4323     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
4324    
4325 wakaba 1.130 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
4326 wakaba 1.126 ## Reprocess.
4327     next B;
4328     } else {
4329 wakaba 1.131 my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
4330     my $tag_name = $token->{tag_name};
4331     if ($nsuri eq $SVG_NS) {
4332     $tag_name = {
4333     altglyph => 'altGlyph',
4334     altglyphdef => 'altGlyphDef',
4335     altglyphitem => 'altGlyphItem',
4336     animatecolor => 'animateColor',
4337     animatemotion => 'animateMotion',
4338     animatetransform => 'animateTransform',
4339     clippath => 'clipPath',
4340     feblend => 'feBlend',
4341     fecolormatrix => 'feColorMatrix',
4342     fecomponenttransfer => 'feComponentTransfer',
4343     fecomposite => 'feComposite',
4344     feconvolvematrix => 'feConvolveMatrix',
4345     fediffuselighting => 'feDiffuseLighting',
4346     fedisplacementmap => 'feDisplacementMap',
4347     fedistantlight => 'feDistantLight',
4348     feflood => 'feFlood',
4349     fefunca => 'feFuncA',
4350     fefuncb => 'feFuncB',
4351     fefuncg => 'feFuncG',
4352     fefuncr => 'feFuncR',
4353     fegaussianblur => 'feGaussianBlur',
4354     feimage => 'feImage',
4355     femerge => 'feMerge',
4356     femergenode => 'feMergeNode',
4357     femorphology => 'feMorphology',
4358     feoffset => 'feOffset',
4359     fepointlight => 'fePointLight',
4360     fespecularlighting => 'feSpecularLighting',
4361     fespotlight => 'feSpotLight',
4362     fetile => 'feTile',
4363     feturbulence => 'feTurbulence',
4364     foreignobject => 'foreignObject',
4365     glyphref => 'glyphRef',
4366     lineargradient => 'linearGradient',
4367     radialgradient => 'radialGradient',
4368     #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
4369     textpath => 'textPath',
4370     }->{$tag_name} || $tag_name;
4371     }
4372    
4373     ## "adjust SVG attributes" (SVG only) - done in insert-element-f
4374    
4375     ## "adjust foreign attributes" - done in insert-element-f
4376 wakaba 1.126
4377 wakaba 1.131 !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);
4378 wakaba 1.126
4379     if ($self->{self_closing}) {
4380     pop @{$self->{open_elements}};
4381     !!!ack ('t87.3');
4382     } else {
4383     !!!cp ('t87.4');
4384     }
4385    
4386     !!!next-token;
4387     next B;
4388     }
4389     } elsif ($token->{type} == END_TAG_TOKEN) {
4390     ## NOTE: "using the rules for secondary insertion mode" then "continue"
4391     !!!cp ('t87.5');
4392     #
4393     } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4394     !!!cp ('t87.6');
4395 wakaba 1.146 !!!parse-error (type => 'not closed',
4396 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4397 wakaba 1.146 ->manakai_local_name,
4398     token => $token);
4399    
4400     pop @{$self->{open_elements}}
4401     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
4402    
4403     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
4404     ## Reprocess.
4405     next B;
4406 wakaba 1.126 } else {
4407     die "$0: $token->{type}: Unknown token type";
4408     }
4409     }
4410    
4411     if ($self->{insertion_mode} & HEAD_IMS) {
4412 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4413 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4414 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4415     !!!cp ('t88.2');
4416     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4417     } else {
4418     !!!cp ('t88.1');
4419     ## Ignore the token.
4420     !!!next-token;
4421 wakaba 1.126 next B;
4422 wakaba 1.99 }
4423 wakaba 1.52 unless (length $token->{data}) {
4424 wakaba 1.79 !!!cp ('t88');
4425 wakaba 1.52 !!!next-token;
4426 wakaba 1.126 next B;
4427 wakaba 1.1 }
4428     }
4429 wakaba 1.52
4430 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4431 wakaba 1.79 !!!cp ('t89');
4432 wakaba 1.52 ## As if <head>
4433 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4434 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4435 wakaba 1.123 push @{$self->{open_elements}},
4436     [$self->{head_element}, $el_category->{head}];
4437 wakaba 1.52
4438     ## Reprocess in the "in head" insertion mode...
4439     pop @{$self->{open_elements}};
4440    
4441     ## Reprocess in the "after head" insertion mode...
4442 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4443 wakaba 1.79 !!!cp ('t90');
4444 wakaba 1.52 ## As if </noscript>
4445     pop @{$self->{open_elements}};
4446 wakaba 1.153 !!!parse-error (type => 'in noscript:#text', token => $token);
4447 wakaba 1.1
4448 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
4449     ## As if </head>
4450     pop @{$self->{open_elements}};
4451    
4452     ## Reprocess in the "after head" insertion mode...
4453 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4454 wakaba 1.79 !!!cp ('t91');
4455 wakaba 1.52 pop @{$self->{open_elements}};
4456    
4457     ## Reprocess in the "after head" insertion mode...
4458 wakaba 1.79 } else {
4459     !!!cp ('t92');
4460 wakaba 1.1 }
4461 wakaba 1.52
4462 wakaba 1.123 ## "after head" insertion mode
4463     ## As if <body>
4464     !!!insert-element ('body',, $token);
4465     $self->{insertion_mode} = IN_BODY_IM;
4466     ## reprocess
4467 wakaba 1.126 next B;
4468 wakaba 1.123 } elsif ($token->{type} == START_TAG_TOKEN) {
4469     if ($token->{tag_name} eq 'head') {
4470     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4471     !!!cp ('t93');
4472 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
4473 wakaba 1.123 $self->{open_elements}->[-1]->[0]->append_child
4474     ($self->{head_element});
4475     push @{$self->{open_elements}},
4476     [$self->{head_element}, $el_category->{head}];
4477     $self->{insertion_mode} = IN_HEAD_IM;
4478 wakaba 1.125 !!!nack ('t93.1');
4479 wakaba 1.123 !!!next-token;
4480 wakaba 1.126 next B;
4481 wakaba 1.125 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4482 wakaba 1.139 !!!cp ('t93.2');
4483 wakaba 1.153 !!!parse-error (type => 'after head', text => 'head',
4484     token => $token);
4485 wakaba 1.139 ## Ignore the token
4486     !!!nack ('t93.3');
4487     !!!next-token;
4488     next B;
4489 wakaba 1.125 } else {
4490     !!!cp ('t95');
4491 wakaba 1.153 !!!parse-error (type => 'in head:head',
4492     token => $token); # or in head noscript
4493 wakaba 1.125 ## Ignore the token
4494     !!!nack ('t95.1');
4495     !!!next-token;
4496 wakaba 1.126 next B;
4497 wakaba 1.125 }
4498     } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4499 wakaba 1.126 !!!cp ('t96');
4500     ## As if <head>
4501     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4502     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4503     push @{$self->{open_elements}},
4504     [$self->{head_element}, $el_category->{head}];
4505 wakaba 1.52
4506 wakaba 1.126 $self->{insertion_mode} = IN_HEAD_IM;
4507     ## Reprocess in the "in head" insertion mode...
4508     } else {
4509     !!!cp ('t97');
4510     }
4511 wakaba 1.52
4512 wakaba 1.49 if ($token->{tag_name} eq 'base') {
4513 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4514 wakaba 1.79 !!!cp ('t98');
4515 wakaba 1.49 ## As if </noscript>
4516     pop @{$self->{open_elements}};
4517 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'base',
4518     token => $token);
4519 wakaba 1.49
4520 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4521 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4522 wakaba 1.79 } else {
4523     !!!cp ('t99');
4524 wakaba 1.49 }
4525    
4526     ## NOTE: There is a "as if in head" code clone.
4527 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4528 wakaba 1.79 !!!cp ('t100');
4529 wakaba 1.153 !!!parse-error (type => 'after head',
4530     text => $token->{tag_name}, token => $token);
4531 wakaba 1.123 push @{$self->{open_elements}},
4532     [$self->{head_element}, $el_category->{head}];
4533 wakaba 1.79 } else {
4534     !!!cp ('t101');
4535 wakaba 1.49 }
4536 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4537 wakaba 1.49 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4538 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4539 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4540 wakaba 1.125 !!!nack ('t101.1');
4541 wakaba 1.49 !!!next-token;
4542 wakaba 1.126 next B;
4543 wakaba 1.49 } elsif ($token->{tag_name} eq 'link') {
4544 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4545 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4546 wakaba 1.79 !!!cp ('t102');
4547 wakaba 1.153 !!!parse-error (type => 'after head',
4548     text => $token->{tag_name}, token => $token);
4549 wakaba 1.123 push @{$self->{open_elements}},
4550     [$self->{head_element}, $el_category->{head}];
4551 wakaba 1.79 } else {
4552     !!!cp ('t103');
4553 wakaba 1.25 }
4554 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4555 wakaba 1.25 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4556 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4557 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4558 wakaba 1.125 !!!ack ('t103.1');
4559 wakaba 1.1 !!!next-token;
4560 wakaba 1.126 next B;
4561 wakaba 1.34 } elsif ($token->{tag_name} eq 'meta') {
4562     ## NOTE: There is a "as if in head" code clone.
4563 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4564 wakaba 1.79 !!!cp ('t104');
4565 wakaba 1.153 !!!parse-error (type => 'after head',
4566     text => $token->{tag_name}, token => $token);
4567 wakaba 1.123 push @{$self->{open_elements}},
4568     [$self->{head_element}, $el_category->{head}];
4569 wakaba 1.79 } else {
4570     !!!cp ('t105');
4571 wakaba 1.34 }
4572 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4573 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4574 wakaba 1.34
4575     unless ($self->{confident}) {
4576 wakaba 1.134 if ($token->{attributes}->{charset}) {
4577 wakaba 1.79 !!!cp ('t106');
4578 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4579     ## in the {change_encoding} callback.
4580 wakaba 1.63 $self->{change_encoding}
4581 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value},
4582     $token);
4583 wakaba 1.66
4584     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4585     ->set_user_data (manakai_has_reference =>
4586     $token->{attributes}->{charset}
4587     ->{has_reference});
4588 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
4589     if ($token->{attributes}->{content}->{value}
4590 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4591 wakaba 1.70 [\x09-\x0D\x20]*=
4592 wakaba 1.34 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4593 wakaba 1.145 ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {
4594 wakaba 1.79 !!!cp ('t107');
4595 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4596     ## in the {change_encoding} callback.
4597 wakaba 1.63 $self->{change_encoding}
4598 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
4599     $token);
4600 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4601     ->set_user_data (manakai_has_reference =>
4602     $token->{attributes}->{content}
4603     ->{has_reference});
4604 wakaba 1.79 } else {
4605     !!!cp ('t108');
4606 wakaba 1.63 }
4607 wakaba 1.34 }
4608 wakaba 1.66 } else {
4609     if ($token->{attributes}->{charset}) {
4610 wakaba 1.79 !!!cp ('t109');
4611 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4612     ->set_user_data (manakai_has_reference =>
4613     $token->{attributes}->{charset}
4614     ->{has_reference});
4615     }
4616 wakaba 1.68 if ($token->{attributes}->{content}) {
4617 wakaba 1.79 !!!cp ('t110');
4618 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4619     ->set_user_data (manakai_has_reference =>
4620     $token->{attributes}->{content}
4621     ->{has_reference});
4622     }
4623 wakaba 1.34 }
4624    
4625 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4626 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4627 wakaba 1.125 !!!ack ('t110.1');
4628 wakaba 1.34 !!!next-token;
4629 wakaba 1.126 next B;
4630 wakaba 1.49 } elsif ($token->{tag_name} eq 'title') {
4631 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4632 wakaba 1.79 !!!cp ('t111');
4633 wakaba 1.49 ## As if </noscript>
4634     pop @{$self->{open_elements}};
4635 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'title',
4636     token => $token);
4637 wakaba 1.49
4638 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4639 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4640 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4641 wakaba 1.79 !!!cp ('t112');
4642 wakaba 1.153 !!!parse-error (type => 'after head',
4643     text => $token->{tag_name}, token => $token);
4644 wakaba 1.123 push @{$self->{open_elements}},
4645     [$self->{head_element}, $el_category->{head}];
4646 wakaba 1.79 } else {
4647     !!!cp ('t113');
4648 wakaba 1.25 }
4649 wakaba 1.49
4650     ## NOTE: There is a "as if in head" code clone.
4651 wakaba 1.31 my $parent = defined $self->{head_element} ? $self->{head_element}
4652     : $self->{open_elements}->[-1]->[0];
4653 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
4654 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4655 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4656 wakaba 1.126 next B;
4657 wakaba 1.148 } elsif ($token->{tag_name} eq 'style' or
4658     $token->{tag_name} eq 'noframes') {
4659 wakaba 1.25 ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
4660 wakaba 1.54 ## insertion mode IN_HEAD_IM)
4661 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4662 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4663 wakaba 1.79 !!!cp ('t114');
4664 wakaba 1.153 !!!parse-error (type => 'after head',
4665     text => $token->{tag_name}, token => $token);
4666 wakaba 1.123 push @{$self->{open_elements}},
4667     [$self->{head_element}, $el_category->{head}];
4668 wakaba 1.79 } else {
4669     !!!cp ('t115');
4670 wakaba 1.25 }
4671 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4672 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4673 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4674 wakaba 1.126 next B;
4675 wakaba 1.25 } elsif ($token->{tag_name} eq 'noscript') {
4676 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
4677 wakaba 1.79 !!!cp ('t116');
4678 wakaba 1.25 ## NOTE: and scripting is disalbed
4679 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4680 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
4681 wakaba 1.125 !!!nack ('t116.1');
4682 wakaba 1.1 !!!next-token;
4683 wakaba 1.126 next B;
4684 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4685 wakaba 1.79 !!!cp ('t117');
4686 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'noscript',
4687     token => $token);
4688 wakaba 1.1 ## Ignore the token
4689 wakaba 1.125 !!!nack ('t117.1');
4690 wakaba 1.41 !!!next-token;
4691 wakaba 1.126 next B;
4692 wakaba 1.1 } else {
4693 wakaba 1.79 !!!cp ('t118');
4694 wakaba 1.25 #
4695 wakaba 1.1 }
4696 wakaba 1.49 } elsif ($token->{tag_name} eq 'script') {
4697 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4698 wakaba 1.79 !!!cp ('t119');
4699 wakaba 1.49 ## As if </noscript>
4700     pop @{$self->{open_elements}};
4701 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'script',
4702     token => $token);
4703 wakaba 1.49
4704 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4705 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4706 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4707 wakaba 1.79 !!!cp ('t120');
4708 wakaba 1.153 !!!parse-error (type => 'after head',
4709     text => $token->{tag_name}, token => $token);
4710 wakaba 1.123 push @{$self->{open_elements}},
4711     [$self->{head_element}, $el_category->{head}];
4712 wakaba 1.79 } else {
4713     !!!cp ('t121');
4714 wakaba 1.25 }
4715 wakaba 1.49
4716 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4717 wakaba 1.100 $script_start_tag->();
4718     pop @{$self->{open_elements}} # <head>
4719 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4720 wakaba 1.126 next B;
4721 wakaba 1.49 } elsif ($token->{tag_name} eq 'body' or
4722 wakaba 1.25 $token->{tag_name} eq 'frameset') {
4723 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4724 wakaba 1.79 !!!cp ('t122');
4725 wakaba 1.49 ## As if </noscript>
4726     pop @{$self->{open_elements}};
4727 wakaba 1.153 !!!parse-error (type => 'in noscript',
4728     text => $token->{tag_name}, token => $token);
4729 wakaba 1.49
4730     ## Reprocess in the "in head" insertion mode...
4731     ## As if </head>
4732     pop @{$self->{open_elements}};
4733    
4734     ## Reprocess in the "after head" insertion mode...
4735 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4736 wakaba 1.79 !!!cp ('t124');
4737 wakaba 1.49 pop @{$self->{open_elements}};
4738    
4739     ## Reprocess in the "after head" insertion mode...
4740 wakaba 1.79 } else {
4741     !!!cp ('t125');
4742 wakaba 1.49 }
4743    
4744     ## "after head" insertion mode
4745 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4746 wakaba 1.54 if ($token->{tag_name} eq 'body') {
4747 wakaba 1.79 !!!cp ('t126');
4748 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4749     } elsif ($token->{tag_name} eq 'frameset') {
4750 wakaba 1.79 !!!cp ('t127');
4751 wakaba 1.54 $self->{insertion_mode} = IN_FRAMESET_IM;
4752     } else {
4753     die "$0: tag name: $self->{tag_name}";
4754     }
4755 wakaba 1.125 !!!nack ('t127.1');
4756 wakaba 1.1 !!!next-token;
4757 wakaba 1.126 next B;
4758 wakaba 1.1 } else {
4759 wakaba 1.79 !!!cp ('t128');
4760 wakaba 1.1 #
4761     }
4762 wakaba 1.49
4763 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4764 wakaba 1.79 !!!cp ('t129');
4765 wakaba 1.49 ## As if </noscript>
4766     pop @{$self->{open_elements}};
4767 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
4768     text => $token->{tag_name}, token => $token);
4769 wakaba 1.49
4770     ## Reprocess in the "in head" insertion mode...
4771     ## As if </head>
4772 wakaba 1.25 pop @{$self->{open_elements}};
4773 wakaba 1.49
4774     ## Reprocess in the "after head" insertion mode...
4775 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4776 wakaba 1.79 !!!cp ('t130');
4777 wakaba 1.49 ## As if </head>
4778 wakaba 1.25 pop @{$self->{open_elements}};
4779 wakaba 1.49
4780     ## Reprocess in the "after head" insertion mode...
4781 wakaba 1.79 } else {
4782     !!!cp ('t131');
4783 wakaba 1.49 }
4784    
4785     ## "after head" insertion mode
4786     ## As if <body>
4787 wakaba 1.116 !!!insert-element ('body',, $token);
4788 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4789 wakaba 1.49 ## reprocess
4790 wakaba 1.125 !!!ack-later;
4791 wakaba 1.126 next B;
4792 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4793 wakaba 1.49 if ($token->{tag_name} eq 'head') {
4794 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4795 wakaba 1.79 !!!cp ('t132');
4796 wakaba 1.50 ## As if <head>
4797 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4798 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4799 wakaba 1.123 push @{$self->{open_elements}},
4800     [$self->{head_element}, $el_category->{head}];
4801 wakaba 1.50
4802     ## Reprocess in the "in head" insertion mode...
4803     pop @{$self->{open_elements}};
4804 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4805 wakaba 1.50 !!!next-token;
4806 wakaba 1.126 next B;
4807 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4808 wakaba 1.79 !!!cp ('t133');
4809 wakaba 1.49 ## As if </noscript>
4810     pop @{$self->{open_elements}};
4811 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
4812     text => 'head', token => $token);
4813 wakaba 1.49
4814     ## Reprocess in the "in head" insertion mode...
4815 wakaba 1.50 pop @{$self->{open_elements}};
4816 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4817 wakaba 1.50 !!!next-token;
4818 wakaba 1.126 next B;
4819 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4820 wakaba 1.79 !!!cp ('t134');
4821 wakaba 1.49 pop @{$self->{open_elements}};
4822 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4823 wakaba 1.49 !!!next-token;
4824 wakaba 1.126 next B;
4825 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4826     !!!cp ('t134.1');
4827 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => 'head',
4828     token => $token);
4829 wakaba 1.139 ## Ignore the token
4830     !!!next-token;
4831     next B;
4832 wakaba 1.49 } else {
4833 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4834 wakaba 1.49 }
4835     } elsif ($token->{tag_name} eq 'noscript') {
4836 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4837 wakaba 1.79 !!!cp ('t136');
4838 wakaba 1.49 pop @{$self->{open_elements}};
4839 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4840 wakaba 1.49 !!!next-token;
4841 wakaba 1.126 next B;
4842 wakaba 1.139 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or
4843     $self->{insertion_mode} == AFTER_HEAD_IM) {
4844 wakaba 1.79 !!!cp ('t137');
4845 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4846     text => 'noscript', token => $token);
4847 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
4848     !!!next-token;
4849 wakaba 1.126 next B;
4850 wakaba 1.49 } else {
4851 wakaba 1.79 !!!cp ('t138');
4852 wakaba 1.49 #
4853     }
4854     } elsif ({
4855 wakaba 1.31 body => 1, html => 1,
4856     }->{$token->{tag_name}}) {
4857 wakaba 1.139 if ($self->{insertion_mode} == BEFORE_HEAD_IM or
4858     $self->{insertion_mode} == IN_HEAD_IM or
4859     $self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4860 wakaba 1.79 !!!cp ('t140');
4861 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4862     text => $token->{tag_name}, token => $token);
4863 wakaba 1.49 ## Ignore the token
4864     !!!next-token;
4865 wakaba 1.126 next B;
4866 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4867     !!!cp ('t140.1');
4868 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4869     text => $token->{tag_name}, token => $token);
4870 wakaba 1.139 ## Ignore the token
4871     !!!next-token;
4872     next B;
4873 wakaba 1.79 } else {
4874 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4875 wakaba 1.49 }
4876 wakaba 1.139 } elsif ($token->{tag_name} eq 'p') {
4877     !!!cp ('t142');
4878 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4879     text => $token->{tag_name}, token => $token);
4880 wakaba 1.139 ## Ignore the token
4881     !!!next-token;
4882     next B;
4883     } elsif ($token->{tag_name} eq 'br') {
4884 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4885 wakaba 1.139 !!!cp ('t142.2');
4886     ## (before head) as if <head>, (in head) as if </head>
4887 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4888 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4889 wakaba 1.139 $self->{insertion_mode} = AFTER_HEAD_IM;
4890    
4891     ## Reprocess in the "after head" insertion mode...
4892     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4893     !!!cp ('t143.2');
4894     ## As if </head>
4895     pop @{$self->{open_elements}};
4896     $self->{insertion_mode} = AFTER_HEAD_IM;
4897    
4898     ## Reprocess in the "after head" insertion mode...
4899     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4900     !!!cp ('t143.3');
4901     ## ISSUE: Two parse errors for <head><noscript></br>
4902 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4903     text => 'br', token => $token);
4904 wakaba 1.139 ## As if </noscript>
4905     pop @{$self->{open_elements}};
4906     $self->{insertion_mode} = IN_HEAD_IM;
4907 wakaba 1.50
4908     ## Reprocess in the "in head" insertion mode...
4909 wakaba 1.139 ## As if </head>
4910     pop @{$self->{open_elements}};
4911     $self->{insertion_mode} = AFTER_HEAD_IM;
4912    
4913     ## Reprocess in the "after head" insertion mode...
4914     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4915     !!!cp ('t143.4');
4916     #
4917 wakaba 1.79 } else {
4918 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4919 wakaba 1.50 }
4920    
4921 wakaba 1.139 ## ISSUE: does not agree with IE7 - it doesn't ignore </br>.
4922 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4923     text => 'br', token => $token);
4924 wakaba 1.139 ## Ignore the token
4925     !!!next-token;
4926     next B;
4927 wakaba 1.25 } else {
4928 wakaba 1.139 !!!cp ('t145');
4929 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4930     text => $token->{tag_name}, token => $token);
4931 wakaba 1.139 ## Ignore the token
4932     !!!next-token;
4933     next B;
4934 wakaba 1.49 }
4935    
4936 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4937 wakaba 1.79 !!!cp ('t146');
4938 wakaba 1.49 ## As if </noscript>
4939     pop @{$self->{open_elements}};
4940 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
4941     text => $token->{tag_name}, token => $token);
4942 wakaba 1.49
4943     ## Reprocess in the "in head" insertion mode...
4944     ## As if </head>
4945     pop @{$self->{open_elements}};
4946    
4947     ## Reprocess in the "after head" insertion mode...
4948 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4949 wakaba 1.79 !!!cp ('t147');
4950 wakaba 1.49 ## As if </head>
4951     pop @{$self->{open_elements}};
4952    
4953     ## Reprocess in the "after head" insertion mode...
4954 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4955 wakaba 1.82 ## ISSUE: This case cannot be reached?
4956 wakaba 1.79 !!!cp ('t148');
4957 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4958     text => $token->{tag_name}, token => $token);
4959 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
4960     !!!next-token;
4961 wakaba 1.126 next B;
4962 wakaba 1.79 } else {
4963     !!!cp ('t149');
4964 wakaba 1.1 }
4965    
4966 wakaba 1.49 ## "after head" insertion mode
4967     ## As if <body>
4968 wakaba 1.116 !!!insert-element ('body',, $token);
4969 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4970 wakaba 1.52 ## reprocess
4971 wakaba 1.126 next B;
4972 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4973     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4974     !!!cp ('t149.1');
4975    
4976     ## NOTE: As if <head>
4977 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4978 wakaba 1.104 $self->{open_elements}->[-1]->[0]->append_child
4979     ($self->{head_element});
4980 wakaba 1.123 #push @{$self->{open_elements}},
4981     # [$self->{head_element}, $el_category->{head}];
4982 wakaba 1.104 #$self->{insertion_mode} = IN_HEAD_IM;
4983     ## NOTE: Reprocess.
4984    
4985     ## NOTE: As if </head>
4986     #pop @{$self->{open_elements}};
4987     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4988     ## NOTE: Reprocess.
4989    
4990     #
4991     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4992     !!!cp ('t149.2');
4993    
4994     ## NOTE: As if </head>
4995     pop @{$self->{open_elements}};
4996     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4997     ## NOTE: Reprocess.
4998    
4999     #
5000     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
5001     !!!cp ('t149.3');
5002    
5003 wakaba 1.113 !!!parse-error (type => 'in noscript:#eof', token => $token);
5004 wakaba 1.104
5005     ## As if </noscript>
5006     pop @{$self->{open_elements}};
5007     #$self->{insertion_mode} = IN_HEAD_IM;
5008     ## NOTE: Reprocess.
5009    
5010     ## NOTE: As if </head>
5011     pop @{$self->{open_elements}};
5012     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
5013     ## NOTE: Reprocess.
5014    
5015     #
5016     } else {
5017     !!!cp ('t149.4');
5018     #
5019     }
5020    
5021     ## NOTE: As if <body>
5022 wakaba 1.116 !!!insert-element ('body',, $token);
5023 wakaba 1.104 $self->{insertion_mode} = IN_BODY_IM;
5024     ## NOTE: Reprocess.
5025 wakaba 1.126 next B;
5026 wakaba 1.104 } else {
5027     die "$0: $token->{type}: Unknown token type";
5028     }
5029 wakaba 1.52
5030     ## ISSUE: An issue in the spec.
5031 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
5032 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5033 wakaba 1.79 !!!cp ('t150');
5034 wakaba 1.52 ## NOTE: There is a code clone of "character in body".
5035     $reconstruct_active_formatting_elements->($insert_to_current);
5036    
5037     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5038    
5039     !!!next-token;
5040 wakaba 1.126 next B;
5041 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5042 wakaba 1.52 if ({
5043     caption => 1, col => 1, colgroup => 1, tbody => 1,
5044     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
5045     }->{$token->{tag_name}}) {
5046 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
5047 wakaba 1.52 ## have an element in table scope
5048 wakaba 1.108 for (reverse 0..$#{$self->{open_elements}}) {
5049 wakaba 1.52 my $node = $self->{open_elements}->[$_];
5050 wakaba 1.123 if ($node->[1] & TABLE_CELL_EL) {
5051 wakaba 1.79 !!!cp ('t151');
5052 wakaba 1.108
5053     ## Close the cell
5054 wakaba 1.125 !!!back-token; # <x>
5055 wakaba 1.122 $token = {type => END_TAG_TOKEN,
5056     tag_name => $node->[0]->manakai_local_name,
5057 wakaba 1.114 line => $token->{line},
5058     column => $token->{column}};
5059 wakaba 1.126 next B;
5060 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5061 wakaba 1.79 !!!cp ('t152');
5062 wakaba 1.108 ## ISSUE: This case can never be reached, maybe.
5063     last;
5064 wakaba 1.52 }
5065 wakaba 1.108 }
5066    
5067     !!!cp ('t153');
5068     !!!parse-error (type => 'start tag not allowed',
5069 wakaba 1.153 text => $token->{tag_name}, token => $token);
5070 wakaba 1.108 ## Ignore the token
5071 wakaba 1.125 !!!nack ('t153.1');
5072 wakaba 1.108 !!!next-token;
5073 wakaba 1.126 next B;
5074 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
5075 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
5076     token => $token);
5077 wakaba 1.52
5078 wakaba 1.108 ## NOTE: As if </caption>.
5079 wakaba 1.52 ## have a table element in table scope
5080     my $i;
5081 wakaba 1.108 INSCOPE: {
5082     for (reverse 0..$#{$self->{open_elements}}) {
5083     my $node = $self->{open_elements}->[$_];
5084 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
5085 wakaba 1.108 !!!cp ('t155');
5086     $i = $_;
5087     last INSCOPE;
5088 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5089 wakaba 1.108 !!!cp ('t156');
5090     last;
5091     }
5092 wakaba 1.52 }
5093 wakaba 1.108
5094     !!!cp ('t157');
5095     !!!parse-error (type => 'start tag not allowed',
5096 wakaba 1.153 text => $token->{tag_name}, token => $token);
5097 wakaba 1.108 ## Ignore the token
5098 wakaba 1.125 !!!nack ('t157.1');
5099 wakaba 1.108 !!!next-token;
5100 wakaba 1.126 next B;
5101 wakaba 1.52 } # INSCOPE
5102    
5103     ## generate implied end tags
5104 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
5105     & END_TAG_OPTIONAL_EL) {
5106 wakaba 1.79 !!!cp ('t158');
5107 wakaba 1.86 pop @{$self->{open_elements}};
5108 wakaba 1.52 }
5109    
5110 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
5111 wakaba 1.79 !!!cp ('t159');
5112 wakaba 1.122 !!!parse-error (type => 'not closed',
5113 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5114 wakaba 1.122 ->manakai_local_name,
5115     token => $token);
5116 wakaba 1.79 } else {
5117     !!!cp ('t160');
5118 wakaba 1.52 }
5119    
5120     splice @{$self->{open_elements}}, $i;
5121    
5122     $clear_up_to_marker->();
5123    
5124 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5125 wakaba 1.52
5126     ## reprocess
5127 wakaba 1.125 !!!ack-later;
5128 wakaba 1.126 next B;
5129 wakaba 1.52 } else {
5130 wakaba 1.79 !!!cp ('t161');
5131 wakaba 1.52 #
5132     }
5133     } else {
5134 wakaba 1.79 !!!cp ('t162');
5135 wakaba 1.52 #
5136     }
5137 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5138 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
5139 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
5140 wakaba 1.43 ## have an element in table scope
5141 wakaba 1.52 my $i;
5142 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5143     my $node = $self->{open_elements}->[$_];
5144 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5145 wakaba 1.79 !!!cp ('t163');
5146 wakaba 1.52 $i = $_;
5147 wakaba 1.43 last INSCOPE;
5148 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5149 wakaba 1.79 !!!cp ('t164');
5150 wakaba 1.43 last INSCOPE;
5151     }
5152     } # INSCOPE
5153 wakaba 1.52 unless (defined $i) {
5154 wakaba 1.79 !!!cp ('t165');
5155 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5156     text => $token->{tag_name},
5157     token => $token);
5158 wakaba 1.43 ## Ignore the token
5159     !!!next-token;
5160 wakaba 1.126 next B;
5161 wakaba 1.43 }
5162    
5163 wakaba 1.52 ## generate implied end tags
5164 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
5165     & END_TAG_OPTIONAL_EL) {
5166 wakaba 1.79 !!!cp ('t166');
5167 wakaba 1.86 pop @{$self->{open_elements}};
5168 wakaba 1.52 }
5169 wakaba 1.86
5170 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5171     ne $token->{tag_name}) {
5172 wakaba 1.79 !!!cp ('t167');
5173 wakaba 1.122 !!!parse-error (type => 'not closed',
5174 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5175 wakaba 1.122 ->manakai_local_name,
5176     token => $token);
5177 wakaba 1.79 } else {
5178     !!!cp ('t168');
5179 wakaba 1.52 }
5180    
5181     splice @{$self->{open_elements}}, $i;
5182    
5183     $clear_up_to_marker->();
5184    
5185 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
5186 wakaba 1.52
5187     !!!next-token;
5188 wakaba 1.126 next B;
5189 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
5190 wakaba 1.79 !!!cp ('t169');
5191 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5192     text => $token->{tag_name}, token => $token);
5193 wakaba 1.52 ## Ignore the token
5194     !!!next-token;
5195 wakaba 1.126 next B;
5196 wakaba 1.52 } else {
5197 wakaba 1.79 !!!cp ('t170');
5198 wakaba 1.52 #
5199     }
5200     } elsif ($token->{tag_name} eq 'caption') {
5201 wakaba 1.54 if ($self->{insertion_mode} == IN_CAPTION_IM) {
5202 wakaba 1.43 ## have a table element in table scope
5203     my $i;
5204 wakaba 1.108 INSCOPE: {
5205     for (reverse 0..$#{$self->{open_elements}}) {
5206     my $node = $self->{open_elements}->[$_];
5207 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
5208 wakaba 1.108 !!!cp ('t171');
5209     $i = $_;
5210     last INSCOPE;
5211 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5212 wakaba 1.108 !!!cp ('t172');
5213     last;
5214     }
5215 wakaba 1.43 }
5216 wakaba 1.108
5217     !!!cp ('t173');
5218     !!!parse-error (type => 'unmatched end tag',
5219 wakaba 1.153 text => $token->{tag_name}, token => $token);
5220 wakaba 1.108 ## Ignore the token
5221     !!!next-token;
5222 wakaba 1.126 next B;
5223 wakaba 1.43 } # INSCOPE
5224    
5225     ## generate implied end tags
5226 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
5227     & END_TAG_OPTIONAL_EL) {
5228 wakaba 1.79 !!!cp ('t174');
5229 wakaba 1.86 pop @{$self->{open_elements}};
5230 wakaba 1.43 }
5231 wakaba 1.52
5232 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
5233 wakaba 1.79 !!!cp ('t175');
5234 wakaba 1.122 !!!parse-error (type => 'not closed',
5235 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5236 wakaba 1.122 ->manakai_local_name,
5237     token => $token);
5238 wakaba 1.79 } else {
5239     !!!cp ('t176');
5240 wakaba 1.52 }
5241    
5242     splice @{$self->{open_elements}}, $i;
5243    
5244     $clear_up_to_marker->();
5245    
5246 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5247 wakaba 1.52
5248     !!!next-token;
5249 wakaba 1.126 next B;
5250 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CELL_IM) {
5251 wakaba 1.79 !!!cp ('t177');
5252 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5253     text => $token->{tag_name}, token => $token);
5254 wakaba 1.52 ## Ignore the token
5255     !!!next-token;
5256 wakaba 1.126 next B;
5257 wakaba 1.52 } else {
5258 wakaba 1.79 !!!cp ('t178');
5259 wakaba 1.52 #
5260     }
5261     } elsif ({
5262     table => 1, tbody => 1, tfoot => 1,
5263     thead => 1, tr => 1,
5264     }->{$token->{tag_name}} and
5265 wakaba 1.54 $self->{insertion_mode} == IN_CELL_IM) {
5266 wakaba 1.52 ## have an element in table scope
5267     my $i;
5268     my $tn;
5269 wakaba 1.108 INSCOPE: {
5270     for (reverse 0..$#{$self->{open_elements}}) {
5271     my $node = $self->{open_elements}->[$_];
5272 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5273 wakaba 1.108 !!!cp ('t179');
5274     $i = $_;
5275    
5276     ## Close the cell
5277 wakaba 1.125 !!!back-token; # </x>
5278 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $tn,
5279     line => $token->{line},
5280     column => $token->{column}};
5281 wakaba 1.126 next B;
5282 wakaba 1.123 } elsif ($node->[1] & TABLE_CELL_EL) {
5283 wakaba 1.108 !!!cp ('t180');
5284 wakaba 1.123 $tn = $node->[0]->manakai_local_name;
5285 wakaba 1.108 ## NOTE: There is exactly one |td| or |th| element
5286     ## in scope in the stack of open elements by definition.
5287 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5288 wakaba 1.108 ## ISSUE: Can this be reached?
5289     !!!cp ('t181');
5290     last;
5291     }
5292 wakaba 1.52 }
5293 wakaba 1.108
5294 wakaba 1.79 !!!cp ('t182');
5295 wakaba 1.108 !!!parse-error (type => 'unmatched end tag',
5296 wakaba 1.153 text => $token->{tag_name}, token => $token);
5297 wakaba 1.52 ## Ignore the token
5298     !!!next-token;
5299 wakaba 1.126 next B;
5300 wakaba 1.108 } # INSCOPE
5301 wakaba 1.52 } elsif ($token->{tag_name} eq 'table' and
5302 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
5303 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
5304     token => $token);
5305 wakaba 1.52
5306     ## As if </caption>
5307     ## have a table element in table scope
5308     my $i;
5309     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5310     my $node = $self->{open_elements}->[$_];
5311 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
5312 wakaba 1.79 !!!cp ('t184');
5313 wakaba 1.52 $i = $_;
5314     last INSCOPE;
5315 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5316 wakaba 1.79 !!!cp ('t185');
5317 wakaba 1.52 last INSCOPE;
5318     }
5319     } # INSCOPE
5320     unless (defined $i) {
5321 wakaba 1.79 !!!cp ('t186');
5322 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5323     text => 'caption', token => $token);
5324 wakaba 1.52 ## Ignore the token
5325     !!!next-token;
5326 wakaba 1.126 next B;
5327 wakaba 1.52 }
5328    
5329     ## generate implied end tags
5330 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5331 wakaba 1.79 !!!cp ('t187');
5332 wakaba 1.86 pop @{$self->{open_elements}};
5333 wakaba 1.52 }
5334    
5335 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
5336 wakaba 1.79 !!!cp ('t188');
5337 wakaba 1.122 !!!parse-error (type => 'not closed',
5338 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5339 wakaba 1.122 ->manakai_local_name,
5340     token => $token);
5341 wakaba 1.79 } else {
5342     !!!cp ('t189');
5343 wakaba 1.52 }
5344    
5345     splice @{$self->{open_elements}}, $i;
5346    
5347     $clear_up_to_marker->();
5348    
5349 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5350 wakaba 1.52
5351     ## reprocess
5352 wakaba 1.126 next B;
5353 wakaba 1.52 } elsif ({
5354     body => 1, col => 1, colgroup => 1, html => 1,
5355     }->{$token->{tag_name}}) {
5356 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
5357 wakaba 1.79 !!!cp ('t190');
5358 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5359     text => $token->{tag_name}, token => $token);
5360 wakaba 1.52 ## Ignore the token
5361     !!!next-token;
5362 wakaba 1.126 next B;
5363 wakaba 1.52 } else {
5364 wakaba 1.79 !!!cp ('t191');
5365 wakaba 1.52 #
5366     }
5367     } elsif ({
5368     tbody => 1, tfoot => 1,
5369     thead => 1, tr => 1,
5370     }->{$token->{tag_name}} and
5371 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
5372 wakaba 1.79 !!!cp ('t192');
5373 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5374     text => $token->{tag_name}, token => $token);
5375 wakaba 1.52 ## Ignore the token
5376     !!!next-token;
5377 wakaba 1.126 next B;
5378 wakaba 1.52 } else {
5379 wakaba 1.79 !!!cp ('t193');
5380 wakaba 1.52 #
5381     }
5382 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5383     for my $entry (@{$self->{open_elements}}) {
5384 wakaba 1.123 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
5385 wakaba 1.104 !!!cp ('t75');
5386 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5387 wakaba 1.104 last;
5388     }
5389     }
5390    
5391     ## Stop parsing.
5392     last B;
5393 wakaba 1.52 } else {
5394     die "$0: $token->{type}: Unknown token type";
5395     }
5396    
5397     $insert = $insert_to_current;
5398     #
5399 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
5400 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
5401 wakaba 1.95 if (not $open_tables->[-1]->[1] and # tainted
5402     $token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5403     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5404 wakaba 1.52
5405 wakaba 1.95 unless (length $token->{data}) {
5406     !!!cp ('t194');
5407     !!!next-token;
5408 wakaba 1.126 next B;
5409 wakaba 1.95 } else {
5410     !!!cp ('t195');
5411     }
5412     }
5413 wakaba 1.52
5414 wakaba 1.153 !!!parse-error (type => 'in table:#text', token => $token);
5415 wakaba 1.52
5416     ## As if in body, but insert into foster parent element
5417     ## ISSUE: Spec says that "whenever a node would be inserted
5418     ## into the current node" while characters might not be
5419     ## result in a new Text node.
5420     $reconstruct_active_formatting_elements->($insert_to_foster);
5421    
5422 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
5423 wakaba 1.52 # MUST
5424     my $foster_parent_element;
5425     my $next_sibling;
5426     my $prev_sibling;
5427     OE: for (reverse 0..$#{$self->{open_elements}}) {
5428 wakaba 1.123 if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
5429 wakaba 1.52 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5430     if (defined $parent and $parent->node_type == 1) {
5431 wakaba 1.79 !!!cp ('t196');
5432 wakaba 1.52 $foster_parent_element = $parent;
5433     $next_sibling = $self->{open_elements}->[$_]->[0];
5434     $prev_sibling = $next_sibling->previous_sibling;
5435     } else {
5436 wakaba 1.79 !!!cp ('t197');
5437 wakaba 1.52 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5438     $prev_sibling = $foster_parent_element->last_child;
5439     }
5440     last OE;
5441     }
5442     } # OE
5443     $foster_parent_element = $self->{open_elements}->[0]->[0] and
5444     $prev_sibling = $foster_parent_element->last_child
5445     unless defined $foster_parent_element;
5446     if (defined $prev_sibling and
5447     $prev_sibling->node_type == 3) {
5448 wakaba 1.79 !!!cp ('t198');
5449 wakaba 1.52 $prev_sibling->manakai_append_text ($token->{data});
5450     } else {
5451 wakaba 1.79 !!!cp ('t199');
5452 wakaba 1.52 $foster_parent_element->insert_before
5453     ($self->{document}->create_text_node ($token->{data}),
5454     $next_sibling);
5455     }
5456 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
5457     } else {
5458     !!!cp ('t200');
5459     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5460     }
5461 wakaba 1.52
5462 wakaba 1.95 !!!next-token;
5463 wakaba 1.126 next B;
5464 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
5465 wakaba 1.153 if ({
5466     tr => ($self->{insertion_mode} != IN_ROW_IM),
5467     th => 1, td => 1,
5468     }->{$token->{tag_name}}) {
5469     if ($self->{insertion_mode} == IN_TABLE_IM) {
5470     ## Clear back to table context
5471     while (not ($self->{open_elements}->[-1]->[1]
5472     & TABLE_SCOPING_EL)) {
5473     !!!cp ('t201');
5474     pop @{$self->{open_elements}};
5475     }
5476    
5477     !!!insert-element ('tbody',, $token);
5478     $self->{insertion_mode} = IN_TABLE_BODY_IM;
5479     ## reprocess in the "in table body" insertion mode...
5480     }
5481    
5482     if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5483     unless ($token->{tag_name} eq 'tr') {
5484     !!!cp ('t202');
5485     !!!parse-error (type => 'missing start tag:tr', token => $token);
5486     }
5487 wakaba 1.43
5488 wakaba 1.153 ## Clear back to table body context
5489     while (not ($self->{open_elements}->[-1]->[1]
5490     & TABLE_ROWS_SCOPING_EL)) {
5491     !!!cp ('t203');
5492     ## ISSUE: Can this case be reached?
5493     pop @{$self->{open_elements}};
5494     }
5495 wakaba 1.43
5496 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
5497 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
5498 wakaba 1.79 !!!cp ('t204');
5499 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5500 wakaba 1.125 !!!nack ('t204');
5501 wakaba 1.52 !!!next-token;
5502 wakaba 1.126 next B;
5503 wakaba 1.52 } else {
5504 wakaba 1.79 !!!cp ('t205');
5505 wakaba 1.116 !!!insert-element ('tr',, $token);
5506 wakaba 1.52 ## reprocess in the "in row" insertion mode
5507     }
5508 wakaba 1.79 } else {
5509     !!!cp ('t206');
5510 wakaba 1.52 }
5511    
5512     ## Clear back to table row context
5513 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5514     & TABLE_ROW_SCOPING_EL)) {
5515 wakaba 1.79 !!!cp ('t207');
5516 wakaba 1.52 pop @{$self->{open_elements}};
5517 wakaba 1.43 }
5518 wakaba 1.52
5519 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5520 wakaba 1.54 $self->{insertion_mode} = IN_CELL_IM;
5521 wakaba 1.52
5522     push @$active_formatting_elements, ['#marker', ''];
5523    
5524 wakaba 1.125 !!!nack ('t207.1');
5525 wakaba 1.52 !!!next-token;
5526 wakaba 1.126 next B;
5527 wakaba 1.52 } elsif ({
5528     caption => 1, col => 1, colgroup => 1,
5529     tbody => 1, tfoot => 1, thead => 1,
5530 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
5531 wakaba 1.52 }->{$token->{tag_name}}) {
5532 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5533 wakaba 1.52 ## As if </tr>
5534 wakaba 1.43 ## have an element in table scope
5535     my $i;
5536     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5537     my $node = $self->{open_elements}->[$_];
5538 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5539 wakaba 1.79 !!!cp ('t208');
5540 wakaba 1.43 $i = $_;
5541     last INSCOPE;
5542 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5543 wakaba 1.79 !!!cp ('t209');
5544 wakaba 1.43 last INSCOPE;
5545     }
5546     } # INSCOPE
5547 wakaba 1.79 unless (defined $i) {
5548 wakaba 1.125 !!!cp ('t210');
5549 wakaba 1.83 ## TODO: This type is wrong.
5550 wakaba 1.153 !!!parse-error (type => 'unmacthed end tag',
5551     text => $token->{tag_name}, token => $token);
5552 wakaba 1.52 ## Ignore the token
5553 wakaba 1.125 !!!nack ('t210.1');
5554 wakaba 1.52 !!!next-token;
5555 wakaba 1.126 next B;
5556 wakaba 1.43 }
5557    
5558 wakaba 1.52 ## Clear back to table row context
5559 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5560     & TABLE_ROW_SCOPING_EL)) {
5561 wakaba 1.79 !!!cp ('t211');
5562 wakaba 1.83 ## ISSUE: Can this case be reached?
5563 wakaba 1.52 pop @{$self->{open_elements}};
5564 wakaba 1.1 }
5565 wakaba 1.43
5566 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5567 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5568 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
5569 wakaba 1.79 !!!cp ('t212');
5570 wakaba 1.52 ## reprocess
5571 wakaba 1.125 !!!ack-later;
5572 wakaba 1.126 next B;
5573 wakaba 1.52 } else {
5574 wakaba 1.79 !!!cp ('t213');
5575 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
5576     }
5577 wakaba 1.1 }
5578 wakaba 1.52
5579 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5580 wakaba 1.52 ## have an element in table scope
5581 wakaba 1.43 my $i;
5582     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5583     my $node = $self->{open_elements}->[$_];
5584 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
5585 wakaba 1.79 !!!cp ('t214');
5586 wakaba 1.43 $i = $_;
5587     last INSCOPE;
5588 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5589 wakaba 1.79 !!!cp ('t215');
5590 wakaba 1.43 last INSCOPE;
5591     }
5592     } # INSCOPE
5593 wakaba 1.52 unless (defined $i) {
5594 wakaba 1.79 !!!cp ('t216');
5595 wakaba 1.153 ## TODO: This erorr type is wrong.
5596     !!!parse-error (type => 'unmatched end tag',
5597     text => $token->{tag_name}, token => $token);
5598 wakaba 1.52 ## Ignore the token
5599 wakaba 1.125 !!!nack ('t216.1');
5600 wakaba 1.52 !!!next-token;
5601 wakaba 1.126 next B;
5602 wakaba 1.43 }
5603 wakaba 1.52
5604     ## Clear back to table body context
5605 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5606     & TABLE_ROWS_SCOPING_EL)) {
5607 wakaba 1.79 !!!cp ('t217');
5608 wakaba 1.83 ## ISSUE: Can this state be reached?
5609 wakaba 1.52 pop @{$self->{open_elements}};
5610 wakaba 1.43 }
5611    
5612 wakaba 1.52 ## As if <{current node}>
5613     ## have an element in table scope
5614     ## true by definition
5615 wakaba 1.43
5616 wakaba 1.52 ## Clear back to table body context
5617     ## nop by definition
5618 wakaba 1.43
5619 wakaba 1.52 pop @{$self->{open_elements}};
5620 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5621 wakaba 1.52 ## reprocess in "in table" insertion mode...
5622 wakaba 1.79 } else {
5623     !!!cp ('t218');
5624 wakaba 1.52 }
5625    
5626     if ($token->{tag_name} eq 'col') {
5627     ## Clear back to table context
5628 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5629     & TABLE_SCOPING_EL)) {
5630 wakaba 1.79 !!!cp ('t219');
5631 wakaba 1.83 ## ISSUE: Can this state be reached?
5632 wakaba 1.52 pop @{$self->{open_elements}};
5633     }
5634 wakaba 1.43
5635 wakaba 1.116 !!!insert-element ('colgroup',, $token);
5636 wakaba 1.54 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
5637 wakaba 1.52 ## reprocess
5638 wakaba 1.125 !!!ack-later;
5639 wakaba 1.126 next B;
5640 wakaba 1.52 } elsif ({
5641     caption => 1,
5642     colgroup => 1,
5643     tbody => 1, tfoot => 1, thead => 1,
5644     }->{$token->{tag_name}}) {
5645     ## Clear back to table context
5646 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5647     & TABLE_SCOPING_EL)) {
5648 wakaba 1.79 !!!cp ('t220');
5649 wakaba 1.83 ## ISSUE: Can this state be reached?
5650 wakaba 1.52 pop @{$self->{open_elements}};
5651 wakaba 1.1 }
5652 wakaba 1.52
5653     push @$active_formatting_elements, ['#marker', '']
5654     if $token->{tag_name} eq 'caption';
5655    
5656 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5657 wakaba 1.52 $self->{insertion_mode} = {
5658 wakaba 1.54 caption => IN_CAPTION_IM,
5659     colgroup => IN_COLUMN_GROUP_IM,
5660     tbody => IN_TABLE_BODY_IM,
5661     tfoot => IN_TABLE_BODY_IM,
5662     thead => IN_TABLE_BODY_IM,
5663 wakaba 1.52 }->{$token->{tag_name}};
5664 wakaba 1.1 !!!next-token;
5665 wakaba 1.125 !!!nack ('t220.1');
5666 wakaba 1.126 next B;
5667 wakaba 1.52 } else {
5668     die "$0: in table: <>: $token->{tag_name}";
5669 wakaba 1.1 }
5670 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
5671 wakaba 1.122 !!!parse-error (type => 'not closed',
5672 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5673 wakaba 1.122 ->manakai_local_name,
5674     token => $token);
5675 wakaba 1.1
5676 wakaba 1.52 ## As if </table>
5677 wakaba 1.1 ## have a table element in table scope
5678     my $i;
5679 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5680     my $node = $self->{open_elements}->[$_];
5681 wakaba 1.123 if ($node->[1] & TABLE_EL) {
5682 wakaba 1.79 !!!cp ('t221');
5683 wakaba 1.1 $i = $_;
5684     last INSCOPE;
5685 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5686 wakaba 1.79 !!!cp ('t222');
5687 wakaba 1.1 last INSCOPE;
5688     }
5689     } # INSCOPE
5690     unless (defined $i) {
5691 wakaba 1.79 !!!cp ('t223');
5692 wakaba 1.83 ## TODO: The following is wrong, maybe.
5693 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => 'table',
5694     token => $token);
5695 wakaba 1.52 ## Ignore tokens </table><table>
5696 wakaba 1.125 !!!nack ('t223.1');
5697 wakaba 1.1 !!!next-token;
5698 wakaba 1.126 next B;
5699 wakaba 1.1 }
5700    
5701 wakaba 1.151 ## TODO: Followings are removed from the latest spec.
5702 wakaba 1.1 ## generate implied end tags
5703 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5704 wakaba 1.79 !!!cp ('t224');
5705 wakaba 1.86 pop @{$self->{open_elements}};
5706 wakaba 1.1 }
5707    
5708 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & TABLE_EL) {
5709 wakaba 1.79 !!!cp ('t225');
5710 wakaba 1.122 ## NOTE: |<table><tr><table>|
5711     !!!parse-error (type => 'not closed',
5712 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5713 wakaba 1.122 ->manakai_local_name,
5714     token => $token);
5715 wakaba 1.79 } else {
5716     !!!cp ('t226');
5717 wakaba 1.1 }
5718    
5719 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5720 wakaba 1.95 pop @{$open_tables};
5721 wakaba 1.1
5722 wakaba 1.52 $self->_reset_insertion_mode;
5723 wakaba 1.1
5724 wakaba 1.125 ## reprocess
5725     !!!ack-later;
5726 wakaba 1.126 next B;
5727 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
5728     if (not $open_tables->[-1]->[1]) { # tainted
5729     !!!cp ('t227.8');
5730     ## NOTE: This is a "as if in head" code clone.
5731     $parse_rcdata->(CDATA_CONTENT_MODEL);
5732 wakaba 1.126 next B;
5733 wakaba 1.100 } else {
5734     !!!cp ('t227.7');
5735     #
5736     }
5737     } elsif ($token->{tag_name} eq 'script') {
5738     if (not $open_tables->[-1]->[1]) { # tainted
5739     !!!cp ('t227.6');
5740     ## NOTE: This is a "as if in head" code clone.
5741     $script_start_tag->();
5742 wakaba 1.126 next B;
5743 wakaba 1.100 } else {
5744     !!!cp ('t227.5');
5745     #
5746     }
5747 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
5748     if (not $open_tables->[-1]->[1]) { # tainted
5749     if ($token->{attributes}->{type}) { ## TODO: case
5750     my $type = lc $token->{attributes}->{type}->{value};
5751     if ($type eq 'hidden') {
5752     !!!cp ('t227.3');
5753 wakaba 1.153 !!!parse-error (type => 'in table',
5754     text => $token->{tag_name}, token => $token);
5755 wakaba 1.98
5756 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5757 wakaba 1.98
5758     ## TODO: form element pointer
5759    
5760     pop @{$self->{open_elements}};
5761    
5762     !!!next-token;
5763 wakaba 1.125 !!!ack ('t227.2.1');
5764 wakaba 1.126 next B;
5765 wakaba 1.98 } else {
5766     !!!cp ('t227.2');
5767     #
5768     }
5769     } else {
5770     !!!cp ('t227.1');
5771     #
5772     }
5773     } else {
5774     !!!cp ('t227.4');
5775     #
5776     }
5777 wakaba 1.58 } else {
5778 wakaba 1.79 !!!cp ('t227');
5779 wakaba 1.58 #
5780     }
5781 wakaba 1.98
5782 wakaba 1.153 !!!parse-error (type => 'in table', text => $token->{tag_name},
5783     token => $token);
5784 wakaba 1.98
5785     $insert = $insert_to_foster;
5786     #
5787 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
5788 wakaba 1.52 if ($token->{tag_name} eq 'tr' and
5789 wakaba 1.54 $self->{insertion_mode} == IN_ROW_IM) {
5790 wakaba 1.52 ## have an element in table scope
5791     my $i;
5792     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5793     my $node = $self->{open_elements}->[$_];
5794 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5795 wakaba 1.79 !!!cp ('t228');
5796 wakaba 1.52 $i = $_;
5797     last INSCOPE;
5798 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5799 wakaba 1.79 !!!cp ('t229');
5800 wakaba 1.52 last INSCOPE;
5801     }
5802     } # INSCOPE
5803     unless (defined $i) {
5804 wakaba 1.79 !!!cp ('t230');
5805 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5806     text => $token->{tag_name}, token => $token);
5807 wakaba 1.52 ## Ignore the token
5808 wakaba 1.125 !!!nack ('t230.1');
5809 wakaba 1.42 !!!next-token;
5810 wakaba 1.126 next B;
5811 wakaba 1.79 } else {
5812     !!!cp ('t232');
5813 wakaba 1.42 }
5814    
5815 wakaba 1.52 ## Clear back to table row context
5816 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5817     & TABLE_ROW_SCOPING_EL)) {
5818 wakaba 1.79 !!!cp ('t231');
5819 wakaba 1.83 ## ISSUE: Can this state be reached?
5820 wakaba 1.52 pop @{$self->{open_elements}};
5821     }
5822 wakaba 1.42
5823 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5824 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5825 wakaba 1.52 !!!next-token;
5826 wakaba 1.125 !!!nack ('t231.1');
5827 wakaba 1.126 next B;
5828 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
5829 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5830 wakaba 1.52 ## As if </tr>
5831     ## have an element in table scope
5832     my $i;
5833     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5834     my $node = $self->{open_elements}->[$_];
5835 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5836 wakaba 1.79 !!!cp ('t233');
5837 wakaba 1.52 $i = $_;
5838     last INSCOPE;
5839 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5840 wakaba 1.79 !!!cp ('t234');
5841 wakaba 1.52 last INSCOPE;
5842 wakaba 1.42 }
5843 wakaba 1.52 } # INSCOPE
5844     unless (defined $i) {
5845 wakaba 1.79 !!!cp ('t235');
5846 wakaba 1.83 ## TODO: The following is wrong.
5847 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5848     text => $token->{type}, token => $token);
5849 wakaba 1.52 ## Ignore the token
5850 wakaba 1.125 !!!nack ('t236.1');
5851 wakaba 1.52 !!!next-token;
5852 wakaba 1.126 next B;
5853 wakaba 1.42 }
5854 wakaba 1.52
5855     ## Clear back to table row context
5856 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5857     & TABLE_ROW_SCOPING_EL)) {
5858 wakaba 1.79 !!!cp ('t236');
5859 wakaba 1.83 ## ISSUE: Can this state be reached?
5860 wakaba 1.46 pop @{$self->{open_elements}};
5861 wakaba 1.1 }
5862 wakaba 1.46
5863 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5864 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5865 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
5866 wakaba 1.1 }
5867    
5868 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5869 wakaba 1.52 ## have an element in table scope
5870     my $i;
5871     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5872     my $node = $self->{open_elements}->[$_];
5873 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
5874 wakaba 1.79 !!!cp ('t237');
5875 wakaba 1.52 $i = $_;
5876     last INSCOPE;
5877 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5878 wakaba 1.79 !!!cp ('t238');
5879 wakaba 1.52 last INSCOPE;
5880     }
5881     } # INSCOPE
5882     unless (defined $i) {
5883 wakaba 1.79 !!!cp ('t239');
5884 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5885     text => $token->{tag_name}, token => $token);
5886 wakaba 1.52 ## Ignore the token
5887 wakaba 1.125 !!!nack ('t239.1');
5888 wakaba 1.52 !!!next-token;
5889 wakaba 1.126 next B;
5890 wakaba 1.47 }
5891    
5892     ## Clear back to table body context
5893 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5894     & TABLE_ROWS_SCOPING_EL)) {
5895 wakaba 1.79 !!!cp ('t240');
5896 wakaba 1.47 pop @{$self->{open_elements}};
5897     }
5898    
5899 wakaba 1.52 ## As if <{current node}>
5900     ## have an element in table scope
5901     ## true by definition
5902    
5903     ## Clear back to table body context
5904     ## nop by definition
5905    
5906     pop @{$self->{open_elements}};
5907 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5908 wakaba 1.52 ## reprocess in the "in table" insertion mode...
5909     }
5910    
5911 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
5912     ## When you edit the code fragment below, please ensure that
5913     ## the code for <table> in the "in table" insertion mode
5914     ## is synced with it.
5915    
5916 wakaba 1.52 ## have a table element in table scope
5917     my $i;
5918     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5919     my $node = $self->{open_elements}->[$_];
5920 wakaba 1.123 if ($node->[1] & TABLE_EL) {
5921 wakaba 1.79 !!!cp ('t241');
5922 wakaba 1.52 $i = $_;
5923     last INSCOPE;
5924 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5925 wakaba 1.79 !!!cp ('t242');
5926 wakaba 1.52 last INSCOPE;
5927 wakaba 1.47 }
5928 wakaba 1.52 } # INSCOPE
5929     unless (defined $i) {
5930 wakaba 1.79 !!!cp ('t243');
5931 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5932     text => $token->{tag_name}, token => $token);
5933 wakaba 1.52 ## Ignore the token
5934 wakaba 1.125 !!!nack ('t243.1');
5935 wakaba 1.52 !!!next-token;
5936 wakaba 1.126 next B;
5937 wakaba 1.3 }
5938 wakaba 1.52
5939     splice @{$self->{open_elements}}, $i;
5940 wakaba 1.95 pop @{$open_tables};
5941 wakaba 1.1
5942 wakaba 1.52 $self->_reset_insertion_mode;
5943 wakaba 1.47
5944     !!!next-token;
5945 wakaba 1.126 next B;
5946 wakaba 1.47 } elsif ({
5947 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
5948 wakaba 1.52 }->{$token->{tag_name}} and
5949 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
5950 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5951 wakaba 1.52 ## have an element in table scope
5952     my $i;
5953     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5954     my $node = $self->{open_elements}->[$_];
5955 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5956 wakaba 1.79 !!!cp ('t247');
5957 wakaba 1.52 $i = $_;
5958     last INSCOPE;
5959 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5960 wakaba 1.79 !!!cp ('t248');
5961 wakaba 1.52 last INSCOPE;
5962     }
5963     } # INSCOPE
5964     unless (defined $i) {
5965 wakaba 1.79 !!!cp ('t249');
5966 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5967     text => $token->{tag_name}, token => $token);
5968 wakaba 1.52 ## Ignore the token
5969 wakaba 1.125 !!!nack ('t249.1');
5970 wakaba 1.52 !!!next-token;
5971 wakaba 1.126 next B;
5972 wakaba 1.52 }
5973    
5974 wakaba 1.48 ## As if </tr>
5975     ## have an element in table scope
5976     my $i;
5977     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5978     my $node = $self->{open_elements}->[$_];
5979 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5980 wakaba 1.79 !!!cp ('t250');
5981 wakaba 1.48 $i = $_;
5982     last INSCOPE;
5983 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5984 wakaba 1.79 !!!cp ('t251');
5985 wakaba 1.48 last INSCOPE;
5986     }
5987     } # INSCOPE
5988 wakaba 1.52 unless (defined $i) {
5989 wakaba 1.79 !!!cp ('t252');
5990 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5991     text => 'tr', token => $token);
5992 wakaba 1.52 ## Ignore the token
5993 wakaba 1.125 !!!nack ('t252.1');
5994 wakaba 1.52 !!!next-token;
5995 wakaba 1.126 next B;
5996 wakaba 1.52 }
5997 wakaba 1.48
5998     ## Clear back to table row context
5999 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
6000     & TABLE_ROW_SCOPING_EL)) {
6001 wakaba 1.79 !!!cp ('t253');
6002 wakaba 1.83 ## ISSUE: Can this case be reached?
6003 wakaba 1.48 pop @{$self->{open_elements}};
6004     }
6005    
6006     pop @{$self->{open_elements}}; # tr
6007 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
6008 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
6009     }
6010    
6011     ## have an element in table scope
6012     my $i;
6013     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6014     my $node = $self->{open_elements}->[$_];
6015 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6016 wakaba 1.79 !!!cp ('t254');
6017 wakaba 1.52 $i = $_;
6018     last INSCOPE;
6019 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
6020 wakaba 1.79 !!!cp ('t255');
6021 wakaba 1.52 last INSCOPE;
6022     }
6023     } # INSCOPE
6024     unless (defined $i) {
6025 wakaba 1.79 !!!cp ('t256');
6026 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6027     text => $token->{tag_name}, token => $token);
6028 wakaba 1.52 ## Ignore the token
6029 wakaba 1.125 !!!nack ('t256.1');
6030 wakaba 1.52 !!!next-token;
6031 wakaba 1.126 next B;
6032 wakaba 1.52 }
6033    
6034     ## Clear back to table body context
6035 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
6036     & TABLE_ROWS_SCOPING_EL)) {
6037 wakaba 1.79 !!!cp ('t257');
6038 wakaba 1.83 ## ISSUE: Can this case be reached?
6039 wakaba 1.52 pop @{$self->{open_elements}};
6040     }
6041    
6042     pop @{$self->{open_elements}};
6043 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
6044 wakaba 1.125 !!!nack ('t257.1');
6045 wakaba 1.52 !!!next-token;
6046 wakaba 1.126 next B;
6047 wakaba 1.52 } elsif ({
6048     body => 1, caption => 1, col => 1, colgroup => 1,
6049     html => 1, td => 1, th => 1,
6050 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
6051     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
6052 wakaba 1.52 }->{$token->{tag_name}}) {
6053 wakaba 1.125 !!!cp ('t258');
6054 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6055     text => $token->{tag_name}, token => $token);
6056 wakaba 1.125 ## Ignore the token
6057     !!!nack ('t258.1');
6058     !!!next-token;
6059 wakaba 1.126 next B;
6060 wakaba 1.58 } else {
6061 wakaba 1.79 !!!cp ('t259');
6062 wakaba 1.153 !!!parse-error (type => 'in table:/',
6063     text => $token->{tag_name}, token => $token);
6064 wakaba 1.52
6065 wakaba 1.58 $insert = $insert_to_foster;
6066     #
6067     }
6068 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6069 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
6070 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
6071 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
6072 wakaba 1.104 !!!cp ('t259.1');
6073 wakaba 1.105 #
6074 wakaba 1.104 } else {
6075     !!!cp ('t259.2');
6076 wakaba 1.105 #
6077 wakaba 1.104 }
6078    
6079     ## Stop parsing
6080     last B;
6081 wakaba 1.58 } else {
6082     die "$0: $token->{type}: Unknown token type";
6083     }
6084 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
6085 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6086 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6087     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6088     unless (length $token->{data}) {
6089 wakaba 1.79 !!!cp ('t260');
6090 wakaba 1.52 !!!next-token;
6091 wakaba 1.126 next B;
6092 wakaba 1.52 }
6093     }
6094    
6095 wakaba 1.79 !!!cp ('t261');
6096 wakaba 1.52 #
6097 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
6098 wakaba 1.52 if ($token->{tag_name} eq 'col') {
6099 wakaba 1.79 !!!cp ('t262');
6100 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6101 wakaba 1.52 pop @{$self->{open_elements}};
6102 wakaba 1.125 !!!ack ('t262.1');
6103 wakaba 1.52 !!!next-token;
6104 wakaba 1.126 next B;
6105 wakaba 1.52 } else {
6106 wakaba 1.79 !!!cp ('t263');
6107 wakaba 1.52 #
6108     }
6109 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6110 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
6111 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
6112 wakaba 1.79 !!!cp ('t264');
6113 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6114     text => 'colgroup', token => $token);
6115 wakaba 1.52 ## Ignore the token
6116     !!!next-token;
6117 wakaba 1.126 next B;
6118 wakaba 1.52 } else {
6119 wakaba 1.79 !!!cp ('t265');
6120 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
6121 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
6122 wakaba 1.52 !!!next-token;
6123 wakaba 1.126 next B;
6124 wakaba 1.52 }
6125     } elsif ($token->{tag_name} eq 'col') {
6126 wakaba 1.79 !!!cp ('t266');
6127 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6128     text => 'col', token => $token);
6129 wakaba 1.52 ## Ignore the token
6130     !!!next-token;
6131 wakaba 1.126 next B;
6132 wakaba 1.52 } else {
6133 wakaba 1.79 !!!cp ('t267');
6134 wakaba 1.52 #
6135     }
6136 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6137 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
6138 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
6139     !!!cp ('t270.2');
6140     ## Stop parsing.
6141     last B;
6142     } else {
6143     ## NOTE: As if </colgroup>.
6144     !!!cp ('t270.1');
6145     pop @{$self->{open_elements}}; # colgroup
6146     $self->{insertion_mode} = IN_TABLE_IM;
6147     ## Reprocess.
6148 wakaba 1.126 next B;
6149 wakaba 1.104 }
6150     } else {
6151     die "$0: $token->{type}: Unknown token type";
6152     }
6153 wakaba 1.52
6154     ## As if </colgroup>
6155 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
6156 wakaba 1.79 !!!cp ('t269');
6157 wakaba 1.104 ## TODO: Wrong error type?
6158 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6159     text => 'colgroup', token => $token);
6160 wakaba 1.52 ## Ignore the token
6161 wakaba 1.125 !!!nack ('t269.1');
6162 wakaba 1.52 !!!next-token;
6163 wakaba 1.126 next B;
6164 wakaba 1.52 } else {
6165 wakaba 1.79 !!!cp ('t270');
6166 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
6167 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
6168 wakaba 1.125 !!!ack-later;
6169 wakaba 1.52 ## reprocess
6170 wakaba 1.126 next B;
6171 wakaba 1.52 }
6172 wakaba 1.101 } elsif ($self->{insertion_mode} & SELECT_IMS) {
6173 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
6174 wakaba 1.79 !!!cp ('t271');
6175 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6176     !!!next-token;
6177 wakaba 1.126 next B;
6178 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
6179 wakaba 1.123 if ($token->{tag_name} eq 'option') {
6180     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
6181     !!!cp ('t272');
6182     ## As if </option>
6183     pop @{$self->{open_elements}};
6184     } else {
6185     !!!cp ('t273');
6186     }
6187 wakaba 1.52
6188 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6189 wakaba 1.125 !!!nack ('t273.1');
6190 wakaba 1.123 !!!next-token;
6191 wakaba 1.126 next B;
6192 wakaba 1.123 } elsif ($token->{tag_name} eq 'optgroup') {
6193     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
6194     !!!cp ('t274');
6195     ## As if </option>
6196     pop @{$self->{open_elements}};
6197     } else {
6198     !!!cp ('t275');
6199     }
6200 wakaba 1.52
6201 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
6202     !!!cp ('t276');
6203     ## As if </optgroup>
6204     pop @{$self->{open_elements}};
6205     } else {
6206     !!!cp ('t277');
6207     }
6208 wakaba 1.52
6209 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6210 wakaba 1.125 !!!nack ('t277.1');
6211 wakaba 1.123 !!!next-token;
6212 wakaba 1.126 next B;
6213 wakaba 1.146 } elsif ({
6214     select => 1, input => 1, textarea => 1,
6215     }->{$token->{tag_name}} or
6216 wakaba 1.101 ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
6217     {
6218     caption => 1, table => 1,
6219     tbody => 1, tfoot => 1, thead => 1,
6220     tr => 1, td => 1, th => 1,
6221     }->{$token->{tag_name}})) {
6222     ## TODO: The type below is not good - <select> is replaced by </select>
6223 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'select',
6224     token => $token);
6225 wakaba 1.101 ## NOTE: As if the token were </select> (<select> case) or
6226     ## as if there were </select> (otherwise).
6227 wakaba 1.123 ## have an element in table scope
6228     my $i;
6229     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6230     my $node = $self->{open_elements}->[$_];
6231     if ($node->[1] & SELECT_EL) {
6232     !!!cp ('t278');
6233     $i = $_;
6234     last INSCOPE;
6235     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6236     !!!cp ('t279');
6237     last INSCOPE;
6238     }
6239     } # INSCOPE
6240     unless (defined $i) {
6241     !!!cp ('t280');
6242 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6243     text => 'select', token => $token);
6244 wakaba 1.123 ## Ignore the token
6245 wakaba 1.125 !!!nack ('t280.1');
6246 wakaba 1.123 !!!next-token;
6247 wakaba 1.126 next B;
6248 wakaba 1.123 }
6249 wakaba 1.52
6250 wakaba 1.123 !!!cp ('t281');
6251     splice @{$self->{open_elements}}, $i;
6252 wakaba 1.52
6253 wakaba 1.123 $self->_reset_insertion_mode;
6254 wakaba 1.47
6255 wakaba 1.101 if ($token->{tag_name} eq 'select') {
6256 wakaba 1.125 !!!nack ('t281.2');
6257 wakaba 1.101 !!!next-token;
6258 wakaba 1.126 next B;
6259 wakaba 1.101 } else {
6260     !!!cp ('t281.1');
6261 wakaba 1.125 !!!ack-later;
6262 wakaba 1.101 ## Reprocess the token.
6263 wakaba 1.126 next B;
6264 wakaba 1.101 }
6265 wakaba 1.58 } else {
6266 wakaba 1.79 !!!cp ('t282');
6267 wakaba 1.153 !!!parse-error (type => 'in select',
6268     text => $token->{tag_name}, token => $token);
6269 wakaba 1.58 ## Ignore the token
6270 wakaba 1.125 !!!nack ('t282.1');
6271 wakaba 1.58 !!!next-token;
6272 wakaba 1.126 next B;
6273 wakaba 1.58 }
6274     } elsif ($token->{type} == END_TAG_TOKEN) {
6275 wakaba 1.123 if ($token->{tag_name} eq 'optgroup') {
6276     if ($self->{open_elements}->[-1]->[1] & OPTION_EL and
6277     $self->{open_elements}->[-2]->[1] & OPTGROUP_EL) {
6278     !!!cp ('t283');
6279     ## As if </option>
6280     splice @{$self->{open_elements}}, -2;
6281     } elsif ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
6282     !!!cp ('t284');
6283     pop @{$self->{open_elements}};
6284     } else {
6285     !!!cp ('t285');
6286 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6287     text => $token->{tag_name}, token => $token);
6288 wakaba 1.123 ## Ignore the token
6289     }
6290 wakaba 1.125 !!!nack ('t285.1');
6291 wakaba 1.123 !!!next-token;
6292 wakaba 1.126 next B;
6293 wakaba 1.123 } elsif ($token->{tag_name} eq 'option') {
6294     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
6295     !!!cp ('t286');
6296     pop @{$self->{open_elements}};
6297     } else {
6298     !!!cp ('t287');
6299 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6300     text => $token->{tag_name}, token => $token);
6301 wakaba 1.123 ## Ignore the token
6302     }
6303 wakaba 1.125 !!!nack ('t287.1');
6304 wakaba 1.123 !!!next-token;
6305 wakaba 1.126 next B;
6306 wakaba 1.123 } elsif ($token->{tag_name} eq 'select') {
6307     ## have an element in table scope
6308     my $i;
6309     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6310     my $node = $self->{open_elements}->[$_];
6311     if ($node->[1] & SELECT_EL) {
6312     !!!cp ('t288');
6313     $i = $_;
6314     last INSCOPE;
6315     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6316     !!!cp ('t289');
6317     last INSCOPE;
6318     }
6319     } # INSCOPE
6320     unless (defined $i) {
6321     !!!cp ('t290');
6322 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6323     text => $token->{tag_name}, token => $token);
6324 wakaba 1.123 ## Ignore the token
6325 wakaba 1.125 !!!nack ('t290.1');
6326 wakaba 1.123 !!!next-token;
6327 wakaba 1.126 next B;
6328 wakaba 1.123 }
6329 wakaba 1.52
6330 wakaba 1.123 !!!cp ('t291');
6331     splice @{$self->{open_elements}}, $i;
6332 wakaba 1.52
6333 wakaba 1.123 $self->_reset_insertion_mode;
6334 wakaba 1.52
6335 wakaba 1.125 !!!nack ('t291.1');
6336 wakaba 1.123 !!!next-token;
6337 wakaba 1.126 next B;
6338 wakaba 1.101 } elsif ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
6339     {
6340     caption => 1, table => 1, tbody => 1,
6341     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
6342     }->{$token->{tag_name}}) {
6343 wakaba 1.83 ## TODO: The following is wrong?
6344 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6345     text => $token->{tag_name}, token => $token);
6346 wakaba 1.52
6347 wakaba 1.123 ## have an element in table scope
6348     my $i;
6349     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6350     my $node = $self->{open_elements}->[$_];
6351     if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6352     !!!cp ('t292');
6353     $i = $_;
6354     last INSCOPE;
6355     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6356     !!!cp ('t293');
6357     last INSCOPE;
6358     }
6359     } # INSCOPE
6360     unless (defined $i) {
6361     !!!cp ('t294');
6362     ## Ignore the token
6363 wakaba 1.125 !!!nack ('t294.1');
6364 wakaba 1.123 !!!next-token;
6365 wakaba 1.126 next B;
6366 wakaba 1.123 }
6367 wakaba 1.52
6368 wakaba 1.123 ## As if </select>
6369     ## have an element in table scope
6370     undef $i;
6371     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6372     my $node = $self->{open_elements}->[$_];
6373     if ($node->[1] & SELECT_EL) {
6374     !!!cp ('t295');
6375     $i = $_;
6376     last INSCOPE;
6377     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6378 wakaba 1.83 ## ISSUE: Can this state be reached?
6379 wakaba 1.123 !!!cp ('t296');
6380     last INSCOPE;
6381     }
6382     } # INSCOPE
6383     unless (defined $i) {
6384     !!!cp ('t297');
6385 wakaba 1.83 ## TODO: The following error type is correct?
6386 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6387     text => 'select', token => $token);
6388 wakaba 1.123 ## Ignore the </select> token
6389 wakaba 1.125 !!!nack ('t297.1');
6390 wakaba 1.123 !!!next-token; ## TODO: ok?
6391 wakaba 1.126 next B;
6392 wakaba 1.123 }
6393 wakaba 1.52
6394 wakaba 1.123 !!!cp ('t298');
6395     splice @{$self->{open_elements}}, $i;
6396 wakaba 1.52
6397 wakaba 1.123 $self->_reset_insertion_mode;
6398 wakaba 1.52
6399 wakaba 1.125 !!!ack-later;
6400 wakaba 1.123 ## reprocess
6401 wakaba 1.126 next B;
6402 wakaba 1.58 } else {
6403 wakaba 1.79 !!!cp ('t299');
6404 wakaba 1.153 !!!parse-error (type => 'in select:/',
6405     text => $token->{tag_name}, token => $token);
6406 wakaba 1.52 ## Ignore the token
6407 wakaba 1.125 !!!nack ('t299.3');
6408 wakaba 1.52 !!!next-token;
6409 wakaba 1.126 next B;
6410 wakaba 1.58 }
6411 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6412 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
6413 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
6414     !!!cp ('t299.1');
6415 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
6416 wakaba 1.104 } else {
6417     !!!cp ('t299.2');
6418     }
6419    
6420     ## Stop parsing.
6421     last B;
6422 wakaba 1.58 } else {
6423     die "$0: $token->{type}: Unknown token type";
6424     }
6425 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
6426 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6427 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6428     my $data = $1;
6429     ## As if in body
6430     $reconstruct_active_formatting_elements->($insert_to_current);
6431    
6432     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6433    
6434     unless (length $token->{data}) {
6435 wakaba 1.79 !!!cp ('t300');
6436 wakaba 1.52 !!!next-token;
6437 wakaba 1.126 next B;
6438 wakaba 1.52 }
6439     }
6440    
6441 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6442 wakaba 1.79 !!!cp ('t301');
6443 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
6444 wakaba 1.52
6445 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
6446 wakaba 1.79 } else {
6447     !!!cp ('t302');
6448 wakaba 1.52 }
6449    
6450     ## "after body" insertion mode
6451 wakaba 1.153 !!!parse-error (type => 'after body:#text', token => $token);
6452 wakaba 1.52
6453 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
6454 wakaba 1.52 ## reprocess
6455 wakaba 1.126 next B;
6456 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
6457 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6458 wakaba 1.79 !!!cp ('t303');
6459 wakaba 1.153 !!!parse-error (type => 'after html',
6460     text => $token->{tag_name}, token => $token);
6461 wakaba 1.52
6462 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
6463 wakaba 1.79 } else {
6464     !!!cp ('t304');
6465 wakaba 1.52 }
6466    
6467     ## "after body" insertion mode
6468 wakaba 1.153 !!!parse-error (type => 'after body',
6469     text => $token->{tag_name}, token => $token);
6470 wakaba 1.52
6471 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
6472 wakaba 1.125 !!!ack-later;
6473 wakaba 1.52 ## reprocess
6474 wakaba 1.126 next B;
6475 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6476 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6477 wakaba 1.79 !!!cp ('t305');
6478 wakaba 1.153 !!!parse-error (type => 'after html:/',
6479     text => $token->{tag_name}, token => $token);
6480 wakaba 1.52
6481 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
6482 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
6483 wakaba 1.79 } else {
6484     !!!cp ('t306');
6485 wakaba 1.52 }
6486    
6487     ## "after body" insertion mode
6488     if ($token->{tag_name} eq 'html') {
6489     if (defined $self->{inner_html_node}) {
6490 wakaba 1.79 !!!cp ('t307');
6491 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6492     text => 'html', token => $token);
6493 wakaba 1.52 ## Ignore the token
6494     !!!next-token;
6495 wakaba 1.126 next B;
6496 wakaba 1.52 } else {
6497 wakaba 1.79 !!!cp ('t308');
6498 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
6499 wakaba 1.52 !!!next-token;
6500 wakaba 1.126 next B;
6501 wakaba 1.52 }
6502     } else {
6503 wakaba 1.79 !!!cp ('t309');
6504 wakaba 1.153 !!!parse-error (type => 'after body:/',
6505     text => $token->{tag_name}, token => $token);
6506 wakaba 1.52
6507 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
6508 wakaba 1.52 ## reprocess
6509 wakaba 1.126 next B;
6510 wakaba 1.52 }
6511 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6512     !!!cp ('t309.2');
6513     ## Stop parsing
6514     last B;
6515 wakaba 1.52 } else {
6516     die "$0: $token->{type}: Unknown token type";
6517     }
6518 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
6519 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6520 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6521     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6522    
6523     unless (length $token->{data}) {
6524 wakaba 1.79 !!!cp ('t310');
6525 wakaba 1.52 !!!next-token;
6526 wakaba 1.126 next B;
6527 wakaba 1.52 }
6528     }
6529    
6530     if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {
6531 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6532 wakaba 1.79 !!!cp ('t311');
6533 wakaba 1.153 !!!parse-error (type => 'in frameset:#text', token => $token);
6534 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
6535 wakaba 1.79 !!!cp ('t312');
6536 wakaba 1.153 !!!parse-error (type => 'after frameset:#text', token => $token);
6537 wakaba 1.158 } else { # "after after frameset"
6538 wakaba 1.79 !!!cp ('t313');
6539 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
6540 wakaba 1.52 }
6541    
6542     ## Ignore the token.
6543     if (length $token->{data}) {
6544 wakaba 1.79 !!!cp ('t314');
6545 wakaba 1.52 ## reprocess the rest of characters
6546     } else {
6547 wakaba 1.79 !!!cp ('t315');
6548 wakaba 1.52 !!!next-token;
6549     }
6550 wakaba 1.126 next B;
6551 wakaba 1.52 }
6552    
6553     die qq[$0: Character "$token->{data}"];
6554 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
6555 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
6556 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6557 wakaba 1.79 !!!cp ('t318');
6558 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6559 wakaba 1.125 !!!nack ('t318.1');
6560 wakaba 1.52 !!!next-token;
6561 wakaba 1.126 next B;
6562 wakaba 1.52 } elsif ($token->{tag_name} eq 'frame' and
6563 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6564 wakaba 1.79 !!!cp ('t319');
6565 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6566 wakaba 1.52 pop @{$self->{open_elements}};
6567 wakaba 1.125 !!!ack ('t319.1');
6568 wakaba 1.52 !!!next-token;
6569 wakaba 1.126 next B;
6570 wakaba 1.52 } elsif ($token->{tag_name} eq 'noframes') {
6571 wakaba 1.79 !!!cp ('t320');
6572 wakaba 1.148 ## NOTE: As if in head.
6573 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6574 wakaba 1.126 next B;
6575 wakaba 1.158
6576     ## NOTE: |<!DOCTYPE HTML><frameset></frameset></html><noframes></noframes>|
6577     ## has no parse error.
6578 wakaba 1.52 } else {
6579 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6580 wakaba 1.79 !!!cp ('t321');
6581 wakaba 1.153 !!!parse-error (type => 'in frameset',
6582     text => $token->{tag_name}, token => $token);
6583 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
6584 wakaba 1.79 !!!cp ('t322');
6585 wakaba 1.153 !!!parse-error (type => 'after frameset',
6586     text => $token->{tag_name}, token => $token);
6587 wakaba 1.158 } else { # "after after frameset"
6588     !!!cp ('t322.2');
6589     !!!parse-error (type => 'after after frameset',
6590     text => $token->{tag_name}, token => $token);
6591 wakaba 1.52 }
6592     ## Ignore the token
6593 wakaba 1.125 !!!nack ('t322.1');
6594 wakaba 1.52 !!!next-token;
6595 wakaba 1.126 next B;
6596 wakaba 1.52 }
6597 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6598 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
6599 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6600 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
6601 wakaba 1.52 @{$self->{open_elements}} == 1) {
6602 wakaba 1.79 !!!cp ('t325');
6603 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6604     text => $token->{tag_name}, token => $token);
6605 wakaba 1.52 ## Ignore the token
6606     !!!next-token;
6607     } else {
6608 wakaba 1.79 !!!cp ('t326');
6609 wakaba 1.52 pop @{$self->{open_elements}};
6610     !!!next-token;
6611     }
6612 wakaba 1.47
6613 wakaba 1.52 if (not defined $self->{inner_html_node} and
6614 wakaba 1.123 not ($self->{open_elements}->[-1]->[1] & FRAMESET_EL)) {
6615 wakaba 1.79 !!!cp ('t327');
6616 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
6617 wakaba 1.79 } else {
6618     !!!cp ('t328');
6619 wakaba 1.52 }
6620 wakaba 1.126 next B;
6621 wakaba 1.52 } elsif ($token->{tag_name} eq 'html' and
6622 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
6623 wakaba 1.79 !!!cp ('t329');
6624 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
6625 wakaba 1.52 !!!next-token;
6626 wakaba 1.126 next B;
6627 wakaba 1.52 } else {
6628 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6629 wakaba 1.79 !!!cp ('t330');
6630 wakaba 1.153 !!!parse-error (type => 'in frameset:/',
6631     text => $token->{tag_name}, token => $token);
6632 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
6633     !!!cp ('t330.1');
6634     !!!parse-error (type => 'after frameset:/',
6635     text => $token->{tag_name}, token => $token);
6636     } else { # "after after html"
6637 wakaba 1.79 !!!cp ('t331');
6638 wakaba 1.158 !!!parse-error (type => 'after after frameset:/',
6639 wakaba 1.153 text => $token->{tag_name}, token => $token);
6640 wakaba 1.52 }
6641     ## Ignore the token
6642     !!!next-token;
6643 wakaba 1.126 next B;
6644 wakaba 1.52 }
6645 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6646 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
6647 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
6648     !!!cp ('t331.1');
6649 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
6650 wakaba 1.104 } else {
6651     !!!cp ('t331.2');
6652     }
6653    
6654     ## Stop parsing
6655     last B;
6656 wakaba 1.52 } else {
6657     die "$0: $token->{type}: Unknown token type";
6658     }
6659 wakaba 1.47
6660 wakaba 1.52 ## ISSUE: An issue in spec here
6661     } else {
6662     die "$0: $self->{insertion_mode}: Unknown insertion mode";
6663     }
6664 wakaba 1.47
6665 wakaba 1.52 ## "in body" insertion mode
6666 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
6667 wakaba 1.52 if ($token->{tag_name} eq 'script') {
6668 wakaba 1.79 !!!cp ('t332');
6669 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6670 wakaba 1.100 $script_start_tag->();
6671 wakaba 1.126 next B;
6672 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
6673 wakaba 1.79 !!!cp ('t333');
6674 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6675 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6676 wakaba 1.126 next B;
6677 wakaba 1.52 } elsif ({
6678     base => 1, link => 1,
6679     }->{$token->{tag_name}}) {
6680 wakaba 1.79 !!!cp ('t334');
6681 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
6682 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6683 wakaba 1.52 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
6684 wakaba 1.125 !!!ack ('t334.1');
6685 wakaba 1.52 !!!next-token;
6686 wakaba 1.126 next B;
6687 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
6688     ## NOTE: This is an "as if in head" code clone, only "-t" differs
6689 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6690 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
6691 wakaba 1.46
6692 wakaba 1.52 unless ($self->{confident}) {
6693 wakaba 1.134 if ($token->{attributes}->{charset}) {
6694 wakaba 1.79 !!!cp ('t335');
6695 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
6696     ## in the {change_encoding} callback.
6697 wakaba 1.63 $self->{change_encoding}
6698 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value}, $token);
6699 wakaba 1.66
6700     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
6701     ->set_user_data (manakai_has_reference =>
6702     $token->{attributes}->{charset}
6703     ->{has_reference});
6704 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
6705     if ($token->{attributes}->{content}->{value}
6706 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
6707 wakaba 1.70 [\x09-\x0D\x20]*=
6708 wakaba 1.52 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
6709 wakaba 1.145 ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {
6710 wakaba 1.79 !!!cp ('t336');
6711 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
6712     ## in the {change_encoding} callback.
6713 wakaba 1.63 $self->{change_encoding}
6714 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
6715 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
6716     ->set_user_data (manakai_has_reference =>
6717     $token->{attributes}->{content}
6718     ->{has_reference});
6719 wakaba 1.63 }
6720 wakaba 1.52 }
6721 wakaba 1.66 } else {
6722     if ($token->{attributes}->{charset}) {
6723 wakaba 1.79 !!!cp ('t337');
6724 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
6725     ->set_user_data (manakai_has_reference =>
6726     $token->{attributes}->{charset}
6727     ->{has_reference});
6728     }
6729 wakaba 1.68 if ($token->{attributes}->{content}) {
6730 wakaba 1.79 !!!cp ('t338');
6731 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
6732     ->set_user_data (manakai_has_reference =>
6733     $token->{attributes}->{content}
6734     ->{has_reference});
6735     }
6736 wakaba 1.52 }
6737 wakaba 1.1
6738 wakaba 1.125 !!!ack ('t338.1');
6739 wakaba 1.52 !!!next-token;
6740 wakaba 1.126 next B;
6741 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
6742 wakaba 1.79 !!!cp ('t341');
6743 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6744 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
6745 wakaba 1.126 next B;
6746 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
6747 wakaba 1.153 !!!parse-error (type => 'in body', text => 'body', token => $token);
6748 wakaba 1.46
6749 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
6750 wakaba 1.123 not ($self->{open_elements}->[1]->[1] & BODY_EL)) {
6751 wakaba 1.79 !!!cp ('t342');
6752 wakaba 1.52 ## Ignore the token
6753     } else {
6754     my $body_el = $self->{open_elements}->[1]->[0];
6755     for my $attr_name (keys %{$token->{attributes}}) {
6756     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
6757 wakaba 1.79 !!!cp ('t343');
6758 wakaba 1.52 $body_el->set_attribute_ns
6759     (undef, [undef, $attr_name],
6760     $token->{attributes}->{$attr_name}->{value});
6761     }
6762     }
6763     }
6764 wakaba 1.125 !!!nack ('t343.1');
6765 wakaba 1.52 !!!next-token;
6766 wakaba 1.126 next B;
6767 wakaba 1.52 } elsif ({
6768     address => 1, blockquote => 1, center => 1, dir => 1,
6769 wakaba 1.85 div => 1, dl => 1, fieldset => 1,
6770     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6771 wakaba 1.97 menu => 1, ol => 1, p => 1, ul => 1,
6772     pre => 1, listing => 1,
6773 wakaba 1.109 form => 1,
6774     table => 1,
6775     hr => 1,
6776 wakaba 1.52 }->{$token->{tag_name}}) {
6777 wakaba 1.109 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
6778     !!!cp ('t350');
6779 wakaba 1.113 !!!parse-error (type => 'in form:form', token => $token);
6780 wakaba 1.109 ## Ignore the token
6781 wakaba 1.125 !!!nack ('t350.1');
6782 wakaba 1.109 !!!next-token;
6783 wakaba 1.126 next B;
6784 wakaba 1.109 }
6785    
6786 wakaba 1.52 ## has a p element in scope
6787     INSCOPE: for (reverse @{$self->{open_elements}}) {
6788 wakaba 1.123 if ($_->[1] & P_EL) {
6789 wakaba 1.79 !!!cp ('t344');
6790 wakaba 1.125 !!!back-token; # <form>
6791 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6792     line => $token->{line}, column => $token->{column}};
6793 wakaba 1.126 next B;
6794 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6795 wakaba 1.79 !!!cp ('t345');
6796 wakaba 1.52 last INSCOPE;
6797     }
6798     } # INSCOPE
6799    
6800 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6801 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
6802 wakaba 1.125 !!!nack ('t346.1');
6803 wakaba 1.52 !!!next-token;
6804 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6805 wakaba 1.52 $token->{data} =~ s/^\x0A//;
6806     unless (length $token->{data}) {
6807 wakaba 1.79 !!!cp ('t346');
6808 wakaba 1.1 !!!next-token;
6809 wakaba 1.79 } else {
6810     !!!cp ('t349');
6811 wakaba 1.52 }
6812 wakaba 1.79 } else {
6813     !!!cp ('t348');
6814 wakaba 1.52 }
6815 wakaba 1.109 } elsif ($token->{tag_name} eq 'form') {
6816     !!!cp ('t347.1');
6817     $self->{form_element} = $self->{open_elements}->[-1]->[0];
6818    
6819 wakaba 1.125 !!!nack ('t347.2');
6820 wakaba 1.109 !!!next-token;
6821     } elsif ($token->{tag_name} eq 'table') {
6822     !!!cp ('t382');
6823     push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
6824    
6825     $self->{insertion_mode} = IN_TABLE_IM;
6826    
6827 wakaba 1.125 !!!nack ('t382.1');
6828 wakaba 1.109 !!!next-token;
6829     } elsif ($token->{tag_name} eq 'hr') {
6830     !!!cp ('t386');
6831     pop @{$self->{open_elements}};
6832    
6833 wakaba 1.125 !!!nack ('t386.1');
6834 wakaba 1.109 !!!next-token;
6835 wakaba 1.52 } else {
6836 wakaba 1.125 !!!nack ('t347.1');
6837 wakaba 1.52 !!!next-token;
6838     }
6839 wakaba 1.126 next B;
6840 wakaba 1.109 } elsif ({li => 1, dt => 1, dd => 1}->{$token->{tag_name}}) {
6841 wakaba 1.52 ## has a p element in scope
6842     INSCOPE: for (reverse @{$self->{open_elements}}) {
6843 wakaba 1.123 if ($_->[1] & P_EL) {
6844 wakaba 1.79 !!!cp ('t353');
6845 wakaba 1.125 !!!back-token; # <x>
6846 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6847     line => $token->{line}, column => $token->{column}};
6848 wakaba 1.126 next B;
6849 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6850 wakaba 1.79 !!!cp ('t354');
6851 wakaba 1.52 last INSCOPE;
6852     }
6853     } # INSCOPE
6854    
6855     ## Step 1
6856     my $i = -1;
6857     my $node = $self->{open_elements}->[$i];
6858 wakaba 1.109 my $li_or_dtdd = {li => {li => 1},
6859     dt => {dt => 1, dd => 1},
6860     dd => {dt => 1, dd => 1}}->{$token->{tag_name}};
6861 wakaba 1.52 LI: {
6862     ## Step 2
6863 wakaba 1.123 if ($li_or_dtdd->{$node->[0]->manakai_local_name}) {
6864 wakaba 1.52 if ($i != -1) {
6865 wakaba 1.79 !!!cp ('t355');
6866 wakaba 1.122 !!!parse-error (type => 'not closed',
6867 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
6868 wakaba 1.122 ->manakai_local_name,
6869     token => $token);
6870 wakaba 1.79 } else {
6871     !!!cp ('t356');
6872 wakaba 1.52 }
6873     splice @{$self->{open_elements}}, $i;
6874     last LI;
6875 wakaba 1.79 } else {
6876     !!!cp ('t357');
6877 wakaba 1.52 }
6878    
6879     ## Step 3
6880 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
6881 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
6882 wakaba 1.123 ($node->[1] & SPECIAL_EL or
6883     $node->[1] & SCOPING_EL) and
6884     not ($node->[1] & ADDRESS_EL) and
6885     not ($node->[1] & DIV_EL)) {
6886 wakaba 1.79 !!!cp ('t358');
6887 wakaba 1.52 last LI;
6888     }
6889    
6890 wakaba 1.79 !!!cp ('t359');
6891 wakaba 1.52 ## Step 4
6892     $i--;
6893     $node = $self->{open_elements}->[$i];
6894     redo LI;
6895     } # LI
6896    
6897 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6898 wakaba 1.125 !!!nack ('t359.1');
6899 wakaba 1.52 !!!next-token;
6900 wakaba 1.126 next B;
6901 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
6902     ## has a p element in scope
6903     INSCOPE: for (reverse @{$self->{open_elements}}) {
6904 wakaba 1.123 if ($_->[1] & P_EL) {
6905 wakaba 1.79 !!!cp ('t367');
6906 wakaba 1.125 !!!back-token; # <plaintext>
6907 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6908     line => $token->{line}, column => $token->{column}};
6909 wakaba 1.126 next B;
6910 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6911 wakaba 1.79 !!!cp ('t368');
6912 wakaba 1.52 last INSCOPE;
6913 wakaba 1.46 }
6914 wakaba 1.52 } # INSCOPE
6915    
6916 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6917 wakaba 1.52
6918     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
6919    
6920 wakaba 1.125 !!!nack ('t368.1');
6921 wakaba 1.52 !!!next-token;
6922 wakaba 1.126 next B;
6923 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
6924     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
6925     my $node = $active_formatting_elements->[$i];
6926 wakaba 1.123 if ($node->[1] & A_EL) {
6927 wakaba 1.79 !!!cp ('t371');
6928 wakaba 1.113 !!!parse-error (type => 'in a:a', token => $token);
6929 wakaba 1.52
6930 wakaba 1.125 !!!back-token; # <a>
6931 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'a',
6932     line => $token->{line}, column => $token->{column}};
6933 wakaba 1.113 $formatting_end_tag->($token);
6934 wakaba 1.52
6935     AFE2: for (reverse 0..$#$active_formatting_elements) {
6936     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
6937 wakaba 1.79 !!!cp ('t372');
6938 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
6939     last AFE2;
6940 wakaba 1.1 }
6941 wakaba 1.52 } # AFE2
6942     OE: for (reverse 0..$#{$self->{open_elements}}) {
6943     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
6944 wakaba 1.79 !!!cp ('t373');
6945 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
6946     last OE;
6947 wakaba 1.1 }
6948 wakaba 1.52 } # OE
6949     last AFE;
6950     } elsif ($node->[0] eq '#marker') {
6951 wakaba 1.79 !!!cp ('t374');
6952 wakaba 1.52 last AFE;
6953     }
6954     } # AFE
6955    
6956     $reconstruct_active_formatting_elements->($insert_to_current);
6957 wakaba 1.1
6958 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6959 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
6960 wakaba 1.1
6961 wakaba 1.125 !!!nack ('t374.1');
6962 wakaba 1.52 !!!next-token;
6963 wakaba 1.126 next B;
6964 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
6965     $reconstruct_active_formatting_elements->($insert_to_current);
6966 wakaba 1.1
6967 wakaba 1.52 ## has a |nobr| element in scope
6968     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6969     my $node = $self->{open_elements}->[$_];
6970 wakaba 1.123 if ($node->[1] & NOBR_EL) {
6971 wakaba 1.79 !!!cp ('t376');
6972 wakaba 1.113 !!!parse-error (type => 'in nobr:nobr', token => $token);
6973 wakaba 1.125 !!!back-token; # <nobr>
6974 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
6975     line => $token->{line}, column => $token->{column}};
6976 wakaba 1.126 next B;
6977 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6978 wakaba 1.79 !!!cp ('t377');
6979 wakaba 1.52 last INSCOPE;
6980     }
6981     } # INSCOPE
6982    
6983 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6984 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
6985    
6986 wakaba 1.125 !!!nack ('t377.1');
6987 wakaba 1.52 !!!next-token;
6988 wakaba 1.126 next B;
6989 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
6990     ## has a button element in scope
6991     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6992     my $node = $self->{open_elements}->[$_];
6993 wakaba 1.123 if ($node->[1] & BUTTON_EL) {
6994 wakaba 1.79 !!!cp ('t378');
6995 wakaba 1.113 !!!parse-error (type => 'in button:button', token => $token);
6996 wakaba 1.125 !!!back-token; # <button>
6997 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'button',
6998     line => $token->{line}, column => $token->{column}};
6999 wakaba 1.126 next B;
7000 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7001 wakaba 1.79 !!!cp ('t379');
7002 wakaba 1.52 last INSCOPE;
7003     }
7004     } # INSCOPE
7005    
7006     $reconstruct_active_formatting_elements->($insert_to_current);
7007    
7008 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
7009 wakaba 1.85
7010     ## TODO: associate with $self->{form_element} if defined
7011    
7012 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
7013 wakaba 1.1
7014 wakaba 1.125 !!!nack ('t379.1');
7015 wakaba 1.52 !!!next-token;
7016 wakaba 1.126 next B;
7017 wakaba 1.103 } elsif ({
7018 wakaba 1.109 xmp => 1,
7019     iframe => 1,
7020     noembed => 1,
7021 wakaba 1.148 noframes => 1, ## NOTE: This is an "as if in head" code clone.
7022 wakaba 1.109 noscript => 0, ## TODO: 1 if scripting is enabled
7023 wakaba 1.103 }->{$token->{tag_name}}) {
7024 wakaba 1.109 if ($token->{tag_name} eq 'xmp') {
7025     !!!cp ('t381');
7026     $reconstruct_active_formatting_elements->($insert_to_current);
7027     } else {
7028     !!!cp ('t399');
7029     }
7030     ## NOTE: There is an "as if in body" code clone.
7031 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
7032 wakaba 1.126 next B;
7033 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
7034 wakaba 1.113 !!!parse-error (type => 'isindex', token => $token);
7035 wakaba 1.52
7036     if (defined $self->{form_element}) {
7037 wakaba 1.79 !!!cp ('t389');
7038 wakaba 1.52 ## Ignore the token
7039 wakaba 1.125 !!!nack ('t389'); ## NOTE: Not acknowledged.
7040 wakaba 1.52 !!!next-token;
7041 wakaba 1.126 next B;
7042 wakaba 1.52 } else {
7043 wakaba 1.147 !!!ack ('t391.1');
7044    
7045 wakaba 1.52 my $at = $token->{attributes};
7046     my $form_attrs;
7047     $form_attrs->{action} = $at->{action} if $at->{action};
7048     my $prompt_attr = $at->{prompt};
7049     $at->{name} = {name => 'name', value => 'isindex'};
7050     delete $at->{action};
7051     delete $at->{prompt};
7052     my @tokens = (
7053 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
7054 wakaba 1.114 attributes => $form_attrs,
7055     line => $token->{line}, column => $token->{column}},
7056     {type => START_TAG_TOKEN, tag_name => 'hr',
7057     line => $token->{line}, column => $token->{column}},
7058     {type => START_TAG_TOKEN, tag_name => 'p',
7059     line => $token->{line}, column => $token->{column}},
7060     {type => START_TAG_TOKEN, tag_name => 'label',
7061     line => $token->{line}, column => $token->{column}},
7062 wakaba 1.52 );
7063     if ($prompt_attr) {
7064 wakaba 1.79 !!!cp ('t390');
7065 wakaba 1.114 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
7066 wakaba 1.118 #line => $token->{line}, column => $token->{column},
7067     };
7068 wakaba 1.1 } else {
7069 wakaba 1.79 !!!cp ('t391');
7070 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
7071 wakaba 1.114 data => 'This is a searchable index. Insert your search keywords here: ',
7072 wakaba 1.118 #line => $token->{line}, column => $token->{column},
7073     }; # SHOULD
7074 wakaba 1.52 ## TODO: make this configurable
7075 wakaba 1.1 }
7076 wakaba 1.52 push @tokens,
7077 wakaba 1.114 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
7078     line => $token->{line}, column => $token->{column}},
7079 wakaba 1.55 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
7080 wakaba 1.114 {type => END_TAG_TOKEN, tag_name => 'label',
7081     line => $token->{line}, column => $token->{column}},
7082     {type => END_TAG_TOKEN, tag_name => 'p',
7083     line => $token->{line}, column => $token->{column}},
7084     {type => START_TAG_TOKEN, tag_name => 'hr',
7085     line => $token->{line}, column => $token->{column}},
7086     {type => END_TAG_TOKEN, tag_name => 'form',
7087     line => $token->{line}, column => $token->{column}};
7088 wakaba 1.52 !!!back-token (@tokens);
7089 wakaba 1.125 !!!next-token;
7090 wakaba 1.126 next B;
7091 wakaba 1.52 }
7092     } elsif ($token->{tag_name} eq 'textarea') {
7093     my $tag_name = $token->{tag_name};
7094     my $el;
7095 wakaba 1.126 !!!create-element ($el, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
7096 wakaba 1.52
7097     ## TODO: $self->{form_element} if defined
7098     $self->{content_model} = RCDATA_CONTENT_MODEL;
7099     delete $self->{escape}; # MUST
7100    
7101     $insert->($el);
7102    
7103     my $text = '';
7104 wakaba 1.125 !!!nack ('t392.1');
7105 wakaba 1.52 !!!next-token;
7106 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
7107 wakaba 1.52 $token->{data} =~ s/^\x0A//;
7108 wakaba 1.51 unless (length $token->{data}) {
7109 wakaba 1.79 !!!cp ('t392');
7110 wakaba 1.51 !!!next-token;
7111 wakaba 1.79 } else {
7112     !!!cp ('t393');
7113 wakaba 1.51 }
7114 wakaba 1.79 } else {
7115     !!!cp ('t394');
7116 wakaba 1.51 }
7117 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
7118 wakaba 1.79 !!!cp ('t395');
7119 wakaba 1.52 $text .= $token->{data};
7120     !!!next-token;
7121     }
7122     if (length $text) {
7123 wakaba 1.79 !!!cp ('t396');
7124 wakaba 1.52 $el->manakai_append_text ($text);
7125     }
7126    
7127     $self->{content_model} = PCDATA_CONTENT_MODEL;
7128 wakaba 1.51
7129 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
7130 wakaba 1.52 $token->{tag_name} eq $tag_name) {
7131 wakaba 1.79 !!!cp ('t397');
7132 wakaba 1.52 ## Ignore the token
7133     } else {
7134 wakaba 1.79 !!!cp ('t398');
7135 wakaba 1.153 !!!parse-error (type => 'in RCDATA:#eof', token => $token);
7136 wakaba 1.51 }
7137 wakaba 1.52 !!!next-token;
7138 wakaba 1.126 next B;
7139 wakaba 1.151 } elsif ($token->{tag_name} eq 'rt' or
7140     $token->{tag_name} eq 'rp') {
7141     ## has a |ruby| element in scope
7142     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7143     my $node = $self->{open_elements}->[$_];
7144     if ($node->[1] & RUBY_EL) {
7145     !!!cp ('t398.1');
7146     ## generate implied end tags
7147     while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7148     !!!cp ('t398.2');
7149     pop @{$self->{open_elements}};
7150     }
7151     unless ($self->{open_elements}->[-1]->[1] & RUBY_EL) {
7152     !!!cp ('t398.3');
7153     !!!parse-error (type => 'not closed',
7154 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
7155 wakaba 1.151 ->manakai_local_name,
7156     token => $token);
7157     pop @{$self->{open_elements}}
7158     while not $self->{open_elements}->[-1]->[1] & RUBY_EL;
7159     }
7160     last INSCOPE;
7161     } elsif ($node->[1] & SCOPING_EL) {
7162     !!!cp ('t398.4');
7163     last INSCOPE;
7164     }
7165     } # INSCOPE
7166    
7167     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
7168    
7169     !!!nack ('t398.5');
7170     !!!next-token;
7171     redo B;
7172 wakaba 1.126 } elsif ($token->{tag_name} eq 'math' or
7173     $token->{tag_name} eq 'svg') {
7174     $reconstruct_active_formatting_elements->($insert_to_current);
7175 wakaba 1.131
7176 wakaba 1.155 ## "Adjust MathML attributes" ('math' only) - done in insert-element-f
7177    
7178 wakaba 1.131 ## "adjust SVG attributes" ('svg' only) - done in insert-element-f
7179    
7180     ## "adjust foreign attributes" - done in insert-element-f
7181 wakaba 1.126
7182 wakaba 1.131 !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
7183 wakaba 1.126
7184     if ($self->{self_closing}) {
7185     pop @{$self->{open_elements}};
7186     !!!ack ('t398.1');
7187     } else {
7188     !!!cp ('t398.2');
7189     $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
7190     ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
7191     ## mode, "in body" (not "in foreign content") secondary insertion
7192     ## mode, maybe.
7193     }
7194    
7195     !!!next-token;
7196     next B;
7197 wakaba 1.52 } elsif ({
7198     caption => 1, col => 1, colgroup => 1, frame => 1,
7199     frameset => 1, head => 1, option => 1, optgroup => 1,
7200     tbody => 1, td => 1, tfoot => 1, th => 1,
7201     thead => 1, tr => 1,
7202     }->{$token->{tag_name}}) {
7203 wakaba 1.79 !!!cp ('t401');
7204 wakaba 1.153 !!!parse-error (type => 'in body',
7205     text => $token->{tag_name}, token => $token);
7206 wakaba 1.52 ## Ignore the token
7207 wakaba 1.125 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
7208 wakaba 1.52 !!!next-token;
7209 wakaba 1.126 next B;
7210 wakaba 1.52
7211     ## ISSUE: An issue on HTML5 new elements in the spec.
7212     } else {
7213 wakaba 1.110 if ($token->{tag_name} eq 'image') {
7214     !!!cp ('t384');
7215 wakaba 1.113 !!!parse-error (type => 'image', token => $token);
7216 wakaba 1.110 $token->{tag_name} = 'img';
7217     } else {
7218     !!!cp ('t385');
7219     }
7220    
7221     ## NOTE: There is an "as if <br>" code clone.
7222 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
7223    
7224 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
7225 wakaba 1.109
7226 wakaba 1.110 if ({
7227     applet => 1, marquee => 1, object => 1,
7228     }->{$token->{tag_name}}) {
7229     !!!cp ('t380');
7230     push @$active_formatting_elements, ['#marker', ''];
7231 wakaba 1.125 !!!nack ('t380.1');
7232 wakaba 1.110 } elsif ({
7233     b => 1, big => 1, em => 1, font => 1, i => 1,
7234     s => 1, small => 1, strile => 1,
7235     strong => 1, tt => 1, u => 1,
7236     }->{$token->{tag_name}}) {
7237     !!!cp ('t375');
7238     push @$active_formatting_elements, $self->{open_elements}->[-1];
7239 wakaba 1.125 !!!nack ('t375.1');
7240 wakaba 1.110 } elsif ($token->{tag_name} eq 'input') {
7241     !!!cp ('t388');
7242     ## TODO: associate with $self->{form_element} if defined
7243     pop @{$self->{open_elements}};
7244 wakaba 1.125 !!!ack ('t388.2');
7245 wakaba 1.110 } elsif ({
7246     area => 1, basefont => 1, bgsound => 1, br => 1,
7247     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
7248     #image => 1,
7249     }->{$token->{tag_name}}) {
7250     !!!cp ('t388.1');
7251     pop @{$self->{open_elements}};
7252 wakaba 1.125 !!!ack ('t388.3');
7253 wakaba 1.110 } elsif ($token->{tag_name} eq 'select') {
7254 wakaba 1.109 ## TODO: associate with $self->{form_element} if defined
7255    
7256     if ($self->{insertion_mode} & TABLE_IMS or
7257     $self->{insertion_mode} & BODY_TABLE_IMS or
7258     $self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
7259     !!!cp ('t400.1');
7260     $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
7261     } else {
7262     !!!cp ('t400.2');
7263     $self->{insertion_mode} = IN_SELECT_IM;
7264     }
7265 wakaba 1.125 !!!nack ('t400.3');
7266 wakaba 1.110 } else {
7267 wakaba 1.125 !!!nack ('t402');
7268 wakaba 1.109 }
7269 wakaba 1.51
7270 wakaba 1.52 !!!next-token;
7271 wakaba 1.126 next B;
7272 wakaba 1.52 }
7273 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
7274 wakaba 1.52 if ($token->{tag_name} eq 'body') {
7275 wakaba 1.107 ## has a |body| element in scope
7276     my $i;
7277 wakaba 1.111 INSCOPE: {
7278     for (reverse @{$self->{open_elements}}) {
7279 wakaba 1.123 if ($_->[1] & BODY_EL) {
7280 wakaba 1.111 !!!cp ('t405');
7281     $i = $_;
7282     last INSCOPE;
7283 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
7284 wakaba 1.111 !!!cp ('t405.1');
7285     last;
7286     }
7287 wakaba 1.52 }
7288 wakaba 1.111
7289     !!!parse-error (type => 'start tag not allowed',
7290 wakaba 1.153 text => $token->{tag_name}, token => $token);
7291 wakaba 1.107 ## NOTE: Ignore the token.
7292 wakaba 1.52 !!!next-token;
7293 wakaba 1.126 next B;
7294 wakaba 1.111 } # INSCOPE
7295 wakaba 1.107
7296     for (@{$self->{open_elements}}) {
7297 wakaba 1.123 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL) {
7298 wakaba 1.107 !!!cp ('t403');
7299 wakaba 1.122 !!!parse-error (type => 'not closed',
7300 wakaba 1.153 text => $_->[0]->manakai_local_name,
7301 wakaba 1.122 token => $token);
7302 wakaba 1.107 last;
7303     } else {
7304     !!!cp ('t404');
7305     }
7306     }
7307    
7308     $self->{insertion_mode} = AFTER_BODY_IM;
7309     !!!next-token;
7310 wakaba 1.126 next B;
7311 wakaba 1.52 } elsif ($token->{tag_name} eq 'html') {
7312 wakaba 1.122 ## TODO: Update this code. It seems that the code below is not
7313     ## up-to-date, though it has same effect as speced.
7314 wakaba 1.123 if (@{$self->{open_elements}} > 1 and
7315     $self->{open_elements}->[1]->[1] & BODY_EL) {
7316 wakaba 1.52 ## ISSUE: There is an issue in the spec.
7317 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & BODY_EL) {
7318 wakaba 1.79 !!!cp ('t406');
7319 wakaba 1.122 !!!parse-error (type => 'not closed',
7320 wakaba 1.153 text => $self->{open_elements}->[1]->[0]
7321 wakaba 1.122 ->manakai_local_name,
7322     token => $token);
7323 wakaba 1.79 } else {
7324     !!!cp ('t407');
7325 wakaba 1.1 }
7326 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
7327 wakaba 1.52 ## reprocess
7328 wakaba 1.126 next B;
7329 wakaba 1.51 } else {
7330 wakaba 1.79 !!!cp ('t408');
7331 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7332     text => $token->{tag_name}, token => $token);
7333 wakaba 1.52 ## Ignore the token
7334     !!!next-token;
7335 wakaba 1.126 next B;
7336 wakaba 1.51 }
7337 wakaba 1.52 } elsif ({
7338     address => 1, blockquote => 1, center => 1, dir => 1,
7339     div => 1, dl => 1, fieldset => 1, listing => 1,
7340     menu => 1, ol => 1, pre => 1, ul => 1,
7341     dd => 1, dt => 1, li => 1,
7342 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
7343 wakaba 1.52 }->{$token->{tag_name}}) {
7344     ## has an element in scope
7345     my $i;
7346     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7347     my $node = $self->{open_elements}->[$_];
7348 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
7349 wakaba 1.79 !!!cp ('t410');
7350 wakaba 1.52 $i = $_;
7351 wakaba 1.87 last INSCOPE;
7352 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7353 wakaba 1.79 !!!cp ('t411');
7354 wakaba 1.52 last INSCOPE;
7355 wakaba 1.51 }
7356 wakaba 1.52 } # INSCOPE
7357 wakaba 1.89
7358     unless (defined $i) { # has an element in scope
7359     !!!cp ('t413');
7360 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7361     text => $token->{tag_name}, token => $token);
7362 wakaba 1.157 ## NOTE: Ignore the token.
7363 wakaba 1.89 } else {
7364     ## Step 1. generate implied end tags
7365     while ({
7366 wakaba 1.151 ## END_TAG_OPTIONAL_EL
7367 wakaba 1.89 dd => ($token->{tag_name} ne 'dd'),
7368     dt => ($token->{tag_name} ne 'dt'),
7369     li => ($token->{tag_name} ne 'li'),
7370     p => 1,
7371 wakaba 1.151 rt => 1,
7372     rp => 1,
7373 wakaba 1.123 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
7374 wakaba 1.89 !!!cp ('t409');
7375     pop @{$self->{open_elements}};
7376     }
7377    
7378     ## Step 2.
7379 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7380     ne $token->{tag_name}) {
7381 wakaba 1.79 !!!cp ('t412');
7382 wakaba 1.122 !!!parse-error (type => 'not closed',
7383 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
7384 wakaba 1.122 ->manakai_local_name,
7385     token => $token);
7386 wakaba 1.51 } else {
7387 wakaba 1.89 !!!cp ('t414');
7388 wakaba 1.51 }
7389 wakaba 1.89
7390     ## Step 3.
7391 wakaba 1.52 splice @{$self->{open_elements}}, $i;
7392 wakaba 1.89
7393     ## Step 4.
7394     $clear_up_to_marker->()
7395     if {
7396 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
7397 wakaba 1.89 }->{$token->{tag_name}};
7398 wakaba 1.51 }
7399 wakaba 1.52 !!!next-token;
7400 wakaba 1.126 next B;
7401 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
7402 wakaba 1.92 undef $self->{form_element};
7403    
7404 wakaba 1.52 ## has an element in scope
7405 wakaba 1.92 my $i;
7406 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7407     my $node = $self->{open_elements}->[$_];
7408 wakaba 1.123 if ($node->[1] & FORM_EL) {
7409 wakaba 1.79 !!!cp ('t418');
7410 wakaba 1.92 $i = $_;
7411 wakaba 1.52 last INSCOPE;
7412 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7413 wakaba 1.79 !!!cp ('t419');
7414 wakaba 1.52 last INSCOPE;
7415     }
7416     } # INSCOPE
7417 wakaba 1.92
7418     unless (defined $i) { # has an element in scope
7419 wakaba 1.79 !!!cp ('t421');
7420 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7421     text => $token->{tag_name}, token => $token);
7422 wakaba 1.157 ## NOTE: Ignore the token.
7423 wakaba 1.92 } else {
7424     ## Step 1. generate implied end tags
7425 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7426 wakaba 1.92 !!!cp ('t417');
7427     pop @{$self->{open_elements}};
7428     }
7429    
7430     ## Step 2.
7431 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7432     ne $token->{tag_name}) {
7433 wakaba 1.92 !!!cp ('t417.1');
7434 wakaba 1.122 !!!parse-error (type => 'not closed',
7435 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
7436 wakaba 1.122 ->manakai_local_name,
7437     token => $token);
7438 wakaba 1.92 } else {
7439     !!!cp ('t420');
7440     }
7441    
7442     ## Step 3.
7443     splice @{$self->{open_elements}}, $i;
7444 wakaba 1.52 }
7445    
7446     !!!next-token;
7447 wakaba 1.126 next B;
7448 wakaba 1.52 } elsif ({
7449     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
7450     }->{$token->{tag_name}}) {
7451     ## has an element in scope
7452     my $i;
7453     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7454     my $node = $self->{open_elements}->[$_];
7455 wakaba 1.123 if ($node->[1] & HEADING_EL) {
7456 wakaba 1.79 !!!cp ('t423');
7457 wakaba 1.52 $i = $_;
7458     last INSCOPE;
7459 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7460 wakaba 1.79 !!!cp ('t424');
7461 wakaba 1.52 last INSCOPE;
7462 wakaba 1.51 }
7463 wakaba 1.52 } # INSCOPE
7464 wakaba 1.93
7465     unless (defined $i) { # has an element in scope
7466     !!!cp ('t425.1');
7467 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7468     text => $token->{tag_name}, token => $token);
7469 wakaba 1.157 ## NOTE: Ignore the token.
7470 wakaba 1.79 } else {
7471 wakaba 1.93 ## Step 1. generate implied end tags
7472 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7473 wakaba 1.93 !!!cp ('t422');
7474     pop @{$self->{open_elements}};
7475     }
7476    
7477     ## Step 2.
7478 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7479     ne $token->{tag_name}) {
7480 wakaba 1.93 !!!cp ('t425');
7481 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7482     text => $token->{tag_name}, token => $token);
7483 wakaba 1.93 } else {
7484     !!!cp ('t426');
7485     }
7486    
7487     ## Step 3.
7488     splice @{$self->{open_elements}}, $i;
7489 wakaba 1.36 }
7490 wakaba 1.52
7491     !!!next-token;
7492 wakaba 1.126 next B;
7493 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
7494     ## has an element in scope
7495     my $i;
7496     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7497     my $node = $self->{open_elements}->[$_];
7498 wakaba 1.123 if ($node->[1] & P_EL) {
7499 wakaba 1.87 !!!cp ('t410.1');
7500     $i = $_;
7501 wakaba 1.88 last INSCOPE;
7502 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7503 wakaba 1.87 !!!cp ('t411.1');
7504     last INSCOPE;
7505     }
7506     } # INSCOPE
7507 wakaba 1.91
7508     if (defined $i) {
7509 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7510     ne $token->{tag_name}) {
7511 wakaba 1.87 !!!cp ('t412.1');
7512 wakaba 1.122 !!!parse-error (type => 'not closed',
7513 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
7514 wakaba 1.122 ->manakai_local_name,
7515     token => $token);
7516 wakaba 1.87 } else {
7517 wakaba 1.91 !!!cp ('t414.1');
7518 wakaba 1.87 }
7519 wakaba 1.91
7520 wakaba 1.87 splice @{$self->{open_elements}}, $i;
7521     } else {
7522 wakaba 1.91 !!!cp ('t413.1');
7523 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7524     text => $token->{tag_name}, token => $token);
7525 wakaba 1.91
7526 wakaba 1.87 !!!cp ('t415.1');
7527     ## As if <p>, then reprocess the current token
7528     my $el;
7529 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'p',, $token);
7530 wakaba 1.87 $insert->($el);
7531 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
7532 wakaba 1.87 }
7533 wakaba 1.91
7534 wakaba 1.87 !!!next-token;
7535 wakaba 1.126 next B;
7536 wakaba 1.52 } elsif ({
7537     a => 1,
7538     b => 1, big => 1, em => 1, font => 1, i => 1,
7539     nobr => 1, s => 1, small => 1, strile => 1,
7540     strong => 1, tt => 1, u => 1,
7541     }->{$token->{tag_name}}) {
7542 wakaba 1.79 !!!cp ('t427');
7543 wakaba 1.113 $formatting_end_tag->($token);
7544 wakaba 1.126 next B;
7545 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
7546 wakaba 1.79 !!!cp ('t428');
7547 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7548     text => 'br', token => $token);
7549 wakaba 1.52
7550     ## As if <br>
7551     $reconstruct_active_formatting_elements->($insert_to_current);
7552    
7553     my $el;
7554 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'br',, $token);
7555 wakaba 1.52 $insert->($el);
7556    
7557     ## Ignore the token.
7558     !!!next-token;
7559 wakaba 1.126 next B;
7560 wakaba 1.52 } elsif ({
7561     caption => 1, col => 1, colgroup => 1, frame => 1,
7562     frameset => 1, head => 1, option => 1, optgroup => 1,
7563     tbody => 1, td => 1, tfoot => 1, th => 1,
7564     thead => 1, tr => 1,
7565     area => 1, basefont => 1, bgsound => 1,
7566     embed => 1, hr => 1, iframe => 1, image => 1,
7567     img => 1, input => 1, isindex => 1, noembed => 1,
7568     noframes => 1, param => 1, select => 1, spacer => 1,
7569     table => 1, textarea => 1, wbr => 1,
7570     noscript => 0, ## TODO: if scripting is enabled
7571     }->{$token->{tag_name}}) {
7572 wakaba 1.79 !!!cp ('t429');
7573 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7574     text => $token->{tag_name}, token => $token);
7575 wakaba 1.52 ## Ignore the token
7576     !!!next-token;
7577 wakaba 1.126 next B;
7578 wakaba 1.52
7579     ## ISSUE: Issue on HTML5 new elements in spec
7580    
7581     } else {
7582     ## Step 1
7583     my $node_i = -1;
7584     my $node = $self->{open_elements}->[$node_i];
7585 wakaba 1.51
7586 wakaba 1.52 ## Step 2
7587     S2: {
7588 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
7589 wakaba 1.52 ## Step 1
7590     ## generate implied end tags
7591 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7592 wakaba 1.79 !!!cp ('t430');
7593 wakaba 1.151 ## NOTE: |<ruby><rt></ruby>|.
7594     ## ISSUE: <ruby><rt></rt> will also take this code path,
7595     ## which seems wrong.
7596 wakaba 1.86 pop @{$self->{open_elements}};
7597 wakaba 1.151 $node_i++;
7598 wakaba 1.52 }
7599    
7600     ## Step 2
7601 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7602     ne $token->{tag_name}) {
7603 wakaba 1.79 !!!cp ('t431');
7604 wakaba 1.58 ## NOTE: <x><y></x>
7605 wakaba 1.122 !!!parse-error (type => 'not closed',
7606 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
7607 wakaba 1.122 ->manakai_local_name,
7608     token => $token);
7609 wakaba 1.79 } else {
7610     !!!cp ('t432');
7611 wakaba 1.52 }
7612    
7613     ## Step 3
7614 wakaba 1.151 splice @{$self->{open_elements}}, $node_i if $node_i < 0;
7615 wakaba 1.51
7616 wakaba 1.1 !!!next-token;
7617 wakaba 1.52 last S2;
7618 wakaba 1.1 } else {
7619 wakaba 1.52 ## Step 3
7620 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
7621 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
7622 wakaba 1.123 ($node->[1] & SPECIAL_EL or
7623     $node->[1] & SCOPING_EL)) {
7624 wakaba 1.79 !!!cp ('t433');
7625 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7626     text => $token->{tag_name}, token => $token);
7627 wakaba 1.52 ## Ignore the token
7628     !!!next-token;
7629     last S2;
7630     }
7631 wakaba 1.79
7632     !!!cp ('t434');
7633 wakaba 1.1 }
7634 wakaba 1.52
7635     ## Step 4
7636     $node_i--;
7637     $node = $self->{open_elements}->[$node_i];
7638    
7639     ## Step 5;
7640     redo S2;
7641     } # S2
7642 wakaba 1.126 next B;
7643 wakaba 1.1 }
7644     }
7645 wakaba 1.126 next B;
7646     } continue { # B
7647     if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
7648     ## NOTE: The code below is executed in cases where it does not have
7649     ## to be, but it it is harmless even in those cases.
7650     ## has an element in scope
7651     INSCOPE: {
7652     for (reverse 0..$#{$self->{open_elements}}) {
7653     my $node = $self->{open_elements}->[$_];
7654     if ($node->[1] & FOREIGN_EL) {
7655     last INSCOPE;
7656     } elsif ($node->[1] & SCOPING_EL) {
7657     last;
7658     }
7659     }
7660    
7661     ## NOTE: No foreign element in scope.
7662     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
7663     } # INSCOPE
7664     }
7665 wakaba 1.1 } # B
7666    
7667     ## Stop parsing # MUST
7668    
7669     ## TODO: script stuffs
7670 wakaba 1.3 } # _tree_construct_main
7671    
7672 wakaba 1.162 sub set_inner_html ($$$;$) {
7673 wakaba 1.3 my $class = shift;
7674     my $node = shift;
7675     my $s = \$_[0];
7676     my $onerror = $_[1];
7677 wakaba 1.162 my $get_wrapper = $_[2] || sub ($) { return $_[0] };
7678 wakaba 1.3
7679 wakaba 1.63 ## ISSUE: Should {confident} be true?
7680    
7681 wakaba 1.3 my $nt = $node->node_type;
7682     if ($nt == 9) {
7683     # MUST
7684    
7685     ## Step 1 # MUST
7686     ## TODO: If the document has an active parser, ...
7687     ## ISSUE: There is an issue in the spec.
7688    
7689     ## Step 2 # MUST
7690     my @cn = @{$node->child_nodes};
7691     for (@cn) {
7692     $node->remove_child ($_);
7693     }
7694    
7695     ## Step 3, 4, 5 # MUST
7696 wakaba 1.162 $class->parse_char_string ($$s => $node, $onerror, $get_wrapper);
7697 wakaba 1.3 } elsif ($nt == 1) {
7698     ## TODO: If non-html element
7699    
7700     ## NOTE: Most of this code is copied from |parse_string|
7701    
7702 wakaba 1.162 ## TODO: Support for $get_wrapper
7703    
7704 wakaba 1.3 ## Step 1 # MUST
7705 wakaba 1.14 my $this_doc = $node->owner_document;
7706     my $doc = $this_doc->implementation->create_document;
7707 wakaba 1.18 $doc->manakai_is_html (1);
7708 wakaba 1.3 my $p = $class->new;
7709     $p->{document} = $doc;
7710    
7711 wakaba 1.84 ## Step 8 # MUST
7712 wakaba 1.3 my $i = 0;
7713 wakaba 1.121 $p->{line_prev} = $p->{line} = 1;
7714     $p->{column_prev} = $p->{column} = 0;
7715 wakaba 1.76 $p->{set_next_char} = sub {
7716 wakaba 1.3 my $self = shift;
7717 wakaba 1.14
7718 wakaba 1.76 pop @{$self->{prev_char}};
7719     unshift @{$self->{prev_char}}, $self->{next_char};
7720 wakaba 1.14
7721 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
7722     $self->{next_char} = ord substr $$s, $i++, 1;
7723 wakaba 1.121
7724     ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
7725     $p->{column}++;
7726 wakaba 1.4
7727 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
7728 wakaba 1.121 $p->{line}++;
7729     $p->{column} = 0;
7730 wakaba 1.79 !!!cp ('i1');
7731 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
7732 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
7733 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
7734 wakaba 1.121 $p->{line}++;
7735     $p->{column} = 0;
7736 wakaba 1.79 !!!cp ('i2');
7737 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
7738     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
7739 wakaba 1.79 !!!cp ('i3');
7740 wakaba 1.76 } elsif ($self->{next_char} == 0x0000) { # NULL
7741 wakaba 1.79 !!!cp ('i4');
7742 wakaba 1.14 !!!parse-error (type => 'NULL');
7743 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
7744 wakaba 1.132 } elsif ($self->{next_char} <= 0x0008 or
7745     (0x000E <= $self->{next_char} and
7746     $self->{next_char} <= 0x001F) or
7747     (0x007F <= $self->{next_char} and
7748     $self->{next_char} <= 0x009F) or
7749     (0xD800 <= $self->{next_char} and
7750     $self->{next_char} <= 0xDFFF) or
7751     (0xFDD0 <= $self->{next_char} and
7752     $self->{next_char} <= 0xFDDF) or
7753     {
7754     0xFFFE => 1, 0xFFFF => 1, 0x1FFFE => 1, 0x1FFFF => 1,
7755     0x2FFFE => 1, 0x2FFFF => 1, 0x3FFFE => 1, 0x3FFFF => 1,
7756     0x4FFFE => 1, 0x4FFFF => 1, 0x5FFFE => 1, 0x5FFFF => 1,
7757     0x6FFFE => 1, 0x6FFFF => 1, 0x7FFFE => 1, 0x7FFFF => 1,
7758     0x8FFFE => 1, 0x8FFFF => 1, 0x9FFFE => 1, 0x9FFFF => 1,
7759     0xAFFFE => 1, 0xAFFFF => 1, 0xBFFFE => 1, 0xBFFFF => 1,
7760     0xCFFFE => 1, 0xCFFFF => 1, 0xDFFFE => 1, 0xDFFFF => 1,
7761     0xEFFFE => 1, 0xEFFFF => 1, 0xFFFFE => 1, 0xFFFFF => 1,
7762     0x10FFFE => 1, 0x10FFFF => 1,
7763     }->{$self->{next_char}}) {
7764     !!!cp ('i4.1');
7765 wakaba 1.153 if ($self->{next_char} < 0x10000) {
7766     !!!parse-error (type => 'control char',
7767     text => (sprintf 'U+%04X', $self->{next_char}));
7768     } else {
7769     !!!parse-error (type => 'control char',
7770     text => (sprintf 'U-%08X', $self->{next_char}));
7771     }
7772 wakaba 1.3 }
7773     };
7774 wakaba 1.76 $p->{prev_char} = [-1, -1, -1];
7775     $p->{next_char} = -1;
7776 wakaba 1.3
7777     my $ponerror = $onerror || sub {
7778     my (%opt) = @_;
7779 wakaba 1.121 my $line = $opt{line};
7780     my $column = $opt{column};
7781     if (defined $opt{token} and defined $opt{token}->{line}) {
7782     $line = $opt{token}->{line};
7783     $column = $opt{token}->{column};
7784     }
7785     warn "Parse error ($opt{type}) at line $line column $column\n";
7786 wakaba 1.3 };
7787     $p->{parse_error} = sub {
7788 wakaba 1.121 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
7789 wakaba 1.3 };
7790    
7791     $p->_initialize_tokenizer;
7792     $p->_initialize_tree_constructor;
7793    
7794     ## Step 2
7795 wakaba 1.71 my $node_ln = $node->manakai_local_name;
7796 wakaba 1.40 $p->{content_model} = {
7797     title => RCDATA_CONTENT_MODEL,
7798     textarea => RCDATA_CONTENT_MODEL,
7799     style => CDATA_CONTENT_MODEL,
7800     script => CDATA_CONTENT_MODEL,
7801     xmp => CDATA_CONTENT_MODEL,
7802     iframe => CDATA_CONTENT_MODEL,
7803     noembed => CDATA_CONTENT_MODEL,
7804     noframes => CDATA_CONTENT_MODEL,
7805     noscript => CDATA_CONTENT_MODEL,
7806     plaintext => PLAINTEXT_CONTENT_MODEL,
7807     }->{$node_ln};
7808     $p->{content_model} = PCDATA_CONTENT_MODEL
7809     unless defined $p->{content_model};
7810     ## ISSUE: What is "the name of the element"? local name?
7811 wakaba 1.3
7812 wakaba 1.123 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
7813     ## TODO: Foreign element OK?
7814 wakaba 1.3
7815 wakaba 1.84 ## Step 3
7816 wakaba 1.3 my $root = $doc->create_element_ns
7817     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
7818    
7819 wakaba 1.84 ## Step 4 # MUST
7820 wakaba 1.3 $doc->append_child ($root);
7821    
7822 wakaba 1.84 ## Step 5 # MUST
7823 wakaba 1.123 push @{$p->{open_elements}}, [$root, $el_category->{html}];
7824 wakaba 1.3
7825     undef $p->{head_element};
7826    
7827 wakaba 1.84 ## Step 6 # MUST
7828 wakaba 1.3 $p->_reset_insertion_mode;
7829    
7830 wakaba 1.84 ## Step 7 # MUST
7831 wakaba 1.3 my $anode = $node;
7832     AN: while (defined $anode) {
7833     if ($anode->node_type == 1) {
7834     my $nsuri = $anode->namespace_uri;
7835     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
7836 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
7837 wakaba 1.79 !!!cp ('i5');
7838 wakaba 1.3 $p->{form_element} = $anode;
7839     last AN;
7840     }
7841     }
7842     }
7843     $anode = $anode->parent_node;
7844     } # AN
7845    
7846 wakaba 1.84 ## Step 9 # MUST
7847 wakaba 1.3 {
7848     my $self = $p;
7849     !!!next-token;
7850     }
7851     $p->_tree_construction_main;
7852    
7853 wakaba 1.84 ## Step 10 # MUST
7854 wakaba 1.3 my @cn = @{$node->child_nodes};
7855     for (@cn) {
7856     $node->remove_child ($_);
7857     }
7858     ## ISSUE: mutation events? read-only?
7859    
7860 wakaba 1.84 ## Step 11 # MUST
7861 wakaba 1.3 @cn = @{$root->child_nodes};
7862     for (@cn) {
7863 wakaba 1.14 $this_doc->adopt_node ($_);
7864 wakaba 1.3 $node->append_child ($_);
7865     }
7866 wakaba 1.14 ## ISSUE: mutation events?
7867 wakaba 1.3
7868     $p->_terminate_tree_constructor;
7869 wakaba 1.121
7870     delete $p->{parse_error}; # delete loop
7871 wakaba 1.3 } else {
7872     die "$0: |set_inner_html| is not defined for node of type $nt";
7873     }
7874     } # set_inner_html
7875    
7876     } # tree construction stage
7877 wakaba 1.1
7878 wakaba 1.63 package Whatpm::HTML::RestartParser;
7879     push our @ISA, 'Error';
7880    
7881 wakaba 1.1 1;
7882 wakaba 1.168 # $Date: 2008/09/13 09:02:28 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24