/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.159 - (hide annotations) (download) (as text)
Fri Sep 5 17:57:47 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.158: +13 -5 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	5 Sep 2008 17:40:06 -0000
	* content-model-1.dat: Test data for |XSLT-compat|
	added (HTML5 revision 2141).  A redundant test
	entry is removed.

	* HTML-tree.t: Support for should-level errors.

2008-09-06  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	5 Sep 2008 17:28:08 -0000
2008-09-06  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm.src: Support for |XSLT-compat| (HTML5 revision 2141).

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.159 our $VERSION=do{my @r=(q$Revision: 1.158 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.18 ## ISSUE:
7     ## var doc = implementation.createDocument (null, null, null);
8     ## doc.write ('');
9     ## alert (doc.compatMode);
10 wakaba 1.1
11 wakaba 1.139 require IO::Handle;
12    
13 wakaba 1.126 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
14     my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
15     my $SVG_NS = q<http://www.w3.org/2000/svg>;
16     my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
17     my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
18     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
19    
20 wakaba 1.123 sub A_EL () { 0b1 }
21     sub ADDRESS_EL () { 0b10 }
22     sub BODY_EL () { 0b100 }
23     sub BUTTON_EL () { 0b1000 }
24     sub CAPTION_EL () { 0b10000 }
25     sub DD_EL () { 0b100000 }
26     sub DIV_EL () { 0b1000000 }
27     sub DT_EL () { 0b10000000 }
28     sub FORM_EL () { 0b100000000 }
29     sub FORMATTING_EL () { 0b1000000000 }
30     sub FRAMESET_EL () { 0b10000000000 }
31     sub HEADING_EL () { 0b100000000000 }
32     sub HTML_EL () { 0b1000000000000 }
33     sub LI_EL () { 0b10000000000000 }
34     sub NOBR_EL () { 0b100000000000000 }
35     sub OPTION_EL () { 0b1000000000000000 }
36     sub OPTGROUP_EL () { 0b10000000000000000 }
37     sub P_EL () { 0b100000000000000000 }
38     sub SELECT_EL () { 0b1000000000000000000 }
39     sub TABLE_EL () { 0b10000000000000000000 }
40     sub TABLE_CELL_EL () { 0b100000000000000000000 }
41     sub TABLE_ROW_EL () { 0b1000000000000000000000 }
42     sub TABLE_ROW_GROUP_EL () { 0b10000000000000000000000 }
43     sub MISC_SCOPING_EL () { 0b100000000000000000000000 }
44     sub MISC_SPECIAL_EL () { 0b1000000000000000000000000 }
45 wakaba 1.126 sub FOREIGN_EL () { 0b10000000000000000000000000 }
46     sub FOREIGN_FLOW_CONTENT_EL () { 0b100000000000000000000000000 }
47     sub MML_AXML_EL () { 0b1000000000000000000000000000 }
48 wakaba 1.151 sub RUBY_EL () { 0b10000000000000000000000000000 }
49     sub RUBY_COMPONENT_EL () { 0b100000000000000000000000000000 }
50 wakaba 1.123
51     sub TABLE_ROWS_EL () {
52     TABLE_EL |
53     TABLE_ROW_EL |
54     TABLE_ROW_GROUP_EL
55     }
56    
57 wakaba 1.151 ## NOTE: Used in "generate implied end tags" algorithm.
58     ## NOTE: There is a code where a modified version of END_TAG_OPTIONAL_EL
59     ## is used in "generate implied end tags" implementation (search for the
60     ## function mae).
61 wakaba 1.123 sub END_TAG_OPTIONAL_EL () {
62     DD_EL |
63     DT_EL |
64     LI_EL |
65 wakaba 1.151 P_EL |
66     RUBY_COMPONENT_EL
67 wakaba 1.123 }
68    
69 wakaba 1.151 ## NOTE: Used in </body> and EOF algorithms.
70 wakaba 1.123 sub ALL_END_TAG_OPTIONAL_EL () {
71 wakaba 1.151 DD_EL |
72     DT_EL |
73     LI_EL |
74     P_EL |
75    
76 wakaba 1.123 BODY_EL |
77     HTML_EL |
78     TABLE_CELL_EL |
79     TABLE_ROW_EL |
80     TABLE_ROW_GROUP_EL
81     }
82    
83     sub SCOPING_EL () {
84     BUTTON_EL |
85     CAPTION_EL |
86     HTML_EL |
87     TABLE_EL |
88     TABLE_CELL_EL |
89     MISC_SCOPING_EL
90     }
91    
92     sub TABLE_SCOPING_EL () {
93     HTML_EL |
94     TABLE_EL
95     }
96    
97     sub TABLE_ROWS_SCOPING_EL () {
98     HTML_EL |
99     TABLE_ROW_GROUP_EL
100     }
101    
102     sub TABLE_ROW_SCOPING_EL () {
103     HTML_EL |
104     TABLE_ROW_EL
105     }
106    
107     sub SPECIAL_EL () {
108     ADDRESS_EL |
109     BODY_EL |
110     DIV_EL |
111 wakaba 1.151
112     DD_EL |
113     DT_EL |
114     LI_EL |
115     P_EL |
116    
117 wakaba 1.123 FORM_EL |
118     FRAMESET_EL |
119     HEADING_EL |
120     OPTION_EL |
121     OPTGROUP_EL |
122     SELECT_EL |
123     TABLE_ROW_EL |
124     TABLE_ROW_GROUP_EL |
125     MISC_SPECIAL_EL
126     }
127    
128     my $el_category = {
129     a => A_EL | FORMATTING_EL,
130     address => ADDRESS_EL,
131     applet => MISC_SCOPING_EL,
132     area => MISC_SPECIAL_EL,
133     b => FORMATTING_EL,
134     base => MISC_SPECIAL_EL,
135     basefont => MISC_SPECIAL_EL,
136     bgsound => MISC_SPECIAL_EL,
137     big => FORMATTING_EL,
138     blockquote => MISC_SPECIAL_EL,
139     body => BODY_EL,
140     br => MISC_SPECIAL_EL,
141     button => BUTTON_EL,
142     caption => CAPTION_EL,
143     center => MISC_SPECIAL_EL,
144     col => MISC_SPECIAL_EL,
145     colgroup => MISC_SPECIAL_EL,
146     dd => DD_EL,
147     dir => MISC_SPECIAL_EL,
148     div => DIV_EL,
149     dl => MISC_SPECIAL_EL,
150     dt => DT_EL,
151     em => FORMATTING_EL,
152     embed => MISC_SPECIAL_EL,
153     fieldset => MISC_SPECIAL_EL,
154     font => FORMATTING_EL,
155     form => FORM_EL,
156     frame => MISC_SPECIAL_EL,
157     frameset => FRAMESET_EL,
158     h1 => HEADING_EL,
159     h2 => HEADING_EL,
160     h3 => HEADING_EL,
161     h4 => HEADING_EL,
162     h5 => HEADING_EL,
163     h6 => HEADING_EL,
164     head => MISC_SPECIAL_EL,
165     hr => MISC_SPECIAL_EL,
166     html => HTML_EL,
167     i => FORMATTING_EL,
168     iframe => MISC_SPECIAL_EL,
169     img => MISC_SPECIAL_EL,
170     input => MISC_SPECIAL_EL,
171     isindex => MISC_SPECIAL_EL,
172     li => LI_EL,
173     link => MISC_SPECIAL_EL,
174     listing => MISC_SPECIAL_EL,
175     marquee => MISC_SCOPING_EL,
176     menu => MISC_SPECIAL_EL,
177     meta => MISC_SPECIAL_EL,
178     nobr => NOBR_EL | FORMATTING_EL,
179     noembed => MISC_SPECIAL_EL,
180     noframes => MISC_SPECIAL_EL,
181     noscript => MISC_SPECIAL_EL,
182     object => MISC_SCOPING_EL,
183     ol => MISC_SPECIAL_EL,
184     optgroup => OPTGROUP_EL,
185     option => OPTION_EL,
186     p => P_EL,
187     param => MISC_SPECIAL_EL,
188     plaintext => MISC_SPECIAL_EL,
189     pre => MISC_SPECIAL_EL,
190 wakaba 1.151 rp => RUBY_COMPONENT_EL,
191     rt => RUBY_COMPONENT_EL,
192     ruby => RUBY_EL,
193 wakaba 1.123 s => FORMATTING_EL,
194     script => MISC_SPECIAL_EL,
195     select => SELECT_EL,
196     small => FORMATTING_EL,
197     spacer => MISC_SPECIAL_EL,
198     strike => FORMATTING_EL,
199     strong => FORMATTING_EL,
200     style => MISC_SPECIAL_EL,
201     table => TABLE_EL,
202     tbody => TABLE_ROW_GROUP_EL,
203     td => TABLE_CELL_EL,
204     textarea => MISC_SPECIAL_EL,
205     tfoot => TABLE_ROW_GROUP_EL,
206     th => TABLE_CELL_EL,
207     thead => TABLE_ROW_GROUP_EL,
208     title => MISC_SPECIAL_EL,
209     tr => TABLE_ROW_EL,
210     tt => FORMATTING_EL,
211     u => FORMATTING_EL,
212     ul => MISC_SPECIAL_EL,
213     wbr => MISC_SPECIAL_EL,
214     };
215    
216 wakaba 1.126 my $el_category_f = {
217     $MML_NS => {
218     'annotation-xml' => MML_AXML_EL,
219     mi => FOREIGN_FLOW_CONTENT_EL,
220     mo => FOREIGN_FLOW_CONTENT_EL,
221     mn => FOREIGN_FLOW_CONTENT_EL,
222     ms => FOREIGN_FLOW_CONTENT_EL,
223     mtext => FOREIGN_FLOW_CONTENT_EL,
224     },
225     $SVG_NS => {
226 wakaba 1.131 foreignObject => FOREIGN_FLOW_CONTENT_EL,
227 wakaba 1.126 desc => FOREIGN_FLOW_CONTENT_EL,
228     title => FOREIGN_FLOW_CONTENT_EL,
229     },
230     ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
231     };
232    
233 wakaba 1.131 my $svg_attr_name = {
234 wakaba 1.146 attributename => 'attributeName',
235 wakaba 1.131 attributetype => 'attributeType',
236     basefrequency => 'baseFrequency',
237     baseprofile => 'baseProfile',
238     calcmode => 'calcMode',
239     clippathunits => 'clipPathUnits',
240     contentscripttype => 'contentScriptType',
241     contentstyletype => 'contentStyleType',
242     diffuseconstant => 'diffuseConstant',
243     edgemode => 'edgeMode',
244     externalresourcesrequired => 'externalResourcesRequired',
245     filterres => 'filterRes',
246     filterunits => 'filterUnits',
247     glyphref => 'glyphRef',
248     gradienttransform => 'gradientTransform',
249     gradientunits => 'gradientUnits',
250     kernelmatrix => 'kernelMatrix',
251     kernelunitlength => 'kernelUnitLength',
252     keypoints => 'keyPoints',
253     keysplines => 'keySplines',
254     keytimes => 'keyTimes',
255     lengthadjust => 'lengthAdjust',
256     limitingconeangle => 'limitingConeAngle',
257     markerheight => 'markerHeight',
258     markerunits => 'markerUnits',
259     markerwidth => 'markerWidth',
260     maskcontentunits => 'maskContentUnits',
261     maskunits => 'maskUnits',
262     numoctaves => 'numOctaves',
263     pathlength => 'pathLength',
264     patterncontentunits => 'patternContentUnits',
265     patterntransform => 'patternTransform',
266     patternunits => 'patternUnits',
267     pointsatx => 'pointsAtX',
268     pointsaty => 'pointsAtY',
269     pointsatz => 'pointsAtZ',
270     preservealpha => 'preserveAlpha',
271     preserveaspectratio => 'preserveAspectRatio',
272     primitiveunits => 'primitiveUnits',
273     refx => 'refX',
274     refy => 'refY',
275     repeatcount => 'repeatCount',
276     repeatdur => 'repeatDur',
277     requiredextensions => 'requiredExtensions',
278 wakaba 1.146 requiredfeatures => 'requiredFeatures',
279 wakaba 1.131 specularconstant => 'specularConstant',
280     specularexponent => 'specularExponent',
281     spreadmethod => 'spreadMethod',
282     startoffset => 'startOffset',
283     stddeviation => 'stdDeviation',
284     stitchtiles => 'stitchTiles',
285     surfacescale => 'surfaceScale',
286     systemlanguage => 'systemLanguage',
287     tablevalues => 'tableValues',
288     targetx => 'targetX',
289     targety => 'targetY',
290     textlength => 'textLength',
291     viewbox => 'viewBox',
292     viewtarget => 'viewTarget',
293     xchannelselector => 'xChannelSelector',
294     ychannelselector => 'yChannelSelector',
295     zoomandpan => 'zoomAndPan',
296     };
297    
298     my $foreign_attr_xname = {
299     'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
300     'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
301     'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
302     'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
303     'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
304     'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
305     'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
306     'xml:base' => [$XML_NS, ['xml', 'base']],
307     'xml:lang' => [$XML_NS, ['xml', 'lang']],
308     'xml:space' => [$XML_NS, ['xml', 'space']],
309     'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
310     'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
311     };
312    
313     ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
314    
315 wakaba 1.4 my $c1_entity_char = {
316 wakaba 1.10 0x80 => 0x20AC,
317     0x81 => 0xFFFD,
318     0x82 => 0x201A,
319     0x83 => 0x0192,
320     0x84 => 0x201E,
321     0x85 => 0x2026,
322     0x86 => 0x2020,
323     0x87 => 0x2021,
324     0x88 => 0x02C6,
325     0x89 => 0x2030,
326     0x8A => 0x0160,
327     0x8B => 0x2039,
328     0x8C => 0x0152,
329     0x8D => 0xFFFD,
330     0x8E => 0x017D,
331     0x8F => 0xFFFD,
332     0x90 => 0xFFFD,
333     0x91 => 0x2018,
334     0x92 => 0x2019,
335     0x93 => 0x201C,
336     0x94 => 0x201D,
337     0x95 => 0x2022,
338     0x96 => 0x2013,
339     0x97 => 0x2014,
340     0x98 => 0x02DC,
341     0x99 => 0x2122,
342     0x9A => 0x0161,
343     0x9B => 0x203A,
344     0x9C => 0x0153,
345     0x9D => 0xFFFD,
346     0x9E => 0x017E,
347     0x9F => 0x0178,
348 wakaba 1.4 }; # $c1_entity_char
349 wakaba 1.1
350 wakaba 1.63 sub parse_byte_string ($$$$;$) {
351 wakaba 1.138 my $self = shift;
352     my $charset_name = shift;
353     open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
354     return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
355     } # parse_byte_string
356    
357     sub parse_byte_stream ($$$$;$) {
358 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
359 wakaba 1.133 my $charset_name = shift;
360 wakaba 1.138 my $byte_stream = $_[0];
361 wakaba 1.133
362 wakaba 1.134 my $onerror = $_[2] || sub {
363     my (%opt) = @_;
364     warn "Parse error ($opt{type})\n";
365     };
366     $self->{parse_error} = $onerror; # updated later by parse_char_string
367    
368 wakaba 1.133 ## HTML5 encoding sniffing algorithm
369     require Message::Charset::Info;
370     my $charset;
371 wakaba 1.136 my $buffer;
372     my ($char_stream, $e_status);
373 wakaba 1.133
374     SNIFFING: {
375    
376     ## Step 1
377     if (defined $charset_name) {
378     $charset = Message::Charset::Info->get_by_iana_name ($charset_name);
379    
380     ## ISSUE: Unsupported encoding is not ignored according to the spec.
381 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
382     ($byte_stream, allow_error_reporting => 1,
383 wakaba 1.133 allow_fallback => 1);
384 wakaba 1.136 if ($char_stream) {
385 wakaba 1.133 $self->{confident} = 1;
386     last SNIFFING;
387 wakaba 1.136 } else {
388     ## TODO: unsupported error
389 wakaba 1.133 }
390     }
391    
392     ## Step 2
393 wakaba 1.136 my $byte_buffer = '';
394     for (1..1024) {
395     my $char = $byte_stream->getc;
396     last unless defined $char;
397     $byte_buffer .= $char;
398     } ## TODO: timeout
399 wakaba 1.133
400     ## Step 3
401 wakaba 1.136 if ($byte_buffer =~ /^\xFE\xFF/) {
402 wakaba 1.133 $charset = Message::Charset::Info->get_by_iana_name ('utf-16be');
403 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
404     ($byte_stream, allow_error_reporting => 1,
405     allow_fallback => 1, byte_buffer => \$byte_buffer);
406 wakaba 1.133 $self->{confident} = 1;
407     last SNIFFING;
408 wakaba 1.136 } elsif ($byte_buffer =~ /^\xFF\xFE/) {
409 wakaba 1.133 $charset = Message::Charset::Info->get_by_iana_name ('utf-16le');
410 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
411     ($byte_stream, allow_error_reporting => 1,
412     allow_fallback => 1, byte_buffer => \$byte_buffer);
413 wakaba 1.133 $self->{confident} = 1;
414     last SNIFFING;
415 wakaba 1.136 } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
416 wakaba 1.133 $charset = Message::Charset::Info->get_by_iana_name ('utf-8');
417 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
418     ($byte_stream, allow_error_reporting => 1,
419     allow_fallback => 1, byte_buffer => \$byte_buffer);
420 wakaba 1.133 $self->{confident} = 1;
421     last SNIFFING;
422     }
423    
424     ## Step 4
425     ## TODO: <meta charset>
426    
427     ## Step 5
428     ## TODO: from history
429    
430     ## Step 6
431 wakaba 1.65 require Whatpm::Charset::UniversalCharDet;
432 wakaba 1.133 $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
433 wakaba 1.136 ($byte_buffer);
434 wakaba 1.133 if (defined $charset_name) {
435     $charset = Message::Charset::Info->get_by_iana_name ($charset_name);
436    
437     ## ISSUE: Unsupported encoding is not ignored according to the spec.
438 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
439     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
440     ($byte_stream);
441     ($char_stream, $e_status) = $charset->get_decode_handle
442     ($buffer, allow_error_reporting => 1,
443     allow_fallback => 1, byte_buffer => \$byte_buffer);
444     if ($char_stream) {
445     $buffer->{buffer} = $byte_buffer;
446 wakaba 1.153 !!!parse-error (type => 'sniffing:chardet',
447     text => $charset_name,
448     level => $self->{level}->{info},
449     layer => 'encode',
450 wakaba 1.134 line => 1, column => 1);
451 wakaba 1.133 $self->{confident} = 0;
452     last SNIFFING;
453     }
454     }
455    
456     ## Step 7: default
457     ## TODO: Make this configurable.
458     $charset = Message::Charset::Info->get_by_iana_name ('windows-1252');
459     ## NOTE: We choose |windows-1252| here, since |utf-8| should be
460     ## detectable in the step 6.
461 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
462     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
463     ($byte_stream);
464     ($char_stream, $e_status)
465     = $charset->get_decode_handle ($buffer,
466     allow_error_reporting => 1,
467     allow_fallback => 1,
468     byte_buffer => \$byte_buffer);
469     $buffer->{buffer} = $byte_buffer;
470 wakaba 1.153 !!!parse-error (type => 'sniffing:default',
471     text => 'windows-1252',
472     level => $self->{level}->{info},
473     line => 1, column => 1,
474     layer => 'encode');
475 wakaba 1.63 $self->{confident} = 0;
476 wakaba 1.133 } # SNIFFING
477    
478 wakaba 1.134 $self->{input_encoding} = $charset->get_iana_name;
479 wakaba 1.133 if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
480 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
481     text => $self->{input_encoding},
482     level => $self->{level}->{uncertain},
483     line => 1, column => 1,
484     layer => 'encode');
485 wakaba 1.133 } elsif (not ($e_status &
486     Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) {
487 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
488     text => $self->{input_encoding},
489     level => $self->{level}->{uncertain},
490     line => 1, column => 1,
491     layer => 'encode');
492 wakaba 1.63 }
493    
494     $self->{change_encoding} = sub {
495     my $self = shift;
496 wakaba 1.134 $charset_name = shift;
497 wakaba 1.114 my $token = shift;
498 wakaba 1.63
499 wakaba 1.134 $charset = Message::Charset::Info->get_by_iana_name ($charset_name);
500 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
501     ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
502     byte_buffer => \ $buffer->{buffer});
503 wakaba 1.134
504 wakaba 1.136 if ($char_stream) { # if supported
505 wakaba 1.134 ## "Change the encoding" algorithm:
506 wakaba 1.63
507 wakaba 1.134 ## Step 1
508 wakaba 1.149 if ($charset->{category} &
509     Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
510 wakaba 1.134 $charset = Message::Charset::Info->get_by_iana_name ('utf-8');
511 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
512     ($byte_stream,
513     byte_buffer => \ $buffer->{buffer});
514 wakaba 1.134 }
515     $charset_name = $charset->get_iana_name;
516    
517     ## Step 2
518     if (defined $self->{input_encoding} and
519     $self->{input_encoding} eq $charset_name) {
520 wakaba 1.153 !!!parse-error (type => 'charset label:matching',
521     text => $charset_name,
522     level => $self->{level}->{info});
523 wakaba 1.134 $self->{confident} = 1;
524     return;
525     }
526 wakaba 1.63
527 wakaba 1.153 !!!parse-error (type => 'charset label detected',
528     text => $self->{input_encoding},
529     value => $charset_name,
530     level => $self->{level}->{warn},
531     token => $token);
532 wakaba 1.134
533     ## Step 3
534     # if (can) {
535     ## change the encoding on the fly.
536     #$self->{confident} = 1;
537     #return;
538     # }
539    
540     ## Step 4
541     throw Whatpm::HTML::RestartParser ();
542 wakaba 1.63 }
543     }; # $self->{change_encoding}
544    
545 wakaba 1.136 my $char_onerror = sub {
546     my (undef, $type, %opt) = @_;
547 wakaba 1.153 !!!parse-error (layer => 'encode',
548     %opt, type => $type,
549 wakaba 1.137 line => $self->{line}, column => $self->{column} + 1);
550 wakaba 1.136 if ($opt{octets}) {
551     ${$opt{octets}} = "\x{FFFD}"; # relacement character
552     }
553     };
554     $char_stream->onerror ($char_onerror);
555    
556 wakaba 1.63 my @args = @_; shift @args; # $s
557     my $return;
558     try {
559 wakaba 1.136 $return = $self->parse_char_stream ($char_stream, @args);
560 wakaba 1.63 } catch Whatpm::HTML::RestartParser with {
561 wakaba 1.134 ## NOTE: Invoked after {change_encoding}.
562    
563     $self->{input_encoding} = $charset->get_iana_name;
564     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
565 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
566     text => $self->{input_encoding},
567     level => $self->{level}->{uncertain},
568     line => 1, column => 1,
569     layer => 'encode');
570 wakaba 1.134 } elsif (not ($e_status &
571     Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) {
572 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
573     text => $self->{input_encoding},
574     level => $self->{level}->{uncertain},
575     line => 1, column => 1,
576     layer => 'encode');
577 wakaba 1.134 }
578 wakaba 1.63 $self->{confident} = 1;
579 wakaba 1.136 $char_stream->onerror ($char_onerror);
580     $return = $self->parse_char_stream ($char_stream, @args);
581 wakaba 1.63 };
582     return $return;
583 wakaba 1.138 } # parse_byte_stream
584 wakaba 1.63
585 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
586     ## and the HTML layer MUST ignore it. However, we does strip BOM in
587     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
588     ## because the core part of our HTML parser expects a string of character,
589     ## not a string of bytes or code units or anything which might contain a BOM.
590     ## Therefore, any parser interface that accepts a string of bytes,
591     ## such as |parse_byte_string| in this module, must ensure that it does
592     ## strip the BOM and never strip any ZWNBSP.
593    
594 wakaba 1.135 sub parse_char_string ($$$;$) {
595     my $self = shift;
596 wakaba 1.139 require utf8;
597     my $s = ref $_[0] ? $_[0] : \($_[0]);
598     open my $input, '<' . (utf8::is_utf8 ($$s) ? ':utf8' : ''), $s;
599 wakaba 1.135 return $self->parse_char_stream ($input, @_[1..$#_]);
600     } # parse_char_string
601     *parse_string = \&parse_char_string;
602 wakaba 1.63
603 wakaba 1.135 sub parse_char_stream ($$$;$) {
604 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
605 wakaba 1.135 my $input = $_[0];
606 wakaba 1.1 $self->{document} = $_[1];
607 wakaba 1.63 @{$self->{document}->child_nodes} = ();
608 wakaba 1.1
609 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
610    
611 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
612 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
613     if defined $self->{input_encoding};
614 wakaba 1.63
615 wakaba 1.1 my $i = 0;
616 wakaba 1.112 $self->{line_prev} = $self->{line} = 1;
617     $self->{column_prev} = $self->{column} = 0;
618 wakaba 1.76 $self->{set_next_char} = sub {
619 wakaba 1.1 my $self = shift;
620 wakaba 1.13
621 wakaba 1.76 pop @{$self->{prev_char}};
622     unshift @{$self->{prev_char}}, $self->{next_char};
623 wakaba 1.13
624 wakaba 1.139 my $char;
625     if (defined $self->{next_next_char}) {
626     $char = $self->{next_next_char};
627     delete $self->{next_next_char};
628     } else {
629     $char = $input->getc;
630     }
631 wakaba 1.135 $self->{next_char} = -1 and return unless defined $char;
632     $self->{next_char} = ord $char;
633 wakaba 1.112
634     ($self->{line_prev}, $self->{column_prev})
635     = ($self->{line}, $self->{column});
636     $self->{column}++;
637 wakaba 1.1
638 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
639 wakaba 1.132 !!!cp ('j1');
640 wakaba 1.112 $self->{line}++;
641     $self->{column} = 0;
642 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
643 wakaba 1.132 !!!cp ('j2');
644 wakaba 1.135 my $next = $input->getc;
645 wakaba 1.139 if (defined $next and $next ne "\x0A") {
646     $self->{next_next_char} = $next;
647 wakaba 1.135 }
648 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
649 wakaba 1.112 $self->{line}++;
650     $self->{column} = 0;
651 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
652 wakaba 1.132 !!!cp ('j3');
653 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
654     } elsif ($self->{next_char} == 0x0000) { # NULL
655 wakaba 1.132 !!!cp ('j4');
656 wakaba 1.8 !!!parse-error (type => 'NULL');
657 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
658 wakaba 1.132 } elsif ($self->{next_char} <= 0x0008 or
659     (0x000E <= $self->{next_char} and $self->{next_char} <= 0x001F) or
660     (0x007F <= $self->{next_char} and $self->{next_char} <= 0x009F) or
661     (0xD800 <= $self->{next_char} and $self->{next_char} <= 0xDFFF) or
662     (0xFDD0 <= $self->{next_char} and $self->{next_char} <= 0xFDDF) or
663     {
664     0xFFFE => 1, 0xFFFF => 1, 0x1FFFE => 1, 0x1FFFF => 1,
665     0x2FFFE => 1, 0x2FFFF => 1, 0x3FFFE => 1, 0x3FFFF => 1,
666     0x4FFFE => 1, 0x4FFFF => 1, 0x5FFFE => 1, 0x5FFFF => 1,
667     0x6FFFE => 1, 0x6FFFF => 1, 0x7FFFE => 1, 0x7FFFF => 1,
668     0x8FFFE => 1, 0x8FFFF => 1, 0x9FFFE => 1, 0x9FFFF => 1,
669     0xAFFFE => 1, 0xAFFFF => 1, 0xBFFFE => 1, 0xBFFFF => 1,
670     0xCFFFE => 1, 0xCFFFF => 1, 0xDFFFE => 1, 0xDFFFF => 1,
671     0xEFFFE => 1, 0xEFFFF => 1, 0xFFFFE => 1, 0xFFFFF => 1,
672     0x10FFFE => 1, 0x10FFFF => 1,
673     }->{$self->{next_char}}) {
674     !!!cp ('j5');
675 wakaba 1.153 if ($self->{next_char} < 0x10000) {
676     !!!parse-error (type => 'control char',
677     text => (sprintf 'U+%04X', $self->{next_char}));
678     } else {
679     !!!parse-error (type => 'control char',
680     text => (sprintf 'U-%08X', $self->{next_char}));
681     }
682 wakaba 1.1 }
683     };
684 wakaba 1.76 $self->{prev_char} = [-1, -1, -1];
685     $self->{next_char} = -1;
686 wakaba 1.1
687 wakaba 1.3 my $onerror = $_[2] || sub {
688     my (%opt) = @_;
689 wakaba 1.112 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
690     my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
691     warn "Parse error ($opt{type}) at line $line column $column\n";
692 wakaba 1.3 };
693     $self->{parse_error} = sub {
694 wakaba 1.112 $onerror->(line => $self->{line}, column => $self->{column}, @_);
695 wakaba 1.1 };
696    
697     $self->_initialize_tokenizer;
698     $self->_initialize_tree_constructor;
699     $self->_construct_tree;
700     $self->_terminate_tree_constructor;
701    
702 wakaba 1.112 delete $self->{parse_error}; # remove loop
703    
704 wakaba 1.1 return $self->{document};
705 wakaba 1.135 } # parse_char_stream
706 wakaba 1.1
707     sub new ($) {
708     my $class = shift;
709 wakaba 1.134 my $self = bless {
710 wakaba 1.153 level => {must => 'm',
711 wakaba 1.159 should => 's',
712 wakaba 1.153 warn => 'w',
713     info => 'i',
714     uncertain => 'u'},
715 wakaba 1.134 }, $class;
716 wakaba 1.76 $self->{set_next_char} = sub {
717     $self->{next_char} = -1;
718 wakaba 1.1 };
719     $self->{parse_error} = sub {
720     #
721     };
722 wakaba 1.63 $self->{change_encoding} = sub {
723     # if ($_[0] is a supported encoding) {
724     # run "change the encoding" algorithm;
725     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
726     # }
727     };
728 wakaba 1.61 $self->{application_cache_selection} = sub {
729     #
730     };
731 wakaba 1.1 return $self;
732     } # new
733    
734 wakaba 1.40 sub CM_ENTITY () { 0b001 } # & markup in data
735     sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
736     sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
737    
738     sub PLAINTEXT_CONTENT_MODEL () { 0 }
739     sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
740     sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
741     sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
742    
743 wakaba 1.57 sub DATA_STATE () { 0 }
744     sub ENTITY_DATA_STATE () { 1 }
745     sub TAG_OPEN_STATE () { 2 }
746     sub CLOSE_TAG_OPEN_STATE () { 3 }
747     sub TAG_NAME_STATE () { 4 }
748     sub BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
749     sub ATTRIBUTE_NAME_STATE () { 6 }
750     sub AFTER_ATTRIBUTE_NAME_STATE () { 7 }
751     sub BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
752     sub ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
753     sub ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
754     sub ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
755     sub ENTITY_IN_ATTRIBUTE_VALUE_STATE () { 12 }
756     sub MARKUP_DECLARATION_OPEN_STATE () { 13 }
757     sub COMMENT_START_STATE () { 14 }
758     sub COMMENT_START_DASH_STATE () { 15 }
759     sub COMMENT_STATE () { 16 }
760     sub COMMENT_END_STATE () { 17 }
761     sub COMMENT_END_DASH_STATE () { 18 }
762     sub BOGUS_COMMENT_STATE () { 19 }
763     sub DOCTYPE_STATE () { 20 }
764     sub BEFORE_DOCTYPE_NAME_STATE () { 21 }
765     sub DOCTYPE_NAME_STATE () { 22 }
766     sub AFTER_DOCTYPE_NAME_STATE () { 23 }
767     sub BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
768     sub DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
769     sub DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
770     sub AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
771     sub BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
772     sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
773     sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
774     sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
775     sub BOGUS_DOCTYPE_STATE () { 32 }
776 wakaba 1.72 sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
777 wakaba 1.125 sub SELF_CLOSING_START_TAG_STATE () { 34 }
778 wakaba 1.127 sub CDATA_BLOCK_STATE () { 35 }
779 wakaba 1.57
780 wakaba 1.55 sub DOCTYPE_TOKEN () { 1 }
781     sub COMMENT_TOKEN () { 2 }
782     sub START_TAG_TOKEN () { 3 }
783     sub END_TAG_TOKEN () { 4 }
784     sub END_OF_FILE_TOKEN () { 5 }
785     sub CHARACTER_TOKEN () { 6 }
786    
787 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
788     sub HEAD_IMS () { 0b1000 }
789     sub BODY_IMS () { 0b10000 }
790 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
791 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
792 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
793 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
794     sub FRAME_IMS () { 0b1000000000 }
795 wakaba 1.101 sub SELECT_IMS () { 0b10000000000 }
796 wakaba 1.126 sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 }
797     ## NOTE: "in foreign content" insertion mode is special; it is combined
798     ## with the secondary insertion mode. In this parser, they are stored
799     ## together in the bit-or'ed form.
800 wakaba 1.54
801 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
802    
803     ## NOTE: "after after body" insertion mode.
804 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
805 wakaba 1.84
806     ## NOTE: "after after frameset" insertion mode.
807 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
808 wakaba 1.84
809 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
810     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
811     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
812     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
813     sub IN_BODY_IM () { BODY_IMS }
814 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
815     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
816     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
817     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
818 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
819     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
820     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
821     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
822 wakaba 1.101 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
823     sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
824 wakaba 1.54 sub IN_COLUMN_GROUP_IM () { 0b10 }
825    
826 wakaba 1.1 ## Implementations MUST act as if state machine in the spec
827    
828     sub _initialize_tokenizer ($) {
829     my $self = shift;
830 wakaba 1.57 $self->{state} = DATA_STATE; # MUST
831 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # be
832 wakaba 1.1 undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
833     undef $self->{current_attribute};
834     undef $self->{last_emitted_start_tag_name};
835     undef $self->{last_attribute_value_state};
836 wakaba 1.125 delete $self->{self_closing};
837 wakaba 1.1 $self->{char} = [];
838 wakaba 1.76 # $self->{next_char}
839 wakaba 1.1 !!!next-input-character;
840     $self->{token} = [];
841 wakaba 1.18 # $self->{escape}
842 wakaba 1.1 } # _initialize_tokenizer
843    
844     ## A token has:
845 wakaba 1.55 ## ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
846     ## CHARACTER_TOKEN, or END_OF_FILE_TOKEN
847     ## ->{name} (DOCTYPE_TOKEN)
848     ## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
849     ## ->{public_identifier} (DOCTYPE_TOKEN)
850     ## ->{system_identifier} (DOCTYPE_TOKEN)
851 wakaba 1.75 ## ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
852 wakaba 1.55 ## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
853 wakaba 1.66 ## ->{name}
854     ## ->{value}
855     ## ->{has_reference} == 1 or 0
856 wakaba 1.55 ## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)
857 wakaba 1.125 ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
858     ## |->{self_closing}| is used to save the value of |$self->{self_closing}|
859     ## while the token is pushed back to the stack.
860    
861 wakaba 1.1 ## Emitted token MUST immediately be handled by the tree construction state.
862    
863     ## Before each step, UA MAY check to see if either one of the scripts in
864     ## "list of scripts that will execute as soon as possible" or the first
865     ## script in the "list of scripts that will execute asynchronously",
866     ## has completed loading. If one has, then it MUST be executed
867     ## and removed from the list.
868    
869 wakaba 1.59 ## NOTE: HTML5 "Writing HTML documents" section, applied to
870     ## documents and not to user agents and conformance checkers,
871     ## contains some requirements that are not detected by the
872     ## parsing algorithm:
873     ## - Some requirements on character encoding declarations. ## TODO
874     ## - "Elements MUST NOT contain content that their content model disallows."
875     ## ... Some are parse error, some are not (will be reported by c.c.).
876     ## - Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) ## TODO
877     ## - Text (in elements, attributes, and comments) SHOULD NOT contain
878     ## control characters other than space characters. ## TODO: (what is control character? C0, C1 and DEL? Unicode control character?)
879    
880     ## TODO: HTML5 poses authors two SHOULD-level requirements that cannot
881     ## be detected by the HTML5 parsing algorithm:
882     ## - Text,
883    
884 wakaba 1.1 sub _get_next_token ($) {
885     my $self = shift;
886 wakaba 1.125
887     if ($self->{self_closing}) {
888     !!!parse-error (type => 'nestc', token => $self->{current_token});
889     ## NOTE: The |self_closing| flag is only set by start tag token.
890     ## In addition, when a start tag token is emitted, it is always set to
891     ## |current_token|.
892     delete $self->{self_closing};
893     }
894    
895 wakaba 1.1 if (@{$self->{token}}) {
896 wakaba 1.125 $self->{self_closing} = $self->{token}->[0]->{self_closing};
897 wakaba 1.1 return shift @{$self->{token}};
898     }
899    
900     A: {
901 wakaba 1.57 if ($self->{state} == DATA_STATE) {
902 wakaba 1.76 if ($self->{next_char} == 0x0026) { # &
903 wakaba 1.72 if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
904     not $self->{escape}) {
905 wakaba 1.77 !!!cp (1);
906 wakaba 1.57 $self->{state} = ENTITY_DATA_STATE;
907 wakaba 1.1 !!!next-input-character;
908     redo A;
909     } else {
910 wakaba 1.77 !!!cp (2);
911 wakaba 1.1 #
912     }
913 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
914 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
915 wakaba 1.13 unless ($self->{escape}) {
916 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
917     $self->{prev_char}->[1] == 0x0021 and # !
918     $self->{prev_char}->[2] == 0x003C) { # <
919 wakaba 1.77 !!!cp (3);
920 wakaba 1.13 $self->{escape} = 1;
921 wakaba 1.77 } else {
922     !!!cp (4);
923 wakaba 1.13 }
924 wakaba 1.77 } else {
925     !!!cp (5);
926 wakaba 1.13 }
927     }
928    
929     #
930 wakaba 1.76 } elsif ($self->{next_char} == 0x003C) { # <
931 wakaba 1.40 if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
932     (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
933 wakaba 1.13 not $self->{escape})) {
934 wakaba 1.77 !!!cp (6);
935 wakaba 1.57 $self->{state} = TAG_OPEN_STATE;
936 wakaba 1.1 !!!next-input-character;
937     redo A;
938     } else {
939 wakaba 1.77 !!!cp (7);
940 wakaba 1.1 #
941     }
942 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
943 wakaba 1.13 if ($self->{escape} and
944 wakaba 1.40 ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
945 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
946     $self->{prev_char}->[1] == 0x002D) { # -
947 wakaba 1.77 !!!cp (8);
948 wakaba 1.13 delete $self->{escape};
949 wakaba 1.77 } else {
950     !!!cp (9);
951 wakaba 1.13 }
952 wakaba 1.77 } else {
953     !!!cp (10);
954 wakaba 1.13 }
955    
956     #
957 wakaba 1.76 } elsif ($self->{next_char} == -1) {
958 wakaba 1.77 !!!cp (11);
959 wakaba 1.112 !!!emit ({type => END_OF_FILE_TOKEN,
960     line => $self->{line}, column => $self->{column}});
961 wakaba 1.1 last A; ## TODO: ok?
962 wakaba 1.77 } else {
963     !!!cp (12);
964 wakaba 1.1 }
965     # Anything else
966 wakaba 1.55 my $token = {type => CHARACTER_TOKEN,
967 wakaba 1.112 data => chr $self->{next_char},
968 wakaba 1.120 line => $self->{line}, column => $self->{column},
969 wakaba 1.118 };
970 wakaba 1.1 ## Stay in the data state
971     !!!next-input-character;
972    
973     !!!emit ($token);
974    
975     redo A;
976 wakaba 1.57 } elsif ($self->{state} == ENTITY_DATA_STATE) {
977 wakaba 1.1 ## (cannot happen in CDATA state)
978 wakaba 1.112
979 wakaba 1.120 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
980 wakaba 1.1
981 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity (0, -1);
982 wakaba 1.1
983 wakaba 1.57 $self->{state} = DATA_STATE;
984 wakaba 1.1 # next-input-character is already done
985    
986     unless (defined $token) {
987 wakaba 1.77 !!!cp (13);
988 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '&',
989 wakaba 1.120 line => $l, column => $c,
990 wakaba 1.118 });
991 wakaba 1.1 } else {
992 wakaba 1.77 !!!cp (14);
993 wakaba 1.1 !!!emit ($token);
994     }
995    
996     redo A;
997 wakaba 1.57 } elsif ($self->{state} == TAG_OPEN_STATE) {
998 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
999 wakaba 1.76 if ($self->{next_char} == 0x002F) { # /
1000 wakaba 1.77 !!!cp (15);
1001 wakaba 1.1 !!!next-input-character;
1002 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
1003 wakaba 1.1 redo A;
1004     } else {
1005 wakaba 1.77 !!!cp (16);
1006 wakaba 1.1 ## reconsume
1007 wakaba 1.57 $self->{state} = DATA_STATE;
1008 wakaba 1.1
1009 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
1010 wakaba 1.120 line => $self->{line_prev},
1011     column => $self->{column_prev},
1012 wakaba 1.118 });
1013 wakaba 1.1
1014     redo A;
1015     }
1016 wakaba 1.40 } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
1017 wakaba 1.76 if ($self->{next_char} == 0x0021) { # !
1018 wakaba 1.77 !!!cp (17);
1019 wakaba 1.57 $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
1020 wakaba 1.1 !!!next-input-character;
1021     redo A;
1022 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1023 wakaba 1.77 !!!cp (18);
1024 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
1025 wakaba 1.1 !!!next-input-character;
1026     redo A;
1027 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1028     $self->{next_char} <= 0x005A) { # A..Z
1029 wakaba 1.77 !!!cp (19);
1030 wakaba 1.1 $self->{current_token}
1031 wakaba 1.55 = {type => START_TAG_TOKEN,
1032 wakaba 1.112 tag_name => chr ($self->{next_char} + 0x0020),
1033     line => $self->{line_prev},
1034     column => $self->{column_prev}};
1035 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1036 wakaba 1.1 !!!next-input-character;
1037     redo A;
1038 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
1039     $self->{next_char} <= 0x007A) { # a..z
1040 wakaba 1.77 !!!cp (20);
1041 wakaba 1.55 $self->{current_token} = {type => START_TAG_TOKEN,
1042 wakaba 1.112 tag_name => chr ($self->{next_char}),
1043     line => $self->{line_prev},
1044     column => $self->{column_prev}};
1045 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1046 wakaba 1.1 !!!next-input-character;
1047     redo A;
1048 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1049 wakaba 1.77 !!!cp (21);
1050 wakaba 1.115 !!!parse-error (type => 'empty start tag',
1051     line => $self->{line_prev},
1052     column => $self->{column_prev});
1053 wakaba 1.57 $self->{state} = DATA_STATE;
1054 wakaba 1.1 !!!next-input-character;
1055    
1056 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<>',
1057 wakaba 1.120 line => $self->{line_prev},
1058     column => $self->{column_prev},
1059 wakaba 1.118 });
1060 wakaba 1.1
1061     redo A;
1062 wakaba 1.76 } elsif ($self->{next_char} == 0x003F) { # ?
1063 wakaba 1.77 !!!cp (22);
1064 wakaba 1.115 !!!parse-error (type => 'pio',
1065     line => $self->{line_prev},
1066     column => $self->{column_prev});
1067 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1068 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1069 wakaba 1.120 line => $self->{line_prev},
1070     column => $self->{column_prev},
1071 wakaba 1.118 };
1072 wakaba 1.76 ## $self->{next_char} is intentionally left as is
1073 wakaba 1.1 redo A;
1074     } else {
1075 wakaba 1.77 !!!cp (23);
1076 wakaba 1.136 !!!parse-error (type => 'bare stago',
1077     line => $self->{line_prev},
1078     column => $self->{column_prev});
1079 wakaba 1.57 $self->{state} = DATA_STATE;
1080 wakaba 1.1 ## reconsume
1081    
1082 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
1083 wakaba 1.120 line => $self->{line_prev},
1084     column => $self->{column_prev},
1085 wakaba 1.118 });
1086 wakaba 1.1
1087     redo A;
1088     }
1089     } else {
1090 wakaba 1.40 die "$0: $self->{content_model} in tag open";
1091 wakaba 1.1 }
1092 wakaba 1.57 } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
1093 wakaba 1.113 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
1094 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
1095 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
1096 wakaba 1.112
1097 wakaba 1.30 ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
1098 wakaba 1.23 my @next_char;
1099     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
1100 wakaba 1.76 push @next_char, $self->{next_char};
1101 wakaba 1.23 my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
1102     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
1103 wakaba 1.76 if ($self->{next_char} == $c or $self->{next_char} == $C) {
1104 wakaba 1.77 !!!cp (24);
1105 wakaba 1.23 !!!next-input-character;
1106     next TAGNAME;
1107     } else {
1108 wakaba 1.77 !!!cp (25);
1109 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
1110 wakaba 1.23 !!!back-next-input-character (@next_char);
1111 wakaba 1.57 $self->{state} = DATA_STATE;
1112 wakaba 1.23
1113 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1114 wakaba 1.120 line => $l, column => $c,
1115 wakaba 1.118 });
1116 wakaba 1.23
1117     redo A;
1118     }
1119     }
1120 wakaba 1.76 push @next_char, $self->{next_char};
1121 wakaba 1.23
1122 wakaba 1.76 unless ($self->{next_char} == 0x0009 or # HT
1123     $self->{next_char} == 0x000A or # LF
1124     $self->{next_char} == 0x000B or # VT
1125     $self->{next_char} == 0x000C or # FF
1126     $self->{next_char} == 0x0020 or # SP
1127     $self->{next_char} == 0x003E or # >
1128     $self->{next_char} == 0x002F or # /
1129     $self->{next_char} == -1) {
1130 wakaba 1.77 !!!cp (26);
1131 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
1132 wakaba 1.1 !!!back-next-input-character (@next_char);
1133 wakaba 1.57 $self->{state} = DATA_STATE;
1134 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1135 wakaba 1.120 line => $l, column => $c,
1136 wakaba 1.118 });
1137 wakaba 1.1 redo A;
1138 wakaba 1.23 } else {
1139 wakaba 1.77 !!!cp (27);
1140 wakaba 1.76 $self->{next_char} = shift @next_char;
1141 wakaba 1.23 !!!back-next-input-character (@next_char);
1142     # and consume...
1143 wakaba 1.1 }
1144 wakaba 1.23 } else {
1145     ## No start tag token has ever been emitted
1146 wakaba 1.77 !!!cp (28);
1147 wakaba 1.23 # next-input-character is already done
1148 wakaba 1.57 $self->{state} = DATA_STATE;
1149 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1150 wakaba 1.120 line => $l, column => $c,
1151 wakaba 1.118 });
1152 wakaba 1.1 redo A;
1153     }
1154     }
1155    
1156 wakaba 1.76 if (0x0041 <= $self->{next_char} and
1157     $self->{next_char} <= 0x005A) { # A..Z
1158 wakaba 1.77 !!!cp (29);
1159 wakaba 1.112 $self->{current_token}
1160     = {type => END_TAG_TOKEN,
1161     tag_name => chr ($self->{next_char} + 0x0020),
1162     line => $l, column => $c};
1163 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1164 wakaba 1.1 !!!next-input-character;
1165     redo A;
1166 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
1167     $self->{next_char} <= 0x007A) { # a..z
1168 wakaba 1.77 !!!cp (30);
1169 wakaba 1.55 $self->{current_token} = {type => END_TAG_TOKEN,
1170 wakaba 1.112 tag_name => chr ($self->{next_char}),
1171     line => $l, column => $c};
1172 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1173 wakaba 1.1 !!!next-input-character;
1174     redo A;
1175 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1176 wakaba 1.77 !!!cp (31);
1177 wakaba 1.115 !!!parse-error (type => 'empty end tag',
1178     line => $self->{line_prev}, ## "<" in "</>"
1179     column => $self->{column_prev} - 1);
1180 wakaba 1.57 $self->{state} = DATA_STATE;
1181 wakaba 1.1 !!!next-input-character;
1182     redo A;
1183 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1184 wakaba 1.77 !!!cp (32);
1185 wakaba 1.3 !!!parse-error (type => 'bare etago');
1186 wakaba 1.57 $self->{state} = DATA_STATE;
1187 wakaba 1.1 # reconsume
1188    
1189 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1190 wakaba 1.120 line => $l, column => $c,
1191 wakaba 1.118 });
1192 wakaba 1.1
1193     redo A;
1194     } else {
1195 wakaba 1.77 !!!cp (33);
1196 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
1197 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1198 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1199 wakaba 1.120 line => $self->{line_prev}, # "<" of "</"
1200     column => $self->{column_prev} - 1,
1201 wakaba 1.118 };
1202 wakaba 1.76 ## $self->{next_char} is intentionally left as is
1203 wakaba 1.1 redo A;
1204     }
1205 wakaba 1.57 } elsif ($self->{state} == TAG_NAME_STATE) {
1206 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1207     $self->{next_char} == 0x000A or # LF
1208     $self->{next_char} == 0x000B or # VT
1209     $self->{next_char} == 0x000C or # FF
1210     $self->{next_char} == 0x0020) { # SP
1211 wakaba 1.77 !!!cp (34);
1212 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1213 wakaba 1.1 !!!next-input-character;
1214     redo A;
1215 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1216 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1217 wakaba 1.77 !!!cp (35);
1218 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1219 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1220 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1221 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
1222     # ## NOTE: This should never be reached.
1223     # !!! cp (36);
1224     # !!! parse-error (type => 'end tag attribute');
1225     #} else {
1226 wakaba 1.77 !!!cp (37);
1227 wakaba 1.78 #}
1228 wakaba 1.1 } else {
1229     die "$0: $self->{current_token}->{type}: Unknown token type";
1230     }
1231 wakaba 1.57 $self->{state} = DATA_STATE;
1232 wakaba 1.1 !!!next-input-character;
1233    
1234     !!!emit ($self->{current_token}); # start tag or end tag
1235    
1236     redo A;
1237 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1238     $self->{next_char} <= 0x005A) { # A..Z
1239 wakaba 1.77 !!!cp (38);
1240 wakaba 1.76 $self->{current_token}->{tag_name} .= chr ($self->{next_char} + 0x0020);
1241 wakaba 1.1 # start tag or end tag
1242     ## Stay in this state
1243     !!!next-input-character;
1244     redo A;
1245 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1246 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1247 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1248 wakaba 1.77 !!!cp (39);
1249 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1250 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1251 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1252 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
1253     # ## NOTE: This state should never be reached.
1254     # !!! cp (40);
1255     # !!! parse-error (type => 'end tag attribute');
1256     #} else {
1257 wakaba 1.77 !!!cp (41);
1258 wakaba 1.78 #}
1259 wakaba 1.1 } else {
1260     die "$0: $self->{current_token}->{type}: Unknown token type";
1261     }
1262 wakaba 1.57 $self->{state} = DATA_STATE;
1263 wakaba 1.1 # reconsume
1264    
1265     !!!emit ($self->{current_token}); # start tag or end tag
1266    
1267     redo A;
1268 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1269 wakaba 1.125 !!!cp (42);
1270     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1271 wakaba 1.1 !!!next-input-character;
1272     redo A;
1273     } else {
1274 wakaba 1.77 !!!cp (44);
1275 wakaba 1.76 $self->{current_token}->{tag_name} .= chr $self->{next_char};
1276 wakaba 1.1 # start tag or end tag
1277     ## Stay in the state
1278     !!!next-input-character;
1279     redo A;
1280     }
1281 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1282 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1283     $self->{next_char} == 0x000A or # LF
1284     $self->{next_char} == 0x000B or # VT
1285     $self->{next_char} == 0x000C or # FF
1286     $self->{next_char} == 0x0020) { # SP
1287 wakaba 1.77 !!!cp (45);
1288 wakaba 1.1 ## Stay in the state
1289     !!!next-input-character;
1290     redo A;
1291 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1292 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1293 wakaba 1.77 !!!cp (46);
1294 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1295 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1296 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1297 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1298 wakaba 1.77 !!!cp (47);
1299 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1300 wakaba 1.77 } else {
1301     !!!cp (48);
1302 wakaba 1.1 }
1303     } else {
1304     die "$0: $self->{current_token}->{type}: Unknown token type";
1305     }
1306 wakaba 1.57 $self->{state} = DATA_STATE;
1307 wakaba 1.1 !!!next-input-character;
1308    
1309     !!!emit ($self->{current_token}); # start tag or end tag
1310    
1311     redo A;
1312 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1313     $self->{next_char} <= 0x005A) { # A..Z
1314 wakaba 1.77 !!!cp (49);
1315 wakaba 1.119 $self->{current_attribute}
1316     = {name => chr ($self->{next_char} + 0x0020),
1317     value => '',
1318     line => $self->{line}, column => $self->{column}};
1319 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1320 wakaba 1.1 !!!next-input-character;
1321     redo A;
1322 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1323 wakaba 1.125 !!!cp (50);
1324     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1325 wakaba 1.1 !!!next-input-character;
1326     redo A;
1327 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1328 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1329 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1330 wakaba 1.77 !!!cp (52);
1331 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1332 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1333 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1334 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1335 wakaba 1.77 !!!cp (53);
1336 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1337 wakaba 1.77 } else {
1338     !!!cp (54);
1339 wakaba 1.1 }
1340     } else {
1341     die "$0: $self->{current_token}->{type}: Unknown token type";
1342     }
1343 wakaba 1.57 $self->{state} = DATA_STATE;
1344 wakaba 1.1 # reconsume
1345    
1346     !!!emit ($self->{current_token}); # start tag or end tag
1347    
1348     redo A;
1349     } else {
1350 wakaba 1.72 if ({
1351     0x0022 => 1, # "
1352     0x0027 => 1, # '
1353     0x003D => 1, # =
1354 wakaba 1.76 }->{$self->{next_char}}) {
1355 wakaba 1.77 !!!cp (55);
1356 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1357 wakaba 1.77 } else {
1358     !!!cp (56);
1359 wakaba 1.72 }
1360 wakaba 1.119 $self->{current_attribute}
1361     = {name => chr ($self->{next_char}),
1362     value => '',
1363     line => $self->{line}, column => $self->{column}};
1364 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1365 wakaba 1.1 !!!next-input-character;
1366     redo A;
1367     }
1368 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1369 wakaba 1.1 my $before_leave = sub {
1370     if (exists $self->{current_token}->{attributes} # start tag or end tag
1371     ->{$self->{current_attribute}->{name}}) { # MUST
1372 wakaba 1.77 !!!cp (57);
1373 wakaba 1.153 !!!parse-error (type => 'duplicate attribute', text => $self->{current_attribute}->{name}, line => $self->{current_attribute}->{line}, column => $self->{current_attribute}->{column});
1374 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
1375     } else {
1376 wakaba 1.77 !!!cp (58);
1377 wakaba 1.1 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
1378     = $self->{current_attribute};
1379     }
1380     }; # $before_leave
1381    
1382 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1383     $self->{next_char} == 0x000A or # LF
1384     $self->{next_char} == 0x000B or # VT
1385     $self->{next_char} == 0x000C or # FF
1386     $self->{next_char} == 0x0020) { # SP
1387 wakaba 1.77 !!!cp (59);
1388 wakaba 1.1 $before_leave->();
1389 wakaba 1.57 $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
1390 wakaba 1.1 !!!next-input-character;
1391     redo A;
1392 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1393 wakaba 1.77 !!!cp (60);
1394 wakaba 1.1 $before_leave->();
1395 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1396 wakaba 1.1 !!!next-input-character;
1397     redo A;
1398 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1399 wakaba 1.1 $before_leave->();
1400 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1401 wakaba 1.77 !!!cp (61);
1402 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1403 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1404 wakaba 1.77 !!!cp (62);
1405 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1406 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1407 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1408 wakaba 1.1 }
1409     } else {
1410     die "$0: $self->{current_token}->{type}: Unknown token type";
1411     }
1412 wakaba 1.57 $self->{state} = DATA_STATE;
1413 wakaba 1.1 !!!next-input-character;
1414    
1415     !!!emit ($self->{current_token}); # start tag or end tag
1416    
1417     redo A;
1418 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1419     $self->{next_char} <= 0x005A) { # A..Z
1420 wakaba 1.77 !!!cp (63);
1421 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char} + 0x0020);
1422 wakaba 1.1 ## Stay in the state
1423     !!!next-input-character;
1424     redo A;
1425 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1426 wakaba 1.125 !!!cp (64);
1427 wakaba 1.1 $before_leave->();
1428 wakaba 1.125 $self->{state} = SELF_CLOSING_START_TAG_STATE;
1429 wakaba 1.1 !!!next-input-character;
1430     redo A;
1431 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1432 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1433 wakaba 1.1 $before_leave->();
1434 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1435 wakaba 1.77 !!!cp (66);
1436 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1437 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1438 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1439 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1440 wakaba 1.77 !!!cp (67);
1441 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1442 wakaba 1.77 } else {
1443 wakaba 1.78 ## NOTE: This state should never be reached.
1444 wakaba 1.77 !!!cp (68);
1445 wakaba 1.1 }
1446     } else {
1447     die "$0: $self->{current_token}->{type}: Unknown token type";
1448     }
1449 wakaba 1.57 $self->{state} = DATA_STATE;
1450 wakaba 1.1 # reconsume
1451    
1452     !!!emit ($self->{current_token}); # start tag or end tag
1453    
1454     redo A;
1455     } else {
1456 wakaba 1.76 if ($self->{next_char} == 0x0022 or # "
1457     $self->{next_char} == 0x0027) { # '
1458 wakaba 1.77 !!!cp (69);
1459 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1460 wakaba 1.77 } else {
1461     !!!cp (70);
1462 wakaba 1.72 }
1463 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char});
1464 wakaba 1.1 ## Stay in the state
1465     !!!next-input-character;
1466     redo A;
1467     }
1468 wakaba 1.57 } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1469 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1470     $self->{next_char} == 0x000A or # LF
1471     $self->{next_char} == 0x000B or # VT
1472     $self->{next_char} == 0x000C or # FF
1473     $self->{next_char} == 0x0020) { # SP
1474 wakaba 1.77 !!!cp (71);
1475 wakaba 1.1 ## Stay in the state
1476     !!!next-input-character;
1477     redo A;
1478 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1479 wakaba 1.77 !!!cp (72);
1480 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1481 wakaba 1.1 !!!next-input-character;
1482     redo A;
1483 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1484 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1485 wakaba 1.77 !!!cp (73);
1486 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1487 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1488 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1489 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1490 wakaba 1.77 !!!cp (74);
1491 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1492 wakaba 1.77 } else {
1493 wakaba 1.78 ## NOTE: This state should never be reached.
1494 wakaba 1.77 !!!cp (75);
1495 wakaba 1.1 }
1496     } else {
1497     die "$0: $self->{current_token}->{type}: Unknown token type";
1498     }
1499 wakaba 1.57 $self->{state} = DATA_STATE;
1500 wakaba 1.1 !!!next-input-character;
1501    
1502     !!!emit ($self->{current_token}); # start tag or end tag
1503    
1504     redo A;
1505 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1506     $self->{next_char} <= 0x005A) { # A..Z
1507 wakaba 1.77 !!!cp (76);
1508 wakaba 1.119 $self->{current_attribute}
1509     = {name => chr ($self->{next_char} + 0x0020),
1510     value => '',
1511     line => $self->{line}, column => $self->{column}};
1512 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1513 wakaba 1.1 !!!next-input-character;
1514     redo A;
1515 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1516 wakaba 1.125 !!!cp (77);
1517     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1518 wakaba 1.1 !!!next-input-character;
1519     redo A;
1520 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1521 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1522 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1523 wakaba 1.77 !!!cp (79);
1524 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1525 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1526 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1527 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1528 wakaba 1.77 !!!cp (80);
1529 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1530 wakaba 1.77 } else {
1531 wakaba 1.78 ## NOTE: This state should never be reached.
1532 wakaba 1.77 !!!cp (81);
1533 wakaba 1.1 }
1534     } else {
1535     die "$0: $self->{current_token}->{type}: Unknown token type";
1536     }
1537 wakaba 1.57 $self->{state} = DATA_STATE;
1538 wakaba 1.1 # reconsume
1539    
1540     !!!emit ($self->{current_token}); # start tag or end tag
1541    
1542     redo A;
1543     } else {
1544 wakaba 1.156 if ($self->{next_char} == 0x0022 or # "
1545     $self->{next_char} == 0x0027) { # '
1546     !!!cp (78);
1547     !!!parse-error (type => 'bad attribute name');
1548     } else {
1549     !!!cp (82);
1550     }
1551 wakaba 1.119 $self->{current_attribute}
1552     = {name => chr ($self->{next_char}),
1553     value => '',
1554     line => $self->{line}, column => $self->{column}};
1555 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1556 wakaba 1.1 !!!next-input-character;
1557     redo A;
1558     }
1559 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1560 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1561     $self->{next_char} == 0x000A or # LF
1562     $self->{next_char} == 0x000B or # VT
1563     $self->{next_char} == 0x000C or # FF
1564     $self->{next_char} == 0x0020) { # SP
1565 wakaba 1.77 !!!cp (83);
1566 wakaba 1.1 ## Stay in the state
1567     !!!next-input-character;
1568     redo A;
1569 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
1570 wakaba 1.77 !!!cp (84);
1571 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
1572 wakaba 1.1 !!!next-input-character;
1573     redo A;
1574 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1575 wakaba 1.77 !!!cp (85);
1576 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1577 wakaba 1.1 ## reconsume
1578     redo A;
1579 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
1580 wakaba 1.77 !!!cp (86);
1581 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
1582 wakaba 1.1 !!!next-input-character;
1583     redo A;
1584 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1585 wakaba 1.156 !!!parse-error (type => 'empty unquoted attribute value');
1586 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1587 wakaba 1.77 !!!cp (87);
1588 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1589 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1590 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1591 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1592 wakaba 1.77 !!!cp (88);
1593 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1594 wakaba 1.77 } else {
1595 wakaba 1.78 ## NOTE: This state should never be reached.
1596 wakaba 1.77 !!!cp (89);
1597 wakaba 1.1 }
1598     } else {
1599     die "$0: $self->{current_token}->{type}: Unknown token type";
1600     }
1601 wakaba 1.57 $self->{state} = DATA_STATE;
1602 wakaba 1.1 !!!next-input-character;
1603    
1604     !!!emit ($self->{current_token}); # start tag or end tag
1605    
1606     redo A;
1607 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1608 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1609 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1610 wakaba 1.77 !!!cp (90);
1611 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1612 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1613 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1614 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1615 wakaba 1.77 !!!cp (91);
1616 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1617 wakaba 1.77 } else {
1618 wakaba 1.78 ## NOTE: This state should never be reached.
1619 wakaba 1.77 !!!cp (92);
1620 wakaba 1.1 }
1621     } else {
1622     die "$0: $self->{current_token}->{type}: Unknown token type";
1623     }
1624 wakaba 1.57 $self->{state} = DATA_STATE;
1625 wakaba 1.1 ## reconsume
1626    
1627     !!!emit ($self->{current_token}); # start tag or end tag
1628    
1629     redo A;
1630     } else {
1631 wakaba 1.76 if ($self->{next_char} == 0x003D) { # =
1632 wakaba 1.77 !!!cp (93);
1633 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1634 wakaba 1.77 } else {
1635     !!!cp (94);
1636 wakaba 1.72 }
1637 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1638 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1639 wakaba 1.1 !!!next-input-character;
1640     redo A;
1641     }
1642 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1643 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
1644 wakaba 1.77 !!!cp (95);
1645 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1646 wakaba 1.1 !!!next-input-character;
1647     redo A;
1648 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1649 wakaba 1.77 !!!cp (96);
1650 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1651     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1652 wakaba 1.1 !!!next-input-character;
1653     redo A;
1654 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1655 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1656 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1657 wakaba 1.77 !!!cp (97);
1658 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1659 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1660 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1661 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1662 wakaba 1.77 !!!cp (98);
1663 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1664 wakaba 1.77 } else {
1665 wakaba 1.78 ## NOTE: This state should never be reached.
1666 wakaba 1.77 !!!cp (99);
1667 wakaba 1.1 }
1668     } else {
1669     die "$0: $self->{current_token}->{type}: Unknown token type";
1670     }
1671 wakaba 1.57 $self->{state} = DATA_STATE;
1672 wakaba 1.1 ## reconsume
1673    
1674     !!!emit ($self->{current_token}); # start tag or end tag
1675    
1676     redo A;
1677     } else {
1678 wakaba 1.77 !!!cp (100);
1679 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1680 wakaba 1.1 ## Stay in the state
1681     !!!next-input-character;
1682     redo A;
1683     }
1684 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1685 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
1686 wakaba 1.77 !!!cp (101);
1687 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1688 wakaba 1.1 !!!next-input-character;
1689     redo A;
1690 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1691 wakaba 1.77 !!!cp (102);
1692 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1693     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1694 wakaba 1.1 !!!next-input-character;
1695     redo A;
1696 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1697 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1698 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1699 wakaba 1.77 !!!cp (103);
1700 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1701 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1702 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1703 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1704 wakaba 1.77 !!!cp (104);
1705 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1706 wakaba 1.77 } else {
1707 wakaba 1.78 ## NOTE: This state should never be reached.
1708 wakaba 1.77 !!!cp (105);
1709 wakaba 1.1 }
1710     } else {
1711     die "$0: $self->{current_token}->{type}: Unknown token type";
1712     }
1713 wakaba 1.57 $self->{state} = DATA_STATE;
1714 wakaba 1.1 ## reconsume
1715    
1716     !!!emit ($self->{current_token}); # start tag or end tag
1717    
1718     redo A;
1719     } else {
1720 wakaba 1.77 !!!cp (106);
1721 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1722 wakaba 1.1 ## Stay in the state
1723     !!!next-input-character;
1724     redo A;
1725     }
1726 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1727 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1728     $self->{next_char} == 0x000A or # LF
1729     $self->{next_char} == 0x000B or # HT
1730     $self->{next_char} == 0x000C or # FF
1731     $self->{next_char} == 0x0020) { # SP
1732 wakaba 1.77 !!!cp (107);
1733 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1734 wakaba 1.1 !!!next-input-character;
1735     redo A;
1736 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1737 wakaba 1.77 !!!cp (108);
1738 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1739     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1740 wakaba 1.1 !!!next-input-character;
1741     redo A;
1742 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1743 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1744 wakaba 1.77 !!!cp (109);
1745 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1746 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1747 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1748 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1749 wakaba 1.77 !!!cp (110);
1750 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1751 wakaba 1.77 } else {
1752 wakaba 1.78 ## NOTE: This state should never be reached.
1753 wakaba 1.77 !!!cp (111);
1754 wakaba 1.1 }
1755     } else {
1756     die "$0: $self->{current_token}->{type}: Unknown token type";
1757     }
1758 wakaba 1.57 $self->{state} = DATA_STATE;
1759 wakaba 1.1 !!!next-input-character;
1760    
1761     !!!emit ($self->{current_token}); # start tag or end tag
1762    
1763     redo A;
1764 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1765 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1766 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1767 wakaba 1.77 !!!cp (112);
1768 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1769 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1770 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1771 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1772 wakaba 1.77 !!!cp (113);
1773 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1774 wakaba 1.77 } else {
1775 wakaba 1.78 ## NOTE: This state should never be reached.
1776 wakaba 1.77 !!!cp (114);
1777 wakaba 1.1 }
1778     } else {
1779     die "$0: $self->{current_token}->{type}: Unknown token type";
1780     }
1781 wakaba 1.57 $self->{state} = DATA_STATE;
1782 wakaba 1.1 ## reconsume
1783    
1784     !!!emit ($self->{current_token}); # start tag or end tag
1785    
1786     redo A;
1787     } else {
1788 wakaba 1.72 if ({
1789     0x0022 => 1, # "
1790     0x0027 => 1, # '
1791     0x003D => 1, # =
1792 wakaba 1.76 }->{$self->{next_char}}) {
1793 wakaba 1.77 !!!cp (115);
1794 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1795 wakaba 1.77 } else {
1796     !!!cp (116);
1797 wakaba 1.72 }
1798 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1799 wakaba 1.1 ## Stay in the state
1800     !!!next-input-character;
1801     redo A;
1802     }
1803 wakaba 1.57 } elsif ($self->{state} == ENTITY_IN_ATTRIBUTE_VALUE_STATE) {
1804 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity
1805     (1,
1806     $self->{last_attribute_value_state}
1807     == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE ? 0x0022 : # "
1808     $self->{last_attribute_value_state}
1809     == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE ? 0x0027 : # '
1810     -1);
1811 wakaba 1.1
1812     unless (defined $token) {
1813 wakaba 1.77 !!!cp (117);
1814 wakaba 1.1 $self->{current_attribute}->{value} .= '&';
1815     } else {
1816 wakaba 1.77 !!!cp (118);
1817 wakaba 1.1 $self->{current_attribute}->{value} .= $token->{data};
1818 wakaba 1.66 $self->{current_attribute}->{has_reference} = $token->{has_reference};
1819 wakaba 1.1 ## ISSUE: spec says "append the returned character token to the current attribute's value"
1820     }
1821    
1822     $self->{state} = $self->{last_attribute_value_state};
1823     # next-input-character is already done
1824     redo A;
1825 wakaba 1.72 } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1826 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1827     $self->{next_char} == 0x000A or # LF
1828     $self->{next_char} == 0x000B or # VT
1829     $self->{next_char} == 0x000C or # FF
1830     $self->{next_char} == 0x0020) { # SP
1831 wakaba 1.77 !!!cp (118);
1832 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1833     !!!next-input-character;
1834     redo A;
1835 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1836 wakaba 1.72 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1837 wakaba 1.77 !!!cp (119);
1838 wakaba 1.72 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1839     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1840     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1841     if ($self->{current_token}->{attributes}) {
1842 wakaba 1.77 !!!cp (120);
1843 wakaba 1.72 !!!parse-error (type => 'end tag attribute');
1844 wakaba 1.77 } else {
1845 wakaba 1.78 ## NOTE: This state should never be reached.
1846 wakaba 1.77 !!!cp (121);
1847 wakaba 1.72 }
1848     } else {
1849     die "$0: $self->{current_token}->{type}: Unknown token type";
1850     }
1851     $self->{state} = DATA_STATE;
1852     !!!next-input-character;
1853    
1854     !!!emit ($self->{current_token}); # start tag or end tag
1855    
1856     redo A;
1857 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1858 wakaba 1.125 !!!cp (122);
1859     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1860 wakaba 1.72 !!!next-input-character;
1861 wakaba 1.125 redo A;
1862 wakaba 1.141 } elsif ($self->{next_char} == -1) {
1863     !!!parse-error (type => 'unclosed tag');
1864     if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1865     !!!cp (122.3);
1866     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1867     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1868     if ($self->{current_token}->{attributes}) {
1869     !!!cp (122.1);
1870     !!!parse-error (type => 'end tag attribute');
1871     } else {
1872     ## NOTE: This state should never be reached.
1873     !!!cp (122.2);
1874     }
1875     } else {
1876     die "$0: $self->{current_token}->{type}: Unknown token type";
1877     }
1878     $self->{state} = DATA_STATE;
1879     ## Reconsume.
1880     !!!emit ($self->{current_token}); # start tag or end tag
1881     redo A;
1882 wakaba 1.125 } else {
1883     !!!cp ('124.1');
1884     !!!parse-error (type => 'no space between attributes');
1885     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1886     ## reconsume
1887     redo A;
1888     }
1889     } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
1890     if ($self->{next_char} == 0x003E) { # >
1891     if ($self->{current_token}->{type} == END_TAG_TOKEN) {
1892     !!!cp ('124.2');
1893     !!!parse-error (type => 'nestc', token => $self->{current_token});
1894     ## TODO: Different type than slash in start tag
1895     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1896     if ($self->{current_token}->{attributes}) {
1897     !!!cp ('124.4');
1898     !!!parse-error (type => 'end tag attribute');
1899     } else {
1900     !!!cp ('124.5');
1901     }
1902     ## TODO: Test |<title></title/>|
1903 wakaba 1.72 } else {
1904 wakaba 1.125 !!!cp ('124.3');
1905     $self->{self_closing} = 1;
1906 wakaba 1.72 }
1907 wakaba 1.125
1908     $self->{state} = DATA_STATE;
1909     !!!next-input-character;
1910    
1911     !!!emit ($self->{current_token}); # start tag or end tag
1912    
1913 wakaba 1.72 redo A;
1914 wakaba 1.141 } elsif ($self->{next_char} == -1) {
1915     !!!parse-error (type => 'unclosed tag');
1916     if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1917     !!!cp (124.7);
1918     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1919     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1920     if ($self->{current_token}->{attributes}) {
1921     !!!cp (124.5);
1922     !!!parse-error (type => 'end tag attribute');
1923     } else {
1924     ## NOTE: This state should never be reached.
1925     !!!cp (124.6);
1926     }
1927     } else {
1928     die "$0: $self->{current_token}->{type}: Unknown token type";
1929     }
1930     $self->{state} = DATA_STATE;
1931     ## Reconsume.
1932     !!!emit ($self->{current_token}); # start tag or end tag
1933     redo A;
1934 wakaba 1.72 } else {
1935 wakaba 1.125 !!!cp ('124.4');
1936     !!!parse-error (type => 'nestc');
1937     ## TODO: This error type is wrong.
1938 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1939 wakaba 1.125 ## Reconsume.
1940 wakaba 1.72 redo A;
1941     }
1942 wakaba 1.57 } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
1943 wakaba 1.1 ## (only happen if PCDATA state)
1944    
1945 wakaba 1.112 ## NOTE: Set by the previous state
1946     #my $token = {type => COMMENT_TOKEN, data => ''};
1947 wakaba 1.1
1948     BC: {
1949 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
1950 wakaba 1.77 !!!cp (124);
1951 wakaba 1.57 $self->{state} = DATA_STATE;
1952 wakaba 1.1 !!!next-input-character;
1953    
1954 wakaba 1.112 !!!emit ($self->{current_token}); # comment
1955 wakaba 1.1
1956     redo A;
1957 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1958 wakaba 1.77 !!!cp (125);
1959 wakaba 1.57 $self->{state} = DATA_STATE;
1960 wakaba 1.1 ## reconsume
1961    
1962 wakaba 1.112 !!!emit ($self->{current_token}); # comment
1963 wakaba 1.1
1964     redo A;
1965     } else {
1966 wakaba 1.77 !!!cp (126);
1967 wakaba 1.112 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
1968 wakaba 1.1 !!!next-input-character;
1969     redo BC;
1970     }
1971     } # BC
1972 wakaba 1.77
1973     die "$0: _get_next_token: unexpected case [BC]";
1974 wakaba 1.57 } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
1975 wakaba 1.1 ## (only happen if PCDATA state)
1976    
1977 wakaba 1.120 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1);
1978 wakaba 1.112
1979 wakaba 1.1 my @next_char;
1980 wakaba 1.76 push @next_char, $self->{next_char};
1981 wakaba 1.1
1982 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1983 wakaba 1.1 !!!next-input-character;
1984 wakaba 1.76 push @next_char, $self->{next_char};
1985     if ($self->{next_char} == 0x002D) { # -
1986 wakaba 1.77 !!!cp (127);
1987 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1988 wakaba 1.120 line => $l, column => $c,
1989 wakaba 1.118 };
1990 wakaba 1.57 $self->{state} = COMMENT_START_STATE;
1991 wakaba 1.1 !!!next-input-character;
1992     redo A;
1993 wakaba 1.77 } else {
1994     !!!cp (128);
1995 wakaba 1.1 }
1996 wakaba 1.76 } elsif ($self->{next_char} == 0x0044 or # D
1997     $self->{next_char} == 0x0064) { # d
1998 wakaba 1.1 !!!next-input-character;
1999 wakaba 1.76 push @next_char, $self->{next_char};
2000     if ($self->{next_char} == 0x004F or # O
2001     $self->{next_char} == 0x006F) { # o
2002 wakaba 1.1 !!!next-input-character;
2003 wakaba 1.76 push @next_char, $self->{next_char};
2004     if ($self->{next_char} == 0x0043 or # C
2005     $self->{next_char} == 0x0063) { # c
2006 wakaba 1.1 !!!next-input-character;
2007 wakaba 1.76 push @next_char, $self->{next_char};
2008     if ($self->{next_char} == 0x0054 or # T
2009     $self->{next_char} == 0x0074) { # t
2010 wakaba 1.1 !!!next-input-character;
2011 wakaba 1.76 push @next_char, $self->{next_char};
2012     if ($self->{next_char} == 0x0059 or # Y
2013     $self->{next_char} == 0x0079) { # y
2014 wakaba 1.1 !!!next-input-character;
2015 wakaba 1.76 push @next_char, $self->{next_char};
2016     if ($self->{next_char} == 0x0050 or # P
2017     $self->{next_char} == 0x0070) { # p
2018 wakaba 1.1 !!!next-input-character;
2019 wakaba 1.76 push @next_char, $self->{next_char};
2020     if ($self->{next_char} == 0x0045 or # E
2021     $self->{next_char} == 0x0065) { # e
2022 wakaba 1.77 !!!cp (129);
2023     ## TODO: What a stupid code this is!
2024 wakaba 1.57 $self->{state} = DOCTYPE_STATE;
2025 wakaba 1.112 $self->{current_token} = {type => DOCTYPE_TOKEN,
2026     quirks => 1,
2027 wakaba 1.120 line => $l, column => $c,
2028 wakaba 1.118 };
2029 wakaba 1.1 !!!next-input-character;
2030     redo A;
2031 wakaba 1.77 } else {
2032     !!!cp (130);
2033 wakaba 1.1 }
2034 wakaba 1.77 } else {
2035     !!!cp (131);
2036 wakaba 1.1 }
2037 wakaba 1.77 } else {
2038     !!!cp (132);
2039 wakaba 1.1 }
2040 wakaba 1.77 } else {
2041     !!!cp (133);
2042 wakaba 1.1 }
2043 wakaba 1.77 } else {
2044     !!!cp (134);
2045 wakaba 1.1 }
2046 wakaba 1.77 } else {
2047     !!!cp (135);
2048 wakaba 1.1 }
2049 wakaba 1.127 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
2050     $self->{open_elements}->[-1]->[1] & FOREIGN_EL and
2051     $self->{next_char} == 0x005B) { # [
2052     !!!next-input-character;
2053     push @next_char, $self->{next_char};
2054     if ($self->{next_char} == 0x0043) { # C
2055     !!!next-input-character;
2056     push @next_char, $self->{next_char};
2057     if ($self->{next_char} == 0x0044) { # D
2058     !!!next-input-character;
2059     push @next_char, $self->{next_char};
2060     if ($self->{next_char} == 0x0041) { # A
2061     !!!next-input-character;
2062     push @next_char, $self->{next_char};
2063     if ($self->{next_char} == 0x0054) { # T
2064     !!!next-input-character;
2065     push @next_char, $self->{next_char};
2066     if ($self->{next_char} == 0x0041) { # A
2067     !!!next-input-character;
2068     push @next_char, $self->{next_char};
2069     if ($self->{next_char} == 0x005B) { # [
2070     !!!cp (135.1);
2071     $self->{state} = CDATA_BLOCK_STATE;
2072     !!!next-input-character;
2073     redo A;
2074     } else {
2075     !!!cp (135.2);
2076     }
2077     } else {
2078     !!!cp (135.3);
2079     }
2080     } else {
2081     !!!cp (135.4);
2082     }
2083     } else {
2084     !!!cp (135.5);
2085     }
2086     } else {
2087     !!!cp (135.6);
2088     }
2089     } else {
2090     !!!cp (135.7);
2091     }
2092 wakaba 1.77 } else {
2093     !!!cp (136);
2094 wakaba 1.1 }
2095    
2096 wakaba 1.30 !!!parse-error (type => 'bogus comment');
2097 wakaba 1.76 $self->{next_char} = shift @next_char;
2098 wakaba 1.1 !!!back-next-input-character (@next_char);
2099 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
2100 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
2101 wakaba 1.120 line => $l, column => $c,
2102 wakaba 1.118 };
2103 wakaba 1.1 redo A;
2104    
2105     ## ISSUE: typos in spec: chacacters, is is a parse error
2106     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
2107 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_STATE) {
2108 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2109 wakaba 1.77 !!!cp (137);
2110 wakaba 1.57 $self->{state} = COMMENT_START_DASH_STATE;
2111 wakaba 1.23 !!!next-input-character;
2112     redo A;
2113 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2114 wakaba 1.77 !!!cp (138);
2115 wakaba 1.23 !!!parse-error (type => 'bogus comment');
2116 wakaba 1.57 $self->{state} = DATA_STATE;
2117 wakaba 1.23 !!!next-input-character;
2118    
2119     !!!emit ($self->{current_token}); # comment
2120    
2121     redo A;
2122 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2123 wakaba 1.77 !!!cp (139);
2124 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
2125 wakaba 1.57 $self->{state} = DATA_STATE;
2126 wakaba 1.23 ## reconsume
2127    
2128     !!!emit ($self->{current_token}); # comment
2129    
2130     redo A;
2131     } else {
2132 wakaba 1.77 !!!cp (140);
2133 wakaba 1.23 $self->{current_token}->{data} # comment
2134 wakaba 1.76 .= chr ($self->{next_char});
2135 wakaba 1.57 $self->{state} = COMMENT_STATE;
2136 wakaba 1.23 !!!next-input-character;
2137     redo A;
2138     }
2139 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
2140 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2141 wakaba 1.77 !!!cp (141);
2142 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
2143 wakaba 1.23 !!!next-input-character;
2144     redo A;
2145 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2146 wakaba 1.77 !!!cp (142);
2147 wakaba 1.23 !!!parse-error (type => 'bogus comment');
2148 wakaba 1.57 $self->{state} = DATA_STATE;
2149 wakaba 1.23 !!!next-input-character;
2150    
2151     !!!emit ($self->{current_token}); # comment
2152    
2153     redo A;
2154 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2155 wakaba 1.77 !!!cp (143);
2156 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
2157 wakaba 1.57 $self->{state} = DATA_STATE;
2158 wakaba 1.23 ## reconsume
2159    
2160     !!!emit ($self->{current_token}); # comment
2161    
2162     redo A;
2163     } else {
2164 wakaba 1.77 !!!cp (144);
2165 wakaba 1.23 $self->{current_token}->{data} # comment
2166 wakaba 1.76 .= '-' . chr ($self->{next_char});
2167 wakaba 1.57 $self->{state} = COMMENT_STATE;
2168 wakaba 1.23 !!!next-input-character;
2169     redo A;
2170     }
2171 wakaba 1.57 } elsif ($self->{state} == COMMENT_STATE) {
2172 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2173 wakaba 1.77 !!!cp (145);
2174 wakaba 1.57 $self->{state} = COMMENT_END_DASH_STATE;
2175 wakaba 1.1 !!!next-input-character;
2176     redo A;
2177 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2178 wakaba 1.77 !!!cp (146);
2179 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2180 wakaba 1.57 $self->{state} = DATA_STATE;
2181 wakaba 1.1 ## reconsume
2182    
2183     !!!emit ($self->{current_token}); # comment
2184    
2185     redo A;
2186     } else {
2187 wakaba 1.77 !!!cp (147);
2188 wakaba 1.76 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
2189 wakaba 1.1 ## Stay in the state
2190     !!!next-input-character;
2191     redo A;
2192     }
2193 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2194 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2195 wakaba 1.77 !!!cp (148);
2196 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
2197 wakaba 1.1 !!!next-input-character;
2198     redo A;
2199 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2200 wakaba 1.77 !!!cp (149);
2201 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2202 wakaba 1.57 $self->{state} = DATA_STATE;
2203 wakaba 1.1 ## reconsume
2204    
2205     !!!emit ($self->{current_token}); # comment
2206    
2207     redo A;
2208     } else {
2209 wakaba 1.77 !!!cp (150);
2210 wakaba 1.76 $self->{current_token}->{data} .= '-' . chr ($self->{next_char}); # comment
2211 wakaba 1.57 $self->{state} = COMMENT_STATE;
2212 wakaba 1.1 !!!next-input-character;
2213     redo A;
2214     }
2215 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_STATE) {
2216 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2217 wakaba 1.77 !!!cp (151);
2218 wakaba 1.57 $self->{state} = DATA_STATE;
2219 wakaba 1.1 !!!next-input-character;
2220    
2221     !!!emit ($self->{current_token}); # comment
2222    
2223     redo A;
2224 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
2225 wakaba 1.77 !!!cp (152);
2226 wakaba 1.114 !!!parse-error (type => 'dash in comment',
2227     line => $self->{line_prev},
2228     column => $self->{column_prev});
2229 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
2230     ## Stay in the state
2231     !!!next-input-character;
2232     redo A;
2233 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2234 wakaba 1.77 !!!cp (153);
2235 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2236 wakaba 1.57 $self->{state} = DATA_STATE;
2237 wakaba 1.1 ## reconsume
2238    
2239     !!!emit ($self->{current_token}); # comment
2240    
2241     redo A;
2242     } else {
2243 wakaba 1.77 !!!cp (154);
2244 wakaba 1.114 !!!parse-error (type => 'dash in comment',
2245     line => $self->{line_prev},
2246     column => $self->{column_prev});
2247 wakaba 1.76 $self->{current_token}->{data} .= '--' . chr ($self->{next_char}); # comment
2248 wakaba 1.57 $self->{state} = COMMENT_STATE;
2249 wakaba 1.1 !!!next-input-character;
2250     redo A;
2251     }
2252 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_STATE) {
2253 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2254     $self->{next_char} == 0x000A or # LF
2255     $self->{next_char} == 0x000B or # VT
2256     $self->{next_char} == 0x000C or # FF
2257     $self->{next_char} == 0x0020) { # SP
2258 wakaba 1.77 !!!cp (155);
2259 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2260 wakaba 1.1 !!!next-input-character;
2261     redo A;
2262     } else {
2263 wakaba 1.77 !!!cp (156);
2264 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
2265 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2266 wakaba 1.1 ## reconsume
2267     redo A;
2268     }
2269 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
2270 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2271     $self->{next_char} == 0x000A or # LF
2272     $self->{next_char} == 0x000B or # VT
2273     $self->{next_char} == 0x000C or # FF
2274     $self->{next_char} == 0x0020) { # SP
2275 wakaba 1.77 !!!cp (157);
2276 wakaba 1.1 ## Stay in the state
2277     !!!next-input-character;
2278     redo A;
2279 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2280 wakaba 1.77 !!!cp (158);
2281 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
2282 wakaba 1.57 $self->{state} = DATA_STATE;
2283 wakaba 1.1 !!!next-input-character;
2284    
2285 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
2286 wakaba 1.1
2287     redo A;
2288 wakaba 1.77 } elsif ($self->{next_char} == -1) {
2289     !!!cp (159);
2290 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
2291 wakaba 1.57 $self->{state} = DATA_STATE;
2292 wakaba 1.1 ## reconsume
2293    
2294 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
2295 wakaba 1.1
2296     redo A;
2297     } else {
2298 wakaba 1.77 !!!cp (160);
2299 wakaba 1.112 $self->{current_token}->{name} = chr $self->{next_char};
2300     delete $self->{current_token}->{quirks};
2301 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
2302 wakaba 1.57 $self->{state} = DOCTYPE_NAME_STATE;
2303 wakaba 1.1 !!!next-input-character;
2304     redo A;
2305     }
2306 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
2307 wakaba 1.18 ## ISSUE: Redundant "First," in the spec.
2308 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2309     $self->{next_char} == 0x000A or # LF
2310     $self->{next_char} == 0x000B or # VT
2311     $self->{next_char} == 0x000C or # FF
2312     $self->{next_char} == 0x0020) { # SP
2313 wakaba 1.77 !!!cp (161);
2314 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_NAME_STATE;
2315 wakaba 1.1 !!!next-input-character;
2316     redo A;
2317 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2318 wakaba 1.77 !!!cp (162);
2319 wakaba 1.57 $self->{state} = DATA_STATE;
2320 wakaba 1.1 !!!next-input-character;
2321    
2322     !!!emit ($self->{current_token}); # DOCTYPE
2323    
2324     redo A;
2325 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2326 wakaba 1.77 !!!cp (163);
2327 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2328 wakaba 1.57 $self->{state} = DATA_STATE;
2329 wakaba 1.1 ## reconsume
2330    
2331 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2332 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2333 wakaba 1.1
2334     redo A;
2335     } else {
2336 wakaba 1.77 !!!cp (164);
2337 wakaba 1.1 $self->{current_token}->{name}
2338 wakaba 1.76 .= chr ($self->{next_char}); # DOCTYPE
2339 wakaba 1.1 ## Stay in the state
2340     !!!next-input-character;
2341     redo A;
2342     }
2343 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
2344 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2345     $self->{next_char} == 0x000A or # LF
2346     $self->{next_char} == 0x000B or # VT
2347     $self->{next_char} == 0x000C or # FF
2348     $self->{next_char} == 0x0020) { # SP
2349 wakaba 1.77 !!!cp (165);
2350 wakaba 1.1 ## Stay in the state
2351     !!!next-input-character;
2352     redo A;
2353 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2354 wakaba 1.77 !!!cp (166);
2355 wakaba 1.57 $self->{state} = DATA_STATE;
2356 wakaba 1.1 !!!next-input-character;
2357    
2358     !!!emit ($self->{current_token}); # DOCTYPE
2359    
2360     redo A;
2361 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2362 wakaba 1.77 !!!cp (167);
2363 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2364 wakaba 1.57 $self->{state} = DATA_STATE;
2365 wakaba 1.1 ## reconsume
2366    
2367 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2368 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2369    
2370     redo A;
2371 wakaba 1.76 } elsif ($self->{next_char} == 0x0050 or # P
2372     $self->{next_char} == 0x0070) { # p
2373 wakaba 1.18 !!!next-input-character;
2374 wakaba 1.76 if ($self->{next_char} == 0x0055 or # U
2375     $self->{next_char} == 0x0075) { # u
2376 wakaba 1.18 !!!next-input-character;
2377 wakaba 1.76 if ($self->{next_char} == 0x0042 or # B
2378     $self->{next_char} == 0x0062) { # b
2379 wakaba 1.18 !!!next-input-character;
2380 wakaba 1.76 if ($self->{next_char} == 0x004C or # L
2381     $self->{next_char} == 0x006C) { # l
2382 wakaba 1.18 !!!next-input-character;
2383 wakaba 1.76 if ($self->{next_char} == 0x0049 or # I
2384     $self->{next_char} == 0x0069) { # i
2385 wakaba 1.18 !!!next-input-character;
2386 wakaba 1.76 if ($self->{next_char} == 0x0043 or # C
2387     $self->{next_char} == 0x0063) { # c
2388 wakaba 1.77 !!!cp (168);
2389 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2390 wakaba 1.18 !!!next-input-character;
2391     redo A;
2392 wakaba 1.77 } else {
2393     !!!cp (169);
2394 wakaba 1.18 }
2395 wakaba 1.77 } else {
2396     !!!cp (170);
2397 wakaba 1.18 }
2398 wakaba 1.77 } else {
2399     !!!cp (171);
2400 wakaba 1.18 }
2401 wakaba 1.77 } else {
2402     !!!cp (172);
2403 wakaba 1.18 }
2404 wakaba 1.77 } else {
2405     !!!cp (173);
2406 wakaba 1.18 }
2407    
2408     #
2409 wakaba 1.76 } elsif ($self->{next_char} == 0x0053 or # S
2410     $self->{next_char} == 0x0073) { # s
2411 wakaba 1.18 !!!next-input-character;
2412 wakaba 1.76 if ($self->{next_char} == 0x0059 or # Y
2413     $self->{next_char} == 0x0079) { # y
2414 wakaba 1.18 !!!next-input-character;
2415 wakaba 1.76 if ($self->{next_char} == 0x0053 or # S
2416     $self->{next_char} == 0x0073) { # s
2417 wakaba 1.18 !!!next-input-character;
2418 wakaba 1.76 if ($self->{next_char} == 0x0054 or # T
2419     $self->{next_char} == 0x0074) { # t
2420 wakaba 1.18 !!!next-input-character;
2421 wakaba 1.76 if ($self->{next_char} == 0x0045 or # E
2422     $self->{next_char} == 0x0065) { # e
2423 wakaba 1.18 !!!next-input-character;
2424 wakaba 1.76 if ($self->{next_char} == 0x004D or # M
2425     $self->{next_char} == 0x006D) { # m
2426 wakaba 1.77 !!!cp (174);
2427 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2428 wakaba 1.18 !!!next-input-character;
2429     redo A;
2430 wakaba 1.77 } else {
2431     !!!cp (175);
2432 wakaba 1.18 }
2433 wakaba 1.77 } else {
2434     !!!cp (176);
2435 wakaba 1.18 }
2436 wakaba 1.77 } else {
2437     !!!cp (177);
2438 wakaba 1.18 }
2439 wakaba 1.77 } else {
2440     !!!cp (178);
2441 wakaba 1.18 }
2442 wakaba 1.77 } else {
2443     !!!cp (179);
2444 wakaba 1.18 }
2445    
2446     #
2447     } else {
2448 wakaba 1.77 !!!cp (180);
2449 wakaba 1.18 !!!next-input-character;
2450     #
2451     }
2452    
2453     !!!parse-error (type => 'string after DOCTYPE name');
2454 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2455 wakaba 1.73
2456 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2457 wakaba 1.18 # next-input-character is already done
2458     redo A;
2459 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2460 wakaba 1.18 if ({
2461     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2462     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2463 wakaba 1.76 }->{$self->{next_char}}) {
2464 wakaba 1.77 !!!cp (181);
2465 wakaba 1.18 ## Stay in the state
2466     !!!next-input-character;
2467     redo A;
2468 wakaba 1.76 } elsif ($self->{next_char} eq 0x0022) { # "
2469 wakaba 1.77 !!!cp (182);
2470 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2471 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
2472 wakaba 1.18 !!!next-input-character;
2473     redo A;
2474 wakaba 1.76 } elsif ($self->{next_char} eq 0x0027) { # '
2475 wakaba 1.77 !!!cp (183);
2476 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2477 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
2478 wakaba 1.18 !!!next-input-character;
2479     redo A;
2480 wakaba 1.76 } elsif ($self->{next_char} eq 0x003E) { # >
2481 wakaba 1.77 !!!cp (184);
2482 wakaba 1.18 !!!parse-error (type => 'no PUBLIC literal');
2483    
2484 wakaba 1.57 $self->{state} = DATA_STATE;
2485 wakaba 1.18 !!!next-input-character;
2486    
2487 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2488 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2489    
2490     redo A;
2491 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2492 wakaba 1.77 !!!cp (185);
2493 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2494    
2495 wakaba 1.57 $self->{state} = DATA_STATE;
2496 wakaba 1.18 ## reconsume
2497    
2498 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2499 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2500    
2501     redo A;
2502     } else {
2503 wakaba 1.77 !!!cp (186);
2504 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC');
2505 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2506 wakaba 1.73
2507 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2508 wakaba 1.18 !!!next-input-character;
2509     redo A;
2510     }
2511 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2512 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2513 wakaba 1.77 !!!cp (187);
2514 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2515 wakaba 1.18 !!!next-input-character;
2516     redo A;
2517 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2518 wakaba 1.77 !!!cp (188);
2519 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2520    
2521     $self->{state} = DATA_STATE;
2522     !!!next-input-character;
2523    
2524 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2525 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2526    
2527     redo A;
2528 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2529 wakaba 1.77 !!!cp (189);
2530 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2531    
2532 wakaba 1.57 $self->{state} = DATA_STATE;
2533 wakaba 1.18 ## reconsume
2534    
2535 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2536 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2537    
2538     redo A;
2539     } else {
2540 wakaba 1.77 !!!cp (190);
2541 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2542 wakaba 1.76 .= chr $self->{next_char};
2543 wakaba 1.18 ## Stay in the state
2544     !!!next-input-character;
2545     redo A;
2546     }
2547 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
2548 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2549 wakaba 1.77 !!!cp (191);
2550 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2551 wakaba 1.18 !!!next-input-character;
2552     redo A;
2553 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2554 wakaba 1.77 !!!cp (192);
2555 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2556    
2557     $self->{state} = DATA_STATE;
2558     !!!next-input-character;
2559    
2560 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2561 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2562    
2563     redo A;
2564 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2565 wakaba 1.77 !!!cp (193);
2566 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2567    
2568 wakaba 1.57 $self->{state} = DATA_STATE;
2569 wakaba 1.18 ## reconsume
2570    
2571 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2572 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2573    
2574     redo A;
2575     } else {
2576 wakaba 1.77 !!!cp (194);
2577 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2578 wakaba 1.76 .= chr $self->{next_char};
2579 wakaba 1.18 ## Stay in the state
2580     !!!next-input-character;
2581     redo A;
2582     }
2583 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2584 wakaba 1.18 if ({
2585     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2586     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2587 wakaba 1.76 }->{$self->{next_char}}) {
2588 wakaba 1.77 !!!cp (195);
2589 wakaba 1.18 ## Stay in the state
2590     !!!next-input-character;
2591     redo A;
2592 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2593 wakaba 1.77 !!!cp (196);
2594 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2595 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2596 wakaba 1.18 !!!next-input-character;
2597     redo A;
2598 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2599 wakaba 1.77 !!!cp (197);
2600 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2601 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2602 wakaba 1.18 !!!next-input-character;
2603     redo A;
2604 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2605 wakaba 1.77 !!!cp (198);
2606 wakaba 1.57 $self->{state} = DATA_STATE;
2607 wakaba 1.18 !!!next-input-character;
2608    
2609     !!!emit ($self->{current_token}); # DOCTYPE
2610    
2611     redo A;
2612 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2613 wakaba 1.77 !!!cp (199);
2614 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2615    
2616 wakaba 1.57 $self->{state} = DATA_STATE;
2617 wakaba 1.26 ## reconsume
2618 wakaba 1.18
2619 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2620 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2621    
2622     redo A;
2623     } else {
2624 wakaba 1.77 !!!cp (200);
2625 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC literal');
2626 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2627 wakaba 1.73
2628 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2629 wakaba 1.18 !!!next-input-character;
2630     redo A;
2631     }
2632 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2633 wakaba 1.18 if ({
2634     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2635     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2636 wakaba 1.76 }->{$self->{next_char}}) {
2637 wakaba 1.77 !!!cp (201);
2638 wakaba 1.18 ## Stay in the state
2639     !!!next-input-character;
2640     redo A;
2641 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2642 wakaba 1.77 !!!cp (202);
2643 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2644 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2645 wakaba 1.18 !!!next-input-character;
2646     redo A;
2647 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2648 wakaba 1.77 !!!cp (203);
2649 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2650 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2651 wakaba 1.18 !!!next-input-character;
2652     redo A;
2653 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2654 wakaba 1.77 !!!cp (204);
2655 wakaba 1.18 !!!parse-error (type => 'no SYSTEM literal');
2656 wakaba 1.57 $self->{state} = DATA_STATE;
2657 wakaba 1.18 !!!next-input-character;
2658    
2659 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2660 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2661    
2662     redo A;
2663 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2664 wakaba 1.77 !!!cp (205);
2665 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2666    
2667 wakaba 1.57 $self->{state} = DATA_STATE;
2668 wakaba 1.26 ## reconsume
2669 wakaba 1.18
2670 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2671 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2672    
2673     redo A;
2674     } else {
2675 wakaba 1.77 !!!cp (206);
2676 wakaba 1.30 !!!parse-error (type => 'string after SYSTEM');
2677 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2678 wakaba 1.73
2679 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2680 wakaba 1.18 !!!next-input-character;
2681     redo A;
2682     }
2683 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2684 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2685 wakaba 1.77 !!!cp (207);
2686 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2687 wakaba 1.18 !!!next-input-character;
2688     redo A;
2689 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2690 wakaba 1.77 !!!cp (208);
2691 wakaba 1.153 !!!parse-error (type => 'unclosed SYSTEM literal');
2692 wakaba 1.69
2693     $self->{state} = DATA_STATE;
2694     !!!next-input-character;
2695    
2696 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2697 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2698    
2699     redo A;
2700 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2701 wakaba 1.77 !!!cp (209);
2702 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2703    
2704 wakaba 1.57 $self->{state} = DATA_STATE;
2705 wakaba 1.18 ## reconsume
2706    
2707 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2708 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2709    
2710     redo A;
2711     } else {
2712 wakaba 1.77 !!!cp (210);
2713 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2714 wakaba 1.76 .= chr $self->{next_char};
2715 wakaba 1.18 ## Stay in the state
2716     !!!next-input-character;
2717     redo A;
2718     }
2719 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
2720 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2721 wakaba 1.77 !!!cp (211);
2722 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2723 wakaba 1.18 !!!next-input-character;
2724     redo A;
2725 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2726 wakaba 1.77 !!!cp (212);
2727 wakaba 1.153 !!!parse-error (type => 'unclosed SYSTEM literal');
2728 wakaba 1.69
2729     $self->{state} = DATA_STATE;
2730     !!!next-input-character;
2731    
2732 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2733 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2734    
2735     redo A;
2736 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2737 wakaba 1.77 !!!cp (213);
2738 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2739    
2740 wakaba 1.57 $self->{state} = DATA_STATE;
2741 wakaba 1.18 ## reconsume
2742    
2743 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2744 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
2745    
2746     redo A;
2747     } else {
2748 wakaba 1.77 !!!cp (214);
2749 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2750 wakaba 1.76 .= chr $self->{next_char};
2751 wakaba 1.18 ## Stay in the state
2752     !!!next-input-character;
2753     redo A;
2754     }
2755 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2756 wakaba 1.18 if ({
2757     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2758     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2759 wakaba 1.76 }->{$self->{next_char}}) {
2760 wakaba 1.77 !!!cp (215);
2761 wakaba 1.18 ## Stay in the state
2762     !!!next-input-character;
2763     redo A;
2764 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2765 wakaba 1.77 !!!cp (216);
2766 wakaba 1.57 $self->{state} = DATA_STATE;
2767 wakaba 1.18 !!!next-input-character;
2768    
2769     !!!emit ($self->{current_token}); # DOCTYPE
2770    
2771     redo A;
2772 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2773 wakaba 1.77 !!!cp (217);
2774 wakaba 1.150 !!!parse-error (type => 'unclosed DOCTYPE');
2775 wakaba 1.57 $self->{state} = DATA_STATE;
2776 wakaba 1.26 ## reconsume
2777 wakaba 1.18
2778 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2779 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2780    
2781     redo A;
2782     } else {
2783 wakaba 1.77 !!!cp (218);
2784 wakaba 1.18 !!!parse-error (type => 'string after SYSTEM literal');
2785 wakaba 1.75 #$self->{current_token}->{quirks} = 1;
2786 wakaba 1.73
2787 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2788 wakaba 1.1 !!!next-input-character;
2789     redo A;
2790     }
2791 wakaba 1.57 } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
2792 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2793 wakaba 1.77 !!!cp (219);
2794 wakaba 1.57 $self->{state} = DATA_STATE;
2795 wakaba 1.1 !!!next-input-character;
2796    
2797     !!!emit ($self->{current_token}); # DOCTYPE
2798    
2799     redo A;
2800 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2801 wakaba 1.77 !!!cp (220);
2802 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2803 wakaba 1.57 $self->{state} = DATA_STATE;
2804 wakaba 1.1 ## reconsume
2805    
2806     !!!emit ($self->{current_token}); # DOCTYPE
2807    
2808     redo A;
2809     } else {
2810 wakaba 1.77 !!!cp (221);
2811 wakaba 1.1 ## Stay in the state
2812     !!!next-input-character;
2813     redo A;
2814     }
2815 wakaba 1.127 } elsif ($self->{state} == CDATA_BLOCK_STATE) {
2816     my $s = '';
2817    
2818     my ($l, $c) = ($self->{line}, $self->{column});
2819    
2820     CS: while ($self->{next_char} != -1) {
2821     if ($self->{next_char} == 0x005D) { # ]
2822     !!!next-input-character;
2823     if ($self->{next_char} == 0x005D) { # ]
2824     !!!next-input-character;
2825     MDC: {
2826     if ($self->{next_char} == 0x003E) { # >
2827     !!!cp (221.1);
2828     !!!next-input-character;
2829     last CS;
2830     } elsif ($self->{next_char} == 0x005D) { # ]
2831     !!!cp (221.2);
2832     $s .= ']';
2833     !!!next-input-character;
2834     redo MDC;
2835     } else {
2836     !!!cp (221.3);
2837     $s .= ']]';
2838     #
2839     }
2840     } # MDC
2841     } else {
2842     !!!cp (221.4);
2843     $s .= ']';
2844     #
2845     }
2846     } else {
2847     !!!cp (221.5);
2848     #
2849     }
2850     $s .= chr $self->{next_char};
2851     !!!next-input-character;
2852     } # CS
2853    
2854     $self->{state} = DATA_STATE;
2855     ## next-input-character done or EOF, which is reconsumed.
2856    
2857     if (length $s) {
2858     !!!cp (221.6);
2859     !!!emit ({type => CHARACTER_TOKEN, data => $s,
2860     line => $l, column => $c});
2861     } else {
2862     !!!cp (221.7);
2863     }
2864    
2865     redo A;
2866    
2867     ## ISSUE: "text tokens" in spec.
2868     ## TODO: Streaming support
2869 wakaba 1.1 } else {
2870     die "$0: $self->{state}: Unknown state";
2871     }
2872     } # A
2873    
2874     die "$0: _get_next_token: unexpected case";
2875     } # _get_next_token
2876    
2877 wakaba 1.72 sub _tokenize_attempt_to_consume_an_entity ($$$) {
2878     my ($self, $in_attr, $additional) = @_;
2879 wakaba 1.20
2880 wakaba 1.112 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
2881    
2882 wakaba 1.20 if ({
2883     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2884     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
2885 wakaba 1.72 $additional => 1,
2886 wakaba 1.76 }->{$self->{next_char}}) {
2887 wakaba 1.78 !!!cp (1001);
2888 wakaba 1.20 ## Don't consume
2889     ## No error
2890     return undef;
2891 wakaba 1.76 } elsif ($self->{next_char} == 0x0023) { # #
2892 wakaba 1.1 !!!next-input-character;
2893 wakaba 1.76 if ($self->{next_char} == 0x0078 or # x
2894     $self->{next_char} == 0x0058) { # X
2895 wakaba 1.26 my $code;
2896 wakaba 1.1 X: {
2897 wakaba 1.76 my $x_char = $self->{next_char};
2898 wakaba 1.1 !!!next-input-character;
2899 wakaba 1.76 if (0x0030 <= $self->{next_char} and
2900     $self->{next_char} <= 0x0039) { # 0..9
2901 wakaba 1.78 !!!cp (1002);
2902 wakaba 1.26 $code ||= 0;
2903     $code *= 0x10;
2904 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2905 wakaba 1.1 redo X;
2906 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
2907     $self->{next_char} <= 0x0066) { # a..f
2908 wakaba 1.78 !!!cp (1003);
2909 wakaba 1.26 $code ||= 0;
2910     $code *= 0x10;
2911 wakaba 1.76 $code += $self->{next_char} - 0x0060 + 9;
2912 wakaba 1.1 redo X;
2913 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
2914     $self->{next_char} <= 0x0046) { # A..F
2915 wakaba 1.78 !!!cp (1004);
2916 wakaba 1.26 $code ||= 0;
2917     $code *= 0x10;
2918 wakaba 1.76 $code += $self->{next_char} - 0x0040 + 9;
2919 wakaba 1.1 redo X;
2920 wakaba 1.26 } elsif (not defined $code) { # no hexadecimal digit
2921 wakaba 1.78 !!!cp (1005);
2922 wakaba 1.112 !!!parse-error (type => 'bare hcro', line => $l, column => $c);
2923 wakaba 1.76 !!!back-next-input-character ($x_char, $self->{next_char});
2924     $self->{next_char} = 0x0023; # #
2925 wakaba 1.1 return undef;
2926 wakaba 1.76 } elsif ($self->{next_char} == 0x003B) { # ;
2927 wakaba 1.78 !!!cp (1006);
2928 wakaba 1.1 !!!next-input-character;
2929     } else {
2930 wakaba 1.78 !!!cp (1007);
2931 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2932 wakaba 1.1 }
2933    
2934 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2935 wakaba 1.78 !!!cp (1008);
2936 wakaba 1.153 !!!parse-error (type => 'invalid character reference',
2937     text => (sprintf 'U+%04X', $code),
2938     line => $l, column => $c);
2939 wakaba 1.26 $code = 0xFFFD;
2940     } elsif ($code > 0x10FFFF) {
2941 wakaba 1.78 !!!cp (1009);
2942 wakaba 1.153 !!!parse-error (type => 'invalid character reference',
2943     text => (sprintf 'U-%08X', $code),
2944     line => $l, column => $c);
2945 wakaba 1.26 $code = 0xFFFD;
2946     } elsif ($code == 0x000D) {
2947 wakaba 1.78 !!!cp (1010);
2948 wakaba 1.112 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
2949 wakaba 1.26 $code = 0x000A;
2950     } elsif (0x80 <= $code and $code <= 0x9F) {
2951 wakaba 1.78 !!!cp (1011);
2952 wakaba 1.153 !!!parse-error (type => 'C1 character reference', text => (sprintf 'U+%04X', $code), line => $l, column => $c);
2953 wakaba 1.26 $code = $c1_entity_char->{$code};
2954 wakaba 1.1 }
2955    
2956 wakaba 1.66 return {type => CHARACTER_TOKEN, data => chr $code,
2957 wakaba 1.118 has_reference => 1,
2958 wakaba 1.120 line => $l, column => $c,
2959 wakaba 1.118 };
2960 wakaba 1.1 } # X
2961 wakaba 1.76 } elsif (0x0030 <= $self->{next_char} and
2962     $self->{next_char} <= 0x0039) { # 0..9
2963     my $code = $self->{next_char} - 0x0030;
2964 wakaba 1.1 !!!next-input-character;
2965    
2966 wakaba 1.76 while (0x0030 <= $self->{next_char} and
2967     $self->{next_char} <= 0x0039) { # 0..9
2968 wakaba 1.78 !!!cp (1012);
2969 wakaba 1.1 $code *= 10;
2970 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2971 wakaba 1.1
2972     !!!next-input-character;
2973     }
2974    
2975 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
2976 wakaba 1.78 !!!cp (1013);
2977 wakaba 1.1 !!!next-input-character;
2978     } else {
2979 wakaba 1.78 !!!cp (1014);
2980 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2981 wakaba 1.1 }
2982    
2983 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2984 wakaba 1.78 !!!cp (1015);
2985 wakaba 1.153 !!!parse-error (type => 'invalid character reference',
2986     text => (sprintf 'U+%04X', $code),
2987     line => $l, column => $c);
2988 wakaba 1.26 $code = 0xFFFD;
2989     } elsif ($code > 0x10FFFF) {
2990 wakaba 1.78 !!!cp (1016);
2991 wakaba 1.153 !!!parse-error (type => 'invalid character reference',
2992     text => (sprintf 'U-%08X', $code),
2993     line => $l, column => $c);
2994 wakaba 1.26 $code = 0xFFFD;
2995     } elsif ($code == 0x000D) {
2996 wakaba 1.78 !!!cp (1017);
2997 wakaba 1.153 !!!parse-error (type => 'CR character reference',
2998     line => $l, column => $c);
2999 wakaba 1.26 $code = 0x000A;
3000 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
3001 wakaba 1.78 !!!cp (1018);
3002 wakaba 1.153 !!!parse-error (type => 'C1 character reference',
3003     text => (sprintf 'U+%04X', $code),
3004     line => $l, column => $c);
3005 wakaba 1.4 $code = $c1_entity_char->{$code};
3006 wakaba 1.1 }
3007    
3008 wakaba 1.112 return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1,
3009 wakaba 1.120 line => $l, column => $c,
3010 wakaba 1.118 };
3011 wakaba 1.1 } else {
3012 wakaba 1.78 !!!cp (1019);
3013 wakaba 1.112 !!!parse-error (type => 'bare nero', line => $l, column => $c);
3014 wakaba 1.76 !!!back-next-input-character ($self->{next_char});
3015     $self->{next_char} = 0x0023; # #
3016 wakaba 1.1 return undef;
3017     }
3018 wakaba 1.76 } elsif ((0x0041 <= $self->{next_char} and
3019     $self->{next_char} <= 0x005A) or
3020     (0x0061 <= $self->{next_char} and
3021     $self->{next_char} <= 0x007A)) {
3022     my $entity_name = chr $self->{next_char};
3023 wakaba 1.1 !!!next-input-character;
3024    
3025     my $value = $entity_name;
3026 wakaba 1.37 my $match = 0;
3027 wakaba 1.16 require Whatpm::_NamedEntityList;
3028     our $EntityChar;
3029 wakaba 1.1
3030 wakaba 1.128 while (length $entity_name < 30 and
3031 wakaba 1.1 ## NOTE: Some number greater than the maximum length of entity name
3032 wakaba 1.76 ((0x0041 <= $self->{next_char} and # a
3033     $self->{next_char} <= 0x005A) or # x
3034     (0x0061 <= $self->{next_char} and # a
3035     $self->{next_char} <= 0x007A) or # z
3036     (0x0030 <= $self->{next_char} and # 0
3037     $self->{next_char} <= 0x0039) or # 9
3038     $self->{next_char} == 0x003B)) { # ;
3039     $entity_name .= chr $self->{next_char};
3040 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
3041 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
3042 wakaba 1.78 !!!cp (1020);
3043 wakaba 1.26 $value = $EntityChar->{$entity_name};
3044 wakaba 1.16 $match = 1;
3045     !!!next-input-character;
3046     last;
3047 wakaba 1.37 } else {
3048 wakaba 1.78 !!!cp (1021);
3049 wakaba 1.26 $value = $EntityChar->{$entity_name};
3050     $match = -1;
3051 wakaba 1.37 !!!next-input-character;
3052 wakaba 1.16 }
3053 wakaba 1.1 } else {
3054 wakaba 1.78 !!!cp (1022);
3055 wakaba 1.76 $value .= chr $self->{next_char};
3056 wakaba 1.37 $match *= 2;
3057     !!!next-input-character;
3058 wakaba 1.1 }
3059     }
3060    
3061 wakaba 1.16 if ($match > 0) {
3062 wakaba 1.78 !!!cp (1023);
3063 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
3064 wakaba 1.120 line => $l, column => $c,
3065 wakaba 1.118 };
3066 wakaba 1.16 } elsif ($match < 0) {
3067 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
3068 wakaba 1.37 if ($in_attr and $match < -1) {
3069 wakaba 1.78 !!!cp (1024);
3070 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$entity_name,
3071 wakaba 1.120 line => $l, column => $c,
3072 wakaba 1.118 };
3073 wakaba 1.37 } else {
3074 wakaba 1.78 !!!cp (1025);
3075 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
3076 wakaba 1.120 line => $l, column => $c,
3077 wakaba 1.118 };
3078 wakaba 1.37 }
3079 wakaba 1.1 } else {
3080 wakaba 1.78 !!!cp (1026);
3081 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
3082 wakaba 1.66 ## NOTE: "No characters are consumed" in the spec.
3083 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$value,
3084 wakaba 1.120 line => $l, column => $c,
3085 wakaba 1.118 };
3086 wakaba 1.1 }
3087     } else {
3088 wakaba 1.78 !!!cp (1027);
3089 wakaba 1.1 ## no characters are consumed
3090 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
3091 wakaba 1.1 return undef;
3092     }
3093     } # _tokenize_attempt_to_consume_an_entity
3094    
3095     sub _initialize_tree_constructor ($) {
3096     my $self = shift;
3097     ## NOTE: $self->{document} MUST be specified before this method is called
3098     $self->{document}->strict_error_checking (0);
3099     ## TODO: Turn mutation events off # MUST
3100     ## TODO: Turn loose Document option (manakai extension) on
3101 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
3102 wakaba 1.154 $self->{document}->set_user_data (manakai_source_line => 1);
3103     $self->{document}->set_user_data (manakai_source_column => 1);
3104 wakaba 1.1 } # _initialize_tree_constructor
3105    
3106     sub _terminate_tree_constructor ($) {
3107     my $self = shift;
3108     $self->{document}->strict_error_checking (1);
3109     ## TODO: Turn mutation events on
3110     } # _terminate_tree_constructor
3111    
3112     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
3113    
3114 wakaba 1.3 { # tree construction stage
3115     my $token;
3116    
3117 wakaba 1.1 sub _construct_tree ($) {
3118     my ($self) = @_;
3119    
3120     ## When an interactive UA render the $self->{document} available
3121     ## to the user, or when it begin accepting user input, are
3122     ## not defined.
3123    
3124     ## Append a character: collect it and all subsequent consecutive
3125     ## characters and insert one Text node whose data is concatenation
3126     ## of all those characters. # MUST
3127    
3128     !!!next-token;
3129    
3130 wakaba 1.3 undef $self->{form_element};
3131     undef $self->{head_element};
3132     $self->{open_elements} = [];
3133     undef $self->{inner_html_node};
3134    
3135 wakaba 1.84 ## NOTE: The "initial" insertion mode.
3136 wakaba 1.3 $self->_tree_construction_initial; # MUST
3137 wakaba 1.84
3138     ## NOTE: The "before html" insertion mode.
3139 wakaba 1.3 $self->_tree_construction_root_element;
3140 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
3141    
3142     ## NOTE: The "before head" insertion mode and so on.
3143 wakaba 1.3 $self->_tree_construction_main;
3144     } # _construct_tree
3145    
3146     sub _tree_construction_initial ($) {
3147     my $self = shift;
3148 wakaba 1.84
3149     ## NOTE: "initial" insertion mode
3150    
3151 wakaba 1.18 INITIAL: {
3152 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3153 wakaba 1.18 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
3154     ## error, switch to a conformance checking mode for another
3155     ## language.
3156     my $doctype_name = $token->{name};
3157     $doctype_name = '' unless defined $doctype_name;
3158 wakaba 1.159 $doctype_name =~ tr/a-z/A-Z/; # ASCII case-insensitive
3159 wakaba 1.18 if (not defined $token->{name} or # <!DOCTYPE>
3160     defined $token->{system_identifier}) {
3161 wakaba 1.79 !!!cp ('t1');
3162 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
3163 wakaba 1.18 } elsif ($doctype_name ne 'HTML') {
3164 wakaba 1.79 !!!cp ('t2');
3165 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
3166 wakaba 1.159 } elsif (defined $token->{public_identifier}) {
3167     if ($token->{public_identifier} eq 'XSLT-compat') {
3168     !!!cp ('t1.2');
3169     !!!parse-error (type => 'XSLT-compat', token => $token,
3170     level => $self->{level}->{should});
3171     } else {
3172     !!!parse-error (type => 'not HTML5', token => $token);
3173     }
3174 wakaba 1.79 } else {
3175     !!!cp ('t3');
3176 wakaba 1.159 #
3177 wakaba 1.18 }
3178    
3179     my $doctype = $self->{document}->create_document_type_definition
3180     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
3181 wakaba 1.122 ## NOTE: Default value for both |public_id| and |system_id| attributes
3182     ## are empty strings, so that we don't set any value in missing cases.
3183 wakaba 1.18 $doctype->public_id ($token->{public_identifier})
3184     if defined $token->{public_identifier};
3185     $doctype->system_id ($token->{system_identifier})
3186     if defined $token->{system_identifier};
3187     ## NOTE: Other DocumentType attributes are null or empty lists.
3188     ## ISSUE: internalSubset = null??
3189     $self->{document}->append_child ($doctype);
3190    
3191 wakaba 1.75 if ($token->{quirks} or $doctype_name ne 'HTML') {
3192 wakaba 1.79 !!!cp ('t4');
3193 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3194     } elsif (defined $token->{public_identifier}) {
3195     my $pubid = $token->{public_identifier};
3196     $pubid =~ tr/a-z/A-z/;
3197 wakaba 1.143 my $prefix = [
3198     "+//SILMARIL//DTD HTML PRO V0R11 19970101//",
3199     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
3200     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
3201     "-//IETF//DTD HTML 2.0 LEVEL 1//",
3202     "-//IETF//DTD HTML 2.0 LEVEL 2//",
3203     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//",
3204     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//",
3205     "-//IETF//DTD HTML 2.0 STRICT//",
3206     "-//IETF//DTD HTML 2.0//",
3207     "-//IETF//DTD HTML 2.1E//",
3208     "-//IETF//DTD HTML 3.0//",
3209     "-//IETF//DTD HTML 3.2 FINAL//",
3210     "-//IETF//DTD HTML 3.2//",
3211     "-//IETF//DTD HTML 3//",
3212     "-//IETF//DTD HTML LEVEL 0//",
3213     "-//IETF//DTD HTML LEVEL 1//",
3214     "-//IETF//DTD HTML LEVEL 2//",
3215     "-//IETF//DTD HTML LEVEL 3//",
3216     "-//IETF//DTD HTML STRICT LEVEL 0//",
3217     "-//IETF//DTD HTML STRICT LEVEL 1//",
3218     "-//IETF//DTD HTML STRICT LEVEL 2//",
3219     "-//IETF//DTD HTML STRICT LEVEL 3//",
3220     "-//IETF//DTD HTML STRICT//",
3221     "-//IETF//DTD HTML//",
3222     "-//METRIUS//DTD METRIUS PRESENTATIONAL//",
3223     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//",
3224     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//",
3225     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//",
3226     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//",
3227     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//",
3228     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//",
3229     "-//NETSCAPE COMM. CORP.//DTD HTML//",
3230     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//",
3231     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//",
3232     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//",
3233     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//",
3234     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//",
3235     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//",
3236     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//",
3237     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//",
3238     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//",
3239     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//",
3240     "-//W3C//DTD HTML 3 1995-03-24//",
3241     "-//W3C//DTD HTML 3.2 DRAFT//",
3242     "-//W3C//DTD HTML 3.2 FINAL//",
3243     "-//W3C//DTD HTML 3.2//",
3244     "-//W3C//DTD HTML 3.2S DRAFT//",
3245     "-//W3C//DTD HTML 4.0 FRAMESET//",
3246     "-//W3C//DTD HTML 4.0 TRANSITIONAL//",
3247     "-//W3C//DTD HTML EXPERIMETNAL 19960712//",
3248     "-//W3C//DTD HTML EXPERIMENTAL 970421//",
3249     "-//W3C//DTD W3 HTML//",
3250     "-//W3O//DTD W3 HTML 3.0//",
3251     "-//WEBTECHS//DTD MOZILLA HTML 2.0//",
3252     "-//WEBTECHS//DTD MOZILLA HTML//",
3253     ]; # $prefix
3254     my $match;
3255     for (@$prefix) {
3256     if (substr ($prefix, 0, length $_) eq $_) {
3257     $match = 1;
3258     last;
3259     }
3260     }
3261     if ($match or
3262     $pubid eq "-//W3O//DTD W3 HTML STRICT 3.0//EN//" or
3263     $pubid eq "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" or
3264     $pubid eq "HTML") {
3265 wakaba 1.79 !!!cp ('t5');
3266 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3267 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
3268     $pubid =~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
3269 wakaba 1.18 if (defined $token->{system_identifier}) {
3270 wakaba 1.79 !!!cp ('t6');
3271 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3272     } else {
3273 wakaba 1.79 !!!cp ('t7');
3274 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
3275 wakaba 1.3 }
3276 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
3277     $pubid =~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
3278 wakaba 1.79 !!!cp ('t8');
3279 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
3280 wakaba 1.79 } else {
3281     !!!cp ('t9');
3282 wakaba 1.18 }
3283 wakaba 1.79 } else {
3284     !!!cp ('t10');
3285 wakaba 1.18 }
3286     if (defined $token->{system_identifier}) {
3287     my $sysid = $token->{system_identifier};
3288     $sysid =~ tr/A-Z/a-z/;
3289     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
3290 wakaba 1.143 ## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"| is
3291     ## marked as quirks.
3292 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3293 wakaba 1.79 !!!cp ('t11');
3294     } else {
3295     !!!cp ('t12');
3296 wakaba 1.18 }
3297 wakaba 1.79 } else {
3298     !!!cp ('t13');
3299 wakaba 1.18 }
3300    
3301 wakaba 1.84 ## Go to the "before html" insertion mode.
3302 wakaba 1.18 !!!next-token;
3303     return;
3304     } elsif ({
3305 wakaba 1.55 START_TAG_TOKEN, 1,
3306     END_TAG_TOKEN, 1,
3307     END_OF_FILE_TOKEN, 1,
3308 wakaba 1.18 }->{$token->{type}}) {
3309 wakaba 1.79 !!!cp ('t14');
3310 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
3311 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3312 wakaba 1.84 ## Go to the "before html" insertion mode.
3313 wakaba 1.18 ## reprocess
3314 wakaba 1.125 !!!ack-later;
3315 wakaba 1.18 return;
3316 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
3317 wakaba 1.18 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
3318     ## Ignore the token
3319 wakaba 1.26
3320 wakaba 1.18 unless (length $token->{data}) {
3321 wakaba 1.79 !!!cp ('t15');
3322 wakaba 1.84 ## Stay in the insertion mode.
3323 wakaba 1.18 !!!next-token;
3324     redo INITIAL;
3325 wakaba 1.79 } else {
3326     !!!cp ('t16');
3327 wakaba 1.3 }
3328 wakaba 1.79 } else {
3329     !!!cp ('t17');
3330 wakaba 1.3 }
3331 wakaba 1.18
3332 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
3333 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3334 wakaba 1.84 ## Go to the "before html" insertion mode.
3335 wakaba 1.18 ## reprocess
3336     return;
3337 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3338 wakaba 1.79 !!!cp ('t18');
3339 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
3340     $self->{document}->append_child ($comment);
3341    
3342 wakaba 1.84 ## Stay in the insertion mode.
3343 wakaba 1.18 !!!next-token;
3344     redo INITIAL;
3345     } else {
3346 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
3347 wakaba 1.18 }
3348     } # INITIAL
3349 wakaba 1.79
3350     die "$0: _tree_construction_initial: This should be never reached";
3351 wakaba 1.3 } # _tree_construction_initial
3352    
3353     sub _tree_construction_root_element ($) {
3354     my $self = shift;
3355 wakaba 1.84
3356     ## NOTE: "before html" insertion mode.
3357 wakaba 1.3
3358     B: {
3359 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3360 wakaba 1.79 !!!cp ('t19');
3361 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
3362 wakaba 1.3 ## Ignore the token
3363 wakaba 1.84 ## Stay in the insertion mode.
3364 wakaba 1.3 !!!next-token;
3365     redo B;
3366 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3367 wakaba 1.79 !!!cp ('t20');
3368 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
3369     $self->{document}->append_child ($comment);
3370 wakaba 1.84 ## Stay in the insertion mode.
3371 wakaba 1.3 !!!next-token;
3372     redo B;
3373 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
3374 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
3375     ## Ignore the token.
3376    
3377 wakaba 1.3 unless (length $token->{data}) {
3378 wakaba 1.79 !!!cp ('t21');
3379 wakaba 1.84 ## Stay in the insertion mode.
3380 wakaba 1.3 !!!next-token;
3381     redo B;
3382 wakaba 1.79 } else {
3383     !!!cp ('t22');
3384 wakaba 1.3 }
3385 wakaba 1.79 } else {
3386     !!!cp ('t23');
3387 wakaba 1.3 }
3388 wakaba 1.61
3389     $self->{application_cache_selection}->(undef);
3390    
3391     #
3392     } elsif ($token->{type} == START_TAG_TOKEN) {
3393 wakaba 1.84 if ($token->{tag_name} eq 'html') {
3394     my $root_element;
3395 wakaba 1.126 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
3396 wakaba 1.84 $self->{document}->append_child ($root_element);
3397 wakaba 1.123 push @{$self->{open_elements}},
3398     [$root_element, $el_category->{html}];
3399 wakaba 1.84
3400     if ($token->{attributes}->{manifest}) {
3401     !!!cp ('t24');
3402     $self->{application_cache_selection}
3403     ->($token->{attributes}->{manifest}->{value});
3404 wakaba 1.118 ## ISSUE: Spec is unclear on relative references.
3405     ## According to Hixie (#whatwg 2008-03-19), it should be
3406     ## resolved against the base URI of the document in HTML
3407     ## or xml:base of the element in XHTML.
3408 wakaba 1.84 } else {
3409     !!!cp ('t25');
3410     $self->{application_cache_selection}->(undef);
3411     }
3412    
3413 wakaba 1.125 !!!nack ('t25c');
3414    
3415 wakaba 1.84 !!!next-token;
3416     return; ## Go to the "before head" insertion mode.
3417 wakaba 1.61 } else {
3418 wakaba 1.84 !!!cp ('t25.1');
3419     #
3420 wakaba 1.61 }
3421 wakaba 1.3 } elsif ({
3422 wakaba 1.55 END_TAG_TOKEN, 1,
3423     END_OF_FILE_TOKEN, 1,
3424 wakaba 1.3 }->{$token->{type}}) {
3425 wakaba 1.79 !!!cp ('t26');
3426 wakaba 1.3 #
3427     } else {
3428 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
3429 wakaba 1.3 }
3430 wakaba 1.61
3431 wakaba 1.126 my $root_element;
3432     !!!create-element ($root_element, $HTML_NS, 'html',, $token);
3433 wakaba 1.84 $self->{document}->append_child ($root_element);
3434 wakaba 1.123 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
3435 wakaba 1.84
3436     $self->{application_cache_selection}->(undef);
3437    
3438     ## NOTE: Reprocess the token.
3439 wakaba 1.125 !!!ack-later;
3440 wakaba 1.84 return; ## Go to the "before head" insertion mode.
3441    
3442     ## ISSUE: There is an issue in the spec
3443 wakaba 1.3 } # B
3444 wakaba 1.79
3445     die "$0: _tree_construction_root_element: This should never be reached";
3446 wakaba 1.3 } # _tree_construction_root_element
3447    
3448     sub _reset_insertion_mode ($) {
3449     my $self = shift;
3450    
3451     ## Step 1
3452     my $last;
3453    
3454     ## Step 2
3455     my $i = -1;
3456     my $node = $self->{open_elements}->[$i];
3457    
3458     ## Step 3
3459     S3: {
3460 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
3461     $last = 1;
3462     if (defined $self->{inner_html_node}) {
3463 wakaba 1.140 !!!cp ('t28');
3464     $node = $self->{inner_html_node};
3465     } else {
3466     die "_reset_insertion_mode: t27";
3467 wakaba 1.3 }
3468     }
3469 wakaba 1.140
3470     ## Step 4..14
3471     my $new_mode;
3472     if ($node->[1] & FOREIGN_EL) {
3473     !!!cp ('t28.1');
3474     ## NOTE: Strictly spaking, the line below only applies to MathML and
3475     ## SVG elements. Currently the HTML syntax supports only MathML and
3476     ## SVG elements as foreigners.
3477 wakaba 1.148 $new_mode = IN_BODY_IM | IN_FOREIGN_CONTENT_IM;
3478 wakaba 1.140 } elsif ($node->[1] & TABLE_CELL_EL) {
3479     if ($last) {
3480     !!!cp ('t28.2');
3481     #
3482     } else {
3483     !!!cp ('t28.3');
3484     $new_mode = IN_CELL_IM;
3485     }
3486     } else {
3487     !!!cp ('t28.4');
3488     $new_mode = {
3489 wakaba 1.54 select => IN_SELECT_IM,
3490 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
3491     ## insertion mode to "in select" by themselves.
3492 wakaba 1.54 tr => IN_ROW_IM,
3493     tbody => IN_TABLE_BODY_IM,
3494     thead => IN_TABLE_BODY_IM,
3495     tfoot => IN_TABLE_BODY_IM,
3496     caption => IN_CAPTION_IM,
3497     colgroup => IN_COLUMN_GROUP_IM,
3498     table => IN_TABLE_IM,
3499     head => IN_BODY_IM, # not in head!
3500     body => IN_BODY_IM,
3501     frameset => IN_FRAMESET_IM,
3502 wakaba 1.123 }->{$node->[0]->manakai_local_name};
3503 wakaba 1.140 }
3504     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
3505 wakaba 1.3
3506 wakaba 1.126 ## Step 15
3507 wakaba 1.123 if ($node->[1] & HTML_EL) {
3508 wakaba 1.3 unless (defined $self->{head_element}) {
3509 wakaba 1.79 !!!cp ('t29');
3510 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
3511 wakaba 1.3 } else {
3512 wakaba 1.81 ## ISSUE: Can this state be reached?
3513 wakaba 1.79 !!!cp ('t30');
3514 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3515 wakaba 1.3 }
3516     return;
3517 wakaba 1.79 } else {
3518     !!!cp ('t31');
3519 wakaba 1.3 }
3520    
3521 wakaba 1.126 ## Step 16
3522 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
3523 wakaba 1.3
3524 wakaba 1.126 ## Step 17
3525 wakaba 1.3 $i--;
3526     $node = $self->{open_elements}->[$i];
3527    
3528 wakaba 1.126 ## Step 18
3529 wakaba 1.3 redo S3;
3530     } # S3
3531 wakaba 1.79
3532     die "$0: _reset_insertion_mode: This line should never be reached";
3533 wakaba 1.3 } # _reset_insertion_mode
3534    
3535     sub _tree_construction_main ($) {
3536     my $self = shift;
3537    
3538 wakaba 1.1 my $active_formatting_elements = [];
3539    
3540     my $reconstruct_active_formatting_elements = sub { # MUST
3541     my $insert = shift;
3542    
3543     ## Step 1
3544     return unless @$active_formatting_elements;
3545    
3546     ## Step 3
3547     my $i = -1;
3548     my $entry = $active_formatting_elements->[$i];
3549    
3550     ## Step 2
3551     return if $entry->[0] eq '#marker';
3552 wakaba 1.3 for (@{$self->{open_elements}}) {
3553 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3554 wakaba 1.79 !!!cp ('t32');
3555 wakaba 1.1 return;
3556     }
3557     }
3558    
3559     S4: {
3560     ## Step 4
3561     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
3562    
3563     ## Step 5
3564     $i--;
3565     $entry = $active_formatting_elements->[$i];
3566    
3567     ## Step 6
3568     if ($entry->[0] eq '#marker') {
3569 wakaba 1.81 !!!cp ('t33_1');
3570 wakaba 1.1 #
3571     } else {
3572     my $in_open_elements;
3573 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
3574 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3575 wakaba 1.79 !!!cp ('t33');
3576 wakaba 1.1 $in_open_elements = 1;
3577     last OE;
3578     }
3579     }
3580     if ($in_open_elements) {
3581 wakaba 1.79 !!!cp ('t34');
3582 wakaba 1.1 #
3583     } else {
3584 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
3585 wakaba 1.79 !!!cp ('t35');
3586 wakaba 1.1 redo S4;
3587     }
3588     }
3589    
3590     ## Step 7
3591     $i++;
3592     $entry = $active_formatting_elements->[$i];
3593     } # S4
3594    
3595     S7: {
3596     ## Step 8
3597     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
3598    
3599     ## Step 9
3600     $insert->($clone->[0]);
3601 wakaba 1.3 push @{$self->{open_elements}}, $clone;
3602 wakaba 1.1
3603     ## Step 10
3604 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
3605 wakaba 1.1
3606     ## Step 11
3607     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
3608 wakaba 1.79 !!!cp ('t36');
3609 wakaba 1.1 ## Step 7'
3610     $i++;
3611     $entry = $active_formatting_elements->[$i];
3612    
3613     redo S7;
3614     }
3615 wakaba 1.79
3616     !!!cp ('t37');
3617 wakaba 1.1 } # S7
3618     }; # $reconstruct_active_formatting_elements
3619    
3620     my $clear_up_to_marker = sub {
3621     for (reverse 0..$#$active_formatting_elements) {
3622     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3623 wakaba 1.79 !!!cp ('t38');
3624 wakaba 1.1 splice @$active_formatting_elements, $_;
3625     return;
3626     }
3627     }
3628 wakaba 1.79
3629     !!!cp ('t39');
3630 wakaba 1.1 }; # $clear_up_to_marker
3631    
3632 wakaba 1.96 my $insert;
3633    
3634     my $parse_rcdata = sub ($) {
3635     my ($content_model_flag) = @_;
3636 wakaba 1.25
3637     ## Step 1
3638     my $start_tag_name = $token->{tag_name};
3639     my $el;
3640 wakaba 1.126 !!!create-element ($el, $HTML_NS, $start_tag_name, $token->{attributes}, $token);
3641 wakaba 1.25
3642     ## Step 2
3643 wakaba 1.96 $insert->($el);
3644 wakaba 1.25
3645     ## Step 3
3646 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
3647 wakaba 1.13 delete $self->{escape}; # MUST
3648 wakaba 1.25
3649     ## Step 4
3650 wakaba 1.1 my $text = '';
3651 wakaba 1.125 !!!nack ('t40.1');
3652 wakaba 1.1 !!!next-token;
3653 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing
3654 wakaba 1.79 !!!cp ('t40');
3655 wakaba 1.1 $text .= $token->{data};
3656     !!!next-token;
3657 wakaba 1.25 }
3658    
3659     ## Step 5
3660 wakaba 1.1 if (length $text) {
3661 wakaba 1.79 !!!cp ('t41');
3662 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
3663     $el->append_child ($text);
3664 wakaba 1.1 }
3665 wakaba 1.25
3666     ## Step 6
3667 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3668 wakaba 1.25
3669     ## Step 7
3670 wakaba 1.79 if ($token->{type} == END_TAG_TOKEN and
3671     $token->{tag_name} eq $start_tag_name) {
3672     !!!cp ('t42');
3673 wakaba 1.1 ## Ignore the token
3674     } else {
3675 wakaba 1.96 ## NOTE: An end-of-file token.
3676     if ($content_model_flag == CDATA_CONTENT_MODEL) {
3677     !!!cp ('t43');
3678 wakaba 1.153 !!!parse-error (type => 'in CDATA:#eof', token => $token);
3679 wakaba 1.96 } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
3680     !!!cp ('t44');
3681 wakaba 1.153 !!!parse-error (type => 'in RCDATA:#eof', token => $token);
3682 wakaba 1.96 } else {
3683     die "$0: $content_model_flag in parse_rcdata";
3684     }
3685 wakaba 1.1 }
3686     !!!next-token;
3687 wakaba 1.25 }; # $parse_rcdata
3688 wakaba 1.1
3689 wakaba 1.96 my $script_start_tag = sub () {
3690 wakaba 1.1 my $script_el;
3691 wakaba 1.126 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
3692 wakaba 1.1 ## TODO: mark as "parser-inserted"
3693    
3694 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
3695 wakaba 1.13 delete $self->{escape}; # MUST
3696 wakaba 1.1
3697     my $text = '';
3698 wakaba 1.125 !!!nack ('t45.1');
3699 wakaba 1.1 !!!next-token;
3700 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
3701 wakaba 1.79 !!!cp ('t45');
3702 wakaba 1.1 $text .= $token->{data};
3703     !!!next-token;
3704     } # stop if non-character token or tokenizer stops tokenising
3705     if (length $text) {
3706 wakaba 1.79 !!!cp ('t46');
3707 wakaba 1.1 $script_el->manakai_append_text ($text);
3708     }
3709    
3710 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3711 wakaba 1.1
3712 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
3713 wakaba 1.1 $token->{tag_name} eq 'script') {
3714 wakaba 1.79 !!!cp ('t47');
3715 wakaba 1.1 ## Ignore the token
3716     } else {
3717 wakaba 1.79 !!!cp ('t48');
3718 wakaba 1.153 !!!parse-error (type => 'in CDATA:#eof', token => $token);
3719 wakaba 1.1 ## ISSUE: And ignore?
3720     ## TODO: mark as "already executed"
3721     }
3722    
3723 wakaba 1.3 if (defined $self->{inner_html_node}) {
3724 wakaba 1.79 !!!cp ('t49');
3725 wakaba 1.3 ## TODO: mark as "already executed"
3726     } else {
3727 wakaba 1.79 !!!cp ('t50');
3728 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
3729     ## TODO: insertion point = just before the next input character
3730 wakaba 1.25
3731     $insert->($script_el);
3732 wakaba 1.1
3733     ## TODO: insertion point = $old_insertion_point (might be "undefined")
3734    
3735     ## TODO: if there is a script that will execute as soon as the parser resume, then...
3736     }
3737    
3738     !!!next-token;
3739     }; # $script_start_tag
3740    
3741 wakaba 1.102 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
3742     ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
3743     my $open_tables = [[$self->{open_elements}->[0]->[0]]];
3744    
3745 wakaba 1.1 my $formatting_end_tag = sub {
3746 wakaba 1.113 my $end_tag_token = shift;
3747     my $tag_name = $end_tag_token->{tag_name};
3748 wakaba 1.1
3749 wakaba 1.103 ## NOTE: The adoption agency algorithm (AAA).
3750 wakaba 1.102
3751 wakaba 1.1 FET: {
3752     ## Step 1
3753     my $formatting_element;
3754     my $formatting_element_i_in_active;
3755     AFE: for (reverse 0..$#$active_formatting_elements) {
3756 wakaba 1.123 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3757     !!!cp ('t52');
3758     last AFE;
3759     } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
3760     eq $tag_name) {
3761 wakaba 1.79 !!!cp ('t51');
3762 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
3763     $formatting_element_i_in_active = $_;
3764     last AFE;
3765     }
3766     } # AFE
3767     unless (defined $formatting_element) {
3768 wakaba 1.79 !!!cp ('t53');
3769 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => $tag_name, token => $end_tag_token);
3770 wakaba 1.1 ## Ignore the token
3771     !!!next-token;
3772     return;
3773     }
3774     ## has an element in scope
3775     my $in_scope = 1;
3776     my $formatting_element_i_in_open;
3777 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3778     my $node = $self->{open_elements}->[$_];
3779 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
3780     if ($in_scope) {
3781 wakaba 1.79 !!!cp ('t54');
3782 wakaba 1.1 $formatting_element_i_in_open = $_;
3783     last INSCOPE;
3784     } else { # in open elements but not in scope
3785 wakaba 1.79 !!!cp ('t55');
3786 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3787     text => $token->{tag_name},
3788 wakaba 1.113 token => $end_tag_token);
3789 wakaba 1.1 ## Ignore the token
3790     !!!next-token;
3791     return;
3792     }
3793 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
3794 wakaba 1.79 !!!cp ('t56');
3795 wakaba 1.1 $in_scope = 0;
3796     }
3797     } # INSCOPE
3798     unless (defined $formatting_element_i_in_open) {
3799 wakaba 1.79 !!!cp ('t57');
3800 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3801     text => $token->{tag_name},
3802 wakaba 1.113 token => $end_tag_token);
3803 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
3804     !!!next-token; ## TODO: ok?
3805     return;
3806     }
3807 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
3808 wakaba 1.79 !!!cp ('t58');
3809 wakaba 1.122 !!!parse-error (type => 'not closed',
3810 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
3811 wakaba 1.122 ->manakai_local_name,
3812 wakaba 1.113 token => $end_tag_token);
3813 wakaba 1.1 }
3814    
3815     ## Step 2
3816     my $furthest_block;
3817     my $furthest_block_i_in_open;
3818 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3819     my $node = $self->{open_elements}->[$_];
3820 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
3821 wakaba 1.1 #not $phrasing_category->{$node->[1]} and
3822 wakaba 1.123 ($node->[1] & SPECIAL_EL or
3823     $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
3824 wakaba 1.79 !!!cp ('t59');
3825 wakaba 1.1 $furthest_block = $node;
3826     $furthest_block_i_in_open = $_;
3827     } elsif ($node->[0] eq $formatting_element->[0]) {
3828 wakaba 1.79 !!!cp ('t60');
3829 wakaba 1.1 last OE;
3830     }
3831     } # OE
3832    
3833     ## Step 3
3834     unless (defined $furthest_block) { # MUST
3835 wakaba 1.79 !!!cp ('t61');
3836 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
3837 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
3838     !!!next-token;
3839     return;
3840     }
3841    
3842     ## Step 4
3843 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
3844 wakaba 1.1
3845     ## Step 5
3846     my $furthest_block_parent = $furthest_block->[0]->parent_node;
3847     if (defined $furthest_block_parent) {
3848 wakaba 1.79 !!!cp ('t62');
3849 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
3850     }
3851    
3852     ## Step 6
3853     my $bookmark_prev_el
3854     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
3855     ->[0];
3856    
3857     ## Step 7
3858     my $node = $furthest_block;
3859     my $node_i_in_open = $furthest_block_i_in_open;
3860     my $last_node = $furthest_block;
3861     S7: {
3862     ## Step 1
3863     $node_i_in_open--;
3864 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
3865 wakaba 1.1
3866     ## Step 2
3867     my $node_i_in_active;
3868     S7S2: {
3869     for (reverse 0..$#$active_formatting_elements) {
3870     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3871 wakaba 1.79 !!!cp ('t63');
3872 wakaba 1.1 $node_i_in_active = $_;
3873     last S7S2;
3874     }
3875     }
3876 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
3877 wakaba 1.1 redo S7;
3878     } # S7S2
3879    
3880     ## Step 3
3881     last S7 if $node->[0] eq $formatting_element->[0];
3882    
3883     ## Step 4
3884     if ($last_node->[0] eq $furthest_block->[0]) {
3885 wakaba 1.79 !!!cp ('t64');
3886 wakaba 1.1 $bookmark_prev_el = $node->[0];
3887     }
3888    
3889     ## Step 5
3890     if ($node->[0]->has_child_nodes ()) {
3891 wakaba 1.79 !!!cp ('t65');
3892 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
3893     $active_formatting_elements->[$node_i_in_active] = $clone;
3894 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
3895 wakaba 1.1 $node = $clone;
3896     }
3897    
3898     ## Step 6
3899     $node->[0]->append_child ($last_node->[0]);
3900    
3901     ## Step 7
3902     $last_node = $node;
3903    
3904     ## Step 8
3905     redo S7;
3906     } # S7
3907    
3908     ## Step 8
3909 wakaba 1.123 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
3910 wakaba 1.102 my $foster_parent_element;
3911     my $next_sibling;
3912 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
3913     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
3914 wakaba 1.102 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3915     if (defined $parent and $parent->node_type == 1) {
3916     !!!cp ('t65.1');
3917     $foster_parent_element = $parent;
3918     $next_sibling = $self->{open_elements}->[$_]->[0];
3919     } else {
3920     !!!cp ('t65.2');
3921     $foster_parent_element
3922     = $self->{open_elements}->[$_ - 1]->[0];
3923     }
3924     last OE;
3925     }
3926     } # OE
3927     $foster_parent_element = $self->{open_elements}->[0]->[0]
3928     unless defined $foster_parent_element;
3929     $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
3930     $open_tables->[-1]->[1] = 1; # tainted
3931     } else {
3932     !!!cp ('t65.3');
3933     $common_ancestor_node->[0]->append_child ($last_node->[0]);
3934     }
3935 wakaba 1.1
3936     ## Step 9
3937     my $clone = [$formatting_element->[0]->clone_node (0),
3938     $formatting_element->[1]];
3939    
3940     ## Step 10
3941     my @cn = @{$furthest_block->[0]->child_nodes};
3942     $clone->[0]->append_child ($_) for @cn;
3943    
3944     ## Step 11
3945     $furthest_block->[0]->append_child ($clone->[0]);
3946    
3947     ## Step 12
3948     my $i;
3949     AFE: for (reverse 0..$#$active_formatting_elements) {
3950     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
3951 wakaba 1.79 !!!cp ('t66');
3952 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
3953     $i-- and last AFE if defined $i;
3954     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
3955 wakaba 1.79 !!!cp ('t67');
3956 wakaba 1.1 $i = $_;
3957     }
3958     } # AFE
3959     splice @$active_formatting_elements, $i + 1, 0, $clone;
3960    
3961     ## Step 13
3962     undef $i;
3963 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3964     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
3965 wakaba 1.79 !!!cp ('t68');
3966 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
3967 wakaba 1.1 $i-- and last OE if defined $i;
3968 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
3969 wakaba 1.79 !!!cp ('t69');
3970 wakaba 1.1 $i = $_;
3971     }
3972     } # OE
3973 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
3974 wakaba 1.1
3975     ## Step 14
3976     redo FET;
3977     } # FET
3978     }; # $formatting_end_tag
3979    
3980 wakaba 1.96 $insert = my $insert_to_current = sub {
3981 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
3982 wakaba 1.1 }; # $insert_to_current
3983    
3984     my $insert_to_foster = sub {
3985 wakaba 1.95 my $child = shift;
3986 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
3987 wakaba 1.95 # MUST
3988     my $foster_parent_element;
3989     my $next_sibling;
3990 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
3991     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
3992 wakaba 1.3 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3993 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3994 wakaba 1.79 !!!cp ('t70');
3995 wakaba 1.1 $foster_parent_element = $parent;
3996 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3997 wakaba 1.1 } else {
3998 wakaba 1.79 !!!cp ('t71');
3999 wakaba 1.1 $foster_parent_element
4000 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
4001 wakaba 1.1 }
4002     last OE;
4003     }
4004     } # OE
4005 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
4006 wakaba 1.1 unless defined $foster_parent_element;
4007     $foster_parent_element->insert_before
4008     ($child, $next_sibling);
4009 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
4010     } else {
4011     !!!cp ('t72');
4012     $self->{open_elements}->[-1]->[0]->append_child ($child);
4013     }
4014 wakaba 1.1 }; # $insert_to_foster
4015    
4016 wakaba 1.126 B: while (1) {
4017 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
4018 wakaba 1.79 !!!cp ('t73');
4019 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
4020 wakaba 1.52 ## Ignore the token
4021     ## Stay in the phase
4022     !!!next-token;
4023 wakaba 1.126 next B;
4024 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
4025 wakaba 1.52 $token->{tag_name} eq 'html') {
4026 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4027 wakaba 1.79 !!!cp ('t79');
4028 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
4029 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
4030     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
4031 wakaba 1.79 !!!cp ('t80');
4032 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
4033 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
4034 wakaba 1.79 } else {
4035     !!!cp ('t81');
4036 wakaba 1.52 }
4037    
4038 wakaba 1.84 !!!cp ('t82');
4039 wakaba 1.113 !!!parse-error (type => 'not first start tag', token => $token);
4040 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
4041     for my $attr_name (keys %{$token->{attributes}}) {
4042     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
4043 wakaba 1.79 !!!cp ('t84');
4044 wakaba 1.52 $top_el->set_attribute_ns
4045     (undef, [undef, $attr_name],
4046     $token->{attributes}->{$attr_name}->{value});
4047     }
4048     }
4049 wakaba 1.125 !!!nack ('t84.1');
4050 wakaba 1.52 !!!next-token;
4051 wakaba 1.126 next B;
4052 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
4053 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
4054 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
4055 wakaba 1.79 !!!cp ('t85');
4056 wakaba 1.52 $self->{document}->append_child ($comment);
4057 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
4058 wakaba 1.79 !!!cp ('t86');
4059 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
4060     } else {
4061 wakaba 1.79 !!!cp ('t87');
4062 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4063     }
4064     !!!next-token;
4065 wakaba 1.126 next B;
4066     } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
4067     if ($token->{type} == CHARACTER_TOKEN) {
4068     !!!cp ('t87.1');
4069     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4070     !!!next-token;
4071     next B;
4072     } elsif ($token->{type} == START_TAG_TOKEN) {
4073 wakaba 1.129 if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
4074     $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
4075 wakaba 1.126 not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
4076     ($token->{tag_name} eq 'svg' and
4077     $self->{open_elements}->[-1]->[1] & MML_AXML_EL)) {
4078     ## NOTE: "using the rules for secondary insertion mode"then"continue"
4079     !!!cp ('t87.2');
4080     #
4081     } elsif ({
4082 wakaba 1.130 b => 1, big => 1, blockquote => 1, body => 1, br => 1,
4083 wakaba 1.146 center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1,
4084     em => 1, embed => 1, font => 1, h1 => 1, h2 => 1, h3 => 1,
4085     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1,
4086     img => 1, li => 1, listing => 1, menu => 1, meta => 1,
4087     nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1,
4088     small => 1, span => 1, strong => 1, strike => 1, sub => 1,
4089     sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1,
4090 wakaba 1.126 }->{$token->{tag_name}}) {
4091     !!!cp ('t87.2');
4092     !!!parse-error (type => 'not closed',
4093 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4094 wakaba 1.126 ->manakai_local_name,
4095     token => $token);
4096    
4097     pop @{$self->{open_elements}}
4098     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
4099    
4100 wakaba 1.130 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
4101 wakaba 1.126 ## Reprocess.
4102     next B;
4103     } else {
4104 wakaba 1.131 my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
4105     my $tag_name = $token->{tag_name};
4106     if ($nsuri eq $SVG_NS) {
4107     $tag_name = {
4108     altglyph => 'altGlyph',
4109     altglyphdef => 'altGlyphDef',
4110     altglyphitem => 'altGlyphItem',
4111     animatecolor => 'animateColor',
4112     animatemotion => 'animateMotion',
4113     animatetransform => 'animateTransform',
4114     clippath => 'clipPath',
4115     feblend => 'feBlend',
4116     fecolormatrix => 'feColorMatrix',
4117     fecomponenttransfer => 'feComponentTransfer',
4118     fecomposite => 'feComposite',
4119     feconvolvematrix => 'feConvolveMatrix',
4120     fediffuselighting => 'feDiffuseLighting',
4121     fedisplacementmap => 'feDisplacementMap',
4122     fedistantlight => 'feDistantLight',
4123     feflood => 'feFlood',
4124     fefunca => 'feFuncA',
4125     fefuncb => 'feFuncB',
4126     fefuncg => 'feFuncG',
4127     fefuncr => 'feFuncR',
4128     fegaussianblur => 'feGaussianBlur',
4129     feimage => 'feImage',
4130     femerge => 'feMerge',
4131     femergenode => 'feMergeNode',
4132     femorphology => 'feMorphology',
4133     feoffset => 'feOffset',
4134     fepointlight => 'fePointLight',
4135     fespecularlighting => 'feSpecularLighting',
4136     fespotlight => 'feSpotLight',
4137     fetile => 'feTile',
4138     feturbulence => 'feTurbulence',
4139     foreignobject => 'foreignObject',
4140     glyphref => 'glyphRef',
4141     lineargradient => 'linearGradient',
4142     radialgradient => 'radialGradient',
4143     #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
4144     textpath => 'textPath',
4145     }->{$tag_name} || $tag_name;
4146     }
4147    
4148     ## "adjust SVG attributes" (SVG only) - done in insert-element-f
4149    
4150     ## "adjust foreign attributes" - done in insert-element-f
4151 wakaba 1.126
4152 wakaba 1.131 !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);
4153 wakaba 1.126
4154     if ($self->{self_closing}) {
4155     pop @{$self->{open_elements}};
4156     !!!ack ('t87.3');
4157     } else {
4158     !!!cp ('t87.4');
4159     }
4160    
4161     !!!next-token;
4162     next B;
4163     }
4164     } elsif ($token->{type} == END_TAG_TOKEN) {
4165     ## NOTE: "using the rules for secondary insertion mode" then "continue"
4166     !!!cp ('t87.5');
4167     #
4168     } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4169     !!!cp ('t87.6');
4170 wakaba 1.146 !!!parse-error (type => 'not closed',
4171 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4172 wakaba 1.146 ->manakai_local_name,
4173     token => $token);
4174    
4175     pop @{$self->{open_elements}}
4176     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
4177    
4178     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
4179     ## Reprocess.
4180     next B;
4181 wakaba 1.126 } else {
4182     die "$0: $token->{type}: Unknown token type";
4183     }
4184     }
4185    
4186     if ($self->{insertion_mode} & HEAD_IMS) {
4187 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4188 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4189 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4190     !!!cp ('t88.2');
4191     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4192     } else {
4193     !!!cp ('t88.1');
4194     ## Ignore the token.
4195     !!!next-token;
4196 wakaba 1.126 next B;
4197 wakaba 1.99 }
4198 wakaba 1.52 unless (length $token->{data}) {
4199 wakaba 1.79 !!!cp ('t88');
4200 wakaba 1.52 !!!next-token;
4201 wakaba 1.126 next B;
4202 wakaba 1.1 }
4203     }
4204 wakaba 1.52
4205 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4206 wakaba 1.79 !!!cp ('t89');
4207 wakaba 1.52 ## As if <head>
4208 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4209 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4210 wakaba 1.123 push @{$self->{open_elements}},
4211     [$self->{head_element}, $el_category->{head}];
4212 wakaba 1.52
4213     ## Reprocess in the "in head" insertion mode...
4214     pop @{$self->{open_elements}};
4215    
4216     ## Reprocess in the "after head" insertion mode...
4217 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4218 wakaba 1.79 !!!cp ('t90');
4219 wakaba 1.52 ## As if </noscript>
4220     pop @{$self->{open_elements}};
4221 wakaba 1.153 !!!parse-error (type => 'in noscript:#text', token => $token);
4222 wakaba 1.1
4223 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
4224     ## As if </head>
4225     pop @{$self->{open_elements}};
4226    
4227     ## Reprocess in the "after head" insertion mode...
4228 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4229 wakaba 1.79 !!!cp ('t91');
4230 wakaba 1.52 pop @{$self->{open_elements}};
4231    
4232     ## Reprocess in the "after head" insertion mode...
4233 wakaba 1.79 } else {
4234     !!!cp ('t92');
4235 wakaba 1.1 }
4236 wakaba 1.52
4237 wakaba 1.123 ## "after head" insertion mode
4238     ## As if <body>
4239     !!!insert-element ('body',, $token);
4240     $self->{insertion_mode} = IN_BODY_IM;
4241     ## reprocess
4242 wakaba 1.126 next B;
4243 wakaba 1.123 } elsif ($token->{type} == START_TAG_TOKEN) {
4244     if ($token->{tag_name} eq 'head') {
4245     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4246     !!!cp ('t93');
4247 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
4248 wakaba 1.123 $self->{open_elements}->[-1]->[0]->append_child
4249     ($self->{head_element});
4250     push @{$self->{open_elements}},
4251     [$self->{head_element}, $el_category->{head}];
4252     $self->{insertion_mode} = IN_HEAD_IM;
4253 wakaba 1.125 !!!nack ('t93.1');
4254 wakaba 1.123 !!!next-token;
4255 wakaba 1.126 next B;
4256 wakaba 1.125 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4257 wakaba 1.139 !!!cp ('t93.2');
4258 wakaba 1.153 !!!parse-error (type => 'after head', text => 'head',
4259     token => $token);
4260 wakaba 1.139 ## Ignore the token
4261     !!!nack ('t93.3');
4262     !!!next-token;
4263     next B;
4264 wakaba 1.125 } else {
4265     !!!cp ('t95');
4266 wakaba 1.153 !!!parse-error (type => 'in head:head',
4267     token => $token); # or in head noscript
4268 wakaba 1.125 ## Ignore the token
4269     !!!nack ('t95.1');
4270     !!!next-token;
4271 wakaba 1.126 next B;
4272 wakaba 1.125 }
4273     } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4274 wakaba 1.126 !!!cp ('t96');
4275     ## As if <head>
4276     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4277     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4278     push @{$self->{open_elements}},
4279     [$self->{head_element}, $el_category->{head}];
4280 wakaba 1.52
4281 wakaba 1.126 $self->{insertion_mode} = IN_HEAD_IM;
4282     ## Reprocess in the "in head" insertion mode...
4283     } else {
4284     !!!cp ('t97');
4285     }
4286 wakaba 1.52
4287 wakaba 1.49 if ($token->{tag_name} eq 'base') {
4288 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4289 wakaba 1.79 !!!cp ('t98');
4290 wakaba 1.49 ## As if </noscript>
4291     pop @{$self->{open_elements}};
4292 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'base',
4293     token => $token);
4294 wakaba 1.49
4295 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4296 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4297 wakaba 1.79 } else {
4298     !!!cp ('t99');
4299 wakaba 1.49 }
4300    
4301     ## NOTE: There is a "as if in head" code clone.
4302 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4303 wakaba 1.79 !!!cp ('t100');
4304 wakaba 1.153 !!!parse-error (type => 'after head',
4305     text => $token->{tag_name}, token => $token);
4306 wakaba 1.123 push @{$self->{open_elements}},
4307     [$self->{head_element}, $el_category->{head}];
4308 wakaba 1.79 } else {
4309     !!!cp ('t101');
4310 wakaba 1.49 }
4311 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4312 wakaba 1.49 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4313 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4314 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4315 wakaba 1.125 !!!nack ('t101.1');
4316 wakaba 1.49 !!!next-token;
4317 wakaba 1.126 next B;
4318 wakaba 1.49 } elsif ($token->{tag_name} eq 'link') {
4319 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4320 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4321 wakaba 1.79 !!!cp ('t102');
4322 wakaba 1.153 !!!parse-error (type => 'after head',
4323     text => $token->{tag_name}, token => $token);
4324 wakaba 1.123 push @{$self->{open_elements}},
4325     [$self->{head_element}, $el_category->{head}];
4326 wakaba 1.79 } else {
4327     !!!cp ('t103');
4328 wakaba 1.25 }
4329 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4330 wakaba 1.25 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4331 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4332 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4333 wakaba 1.125 !!!ack ('t103.1');
4334 wakaba 1.1 !!!next-token;
4335 wakaba 1.126 next B;
4336 wakaba 1.34 } elsif ($token->{tag_name} eq 'meta') {
4337     ## NOTE: There is a "as if in head" code clone.
4338 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4339 wakaba 1.79 !!!cp ('t104');
4340 wakaba 1.153 !!!parse-error (type => 'after head',
4341     text => $token->{tag_name}, token => $token);
4342 wakaba 1.123 push @{$self->{open_elements}},
4343     [$self->{head_element}, $el_category->{head}];
4344 wakaba 1.79 } else {
4345     !!!cp ('t105');
4346 wakaba 1.34 }
4347 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4348 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4349 wakaba 1.34
4350     unless ($self->{confident}) {
4351 wakaba 1.134 if ($token->{attributes}->{charset}) {
4352 wakaba 1.79 !!!cp ('t106');
4353 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4354     ## in the {change_encoding} callback.
4355 wakaba 1.63 $self->{change_encoding}
4356 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value},
4357     $token);
4358 wakaba 1.66
4359     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4360     ->set_user_data (manakai_has_reference =>
4361     $token->{attributes}->{charset}
4362     ->{has_reference});
4363 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
4364     if ($token->{attributes}->{content}->{value}
4365 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4366 wakaba 1.70 [\x09-\x0D\x20]*=
4367 wakaba 1.34 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4368 wakaba 1.145 ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {
4369 wakaba 1.79 !!!cp ('t107');
4370 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4371     ## in the {change_encoding} callback.
4372 wakaba 1.63 $self->{change_encoding}
4373 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
4374     $token);
4375 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4376     ->set_user_data (manakai_has_reference =>
4377     $token->{attributes}->{content}
4378     ->{has_reference});
4379 wakaba 1.79 } else {
4380     !!!cp ('t108');
4381 wakaba 1.63 }
4382 wakaba 1.34 }
4383 wakaba 1.66 } else {
4384     if ($token->{attributes}->{charset}) {
4385 wakaba 1.79 !!!cp ('t109');
4386 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4387     ->set_user_data (manakai_has_reference =>
4388     $token->{attributes}->{charset}
4389     ->{has_reference});
4390     }
4391 wakaba 1.68 if ($token->{attributes}->{content}) {
4392 wakaba 1.79 !!!cp ('t110');
4393 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4394     ->set_user_data (manakai_has_reference =>
4395     $token->{attributes}->{content}
4396     ->{has_reference});
4397     }
4398 wakaba 1.34 }
4399    
4400 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4401 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4402 wakaba 1.125 !!!ack ('t110.1');
4403 wakaba 1.34 !!!next-token;
4404 wakaba 1.126 next B;
4405 wakaba 1.49 } elsif ($token->{tag_name} eq 'title') {
4406 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4407 wakaba 1.79 !!!cp ('t111');
4408 wakaba 1.49 ## As if </noscript>
4409     pop @{$self->{open_elements}};
4410 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'title',
4411     token => $token);
4412 wakaba 1.49
4413 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4414 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4415 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4416 wakaba 1.79 !!!cp ('t112');
4417 wakaba 1.153 !!!parse-error (type => 'after head',
4418     text => $token->{tag_name}, token => $token);
4419 wakaba 1.123 push @{$self->{open_elements}},
4420     [$self->{head_element}, $el_category->{head}];
4421 wakaba 1.79 } else {
4422     !!!cp ('t113');
4423 wakaba 1.25 }
4424 wakaba 1.49
4425     ## NOTE: There is a "as if in head" code clone.
4426 wakaba 1.31 my $parent = defined $self->{head_element} ? $self->{head_element}
4427     : $self->{open_elements}->[-1]->[0];
4428 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
4429 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4430 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4431 wakaba 1.126 next B;
4432 wakaba 1.148 } elsif ($token->{tag_name} eq 'style' or
4433     $token->{tag_name} eq 'noframes') {
4434 wakaba 1.25 ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
4435 wakaba 1.54 ## insertion mode IN_HEAD_IM)
4436 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4437 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4438 wakaba 1.79 !!!cp ('t114');
4439 wakaba 1.153 !!!parse-error (type => 'after head',
4440     text => $token->{tag_name}, token => $token);
4441 wakaba 1.123 push @{$self->{open_elements}},
4442     [$self->{head_element}, $el_category->{head}];
4443 wakaba 1.79 } else {
4444     !!!cp ('t115');
4445 wakaba 1.25 }
4446 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4447 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4448 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4449 wakaba 1.126 next B;
4450 wakaba 1.25 } elsif ($token->{tag_name} eq 'noscript') {
4451 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
4452 wakaba 1.79 !!!cp ('t116');
4453 wakaba 1.25 ## NOTE: and scripting is disalbed
4454 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4455 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
4456 wakaba 1.125 !!!nack ('t116.1');
4457 wakaba 1.1 !!!next-token;
4458 wakaba 1.126 next B;
4459 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4460 wakaba 1.79 !!!cp ('t117');
4461 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'noscript',
4462     token => $token);
4463 wakaba 1.1 ## Ignore the token
4464 wakaba 1.125 !!!nack ('t117.1');
4465 wakaba 1.41 !!!next-token;
4466 wakaba 1.126 next B;
4467 wakaba 1.1 } else {
4468 wakaba 1.79 !!!cp ('t118');
4469 wakaba 1.25 #
4470 wakaba 1.1 }
4471 wakaba 1.49 } elsif ($token->{tag_name} eq 'script') {
4472 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4473 wakaba 1.79 !!!cp ('t119');
4474 wakaba 1.49 ## As if </noscript>
4475     pop @{$self->{open_elements}};
4476 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'script',
4477     token => $token);
4478 wakaba 1.49
4479 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4480 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4481 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4482 wakaba 1.79 !!!cp ('t120');
4483 wakaba 1.153 !!!parse-error (type => 'after head',
4484     text => $token->{tag_name}, token => $token);
4485 wakaba 1.123 push @{$self->{open_elements}},
4486     [$self->{head_element}, $el_category->{head}];
4487 wakaba 1.79 } else {
4488     !!!cp ('t121');
4489 wakaba 1.25 }
4490 wakaba 1.49
4491 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4492 wakaba 1.100 $script_start_tag->();
4493     pop @{$self->{open_elements}} # <head>
4494 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4495 wakaba 1.126 next B;
4496 wakaba 1.49 } elsif ($token->{tag_name} eq 'body' or
4497 wakaba 1.25 $token->{tag_name} eq 'frameset') {
4498 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4499 wakaba 1.79 !!!cp ('t122');
4500 wakaba 1.49 ## As if </noscript>
4501     pop @{$self->{open_elements}};
4502 wakaba 1.153 !!!parse-error (type => 'in noscript',
4503     text => $token->{tag_name}, token => $token);
4504 wakaba 1.49
4505     ## Reprocess in the "in head" insertion mode...
4506     ## As if </head>
4507     pop @{$self->{open_elements}};
4508    
4509     ## Reprocess in the "after head" insertion mode...
4510 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4511 wakaba 1.79 !!!cp ('t124');
4512 wakaba 1.49 pop @{$self->{open_elements}};
4513    
4514     ## Reprocess in the "after head" insertion mode...
4515 wakaba 1.79 } else {
4516     !!!cp ('t125');
4517 wakaba 1.49 }
4518    
4519     ## "after head" insertion mode
4520 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4521 wakaba 1.54 if ($token->{tag_name} eq 'body') {
4522 wakaba 1.79 !!!cp ('t126');
4523 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4524     } elsif ($token->{tag_name} eq 'frameset') {
4525 wakaba 1.79 !!!cp ('t127');
4526 wakaba 1.54 $self->{insertion_mode} = IN_FRAMESET_IM;
4527     } else {
4528     die "$0: tag name: $self->{tag_name}";
4529     }
4530 wakaba 1.125 !!!nack ('t127.1');
4531 wakaba 1.1 !!!next-token;
4532 wakaba 1.126 next B;
4533 wakaba 1.1 } else {
4534 wakaba 1.79 !!!cp ('t128');
4535 wakaba 1.1 #
4536     }
4537 wakaba 1.49
4538 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4539 wakaba 1.79 !!!cp ('t129');
4540 wakaba 1.49 ## As if </noscript>
4541     pop @{$self->{open_elements}};
4542 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
4543     text => $token->{tag_name}, token => $token);
4544 wakaba 1.49
4545     ## Reprocess in the "in head" insertion mode...
4546     ## As if </head>
4547 wakaba 1.25 pop @{$self->{open_elements}};
4548 wakaba 1.49
4549     ## Reprocess in the "after head" insertion mode...
4550 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4551 wakaba 1.79 !!!cp ('t130');
4552 wakaba 1.49 ## As if </head>
4553 wakaba 1.25 pop @{$self->{open_elements}};
4554 wakaba 1.49
4555     ## Reprocess in the "after head" insertion mode...
4556 wakaba 1.79 } else {
4557     !!!cp ('t131');
4558 wakaba 1.49 }
4559    
4560     ## "after head" insertion mode
4561     ## As if <body>
4562 wakaba 1.116 !!!insert-element ('body',, $token);
4563 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4564 wakaba 1.49 ## reprocess
4565 wakaba 1.125 !!!ack-later;
4566 wakaba 1.126 next B;
4567 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4568 wakaba 1.49 if ($token->{tag_name} eq 'head') {
4569 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4570 wakaba 1.79 !!!cp ('t132');
4571 wakaba 1.50 ## As if <head>
4572 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4573 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4574 wakaba 1.123 push @{$self->{open_elements}},
4575     [$self->{head_element}, $el_category->{head}];
4576 wakaba 1.50
4577     ## Reprocess in the "in head" insertion mode...
4578     pop @{$self->{open_elements}};
4579 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4580 wakaba 1.50 !!!next-token;
4581 wakaba 1.126 next B;
4582 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4583 wakaba 1.79 !!!cp ('t133');
4584 wakaba 1.49 ## As if </noscript>
4585     pop @{$self->{open_elements}};
4586 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
4587     text => 'head', token => $token);
4588 wakaba 1.49
4589     ## Reprocess in the "in head" insertion mode...
4590 wakaba 1.50 pop @{$self->{open_elements}};
4591 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4592 wakaba 1.50 !!!next-token;
4593 wakaba 1.126 next B;
4594 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4595 wakaba 1.79 !!!cp ('t134');
4596 wakaba 1.49 pop @{$self->{open_elements}};
4597 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4598 wakaba 1.49 !!!next-token;
4599 wakaba 1.126 next B;
4600 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4601     !!!cp ('t134.1');
4602 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => 'head',
4603     token => $token);
4604 wakaba 1.139 ## Ignore the token
4605     !!!next-token;
4606     next B;
4607 wakaba 1.49 } else {
4608 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4609 wakaba 1.49 }
4610     } elsif ($token->{tag_name} eq 'noscript') {
4611 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4612 wakaba 1.79 !!!cp ('t136');
4613 wakaba 1.49 pop @{$self->{open_elements}};
4614 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4615 wakaba 1.49 !!!next-token;
4616 wakaba 1.126 next B;
4617 wakaba 1.139 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or
4618     $self->{insertion_mode} == AFTER_HEAD_IM) {
4619 wakaba 1.79 !!!cp ('t137');
4620 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4621     text => 'noscript', token => $token);
4622 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
4623     !!!next-token;
4624 wakaba 1.126 next B;
4625 wakaba 1.49 } else {
4626 wakaba 1.79 !!!cp ('t138');
4627 wakaba 1.49 #
4628     }
4629     } elsif ({
4630 wakaba 1.31 body => 1, html => 1,
4631     }->{$token->{tag_name}}) {
4632 wakaba 1.139 if ($self->{insertion_mode} == BEFORE_HEAD_IM or
4633     $self->{insertion_mode} == IN_HEAD_IM or
4634     $self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4635 wakaba 1.79 !!!cp ('t140');
4636 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4637     text => $token->{tag_name}, token => $token);
4638 wakaba 1.49 ## Ignore the token
4639     !!!next-token;
4640 wakaba 1.126 next B;
4641 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4642     !!!cp ('t140.1');
4643 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4644     text => $token->{tag_name}, token => $token);
4645 wakaba 1.139 ## Ignore the token
4646     !!!next-token;
4647     next B;
4648 wakaba 1.79 } else {
4649 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4650 wakaba 1.49 }
4651 wakaba 1.139 } elsif ($token->{tag_name} eq 'p') {
4652     !!!cp ('t142');
4653 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4654     text => $token->{tag_name}, token => $token);
4655 wakaba 1.139 ## Ignore the token
4656     !!!next-token;
4657     next B;
4658     } elsif ($token->{tag_name} eq 'br') {
4659 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4660 wakaba 1.139 !!!cp ('t142.2');
4661     ## (before head) as if <head>, (in head) as if </head>
4662 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4663 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4664 wakaba 1.139 $self->{insertion_mode} = AFTER_HEAD_IM;
4665    
4666     ## Reprocess in the "after head" insertion mode...
4667     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4668     !!!cp ('t143.2');
4669     ## As if </head>
4670     pop @{$self->{open_elements}};
4671     $self->{insertion_mode} = AFTER_HEAD_IM;
4672    
4673     ## Reprocess in the "after head" insertion mode...
4674     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4675     !!!cp ('t143.3');
4676     ## ISSUE: Two parse errors for <head><noscript></br>
4677 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4678     text => 'br', token => $token);
4679 wakaba 1.139 ## As if </noscript>
4680     pop @{$self->{open_elements}};
4681     $self->{insertion_mode} = IN_HEAD_IM;
4682 wakaba 1.50
4683     ## Reprocess in the "in head" insertion mode...
4684 wakaba 1.139 ## As if </head>
4685     pop @{$self->{open_elements}};
4686     $self->{insertion_mode} = AFTER_HEAD_IM;
4687    
4688     ## Reprocess in the "after head" insertion mode...
4689     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4690     !!!cp ('t143.4');
4691     #
4692 wakaba 1.79 } else {
4693 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4694 wakaba 1.50 }
4695    
4696 wakaba 1.139 ## ISSUE: does not agree with IE7 - it doesn't ignore </br>.
4697 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4698     text => 'br', token => $token);
4699 wakaba 1.139 ## Ignore the token
4700     !!!next-token;
4701     next B;
4702 wakaba 1.25 } else {
4703 wakaba 1.139 !!!cp ('t145');
4704 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4705     text => $token->{tag_name}, token => $token);
4706 wakaba 1.139 ## Ignore the token
4707     !!!next-token;
4708     next B;
4709 wakaba 1.49 }
4710    
4711 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4712 wakaba 1.79 !!!cp ('t146');
4713 wakaba 1.49 ## As if </noscript>
4714     pop @{$self->{open_elements}};
4715 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
4716     text => $token->{tag_name}, token => $token);
4717 wakaba 1.49
4718     ## Reprocess in the "in head" insertion mode...
4719     ## As if </head>
4720     pop @{$self->{open_elements}};
4721    
4722     ## Reprocess in the "after head" insertion mode...
4723 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4724 wakaba 1.79 !!!cp ('t147');
4725 wakaba 1.49 ## As if </head>
4726     pop @{$self->{open_elements}};
4727    
4728     ## Reprocess in the "after head" insertion mode...
4729 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4730 wakaba 1.82 ## ISSUE: This case cannot be reached?
4731 wakaba 1.79 !!!cp ('t148');
4732 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4733     text => $token->{tag_name}, token => $token);
4734 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
4735     !!!next-token;
4736 wakaba 1.126 next B;
4737 wakaba 1.79 } else {
4738     !!!cp ('t149');
4739 wakaba 1.1 }
4740    
4741 wakaba 1.49 ## "after head" insertion mode
4742     ## As if <body>
4743 wakaba 1.116 !!!insert-element ('body',, $token);
4744 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4745 wakaba 1.52 ## reprocess
4746 wakaba 1.126 next B;
4747 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4748     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4749     !!!cp ('t149.1');
4750    
4751     ## NOTE: As if <head>
4752 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4753 wakaba 1.104 $self->{open_elements}->[-1]->[0]->append_child
4754     ($self->{head_element});
4755 wakaba 1.123 #push @{$self->{open_elements}},
4756     # [$self->{head_element}, $el_category->{head}];
4757 wakaba 1.104 #$self->{insertion_mode} = IN_HEAD_IM;
4758     ## NOTE: Reprocess.
4759    
4760     ## NOTE: As if </head>
4761     #pop @{$self->{open_elements}};
4762     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4763     ## NOTE: Reprocess.
4764    
4765     #
4766     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4767     !!!cp ('t149.2');
4768    
4769     ## NOTE: As if </head>
4770     pop @{$self->{open_elements}};
4771     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4772     ## NOTE: Reprocess.
4773    
4774     #
4775     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4776     !!!cp ('t149.3');
4777    
4778 wakaba 1.113 !!!parse-error (type => 'in noscript:#eof', token => $token);
4779 wakaba 1.104
4780     ## As if </noscript>
4781     pop @{$self->{open_elements}};
4782     #$self->{insertion_mode} = IN_HEAD_IM;
4783     ## NOTE: Reprocess.
4784    
4785     ## NOTE: As if </head>
4786     pop @{$self->{open_elements}};
4787     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4788     ## NOTE: Reprocess.
4789    
4790     #
4791     } else {
4792     !!!cp ('t149.4');
4793     #
4794     }
4795    
4796     ## NOTE: As if <body>
4797 wakaba 1.116 !!!insert-element ('body',, $token);
4798 wakaba 1.104 $self->{insertion_mode} = IN_BODY_IM;
4799     ## NOTE: Reprocess.
4800 wakaba 1.126 next B;
4801 wakaba 1.104 } else {
4802     die "$0: $token->{type}: Unknown token type";
4803     }
4804 wakaba 1.52
4805     ## ISSUE: An issue in the spec.
4806 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
4807 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4808 wakaba 1.79 !!!cp ('t150');
4809 wakaba 1.52 ## NOTE: There is a code clone of "character in body".
4810     $reconstruct_active_formatting_elements->($insert_to_current);
4811    
4812     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4813    
4814     !!!next-token;
4815 wakaba 1.126 next B;
4816 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4817 wakaba 1.52 if ({
4818     caption => 1, col => 1, colgroup => 1, tbody => 1,
4819     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
4820     }->{$token->{tag_name}}) {
4821 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
4822 wakaba 1.52 ## have an element in table scope
4823 wakaba 1.108 for (reverse 0..$#{$self->{open_elements}}) {
4824 wakaba 1.52 my $node = $self->{open_elements}->[$_];
4825 wakaba 1.123 if ($node->[1] & TABLE_CELL_EL) {
4826 wakaba 1.79 !!!cp ('t151');
4827 wakaba 1.108
4828     ## Close the cell
4829 wakaba 1.125 !!!back-token; # <x>
4830 wakaba 1.122 $token = {type => END_TAG_TOKEN,
4831     tag_name => $node->[0]->manakai_local_name,
4832 wakaba 1.114 line => $token->{line},
4833     column => $token->{column}};
4834 wakaba 1.126 next B;
4835 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4836 wakaba 1.79 !!!cp ('t152');
4837 wakaba 1.108 ## ISSUE: This case can never be reached, maybe.
4838     last;
4839 wakaba 1.52 }
4840 wakaba 1.108 }
4841    
4842     !!!cp ('t153');
4843     !!!parse-error (type => 'start tag not allowed',
4844 wakaba 1.153 text => $token->{tag_name}, token => $token);
4845 wakaba 1.108 ## Ignore the token
4846 wakaba 1.125 !!!nack ('t153.1');
4847 wakaba 1.108 !!!next-token;
4848 wakaba 1.126 next B;
4849 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4850 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
4851     token => $token);
4852 wakaba 1.52
4853 wakaba 1.108 ## NOTE: As if </caption>.
4854 wakaba 1.52 ## have a table element in table scope
4855     my $i;
4856 wakaba 1.108 INSCOPE: {
4857     for (reverse 0..$#{$self->{open_elements}}) {
4858     my $node = $self->{open_elements}->[$_];
4859 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4860 wakaba 1.108 !!!cp ('t155');
4861     $i = $_;
4862     last INSCOPE;
4863 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4864 wakaba 1.108 !!!cp ('t156');
4865     last;
4866     }
4867 wakaba 1.52 }
4868 wakaba 1.108
4869     !!!cp ('t157');
4870     !!!parse-error (type => 'start tag not allowed',
4871 wakaba 1.153 text => $token->{tag_name}, token => $token);
4872 wakaba 1.108 ## Ignore the token
4873 wakaba 1.125 !!!nack ('t157.1');
4874 wakaba 1.108 !!!next-token;
4875 wakaba 1.126 next B;
4876 wakaba 1.52 } # INSCOPE
4877    
4878     ## generate implied end tags
4879 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4880     & END_TAG_OPTIONAL_EL) {
4881 wakaba 1.79 !!!cp ('t158');
4882 wakaba 1.86 pop @{$self->{open_elements}};
4883 wakaba 1.52 }
4884    
4885 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
4886 wakaba 1.79 !!!cp ('t159');
4887 wakaba 1.122 !!!parse-error (type => 'not closed',
4888 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4889 wakaba 1.122 ->manakai_local_name,
4890     token => $token);
4891 wakaba 1.79 } else {
4892     !!!cp ('t160');
4893 wakaba 1.52 }
4894    
4895     splice @{$self->{open_elements}}, $i;
4896    
4897     $clear_up_to_marker->();
4898    
4899 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4900 wakaba 1.52
4901     ## reprocess
4902 wakaba 1.125 !!!ack-later;
4903 wakaba 1.126 next B;
4904 wakaba 1.52 } else {
4905 wakaba 1.79 !!!cp ('t161');
4906 wakaba 1.52 #
4907     }
4908     } else {
4909 wakaba 1.79 !!!cp ('t162');
4910 wakaba 1.52 #
4911     }
4912 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4913 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4914 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
4915 wakaba 1.43 ## have an element in table scope
4916 wakaba 1.52 my $i;
4917 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4918     my $node = $self->{open_elements}->[$_];
4919 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
4920 wakaba 1.79 !!!cp ('t163');
4921 wakaba 1.52 $i = $_;
4922 wakaba 1.43 last INSCOPE;
4923 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4924 wakaba 1.79 !!!cp ('t164');
4925 wakaba 1.43 last INSCOPE;
4926     }
4927     } # INSCOPE
4928 wakaba 1.52 unless (defined $i) {
4929 wakaba 1.79 !!!cp ('t165');
4930 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4931     text => $token->{tag_name},
4932     token => $token);
4933 wakaba 1.43 ## Ignore the token
4934     !!!next-token;
4935 wakaba 1.126 next B;
4936 wakaba 1.43 }
4937    
4938 wakaba 1.52 ## generate implied end tags
4939 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4940     & END_TAG_OPTIONAL_EL) {
4941 wakaba 1.79 !!!cp ('t166');
4942 wakaba 1.86 pop @{$self->{open_elements}};
4943 wakaba 1.52 }
4944 wakaba 1.86
4945 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
4946     ne $token->{tag_name}) {
4947 wakaba 1.79 !!!cp ('t167');
4948 wakaba 1.122 !!!parse-error (type => 'not closed',
4949 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4950 wakaba 1.122 ->manakai_local_name,
4951     token => $token);
4952 wakaba 1.79 } else {
4953     !!!cp ('t168');
4954 wakaba 1.52 }
4955    
4956     splice @{$self->{open_elements}}, $i;
4957    
4958     $clear_up_to_marker->();
4959    
4960 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
4961 wakaba 1.52
4962     !!!next-token;
4963 wakaba 1.126 next B;
4964 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4965 wakaba 1.79 !!!cp ('t169');
4966 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4967     text => $token->{tag_name}, token => $token);
4968 wakaba 1.52 ## Ignore the token
4969     !!!next-token;
4970 wakaba 1.126 next B;
4971 wakaba 1.52 } else {
4972 wakaba 1.79 !!!cp ('t170');
4973 wakaba 1.52 #
4974     }
4975     } elsif ($token->{tag_name} eq 'caption') {
4976 wakaba 1.54 if ($self->{insertion_mode} == IN_CAPTION_IM) {
4977 wakaba 1.43 ## have a table element in table scope
4978     my $i;
4979 wakaba 1.108 INSCOPE: {
4980     for (reverse 0..$#{$self->{open_elements}}) {
4981     my $node = $self->{open_elements}->[$_];
4982 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4983 wakaba 1.108 !!!cp ('t171');
4984     $i = $_;
4985     last INSCOPE;
4986 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4987 wakaba 1.108 !!!cp ('t172');
4988     last;
4989     }
4990 wakaba 1.43 }
4991 wakaba 1.108
4992     !!!cp ('t173');
4993     !!!parse-error (type => 'unmatched end tag',
4994 wakaba 1.153 text => $token->{tag_name}, token => $token);
4995 wakaba 1.108 ## Ignore the token
4996     !!!next-token;
4997 wakaba 1.126 next B;
4998 wakaba 1.43 } # INSCOPE
4999    
5000     ## generate implied end tags
5001 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
5002     & END_TAG_OPTIONAL_EL) {
5003 wakaba 1.79 !!!cp ('t174');
5004 wakaba 1.86 pop @{$self->{open_elements}};
5005 wakaba 1.43 }
5006 wakaba 1.52
5007 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
5008 wakaba 1.79 !!!cp ('t175');
5009 wakaba 1.122 !!!parse-error (type => 'not closed',
5010 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5011 wakaba 1.122 ->manakai_local_name,
5012     token => $token);
5013 wakaba 1.79 } else {
5014     !!!cp ('t176');
5015 wakaba 1.52 }
5016    
5017     splice @{$self->{open_elements}}, $i;
5018    
5019     $clear_up_to_marker->();
5020    
5021 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5022 wakaba 1.52
5023     !!!next-token;
5024 wakaba 1.126 next B;
5025 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CELL_IM) {
5026 wakaba 1.79 !!!cp ('t177');
5027 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5028     text => $token->{tag_name}, token => $token);
5029 wakaba 1.52 ## Ignore the token
5030     !!!next-token;
5031 wakaba 1.126 next B;
5032 wakaba 1.52 } else {
5033 wakaba 1.79 !!!cp ('t178');
5034 wakaba 1.52 #
5035     }
5036     } elsif ({
5037     table => 1, tbody => 1, tfoot => 1,
5038     thead => 1, tr => 1,
5039     }->{$token->{tag_name}} and
5040 wakaba 1.54 $self->{insertion_mode} == IN_CELL_IM) {
5041 wakaba 1.52 ## have an element in table scope
5042     my $i;
5043     my $tn;
5044 wakaba 1.108 INSCOPE: {
5045     for (reverse 0..$#{$self->{open_elements}}) {
5046     my $node = $self->{open_elements}->[$_];
5047 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5048 wakaba 1.108 !!!cp ('t179');
5049     $i = $_;
5050    
5051     ## Close the cell
5052 wakaba 1.125 !!!back-token; # </x>
5053 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $tn,
5054     line => $token->{line},
5055     column => $token->{column}};
5056 wakaba 1.126 next B;
5057 wakaba 1.123 } elsif ($node->[1] & TABLE_CELL_EL) {
5058 wakaba 1.108 !!!cp ('t180');
5059 wakaba 1.123 $tn = $node->[0]->manakai_local_name;
5060 wakaba 1.108 ## NOTE: There is exactly one |td| or |th| element
5061     ## in scope in the stack of open elements by definition.
5062 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5063 wakaba 1.108 ## ISSUE: Can this be reached?
5064     !!!cp ('t181');
5065     last;
5066     }
5067 wakaba 1.52 }
5068 wakaba 1.108
5069 wakaba 1.79 !!!cp ('t182');
5070 wakaba 1.108 !!!parse-error (type => 'unmatched end tag',
5071 wakaba 1.153 text => $token->{tag_name}, token => $token);
5072 wakaba 1.52 ## Ignore the token
5073     !!!next-token;
5074 wakaba 1.126 next B;
5075 wakaba 1.108 } # INSCOPE
5076 wakaba 1.52 } elsif ($token->{tag_name} eq 'table' and
5077 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
5078 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
5079     token => $token);
5080 wakaba 1.52
5081     ## As if </caption>
5082     ## have a table element in table scope
5083     my $i;
5084     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5085     my $node = $self->{open_elements}->[$_];
5086 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
5087 wakaba 1.79 !!!cp ('t184');
5088 wakaba 1.52 $i = $_;
5089     last INSCOPE;
5090 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5091 wakaba 1.79 !!!cp ('t185');
5092 wakaba 1.52 last INSCOPE;
5093     }
5094     } # INSCOPE
5095     unless (defined $i) {
5096 wakaba 1.79 !!!cp ('t186');
5097 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5098     text => 'caption', token => $token);
5099 wakaba 1.52 ## Ignore the token
5100     !!!next-token;
5101 wakaba 1.126 next B;
5102 wakaba 1.52 }
5103    
5104     ## generate implied end tags
5105 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5106 wakaba 1.79 !!!cp ('t187');
5107 wakaba 1.86 pop @{$self->{open_elements}};
5108 wakaba 1.52 }
5109    
5110 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
5111 wakaba 1.79 !!!cp ('t188');
5112 wakaba 1.122 !!!parse-error (type => 'not closed',
5113 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5114 wakaba 1.122 ->manakai_local_name,
5115     token => $token);
5116 wakaba 1.79 } else {
5117     !!!cp ('t189');
5118 wakaba 1.52 }
5119    
5120     splice @{$self->{open_elements}}, $i;
5121    
5122     $clear_up_to_marker->();
5123    
5124 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5125 wakaba 1.52
5126     ## reprocess
5127 wakaba 1.126 next B;
5128 wakaba 1.52 } elsif ({
5129     body => 1, col => 1, colgroup => 1, html => 1,
5130     }->{$token->{tag_name}}) {
5131 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
5132 wakaba 1.79 !!!cp ('t190');
5133 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5134     text => $token->{tag_name}, token => $token);
5135 wakaba 1.52 ## Ignore the token
5136     !!!next-token;
5137 wakaba 1.126 next B;
5138 wakaba 1.52 } else {
5139 wakaba 1.79 !!!cp ('t191');
5140 wakaba 1.52 #
5141     }
5142     } elsif ({
5143     tbody => 1, tfoot => 1,
5144     thead => 1, tr => 1,
5145     }->{$token->{tag_name}} and
5146 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
5147 wakaba 1.79 !!!cp ('t192');
5148 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5149     text => $token->{tag_name}, token => $token);
5150 wakaba 1.52 ## Ignore the token
5151     !!!next-token;
5152 wakaba 1.126 next B;
5153 wakaba 1.52 } else {
5154 wakaba 1.79 !!!cp ('t193');
5155 wakaba 1.52 #
5156     }
5157 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5158     for my $entry (@{$self->{open_elements}}) {
5159 wakaba 1.123 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
5160 wakaba 1.104 !!!cp ('t75');
5161 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5162 wakaba 1.104 last;
5163     }
5164     }
5165    
5166     ## Stop parsing.
5167     last B;
5168 wakaba 1.52 } else {
5169     die "$0: $token->{type}: Unknown token type";
5170     }
5171    
5172     $insert = $insert_to_current;
5173     #
5174 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
5175 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
5176 wakaba 1.95 if (not $open_tables->[-1]->[1] and # tainted
5177     $token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5178     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5179 wakaba 1.52
5180 wakaba 1.95 unless (length $token->{data}) {
5181     !!!cp ('t194');
5182     !!!next-token;
5183 wakaba 1.126 next B;
5184 wakaba 1.95 } else {
5185     !!!cp ('t195');
5186     }
5187     }
5188 wakaba 1.52
5189 wakaba 1.153 !!!parse-error (type => 'in table:#text', token => $token);
5190 wakaba 1.52
5191     ## As if in body, but insert into foster parent element
5192     ## ISSUE: Spec says that "whenever a node would be inserted
5193     ## into the current node" while characters might not be
5194     ## result in a new Text node.
5195     $reconstruct_active_formatting_elements->($insert_to_foster);
5196    
5197 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
5198 wakaba 1.52 # MUST
5199     my $foster_parent_element;
5200     my $next_sibling;
5201     my $prev_sibling;
5202     OE: for (reverse 0..$#{$self->{open_elements}}) {
5203 wakaba 1.123 if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
5204 wakaba 1.52 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5205     if (defined $parent and $parent->node_type == 1) {
5206 wakaba 1.79 !!!cp ('t196');
5207 wakaba 1.52 $foster_parent_element = $parent;
5208     $next_sibling = $self->{open_elements}->[$_]->[0];
5209     $prev_sibling = $next_sibling->previous_sibling;
5210     } else {
5211 wakaba 1.79 !!!cp ('t197');
5212 wakaba 1.52 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5213     $prev_sibling = $foster_parent_element->last_child;
5214     }
5215     last OE;
5216     }
5217     } # OE
5218     $foster_parent_element = $self->{open_elements}->[0]->[0] and
5219     $prev_sibling = $foster_parent_element->last_child
5220     unless defined $foster_parent_element;
5221     if (defined $prev_sibling and
5222     $prev_sibling->node_type == 3) {
5223 wakaba 1.79 !!!cp ('t198');
5224 wakaba 1.52 $prev_sibling->manakai_append_text ($token->{data});
5225     } else {
5226 wakaba 1.79 !!!cp ('t199');
5227 wakaba 1.52 $foster_parent_element->insert_before
5228     ($self->{document}->create_text_node ($token->{data}),
5229     $next_sibling);
5230     }
5231 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
5232     } else {
5233     !!!cp ('t200');
5234     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5235     }
5236 wakaba 1.52
5237 wakaba 1.95 !!!next-token;
5238 wakaba 1.126 next B;
5239 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
5240 wakaba 1.153 if ({
5241     tr => ($self->{insertion_mode} != IN_ROW_IM),
5242     th => 1, td => 1,
5243     }->{$token->{tag_name}}) {
5244     if ($self->{insertion_mode} == IN_TABLE_IM) {
5245     ## Clear back to table context
5246     while (not ($self->{open_elements}->[-1]->[1]
5247     & TABLE_SCOPING_EL)) {
5248     !!!cp ('t201');
5249     pop @{$self->{open_elements}};
5250     }
5251    
5252     !!!insert-element ('tbody',, $token);
5253     $self->{insertion_mode} = IN_TABLE_BODY_IM;
5254     ## reprocess in the "in table body" insertion mode...
5255     }
5256    
5257     if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5258     unless ($token->{tag_name} eq 'tr') {
5259     !!!cp ('t202');
5260     !!!parse-error (type => 'missing start tag:tr', token => $token);
5261     }
5262 wakaba 1.43
5263 wakaba 1.153 ## Clear back to table body context
5264     while (not ($self->{open_elements}->[-1]->[1]
5265     & TABLE_ROWS_SCOPING_EL)) {
5266     !!!cp ('t203');
5267     ## ISSUE: Can this case be reached?
5268     pop @{$self->{open_elements}};
5269     }
5270 wakaba 1.43
5271 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
5272 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
5273 wakaba 1.79 !!!cp ('t204');
5274 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5275 wakaba 1.125 !!!nack ('t204');
5276 wakaba 1.52 !!!next-token;
5277 wakaba 1.126 next B;
5278 wakaba 1.52 } else {
5279 wakaba 1.79 !!!cp ('t205');
5280 wakaba 1.116 !!!insert-element ('tr',, $token);
5281 wakaba 1.52 ## reprocess in the "in row" insertion mode
5282     }
5283 wakaba 1.79 } else {
5284     !!!cp ('t206');
5285 wakaba 1.52 }
5286    
5287     ## Clear back to table row context
5288 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5289     & TABLE_ROW_SCOPING_EL)) {
5290 wakaba 1.79 !!!cp ('t207');
5291 wakaba 1.52 pop @{$self->{open_elements}};
5292 wakaba 1.43 }
5293 wakaba 1.52
5294 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5295 wakaba 1.54 $self->{insertion_mode} = IN_CELL_IM;
5296 wakaba 1.52
5297     push @$active_formatting_elements, ['#marker', ''];
5298    
5299 wakaba 1.125 !!!nack ('t207.1');
5300 wakaba 1.52 !!!next-token;
5301 wakaba 1.126 next B;
5302 wakaba 1.52 } elsif ({
5303     caption => 1, col => 1, colgroup => 1,
5304     tbody => 1, tfoot => 1, thead => 1,
5305 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
5306 wakaba 1.52 }->{$token->{tag_name}}) {
5307 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5308 wakaba 1.52 ## As if </tr>
5309 wakaba 1.43 ## have an element in table scope
5310     my $i;
5311     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5312     my $node = $self->{open_elements}->[$_];
5313 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5314 wakaba 1.79 !!!cp ('t208');
5315 wakaba 1.43 $i = $_;
5316     last INSCOPE;
5317 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5318 wakaba 1.79 !!!cp ('t209');
5319 wakaba 1.43 last INSCOPE;
5320     }
5321     } # INSCOPE
5322 wakaba 1.79 unless (defined $i) {
5323 wakaba 1.125 !!!cp ('t210');
5324 wakaba 1.83 ## TODO: This type is wrong.
5325 wakaba 1.153 !!!parse-error (type => 'unmacthed end tag',
5326     text => $token->{tag_name}, token => $token);
5327 wakaba 1.52 ## Ignore the token
5328 wakaba 1.125 !!!nack ('t210.1');
5329 wakaba 1.52 !!!next-token;
5330 wakaba 1.126 next B;
5331 wakaba 1.43 }
5332    
5333 wakaba 1.52 ## Clear back to table row context
5334 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5335     & TABLE_ROW_SCOPING_EL)) {
5336 wakaba 1.79 !!!cp ('t211');
5337 wakaba 1.83 ## ISSUE: Can this case be reached?
5338 wakaba 1.52 pop @{$self->{open_elements}};
5339 wakaba 1.1 }
5340 wakaba 1.43
5341 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5342 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5343 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
5344 wakaba 1.79 !!!cp ('t212');
5345 wakaba 1.52 ## reprocess
5346 wakaba 1.125 !!!ack-later;
5347 wakaba 1.126 next B;
5348 wakaba 1.52 } else {
5349 wakaba 1.79 !!!cp ('t213');
5350 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
5351     }
5352 wakaba 1.1 }
5353 wakaba 1.52
5354 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5355 wakaba 1.52 ## have an element in table scope
5356 wakaba 1.43 my $i;
5357     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5358     my $node = $self->{open_elements}->[$_];
5359 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
5360 wakaba 1.79 !!!cp ('t214');
5361 wakaba 1.43 $i = $_;
5362     last INSCOPE;
5363 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5364 wakaba 1.79 !!!cp ('t215');
5365 wakaba 1.43 last INSCOPE;
5366     }
5367     } # INSCOPE
5368 wakaba 1.52 unless (defined $i) {
5369 wakaba 1.79 !!!cp ('t216');
5370 wakaba 1.153 ## TODO: This erorr type is wrong.
5371     !!!parse-error (type => 'unmatched end tag',
5372     text => $token->{tag_name}, token => $token);
5373 wakaba 1.52 ## Ignore the token
5374 wakaba 1.125 !!!nack ('t216.1');
5375 wakaba 1.52 !!!next-token;
5376 wakaba 1.126 next B;
5377 wakaba 1.43 }
5378 wakaba 1.52
5379     ## Clear back to table body context
5380 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5381     & TABLE_ROWS_SCOPING_EL)) {
5382 wakaba 1.79 !!!cp ('t217');
5383 wakaba 1.83 ## ISSUE: Can this state be reached?
5384 wakaba 1.52 pop @{$self->{open_elements}};
5385 wakaba 1.43 }
5386    
5387 wakaba 1.52 ## As if <{current node}>
5388     ## have an element in table scope
5389     ## true by definition
5390 wakaba 1.43
5391 wakaba 1.52 ## Clear back to table body context
5392     ## nop by definition
5393 wakaba 1.43
5394 wakaba 1.52 pop @{$self->{open_elements}};
5395 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5396 wakaba 1.52 ## reprocess in "in table" insertion mode...
5397 wakaba 1.79 } else {
5398     !!!cp ('t218');
5399 wakaba 1.52 }
5400    
5401     if ($token->{tag_name} eq 'col') {
5402     ## Clear back to table context
5403 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5404     & TABLE_SCOPING_EL)) {
5405 wakaba 1.79 !!!cp ('t219');
5406 wakaba 1.83 ## ISSUE: Can this state be reached?
5407 wakaba 1.52 pop @{$self->{open_elements}};
5408     }
5409 wakaba 1.43
5410 wakaba 1.116 !!!insert-element ('colgroup',, $token);
5411 wakaba 1.54 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
5412 wakaba 1.52 ## reprocess
5413 wakaba 1.125 !!!ack-later;
5414 wakaba 1.126 next B;
5415 wakaba 1.52 } elsif ({
5416     caption => 1,
5417     colgroup => 1,
5418     tbody => 1, tfoot => 1, thead => 1,
5419     }->{$token->{tag_name}}) {
5420     ## Clear back to table context
5421 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5422     & TABLE_SCOPING_EL)) {
5423 wakaba 1.79 !!!cp ('t220');
5424 wakaba 1.83 ## ISSUE: Can this state be reached?
5425 wakaba 1.52 pop @{$self->{open_elements}};
5426 wakaba 1.1 }
5427 wakaba 1.52
5428     push @$active_formatting_elements, ['#marker', '']
5429     if $token->{tag_name} eq 'caption';
5430    
5431 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5432 wakaba 1.52 $self->{insertion_mode} = {
5433 wakaba 1.54 caption => IN_CAPTION_IM,
5434     colgroup => IN_COLUMN_GROUP_IM,
5435     tbody => IN_TABLE_BODY_IM,
5436     tfoot => IN_TABLE_BODY_IM,
5437     thead => IN_TABLE_BODY_IM,
5438 wakaba 1.52 }->{$token->{tag_name}};
5439 wakaba 1.1 !!!next-token;
5440 wakaba 1.125 !!!nack ('t220.1');
5441 wakaba 1.126 next B;
5442 wakaba 1.52 } else {
5443     die "$0: in table: <>: $token->{tag_name}";
5444 wakaba 1.1 }
5445 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
5446 wakaba 1.122 !!!parse-error (type => 'not closed',
5447 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5448 wakaba 1.122 ->manakai_local_name,
5449     token => $token);
5450 wakaba 1.1
5451 wakaba 1.52 ## As if </table>
5452 wakaba 1.1 ## have a table element in table scope
5453     my $i;
5454 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5455     my $node = $self->{open_elements}->[$_];
5456 wakaba 1.123 if ($node->[1] & TABLE_EL) {
5457 wakaba 1.79 !!!cp ('t221');
5458 wakaba 1.1 $i = $_;
5459     last INSCOPE;
5460 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5461 wakaba 1.79 !!!cp ('t222');
5462 wakaba 1.1 last INSCOPE;
5463     }
5464     } # INSCOPE
5465     unless (defined $i) {
5466 wakaba 1.79 !!!cp ('t223');
5467 wakaba 1.83 ## TODO: The following is wrong, maybe.
5468 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => 'table',
5469     token => $token);
5470 wakaba 1.52 ## Ignore tokens </table><table>
5471 wakaba 1.125 !!!nack ('t223.1');
5472 wakaba 1.1 !!!next-token;
5473 wakaba 1.126 next B;
5474 wakaba 1.1 }
5475    
5476 wakaba 1.151 ## TODO: Followings are removed from the latest spec.
5477 wakaba 1.1 ## generate implied end tags
5478 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5479 wakaba 1.79 !!!cp ('t224');
5480 wakaba 1.86 pop @{$self->{open_elements}};
5481 wakaba 1.1 }
5482    
5483 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & TABLE_EL) {
5484 wakaba 1.79 !!!cp ('t225');
5485 wakaba 1.122 ## NOTE: |<table><tr><table>|
5486     !!!parse-error (type => 'not closed',
5487 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5488 wakaba 1.122 ->manakai_local_name,
5489     token => $token);
5490 wakaba 1.79 } else {
5491     !!!cp ('t226');
5492 wakaba 1.1 }
5493    
5494 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5495 wakaba 1.95 pop @{$open_tables};
5496 wakaba 1.1
5497 wakaba 1.52 $self->_reset_insertion_mode;
5498 wakaba 1.1
5499 wakaba 1.125 ## reprocess
5500     !!!ack-later;
5501 wakaba 1.126 next B;
5502 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
5503     if (not $open_tables->[-1]->[1]) { # tainted
5504     !!!cp ('t227.8');
5505     ## NOTE: This is a "as if in head" code clone.
5506     $parse_rcdata->(CDATA_CONTENT_MODEL);
5507 wakaba 1.126 next B;
5508 wakaba 1.100 } else {
5509     !!!cp ('t227.7');
5510     #
5511     }
5512     } elsif ($token->{tag_name} eq 'script') {
5513     if (not $open_tables->[-1]->[1]) { # tainted
5514     !!!cp ('t227.6');
5515     ## NOTE: This is a "as if in head" code clone.
5516     $script_start_tag->();
5517 wakaba 1.126 next B;
5518 wakaba 1.100 } else {
5519     !!!cp ('t227.5');
5520     #
5521     }
5522 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
5523     if (not $open_tables->[-1]->[1]) { # tainted
5524     if ($token->{attributes}->{type}) { ## TODO: case
5525     my $type = lc $token->{attributes}->{type}->{value};
5526     if ($type eq 'hidden') {
5527     !!!cp ('t227.3');
5528 wakaba 1.153 !!!parse-error (type => 'in table',
5529     text => $token->{tag_name}, token => $token);
5530 wakaba 1.98
5531 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5532 wakaba 1.98
5533     ## TODO: form element pointer
5534    
5535     pop @{$self->{open_elements}};
5536    
5537     !!!next-token;
5538 wakaba 1.125 !!!ack ('t227.2.1');
5539 wakaba 1.126 next B;
5540 wakaba 1.98 } else {
5541     !!!cp ('t227.2');
5542     #
5543     }
5544     } else {
5545     !!!cp ('t227.1');
5546     #
5547     }
5548     } else {
5549     !!!cp ('t227.4');
5550     #
5551     }
5552 wakaba 1.58 } else {
5553 wakaba 1.79 !!!cp ('t227');
5554 wakaba 1.58 #
5555     }
5556 wakaba 1.98
5557 wakaba 1.153 !!!parse-error (type => 'in table', text => $token->{tag_name},
5558     token => $token);
5559 wakaba 1.98
5560     $insert = $insert_to_foster;
5561     #
5562 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
5563 wakaba 1.52 if ($token->{tag_name} eq 'tr' and
5564 wakaba 1.54 $self->{insertion_mode} == IN_ROW_IM) {
5565 wakaba 1.52 ## have an element in table scope
5566     my $i;
5567     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5568     my $node = $self->{open_elements}->[$_];
5569 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5570 wakaba 1.79 !!!cp ('t228');
5571 wakaba 1.52 $i = $_;
5572     last INSCOPE;
5573 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5574 wakaba 1.79 !!!cp ('t229');
5575 wakaba 1.52 last INSCOPE;
5576     }
5577     } # INSCOPE
5578     unless (defined $i) {
5579 wakaba 1.79 !!!cp ('t230');
5580 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5581     text => $token->{tag_name}, token => $token);
5582 wakaba 1.52 ## Ignore the token
5583 wakaba 1.125 !!!nack ('t230.1');
5584 wakaba 1.42 !!!next-token;
5585 wakaba 1.126 next B;
5586 wakaba 1.79 } else {
5587     !!!cp ('t232');
5588 wakaba 1.42 }
5589    
5590 wakaba 1.52 ## Clear back to table row context
5591 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5592     & TABLE_ROW_SCOPING_EL)) {
5593 wakaba 1.79 !!!cp ('t231');
5594 wakaba 1.83 ## ISSUE: Can this state be reached?
5595 wakaba 1.52 pop @{$self->{open_elements}};
5596     }
5597 wakaba 1.42
5598 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5599 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5600 wakaba 1.52 !!!next-token;
5601 wakaba 1.125 !!!nack ('t231.1');
5602 wakaba 1.126 next B;
5603 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
5604 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5605 wakaba 1.52 ## As if </tr>
5606     ## have an element in table scope
5607     my $i;
5608     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5609     my $node = $self->{open_elements}->[$_];
5610 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5611 wakaba 1.79 !!!cp ('t233');
5612 wakaba 1.52 $i = $_;
5613     last INSCOPE;
5614 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5615 wakaba 1.79 !!!cp ('t234');
5616 wakaba 1.52 last INSCOPE;
5617 wakaba 1.42 }
5618 wakaba 1.52 } # INSCOPE
5619     unless (defined $i) {
5620 wakaba 1.79 !!!cp ('t235');
5621 wakaba 1.83 ## TODO: The following is wrong.
5622 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5623     text => $token->{type}, token => $token);
5624 wakaba 1.52 ## Ignore the token
5625 wakaba 1.125 !!!nack ('t236.1');
5626 wakaba 1.52 !!!next-token;
5627 wakaba 1.126 next B;
5628 wakaba 1.42 }
5629 wakaba 1.52
5630     ## Clear back to table row context
5631 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5632     & TABLE_ROW_SCOPING_EL)) {
5633 wakaba 1.79 !!!cp ('t236');
5634 wakaba 1.83 ## ISSUE: Can this state be reached?
5635 wakaba 1.46 pop @{$self->{open_elements}};
5636 wakaba 1.1 }
5637 wakaba 1.46
5638 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5639 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5640 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
5641 wakaba 1.1 }
5642    
5643 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5644 wakaba 1.52 ## have an element in table scope
5645     my $i;
5646     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5647     my $node = $self->{open_elements}->[$_];
5648 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
5649 wakaba 1.79 !!!cp ('t237');
5650 wakaba 1.52 $i = $_;
5651     last INSCOPE;
5652 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5653 wakaba 1.79 !!!cp ('t238');
5654 wakaba 1.52 last INSCOPE;
5655     }
5656     } # INSCOPE
5657     unless (defined $i) {
5658 wakaba 1.79 !!!cp ('t239');
5659 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5660     text => $token->{tag_name}, token => $token);
5661 wakaba 1.52 ## Ignore the token
5662 wakaba 1.125 !!!nack ('t239.1');
5663 wakaba 1.52 !!!next-token;
5664 wakaba 1.126 next B;
5665 wakaba 1.47 }
5666    
5667     ## Clear back to table body context
5668 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5669     & TABLE_ROWS_SCOPING_EL)) {
5670 wakaba 1.79 !!!cp ('t240');
5671 wakaba 1.47 pop @{$self->{open_elements}};
5672     }
5673    
5674 wakaba 1.52 ## As if <{current node}>
5675     ## have an element in table scope
5676     ## true by definition
5677    
5678     ## Clear back to table body context
5679     ## nop by definition
5680    
5681     pop @{$self->{open_elements}};
5682 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5683 wakaba 1.52 ## reprocess in the "in table" insertion mode...
5684     }
5685    
5686 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
5687     ## When you edit the code fragment below, please ensure that
5688     ## the code for <table> in the "in table" insertion mode
5689     ## is synced with it.
5690    
5691 wakaba 1.52 ## have a table element in table scope
5692     my $i;
5693     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5694     my $node = $self->{open_elements}->[$_];
5695 wakaba 1.123 if ($node->[1] & TABLE_EL) {
5696 wakaba 1.79 !!!cp ('t241');
5697 wakaba 1.52 $i = $_;
5698     last INSCOPE;
5699 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5700 wakaba 1.79 !!!cp ('t242');
5701 wakaba 1.52 last INSCOPE;
5702 wakaba 1.47 }
5703 wakaba 1.52 } # INSCOPE
5704     unless (defined $i) {
5705 wakaba 1.79 !!!cp ('t243');
5706 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5707     text => $token->{tag_name}, token => $token);
5708 wakaba 1.52 ## Ignore the token
5709 wakaba 1.125 !!!nack ('t243.1');
5710 wakaba 1.52 !!!next-token;
5711 wakaba 1.126 next B;
5712 wakaba 1.3 }
5713 wakaba 1.52
5714     splice @{$self->{open_elements}}, $i;
5715 wakaba 1.95 pop @{$open_tables};
5716 wakaba 1.1
5717 wakaba 1.52 $self->_reset_insertion_mode;
5718 wakaba 1.47
5719     !!!next-token;
5720 wakaba 1.126 next B;
5721 wakaba 1.47 } elsif ({
5722 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
5723 wakaba 1.52 }->{$token->{tag_name}} and
5724 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
5725 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5726 wakaba 1.52 ## have an element in table scope
5727     my $i;
5728     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5729     my $node = $self->{open_elements}->[$_];
5730 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5731 wakaba 1.79 !!!cp ('t247');
5732 wakaba 1.52 $i = $_;
5733     last INSCOPE;
5734 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5735 wakaba 1.79 !!!cp ('t248');
5736 wakaba 1.52 last INSCOPE;
5737     }
5738     } # INSCOPE
5739     unless (defined $i) {
5740 wakaba 1.79 !!!cp ('t249');
5741 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5742     text => $token->{tag_name}, token => $token);
5743 wakaba 1.52 ## Ignore the token
5744 wakaba 1.125 !!!nack ('t249.1');
5745 wakaba 1.52 !!!next-token;
5746 wakaba 1.126 next B;
5747 wakaba 1.52 }
5748    
5749 wakaba 1.48 ## As if </tr>
5750     ## have an element in table scope
5751     my $i;
5752     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5753     my $node = $self->{open_elements}->[$_];
5754 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5755 wakaba 1.79 !!!cp ('t250');
5756 wakaba 1.48 $i = $_;
5757     last INSCOPE;
5758 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5759 wakaba 1.79 !!!cp ('t251');
5760 wakaba 1.48 last INSCOPE;
5761     }
5762     } # INSCOPE
5763 wakaba 1.52 unless (defined $i) {
5764 wakaba 1.79 !!!cp ('t252');
5765 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5766     text => 'tr', token => $token);
5767 wakaba 1.52 ## Ignore the token
5768 wakaba 1.125 !!!nack ('t252.1');
5769 wakaba 1.52 !!!next-token;
5770 wakaba 1.126 next B;
5771 wakaba 1.52 }
5772 wakaba 1.48
5773     ## Clear back to table row context
5774 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5775     & TABLE_ROW_SCOPING_EL)) {
5776 wakaba 1.79 !!!cp ('t253');
5777 wakaba 1.83 ## ISSUE: Can this case be reached?
5778 wakaba 1.48 pop @{$self->{open_elements}};
5779     }
5780    
5781     pop @{$self->{open_elements}}; # tr
5782 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5783 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
5784     }
5785    
5786     ## have an element in table scope
5787     my $i;
5788     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5789     my $node = $self->{open_elements}->[$_];
5790 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5791 wakaba 1.79 !!!cp ('t254');
5792 wakaba 1.52 $i = $_;
5793     last INSCOPE;
5794 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5795 wakaba 1.79 !!!cp ('t255');
5796 wakaba 1.52 last INSCOPE;
5797     }
5798     } # INSCOPE
5799     unless (defined $i) {
5800 wakaba 1.79 !!!cp ('t256');
5801 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5802     text => $token->{tag_name}, token => $token);
5803 wakaba 1.52 ## Ignore the token
5804 wakaba 1.125 !!!nack ('t256.1');
5805 wakaba 1.52 !!!next-token;
5806 wakaba 1.126 next B;
5807 wakaba 1.52 }
5808    
5809     ## Clear back to table body context
5810 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5811     & TABLE_ROWS_SCOPING_EL)) {
5812 wakaba 1.79 !!!cp ('t257');
5813 wakaba 1.83 ## ISSUE: Can this case be reached?
5814 wakaba 1.52 pop @{$self->{open_elements}};
5815     }
5816    
5817     pop @{$self->{open_elements}};
5818 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5819 wakaba 1.125 !!!nack ('t257.1');
5820 wakaba 1.52 !!!next-token;
5821 wakaba 1.126 next B;
5822 wakaba 1.52 } elsif ({
5823     body => 1, caption => 1, col => 1, colgroup => 1,
5824     html => 1, td => 1, th => 1,
5825 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
5826     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
5827 wakaba 1.52 }->{$token->{tag_name}}) {
5828 wakaba 1.125 !!!cp ('t258');
5829 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5830     text => $token->{tag_name}, token => $token);
5831 wakaba 1.125 ## Ignore the token
5832     !!!nack ('t258.1');
5833     !!!next-token;
5834 wakaba 1.126 next B;
5835 wakaba 1.58 } else {
5836 wakaba 1.79 !!!cp ('t259');
5837 wakaba 1.153 !!!parse-error (type => 'in table:/',
5838     text => $token->{tag_name}, token => $token);
5839 wakaba 1.52
5840 wakaba 1.58 $insert = $insert_to_foster;
5841     #
5842     }
5843 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5844 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
5845 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5846 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5847 wakaba 1.104 !!!cp ('t259.1');
5848 wakaba 1.105 #
5849 wakaba 1.104 } else {
5850     !!!cp ('t259.2');
5851 wakaba 1.105 #
5852 wakaba 1.104 }
5853    
5854     ## Stop parsing
5855     last B;
5856 wakaba 1.58 } else {
5857     die "$0: $token->{type}: Unknown token type";
5858     }
5859 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
5860 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5861 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5862     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5863     unless (length $token->{data}) {
5864 wakaba 1.79 !!!cp ('t260');
5865 wakaba 1.52 !!!next-token;
5866 wakaba 1.126 next B;
5867 wakaba 1.52 }
5868     }
5869    
5870 wakaba 1.79 !!!cp ('t261');
5871 wakaba 1.52 #
5872 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5873 wakaba 1.52 if ($token->{tag_name} eq 'col') {
5874 wakaba 1.79 !!!cp ('t262');
5875 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5876 wakaba 1.52 pop @{$self->{open_elements}};
5877 wakaba 1.125 !!!ack ('t262.1');
5878 wakaba 1.52 !!!next-token;
5879 wakaba 1.126 next B;
5880 wakaba 1.52 } else {
5881 wakaba 1.79 !!!cp ('t263');
5882 wakaba 1.52 #
5883     }
5884 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5885 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
5886 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
5887 wakaba 1.79 !!!cp ('t264');
5888 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5889     text => 'colgroup', token => $token);
5890 wakaba 1.52 ## Ignore the token
5891     !!!next-token;
5892 wakaba 1.126 next B;
5893 wakaba 1.52 } else {
5894 wakaba 1.79 !!!cp ('t265');
5895 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
5896 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5897 wakaba 1.52 !!!next-token;
5898 wakaba 1.126 next B;
5899 wakaba 1.52 }
5900     } elsif ($token->{tag_name} eq 'col') {
5901 wakaba 1.79 !!!cp ('t266');
5902 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5903     text => 'col', token => $token);
5904 wakaba 1.52 ## Ignore the token
5905     !!!next-token;
5906 wakaba 1.126 next B;
5907 wakaba 1.52 } else {
5908 wakaba 1.79 !!!cp ('t267');
5909 wakaba 1.52 #
5910     }
5911 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5912 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
5913 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5914     !!!cp ('t270.2');
5915     ## Stop parsing.
5916     last B;
5917     } else {
5918     ## NOTE: As if </colgroup>.
5919     !!!cp ('t270.1');
5920     pop @{$self->{open_elements}}; # colgroup
5921     $self->{insertion_mode} = IN_TABLE_IM;
5922     ## Reprocess.
5923 wakaba 1.126 next B;
5924 wakaba 1.104 }
5925     } else {
5926     die "$0: $token->{type}: Unknown token type";
5927     }
5928 wakaba 1.52
5929     ## As if </colgroup>
5930 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
5931 wakaba 1.79 !!!cp ('t269');
5932 wakaba 1.104 ## TODO: Wrong error type?
5933 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5934     text => 'colgroup', token => $token);
5935 wakaba 1.52 ## Ignore the token
5936 wakaba 1.125 !!!nack ('t269.1');
5937 wakaba 1.52 !!!next-token;
5938 wakaba 1.126 next B;
5939 wakaba 1.52 } else {
5940 wakaba 1.79 !!!cp ('t270');
5941 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
5942 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5943 wakaba 1.125 !!!ack-later;
5944 wakaba 1.52 ## reprocess
5945 wakaba 1.126 next B;
5946 wakaba 1.52 }
5947 wakaba 1.101 } elsif ($self->{insertion_mode} & SELECT_IMS) {
5948 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
5949 wakaba 1.79 !!!cp ('t271');
5950 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5951     !!!next-token;
5952 wakaba 1.126 next B;
5953 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
5954 wakaba 1.123 if ($token->{tag_name} eq 'option') {
5955     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5956     !!!cp ('t272');
5957     ## As if </option>
5958     pop @{$self->{open_elements}};
5959     } else {
5960     !!!cp ('t273');
5961     }
5962 wakaba 1.52
5963 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5964 wakaba 1.125 !!!nack ('t273.1');
5965 wakaba 1.123 !!!next-token;
5966 wakaba 1.126 next B;
5967 wakaba 1.123 } elsif ($token->{tag_name} eq 'optgroup') {
5968     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5969     !!!cp ('t274');
5970     ## As if </option>
5971     pop @{$self->{open_elements}};
5972     } else {
5973     !!!cp ('t275');
5974     }
5975 wakaba 1.52
5976 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
5977     !!!cp ('t276');
5978     ## As if </optgroup>
5979     pop @{$self->{open_elements}};
5980     } else {
5981     !!!cp ('t277');
5982     }
5983 wakaba 1.52
5984 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5985 wakaba 1.125 !!!nack ('t277.1');
5986 wakaba 1.123 !!!next-token;
5987 wakaba 1.126 next B;
5988 wakaba 1.146 } elsif ({
5989     select => 1, input => 1, textarea => 1,
5990     }->{$token->{tag_name}} or
5991 wakaba 1.101 ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
5992     {
5993     caption => 1, table => 1,
5994     tbody => 1, tfoot => 1, thead => 1,
5995     tr => 1, td => 1, th => 1,
5996     }->{$token->{tag_name}})) {
5997     ## TODO: The type below is not good - <select> is replaced by </select>
5998 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'select',
5999     token => $token);
6000 wakaba 1.101 ## NOTE: As if the token were </select> (<select> case) or
6001     ## as if there were </select> (otherwise).
6002 wakaba 1.123 ## have an element in table scope
6003     my $i;
6004     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6005     my $node = $self->{open_elements}->[$_];
6006     if ($node->[1] & SELECT_EL) {
6007     !!!cp ('t278');
6008     $i = $_;
6009     last INSCOPE;
6010     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6011     !!!cp ('t279');
6012     last INSCOPE;
6013     }
6014     } # INSCOPE
6015     unless (defined $i) {
6016     !!!cp ('t280');
6017 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6018     text => 'select', token => $token);
6019 wakaba 1.123 ## Ignore the token
6020 wakaba 1.125 !!!nack ('t280.1');
6021 wakaba 1.123 !!!next-token;
6022 wakaba 1.126 next B;
6023 wakaba 1.123 }
6024 wakaba 1.52
6025 wakaba 1.123 !!!cp ('t281');
6026     splice @{$self->{open_elements}}, $i;
6027 wakaba 1.52
6028 wakaba 1.123 $self->_reset_insertion_mode;
6029 wakaba 1.47
6030 wakaba 1.101 if ($token->{tag_name} eq 'select') {
6031 wakaba 1.125 !!!nack ('t281.2');
6032 wakaba 1.101 !!!next-token;
6033 wakaba 1.126 next B;
6034 wakaba 1.101 } else {
6035     !!!cp ('t281.1');
6036 wakaba 1.125 !!!ack-later;
6037 wakaba 1.101 ## Reprocess the token.
6038 wakaba 1.126 next B;
6039 wakaba 1.101 }
6040 wakaba 1.58 } else {
6041 wakaba 1.79 !!!cp ('t282');
6042 wakaba 1.153 !!!parse-error (type => 'in select',
6043     text => $token->{tag_name}, token => $token);
6044 wakaba 1.58 ## Ignore the token
6045 wakaba 1.125 !!!nack ('t282.1');
6046 wakaba 1.58 !!!next-token;
6047 wakaba 1.126 next B;
6048 wakaba 1.58 }
6049     } elsif ($token->{type} == END_TAG_TOKEN) {
6050 wakaba 1.123 if ($token->{tag_name} eq 'optgroup') {
6051     if ($self->{open_elements}->[-1]->[1] & OPTION_EL and
6052     $self->{open_elements}->[-2]->[1] & OPTGROUP_EL) {
6053     !!!cp ('t283');
6054     ## As if </option>
6055     splice @{$self->{open_elements}}, -2;
6056     } elsif ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
6057     !!!cp ('t284');
6058     pop @{$self->{open_elements}};
6059     } else {
6060     !!!cp ('t285');
6061 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6062     text => $token->{tag_name}, token => $token);
6063 wakaba 1.123 ## Ignore the token
6064     }
6065 wakaba 1.125 !!!nack ('t285.1');
6066 wakaba 1.123 !!!next-token;
6067 wakaba 1.126 next B;
6068 wakaba 1.123 } elsif ($token->{tag_name} eq 'option') {
6069     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
6070     !!!cp ('t286');
6071     pop @{$self->{open_elements}};
6072     } else {
6073     !!!cp ('t287');
6074 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6075     text => $token->{tag_name}, token => $token);
6076 wakaba 1.123 ## Ignore the token
6077     }
6078 wakaba 1.125 !!!nack ('t287.1');
6079 wakaba 1.123 !!!next-token;
6080 wakaba 1.126 next B;
6081 wakaba 1.123 } elsif ($token->{tag_name} eq 'select') {
6082     ## have an element in table scope
6083     my $i;
6084     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6085     my $node = $self->{open_elements}->[$_];
6086     if ($node->[1] & SELECT_EL) {
6087     !!!cp ('t288');
6088     $i = $_;
6089     last INSCOPE;
6090     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6091     !!!cp ('t289');
6092     last INSCOPE;
6093     }
6094     } # INSCOPE
6095     unless (defined $i) {
6096     !!!cp ('t290');
6097 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6098     text => $token->{tag_name}, token => $token);
6099 wakaba 1.123 ## Ignore the token
6100 wakaba 1.125 !!!nack ('t290.1');
6101 wakaba 1.123 !!!next-token;
6102 wakaba 1.126 next B;
6103 wakaba 1.123 }
6104 wakaba 1.52
6105 wakaba 1.123 !!!cp ('t291');
6106     splice @{$self->{open_elements}}, $i;
6107 wakaba 1.52
6108 wakaba 1.123 $self->_reset_insertion_mode;
6109 wakaba 1.52
6110 wakaba 1.125 !!!nack ('t291.1');
6111 wakaba 1.123 !!!next-token;
6112 wakaba 1.126 next B;
6113 wakaba 1.101 } elsif ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
6114     {
6115     caption => 1, table => 1, tbody => 1,
6116     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
6117     }->{$token->{tag_name}}) {
6118 wakaba 1.83 ## TODO: The following is wrong?
6119 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6120     text => $token->{tag_name}, token => $token);
6121 wakaba 1.52
6122 wakaba 1.123 ## have an element in table scope
6123     my $i;
6124     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6125     my $node = $self->{open_elements}->[$_];
6126     if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6127     !!!cp ('t292');
6128     $i = $_;
6129     last INSCOPE;
6130     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6131     !!!cp ('t293');
6132     last INSCOPE;
6133     }
6134     } # INSCOPE
6135     unless (defined $i) {
6136     !!!cp ('t294');
6137     ## Ignore the token
6138 wakaba 1.125 !!!nack ('t294.1');
6139 wakaba 1.123 !!!next-token;
6140 wakaba 1.126 next B;
6141 wakaba 1.123 }
6142 wakaba 1.52
6143 wakaba 1.123 ## As if </select>
6144     ## have an element in table scope
6145     undef $i;
6146     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6147     my $node = $self->{open_elements}->[$_];
6148     if ($node->[1] & SELECT_EL) {
6149     !!!cp ('t295');
6150     $i = $_;
6151     last INSCOPE;
6152     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6153 wakaba 1.83 ## ISSUE: Can this state be reached?
6154 wakaba 1.123 !!!cp ('t296');
6155     last INSCOPE;
6156     }
6157     } # INSCOPE
6158     unless (defined $i) {
6159     !!!cp ('t297');
6160 wakaba 1.83 ## TODO: The following error type is correct?
6161 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6162     text => 'select', token => $token);
6163 wakaba 1.123 ## Ignore the </select> token
6164 wakaba 1.125 !!!nack ('t297.1');
6165 wakaba 1.123 !!!next-token; ## TODO: ok?
6166 wakaba 1.126 next B;
6167 wakaba 1.123 }
6168 wakaba 1.52
6169 wakaba 1.123 !!!cp ('t298');
6170     splice @{$self->{open_elements}}, $i;
6171 wakaba 1.52
6172 wakaba 1.123 $self->_reset_insertion_mode;
6173 wakaba 1.52
6174 wakaba 1.125 !!!ack-later;
6175 wakaba 1.123 ## reprocess
6176 wakaba 1.126 next B;
6177 wakaba 1.58 } else {
6178 wakaba 1.79 !!!cp ('t299');
6179 wakaba 1.153 !!!parse-error (type => 'in select:/',
6180     text => $token->{tag_name}, token => $token);
6181 wakaba 1.52 ## Ignore the token
6182 wakaba 1.125 !!!nack ('t299.3');
6183 wakaba 1.52 !!!next-token;
6184 wakaba 1.126 next B;
6185 wakaba 1.58 }
6186 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6187 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
6188 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
6189     !!!cp ('t299.1');
6190 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
6191 wakaba 1.104 } else {
6192     !!!cp ('t299.2');
6193     }
6194    
6195     ## Stop parsing.
6196     last B;
6197 wakaba 1.58 } else {
6198     die "$0: $token->{type}: Unknown token type";
6199     }
6200 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
6201 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6202 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6203     my $data = $1;
6204     ## As if in body
6205     $reconstruct_active_formatting_elements->($insert_to_current);
6206    
6207     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6208    
6209     unless (length $token->{data}) {
6210 wakaba 1.79 !!!cp ('t300');
6211 wakaba 1.52 !!!next-token;
6212 wakaba 1.126 next B;
6213 wakaba 1.52 }
6214     }
6215    
6216 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6217 wakaba 1.79 !!!cp ('t301');
6218 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
6219 wakaba 1.52
6220 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
6221 wakaba 1.79 } else {
6222     !!!cp ('t302');
6223 wakaba 1.52 }
6224    
6225     ## "after body" insertion mode
6226 wakaba 1.153 !!!parse-error (type => 'after body:#text', token => $token);
6227 wakaba 1.52
6228 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
6229 wakaba 1.52 ## reprocess
6230 wakaba 1.126 next B;
6231 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
6232 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6233 wakaba 1.79 !!!cp ('t303');
6234 wakaba 1.153 !!!parse-error (type => 'after html',
6235     text => $token->{tag_name}, token => $token);
6236 wakaba 1.52
6237 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
6238 wakaba 1.79 } else {
6239     !!!cp ('t304');
6240 wakaba 1.52 }
6241    
6242     ## "after body" insertion mode
6243 wakaba 1.153 !!!parse-error (type => 'after body',
6244     text => $token->{tag_name}, token => $token);
6245 wakaba 1.52
6246 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
6247 wakaba 1.125 !!!ack-later;
6248 wakaba 1.52 ## reprocess
6249 wakaba 1.126 next B;
6250 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6251 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6252 wakaba 1.79 !!!cp ('t305');
6253 wakaba 1.153 !!!parse-error (type => 'after html:/',
6254     text => $token->{tag_name}, token => $token);
6255 wakaba 1.52
6256 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
6257 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
6258 wakaba 1.79 } else {
6259     !!!cp ('t306');
6260 wakaba 1.52 }
6261    
6262     ## "after body" insertion mode
6263     if ($token->{tag_name} eq 'html') {
6264     if (defined $self->{inner_html_node}) {
6265 wakaba 1.79 !!!cp ('t307');
6266 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6267     text => 'html', token => $token);
6268 wakaba 1.52 ## Ignore the token
6269     !!!next-token;
6270 wakaba 1.126 next B;
6271 wakaba 1.52 } else {
6272 wakaba 1.79 !!!cp ('t308');
6273 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
6274 wakaba 1.52 !!!next-token;
6275 wakaba 1.126 next B;
6276 wakaba 1.52 }
6277     } else {
6278 wakaba 1.79 !!!cp ('t309');
6279 wakaba 1.153 !!!parse-error (type => 'after body:/',
6280     text => $token->{tag_name}, token => $token);
6281 wakaba 1.52
6282 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
6283 wakaba 1.52 ## reprocess
6284 wakaba 1.126 next B;
6285 wakaba 1.52 }
6286 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6287     !!!cp ('t309.2');
6288     ## Stop parsing
6289     last B;
6290 wakaba 1.52 } else {
6291     die "$0: $token->{type}: Unknown token type";
6292     }
6293 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
6294 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6295 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6296     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6297    
6298     unless (length $token->{data}) {
6299 wakaba 1.79 !!!cp ('t310');
6300 wakaba 1.52 !!!next-token;
6301 wakaba 1.126 next B;
6302 wakaba 1.52 }
6303     }
6304    
6305     if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {
6306 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6307 wakaba 1.79 !!!cp ('t311');
6308 wakaba 1.153 !!!parse-error (type => 'in frameset:#text', token => $token);
6309 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
6310 wakaba 1.79 !!!cp ('t312');
6311 wakaba 1.153 !!!parse-error (type => 'after frameset:#text', token => $token);
6312 wakaba 1.158 } else { # "after after frameset"
6313 wakaba 1.79 !!!cp ('t313');
6314 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
6315 wakaba 1.52 }
6316    
6317     ## Ignore the token.
6318     if (length $token->{data}) {
6319 wakaba 1.79 !!!cp ('t314');
6320 wakaba 1.52 ## reprocess the rest of characters
6321     } else {
6322 wakaba 1.79 !!!cp ('t315');
6323 wakaba 1.52 !!!next-token;
6324     }
6325 wakaba 1.126 next B;
6326 wakaba 1.52 }
6327    
6328     die qq[$0: Character "$token->{data}"];
6329 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
6330 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
6331 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6332 wakaba 1.79 !!!cp ('t318');
6333 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6334 wakaba 1.125 !!!nack ('t318.1');
6335 wakaba 1.52 !!!next-token;
6336 wakaba 1.126 next B;
6337 wakaba 1.52 } elsif ($token->{tag_name} eq 'frame' and
6338 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6339 wakaba 1.79 !!!cp ('t319');
6340 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6341 wakaba 1.52 pop @{$self->{open_elements}};
6342 wakaba 1.125 !!!ack ('t319.1');
6343 wakaba 1.52 !!!next-token;
6344 wakaba 1.126 next B;
6345 wakaba 1.52 } elsif ($token->{tag_name} eq 'noframes') {
6346 wakaba 1.79 !!!cp ('t320');
6347 wakaba 1.148 ## NOTE: As if in head.
6348 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6349 wakaba 1.126 next B;
6350 wakaba 1.158
6351     ## NOTE: |<!DOCTYPE HTML><frameset></frameset></html><noframes></noframes>|
6352     ## has no parse error.
6353 wakaba 1.52 } else {
6354 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6355 wakaba 1.79 !!!cp ('t321');
6356 wakaba 1.153 !!!parse-error (type => 'in frameset',
6357     text => $token->{tag_name}, token => $token);
6358 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
6359 wakaba 1.79 !!!cp ('t322');
6360 wakaba 1.153 !!!parse-error (type => 'after frameset',
6361     text => $token->{tag_name}, token => $token);
6362 wakaba 1.158 } else { # "after after frameset"
6363     !!!cp ('t322.2');
6364     !!!parse-error (type => 'after after frameset',
6365     text => $token->{tag_name}, token => $token);
6366 wakaba 1.52 }
6367     ## Ignore the token
6368 wakaba 1.125 !!!nack ('t322.1');
6369 wakaba 1.52 !!!next-token;
6370 wakaba 1.126 next B;
6371 wakaba 1.52 }
6372 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6373 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
6374 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6375 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
6376 wakaba 1.52 @{$self->{open_elements}} == 1) {
6377 wakaba 1.79 !!!cp ('t325');
6378 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6379     text => $token->{tag_name}, token => $token);
6380 wakaba 1.52 ## Ignore the token
6381     !!!next-token;
6382     } else {
6383 wakaba 1.79 !!!cp ('t326');
6384 wakaba 1.52 pop @{$self->{open_elements}};
6385     !!!next-token;
6386     }
6387 wakaba 1.47
6388 wakaba 1.52 if (not defined $self->{inner_html_node} and
6389 wakaba 1.123 not ($self->{open_elements}->[-1]->[1] & FRAMESET_EL)) {
6390 wakaba 1.79 !!!cp ('t327');
6391 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
6392 wakaba 1.79 } else {
6393     !!!cp ('t328');
6394 wakaba 1.52 }
6395 wakaba 1.126 next B;
6396 wakaba 1.52 } elsif ($token->{tag_name} eq 'html' and
6397 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
6398 wakaba 1.79 !!!cp ('t329');
6399 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
6400 wakaba 1.52 !!!next-token;
6401 wakaba 1.126 next B;
6402 wakaba 1.52 } else {
6403 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6404 wakaba 1.79 !!!cp ('t330');
6405 wakaba 1.153 !!!parse-error (type => 'in frameset:/',
6406     text => $token->{tag_name}, token => $token);
6407 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
6408     !!!cp ('t330.1');
6409     !!!parse-error (type => 'after frameset:/',
6410     text => $token->{tag_name}, token => $token);
6411     } else { # "after after html"
6412 wakaba 1.79 !!!cp ('t331');
6413 wakaba 1.158 !!!parse-error (type => 'after after frameset:/',
6414 wakaba 1.153 text => $token->{tag_name}, token => $token);
6415 wakaba 1.52 }
6416     ## Ignore the token
6417     !!!next-token;
6418 wakaba 1.126 next B;
6419 wakaba 1.52 }
6420 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6421 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
6422 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
6423     !!!cp ('t331.1');
6424 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
6425 wakaba 1.104 } else {
6426     !!!cp ('t331.2');
6427     }
6428    
6429     ## Stop parsing
6430     last B;
6431 wakaba 1.52 } else {
6432     die "$0: $token->{type}: Unknown token type";
6433     }
6434 wakaba 1.47
6435 wakaba 1.52 ## ISSUE: An issue in spec here
6436     } else {
6437     die "$0: $self->{insertion_mode}: Unknown insertion mode";
6438     }
6439 wakaba 1.47
6440 wakaba 1.52 ## "in body" insertion mode
6441 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
6442 wakaba 1.52 if ($token->{tag_name} eq 'script') {
6443 wakaba 1.79 !!!cp ('t332');
6444 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6445 wakaba 1.100 $script_start_tag->();
6446 wakaba 1.126 next B;
6447 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
6448 wakaba 1.79 !!!cp ('t333');
6449 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6450 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6451 wakaba 1.126 next B;
6452 wakaba 1.52 } elsif ({
6453     base => 1, link => 1,
6454     }->{$token->{tag_name}}) {
6455 wakaba 1.79 !!!cp ('t334');
6456 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
6457 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6458 wakaba 1.52 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
6459 wakaba 1.125 !!!ack ('t334.1');
6460 wakaba 1.52 !!!next-token;
6461 wakaba 1.126 next B;
6462 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
6463     ## NOTE: This is an "as if in head" code clone, only "-t" differs
6464 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6465 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
6466 wakaba 1.46
6467 wakaba 1.52 unless ($self->{confident}) {
6468 wakaba 1.134 if ($token->{attributes}->{charset}) {
6469 wakaba 1.79 !!!cp ('t335');
6470 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
6471     ## in the {change_encoding} callback.
6472 wakaba 1.63 $self->{change_encoding}
6473 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value}, $token);
6474 wakaba 1.66
6475     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
6476     ->set_user_data (manakai_has_reference =>
6477     $token->{attributes}->{charset}
6478     ->{has_reference});
6479 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
6480     if ($token->{attributes}->{content}->{value}
6481 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
6482 wakaba 1.70 [\x09-\x0D\x20]*=
6483 wakaba 1.52 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
6484 wakaba 1.145 ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {
6485 wakaba 1.79 !!!cp ('t336');
6486 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
6487     ## in the {change_encoding} callback.
6488 wakaba 1.63 $self->{change_encoding}
6489 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
6490 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
6491     ->set_user_data (manakai_has_reference =>
6492     $token->{attributes}->{content}
6493     ->{has_reference});
6494 wakaba 1.63 }
6495 wakaba 1.52 }
6496 wakaba 1.66 } else {
6497     if ($token->{attributes}->{charset}) {
6498 wakaba 1.79 !!!cp ('t337');
6499 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
6500     ->set_user_data (manakai_has_reference =>
6501     $token->{attributes}->{charset}
6502     ->{has_reference});
6503     }
6504 wakaba 1.68 if ($token->{attributes}->{content}) {
6505 wakaba 1.79 !!!cp ('t338');
6506 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
6507     ->set_user_data (manakai_has_reference =>
6508     $token->{attributes}->{content}
6509     ->{has_reference});
6510     }
6511 wakaba 1.52 }
6512 wakaba 1.1
6513 wakaba 1.125 !!!ack ('t338.1');
6514 wakaba 1.52 !!!next-token;
6515 wakaba 1.126 next B;
6516 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
6517 wakaba 1.79 !!!cp ('t341');
6518 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6519 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
6520 wakaba 1.126 next B;
6521 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
6522 wakaba 1.153 !!!parse-error (type => 'in body', text => 'body', token => $token);
6523 wakaba 1.46
6524 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
6525 wakaba 1.123 not ($self->{open_elements}->[1]->[1] & BODY_EL)) {
6526 wakaba 1.79 !!!cp ('t342');
6527 wakaba 1.52 ## Ignore the token
6528     } else {
6529     my $body_el = $self->{open_elements}->[1]->[0];
6530     for my $attr_name (keys %{$token->{attributes}}) {
6531     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
6532 wakaba 1.79 !!!cp ('t343');
6533 wakaba 1.52 $body_el->set_attribute_ns
6534     (undef, [undef, $attr_name],
6535     $token->{attributes}->{$attr_name}->{value});
6536     }
6537     }
6538     }
6539 wakaba 1.125 !!!nack ('t343.1');
6540 wakaba 1.52 !!!next-token;
6541 wakaba 1.126 next B;
6542 wakaba 1.52 } elsif ({
6543     address => 1, blockquote => 1, center => 1, dir => 1,
6544 wakaba 1.85 div => 1, dl => 1, fieldset => 1,
6545     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6546 wakaba 1.97 menu => 1, ol => 1, p => 1, ul => 1,
6547     pre => 1, listing => 1,
6548 wakaba 1.109 form => 1,
6549     table => 1,
6550     hr => 1,
6551 wakaba 1.52 }->{$token->{tag_name}}) {
6552 wakaba 1.109 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
6553     !!!cp ('t350');
6554 wakaba 1.113 !!!parse-error (type => 'in form:form', token => $token);
6555 wakaba 1.109 ## Ignore the token
6556 wakaba 1.125 !!!nack ('t350.1');
6557 wakaba 1.109 !!!next-token;
6558 wakaba 1.126 next B;
6559 wakaba 1.109 }
6560    
6561 wakaba 1.52 ## has a p element in scope
6562     INSCOPE: for (reverse @{$self->{open_elements}}) {
6563 wakaba 1.123 if ($_->[1] & P_EL) {
6564 wakaba 1.79 !!!cp ('t344');
6565 wakaba 1.125 !!!back-token; # <form>
6566 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6567     line => $token->{line}, column => $token->{column}};
6568 wakaba 1.126 next B;
6569 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6570 wakaba 1.79 !!!cp ('t345');
6571 wakaba 1.52 last INSCOPE;
6572     }
6573     } # INSCOPE
6574    
6575 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6576 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
6577 wakaba 1.125 !!!nack ('t346.1');
6578 wakaba 1.52 !!!next-token;
6579 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6580 wakaba 1.52 $token->{data} =~ s/^\x0A//;
6581     unless (length $token->{data}) {
6582 wakaba 1.79 !!!cp ('t346');
6583 wakaba 1.1 !!!next-token;
6584 wakaba 1.79 } else {
6585     !!!cp ('t349');
6586 wakaba 1.52 }
6587 wakaba 1.79 } else {
6588     !!!cp ('t348');
6589 wakaba 1.52 }
6590 wakaba 1.109 } elsif ($token->{tag_name} eq 'form') {
6591     !!!cp ('t347.1');
6592     $self->{form_element} = $self->{open_elements}->[-1]->[0];
6593    
6594 wakaba 1.125 !!!nack ('t347.2');
6595 wakaba 1.109 !!!next-token;
6596     } elsif ($token->{tag_name} eq 'table') {
6597     !!!cp ('t382');
6598     push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
6599    
6600     $self->{insertion_mode} = IN_TABLE_IM;
6601    
6602 wakaba 1.125 !!!nack ('t382.1');
6603 wakaba 1.109 !!!next-token;
6604     } elsif ($token->{tag_name} eq 'hr') {
6605     !!!cp ('t386');
6606     pop @{$self->{open_elements}};
6607    
6608 wakaba 1.125 !!!nack ('t386.1');
6609 wakaba 1.109 !!!next-token;
6610 wakaba 1.52 } else {
6611 wakaba 1.125 !!!nack ('t347.1');
6612 wakaba 1.52 !!!next-token;
6613     }
6614 wakaba 1.126 next B;
6615 wakaba 1.109 } elsif ({li => 1, dt => 1, dd => 1}->{$token->{tag_name}}) {
6616 wakaba 1.52 ## has a p element in scope
6617     INSCOPE: for (reverse @{$self->{open_elements}}) {
6618 wakaba 1.123 if ($_->[1] & P_EL) {
6619 wakaba 1.79 !!!cp ('t353');
6620 wakaba 1.125 !!!back-token; # <x>
6621 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6622     line => $token->{line}, column => $token->{column}};
6623 wakaba 1.126 next B;
6624 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6625 wakaba 1.79 !!!cp ('t354');
6626 wakaba 1.52 last INSCOPE;
6627     }
6628     } # INSCOPE
6629    
6630     ## Step 1
6631     my $i = -1;
6632     my $node = $self->{open_elements}->[$i];
6633 wakaba 1.109 my $li_or_dtdd = {li => {li => 1},
6634     dt => {dt => 1, dd => 1},
6635     dd => {dt => 1, dd => 1}}->{$token->{tag_name}};
6636 wakaba 1.52 LI: {
6637     ## Step 2
6638 wakaba 1.123 if ($li_or_dtdd->{$node->[0]->manakai_local_name}) {
6639 wakaba 1.52 if ($i != -1) {
6640 wakaba 1.79 !!!cp ('t355');
6641 wakaba 1.122 !!!parse-error (type => 'not closed',
6642 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
6643 wakaba 1.122 ->manakai_local_name,
6644     token => $token);
6645 wakaba 1.79 } else {
6646     !!!cp ('t356');
6647 wakaba 1.52 }
6648     splice @{$self->{open_elements}}, $i;
6649     last LI;
6650 wakaba 1.79 } else {
6651     !!!cp ('t357');
6652 wakaba 1.52 }
6653    
6654     ## Step 3
6655 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
6656 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
6657 wakaba 1.123 ($node->[1] & SPECIAL_EL or
6658     $node->[1] & SCOPING_EL) and
6659     not ($node->[1] & ADDRESS_EL) and
6660     not ($node->[1] & DIV_EL)) {
6661 wakaba 1.79 !!!cp ('t358');
6662 wakaba 1.52 last LI;
6663     }
6664    
6665 wakaba 1.79 !!!cp ('t359');
6666 wakaba 1.52 ## Step 4
6667     $i--;
6668     $node = $self->{open_elements}->[$i];
6669     redo LI;
6670     } # LI
6671    
6672 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6673 wakaba 1.125 !!!nack ('t359.1');
6674 wakaba 1.52 !!!next-token;
6675 wakaba 1.126 next B;
6676 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
6677     ## has a p element in scope
6678     INSCOPE: for (reverse @{$self->{open_elements}}) {
6679 wakaba 1.123 if ($_->[1] & P_EL) {
6680 wakaba 1.79 !!!cp ('t367');
6681 wakaba 1.125 !!!back-token; # <plaintext>
6682 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6683     line => $token->{line}, column => $token->{column}};
6684 wakaba 1.126 next B;
6685 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6686 wakaba 1.79 !!!cp ('t368');
6687 wakaba 1.52 last INSCOPE;
6688 wakaba 1.46 }
6689 wakaba 1.52 } # INSCOPE
6690    
6691 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6692 wakaba 1.52
6693     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
6694    
6695 wakaba 1.125 !!!nack ('t368.1');
6696 wakaba 1.52 !!!next-token;
6697 wakaba 1.126 next B;
6698 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
6699     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
6700     my $node = $active_formatting_elements->[$i];
6701 wakaba 1.123 if ($node->[1] & A_EL) {
6702 wakaba 1.79 !!!cp ('t371');
6703 wakaba 1.113 !!!parse-error (type => 'in a:a', token => $token);
6704 wakaba 1.52
6705 wakaba 1.125 !!!back-token; # <a>
6706 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'a',
6707     line => $token->{line}, column => $token->{column}};
6708 wakaba 1.113 $formatting_end_tag->($token);
6709 wakaba 1.52
6710     AFE2: for (reverse 0..$#$active_formatting_elements) {
6711     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
6712 wakaba 1.79 !!!cp ('t372');
6713 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
6714     last AFE2;
6715 wakaba 1.1 }
6716 wakaba 1.52 } # AFE2
6717     OE: for (reverse 0..$#{$self->{open_elements}}) {
6718     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
6719 wakaba 1.79 !!!cp ('t373');
6720 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
6721     last OE;
6722 wakaba 1.1 }
6723 wakaba 1.52 } # OE
6724     last AFE;
6725     } elsif ($node->[0] eq '#marker') {
6726 wakaba 1.79 !!!cp ('t374');
6727 wakaba 1.52 last AFE;
6728     }
6729     } # AFE
6730    
6731     $reconstruct_active_formatting_elements->($insert_to_current);
6732 wakaba 1.1
6733 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6734 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
6735 wakaba 1.1
6736 wakaba 1.125 !!!nack ('t374.1');
6737 wakaba 1.52 !!!next-token;
6738 wakaba 1.126 next B;
6739 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
6740     $reconstruct_active_formatting_elements->($insert_to_current);
6741 wakaba 1.1
6742 wakaba 1.52 ## has a |nobr| element in scope
6743     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6744     my $node = $self->{open_elements}->[$_];
6745 wakaba 1.123 if ($node->[1] & NOBR_EL) {
6746 wakaba 1.79 !!!cp ('t376');
6747 wakaba 1.113 !!!parse-error (type => 'in nobr:nobr', token => $token);
6748 wakaba 1.125 !!!back-token; # <nobr>
6749 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
6750     line => $token->{line}, column => $token->{column}};
6751 wakaba 1.126 next B;
6752 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6753 wakaba 1.79 !!!cp ('t377');
6754 wakaba 1.52 last INSCOPE;
6755     }
6756     } # INSCOPE
6757    
6758 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6759 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
6760    
6761 wakaba 1.125 !!!nack ('t377.1');
6762 wakaba 1.52 !!!next-token;
6763 wakaba 1.126 next B;
6764 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
6765     ## has a button element in scope
6766     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6767     my $node = $self->{open_elements}->[$_];
6768 wakaba 1.123 if ($node->[1] & BUTTON_EL) {
6769 wakaba 1.79 !!!cp ('t378');
6770 wakaba 1.113 !!!parse-error (type => 'in button:button', token => $token);
6771 wakaba 1.125 !!!back-token; # <button>
6772 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'button',
6773     line => $token->{line}, column => $token->{column}};
6774 wakaba 1.126 next B;
6775 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6776 wakaba 1.79 !!!cp ('t379');
6777 wakaba 1.52 last INSCOPE;
6778     }
6779     } # INSCOPE
6780    
6781     $reconstruct_active_formatting_elements->($insert_to_current);
6782    
6783 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6784 wakaba 1.85
6785     ## TODO: associate with $self->{form_element} if defined
6786    
6787 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
6788 wakaba 1.1
6789 wakaba 1.125 !!!nack ('t379.1');
6790 wakaba 1.52 !!!next-token;
6791 wakaba 1.126 next B;
6792 wakaba 1.103 } elsif ({
6793 wakaba 1.109 xmp => 1,
6794     iframe => 1,
6795     noembed => 1,
6796 wakaba 1.148 noframes => 1, ## NOTE: This is an "as if in head" code clone.
6797 wakaba 1.109 noscript => 0, ## TODO: 1 if scripting is enabled
6798 wakaba 1.103 }->{$token->{tag_name}}) {
6799 wakaba 1.109 if ($token->{tag_name} eq 'xmp') {
6800     !!!cp ('t381');
6801     $reconstruct_active_formatting_elements->($insert_to_current);
6802     } else {
6803     !!!cp ('t399');
6804     }
6805     ## NOTE: There is an "as if in body" code clone.
6806 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6807 wakaba 1.126 next B;
6808 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
6809 wakaba 1.113 !!!parse-error (type => 'isindex', token => $token);
6810 wakaba 1.52
6811     if (defined $self->{form_element}) {
6812 wakaba 1.79 !!!cp ('t389');
6813 wakaba 1.52 ## Ignore the token
6814 wakaba 1.125 !!!nack ('t389'); ## NOTE: Not acknowledged.
6815 wakaba 1.52 !!!next-token;
6816 wakaba 1.126 next B;
6817 wakaba 1.52 } else {
6818 wakaba 1.147 !!!ack ('t391.1');
6819    
6820 wakaba 1.52 my $at = $token->{attributes};
6821     my $form_attrs;
6822     $form_attrs->{action} = $at->{action} if $at->{action};
6823     my $prompt_attr = $at->{prompt};
6824     $at->{name} = {name => 'name', value => 'isindex'};
6825     delete $at->{action};
6826     delete $at->{prompt};
6827     my @tokens = (
6828 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
6829 wakaba 1.114 attributes => $form_attrs,
6830     line => $token->{line}, column => $token->{column}},
6831     {type => START_TAG_TOKEN, tag_name => 'hr',
6832     line => $token->{line}, column => $token->{column}},
6833     {type => START_TAG_TOKEN, tag_name => 'p',
6834     line => $token->{line}, column => $token->{column}},
6835     {type => START_TAG_TOKEN, tag_name => 'label',
6836     line => $token->{line}, column => $token->{column}},
6837 wakaba 1.52 );
6838     if ($prompt_attr) {
6839 wakaba 1.79 !!!cp ('t390');
6840 wakaba 1.114 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
6841 wakaba 1.118 #line => $token->{line}, column => $token->{column},
6842     };
6843 wakaba 1.1 } else {
6844 wakaba 1.79 !!!cp ('t391');
6845 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
6846 wakaba 1.114 data => 'This is a searchable index. Insert your search keywords here: ',
6847 wakaba 1.118 #line => $token->{line}, column => $token->{column},
6848     }; # SHOULD
6849 wakaba 1.52 ## TODO: make this configurable
6850 wakaba 1.1 }
6851 wakaba 1.52 push @tokens,
6852 wakaba 1.114 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
6853     line => $token->{line}, column => $token->{column}},
6854 wakaba 1.55 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
6855 wakaba 1.114 {type => END_TAG_TOKEN, tag_name => 'label',
6856     line => $token->{line}, column => $token->{column}},
6857     {type => END_TAG_TOKEN, tag_name => 'p',
6858     line => $token->{line}, column => $token->{column}},
6859     {type => START_TAG_TOKEN, tag_name => 'hr',
6860     line => $token->{line}, column => $token->{column}},
6861     {type => END_TAG_TOKEN, tag_name => 'form',
6862     line => $token->{line}, column => $token->{column}};
6863 wakaba 1.52 !!!back-token (@tokens);
6864 wakaba 1.125 !!!next-token;
6865 wakaba 1.126 next B;
6866 wakaba 1.52 }
6867     } elsif ($token->{tag_name} eq 'textarea') {
6868     my $tag_name = $token->{tag_name};
6869     my $el;
6870 wakaba 1.126 !!!create-element ($el, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
6871 wakaba 1.52
6872     ## TODO: $self->{form_element} if defined
6873     $self->{content_model} = RCDATA_CONTENT_MODEL;
6874     delete $self->{escape}; # MUST
6875    
6876     $insert->($el);
6877    
6878     my $text = '';
6879 wakaba 1.125 !!!nack ('t392.1');
6880 wakaba 1.52 !!!next-token;
6881 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6882 wakaba 1.52 $token->{data} =~ s/^\x0A//;
6883 wakaba 1.51 unless (length $token->{data}) {
6884 wakaba 1.79 !!!cp ('t392');
6885 wakaba 1.51 !!!next-token;
6886 wakaba 1.79 } else {
6887     !!!cp ('t393');
6888 wakaba 1.51 }
6889 wakaba 1.79 } else {
6890     !!!cp ('t394');
6891 wakaba 1.51 }
6892 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
6893 wakaba 1.79 !!!cp ('t395');
6894 wakaba 1.52 $text .= $token->{data};
6895     !!!next-token;
6896     }
6897     if (length $text) {
6898 wakaba 1.79 !!!cp ('t396');
6899 wakaba 1.52 $el->manakai_append_text ($text);
6900     }
6901    
6902     $self->{content_model} = PCDATA_CONTENT_MODEL;
6903 wakaba 1.51
6904 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
6905 wakaba 1.52 $token->{tag_name} eq $tag_name) {
6906 wakaba 1.79 !!!cp ('t397');
6907 wakaba 1.52 ## Ignore the token
6908     } else {
6909 wakaba 1.79 !!!cp ('t398');
6910 wakaba 1.153 !!!parse-error (type => 'in RCDATA:#eof', token => $token);
6911 wakaba 1.51 }
6912 wakaba 1.52 !!!next-token;
6913 wakaba 1.126 next B;
6914 wakaba 1.151 } elsif ($token->{tag_name} eq 'rt' or
6915     $token->{tag_name} eq 'rp') {
6916     ## has a |ruby| element in scope
6917     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6918     my $node = $self->{open_elements}->[$_];
6919     if ($node->[1] & RUBY_EL) {
6920     !!!cp ('t398.1');
6921     ## generate implied end tags
6922     while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
6923     !!!cp ('t398.2');
6924     pop @{$self->{open_elements}};
6925     }
6926     unless ($self->{open_elements}->[-1]->[1] & RUBY_EL) {
6927     !!!cp ('t398.3');
6928     !!!parse-error (type => 'not closed',
6929 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
6930 wakaba 1.151 ->manakai_local_name,
6931     token => $token);
6932     pop @{$self->{open_elements}}
6933     while not $self->{open_elements}->[-1]->[1] & RUBY_EL;
6934     }
6935     last INSCOPE;
6936     } elsif ($node->[1] & SCOPING_EL) {
6937     !!!cp ('t398.4');
6938     last INSCOPE;
6939     }
6940     } # INSCOPE
6941    
6942     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6943    
6944     !!!nack ('t398.5');
6945     !!!next-token;
6946     redo B;
6947 wakaba 1.126 } elsif ($token->{tag_name} eq 'math' or
6948     $token->{tag_name} eq 'svg') {
6949     $reconstruct_active_formatting_elements->($insert_to_current);
6950 wakaba 1.131
6951 wakaba 1.155 ## "Adjust MathML attributes" ('math' only) - done in insert-element-f
6952    
6953 wakaba 1.131 ## "adjust SVG attributes" ('svg' only) - done in insert-element-f
6954    
6955     ## "adjust foreign attributes" - done in insert-element-f
6956 wakaba 1.126
6957 wakaba 1.131 !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
6958 wakaba 1.126
6959     if ($self->{self_closing}) {
6960     pop @{$self->{open_elements}};
6961     !!!ack ('t398.1');
6962     } else {
6963     !!!cp ('t398.2');
6964     $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
6965     ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
6966     ## mode, "in body" (not "in foreign content") secondary insertion
6967     ## mode, maybe.
6968     }
6969    
6970     !!!next-token;
6971     next B;
6972 wakaba 1.52 } elsif ({
6973     caption => 1, col => 1, colgroup => 1, frame => 1,
6974     frameset => 1, head => 1, option => 1, optgroup => 1,
6975     tbody => 1, td => 1, tfoot => 1, th => 1,
6976     thead => 1, tr => 1,
6977     }->{$token->{tag_name}}) {
6978 wakaba 1.79 !!!cp ('t401');
6979 wakaba 1.153 !!!parse-error (type => 'in body',
6980     text => $token->{tag_name}, token => $token);
6981 wakaba 1.52 ## Ignore the token
6982 wakaba 1.125 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
6983 wakaba 1.52 !!!next-token;
6984 wakaba 1.126 next B;
6985 wakaba 1.52
6986     ## ISSUE: An issue on HTML5 new elements in the spec.
6987     } else {
6988 wakaba 1.110 if ($token->{tag_name} eq 'image') {
6989     !!!cp ('t384');
6990 wakaba 1.113 !!!parse-error (type => 'image', token => $token);
6991 wakaba 1.110 $token->{tag_name} = 'img';
6992     } else {
6993     !!!cp ('t385');
6994     }
6995    
6996     ## NOTE: There is an "as if <br>" code clone.
6997 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
6998    
6999 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
7000 wakaba 1.109
7001 wakaba 1.110 if ({
7002     applet => 1, marquee => 1, object => 1,
7003     }->{$token->{tag_name}}) {
7004     !!!cp ('t380');
7005     push @$active_formatting_elements, ['#marker', ''];
7006 wakaba 1.125 !!!nack ('t380.1');
7007 wakaba 1.110 } elsif ({
7008     b => 1, big => 1, em => 1, font => 1, i => 1,
7009     s => 1, small => 1, strile => 1,
7010     strong => 1, tt => 1, u => 1,
7011     }->{$token->{tag_name}}) {
7012     !!!cp ('t375');
7013     push @$active_formatting_elements, $self->{open_elements}->[-1];
7014 wakaba 1.125 !!!nack ('t375.1');
7015 wakaba 1.110 } elsif ($token->{tag_name} eq 'input') {
7016     !!!cp ('t388');
7017     ## TODO: associate with $self->{form_element} if defined
7018     pop @{$self->{open_elements}};
7019 wakaba 1.125 !!!ack ('t388.2');
7020 wakaba 1.110 } elsif ({
7021     area => 1, basefont => 1, bgsound => 1, br => 1,
7022     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
7023     #image => 1,
7024     }->{$token->{tag_name}}) {
7025     !!!cp ('t388.1');
7026     pop @{$self->{open_elements}};
7027 wakaba 1.125 !!!ack ('t388.3');
7028 wakaba 1.110 } elsif ($token->{tag_name} eq 'select') {
7029 wakaba 1.109 ## TODO: associate with $self->{form_element} if defined
7030    
7031     if ($self->{insertion_mode} & TABLE_IMS or
7032     $self->{insertion_mode} & BODY_TABLE_IMS or
7033     $self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
7034     !!!cp ('t400.1');
7035     $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
7036     } else {
7037     !!!cp ('t400.2');
7038     $self->{insertion_mode} = IN_SELECT_IM;
7039     }
7040 wakaba 1.125 !!!nack ('t400.3');
7041 wakaba 1.110 } else {
7042 wakaba 1.125 !!!nack ('t402');
7043 wakaba 1.109 }
7044 wakaba 1.51
7045 wakaba 1.52 !!!next-token;
7046 wakaba 1.126 next B;
7047 wakaba 1.52 }
7048 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
7049 wakaba 1.52 if ($token->{tag_name} eq 'body') {
7050 wakaba 1.107 ## has a |body| element in scope
7051     my $i;
7052 wakaba 1.111 INSCOPE: {
7053     for (reverse @{$self->{open_elements}}) {
7054 wakaba 1.123 if ($_->[1] & BODY_EL) {
7055 wakaba 1.111 !!!cp ('t405');
7056     $i = $_;
7057     last INSCOPE;
7058 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
7059 wakaba 1.111 !!!cp ('t405.1');
7060     last;
7061     }
7062 wakaba 1.52 }
7063 wakaba 1.111
7064     !!!parse-error (type => 'start tag not allowed',
7065 wakaba 1.153 text => $token->{tag_name}, token => $token);
7066 wakaba 1.107 ## NOTE: Ignore the token.
7067 wakaba 1.52 !!!next-token;
7068 wakaba 1.126 next B;
7069 wakaba 1.111 } # INSCOPE
7070 wakaba 1.107
7071     for (@{$self->{open_elements}}) {
7072 wakaba 1.123 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL) {
7073 wakaba 1.107 !!!cp ('t403');
7074 wakaba 1.122 !!!parse-error (type => 'not closed',
7075 wakaba 1.153 text => $_->[0]->manakai_local_name,
7076 wakaba 1.122 token => $token);
7077 wakaba 1.107 last;
7078     } else {
7079     !!!cp ('t404');
7080     }
7081     }
7082    
7083     $self->{insertion_mode} = AFTER_BODY_IM;
7084     !!!next-token;
7085 wakaba 1.126 next B;
7086 wakaba 1.52 } elsif ($token->{tag_name} eq 'html') {
7087 wakaba 1.122 ## TODO: Update this code. It seems that the code below is not
7088     ## up-to-date, though it has same effect as speced.
7089 wakaba 1.123 if (@{$self->{open_elements}} > 1 and
7090     $self->{open_elements}->[1]->[1] & BODY_EL) {
7091 wakaba 1.52 ## ISSUE: There is an issue in the spec.
7092 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & BODY_EL) {
7093 wakaba 1.79 !!!cp ('t406');
7094 wakaba 1.122 !!!parse-error (type => 'not closed',
7095 wakaba 1.153 text => $self->{open_elements}->[1]->[0]
7096 wakaba 1.122 ->manakai_local_name,
7097     token => $token);
7098 wakaba 1.79 } else {
7099     !!!cp ('t407');
7100 wakaba 1.1 }
7101 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
7102 wakaba 1.52 ## reprocess
7103 wakaba 1.126 next B;
7104 wakaba 1.51 } else {
7105 wakaba 1.79 !!!cp ('t408');
7106 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7107     text => $token->{tag_name}, token => $token);
7108 wakaba 1.52 ## Ignore the token
7109     !!!next-token;
7110 wakaba 1.126 next B;
7111 wakaba 1.51 }
7112 wakaba 1.52 } elsif ({
7113     address => 1, blockquote => 1, center => 1, dir => 1,
7114     div => 1, dl => 1, fieldset => 1, listing => 1,
7115     menu => 1, ol => 1, pre => 1, ul => 1,
7116     dd => 1, dt => 1, li => 1,
7117 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
7118 wakaba 1.52 }->{$token->{tag_name}}) {
7119     ## has an element in scope
7120     my $i;
7121     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7122     my $node = $self->{open_elements}->[$_];
7123 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
7124 wakaba 1.79 !!!cp ('t410');
7125 wakaba 1.52 $i = $_;
7126 wakaba 1.87 last INSCOPE;
7127 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7128 wakaba 1.79 !!!cp ('t411');
7129 wakaba 1.52 last INSCOPE;
7130 wakaba 1.51 }
7131 wakaba 1.52 } # INSCOPE
7132 wakaba 1.89
7133     unless (defined $i) { # has an element in scope
7134     !!!cp ('t413');
7135 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7136     text => $token->{tag_name}, token => $token);
7137 wakaba 1.157 ## NOTE: Ignore the token.
7138 wakaba 1.89 } else {
7139     ## Step 1. generate implied end tags
7140     while ({
7141 wakaba 1.151 ## END_TAG_OPTIONAL_EL
7142 wakaba 1.89 dd => ($token->{tag_name} ne 'dd'),
7143     dt => ($token->{tag_name} ne 'dt'),
7144     li => ($token->{tag_name} ne 'li'),
7145     p => 1,
7146 wakaba 1.151 rt => 1,
7147     rp => 1,
7148 wakaba 1.123 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
7149 wakaba 1.89 !!!cp ('t409');
7150     pop @{$self->{open_elements}};
7151     }
7152    
7153     ## Step 2.
7154 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7155     ne $token->{tag_name}) {
7156 wakaba 1.79 !!!cp ('t412');
7157 wakaba 1.122 !!!parse-error (type => 'not closed',
7158 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
7159 wakaba 1.122 ->manakai_local_name,
7160     token => $token);
7161 wakaba 1.51 } else {
7162 wakaba 1.89 !!!cp ('t414');
7163 wakaba 1.51 }
7164 wakaba 1.89
7165     ## Step 3.
7166 wakaba 1.52 splice @{$self->{open_elements}}, $i;
7167 wakaba 1.89
7168     ## Step 4.
7169     $clear_up_to_marker->()
7170     if {
7171 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
7172 wakaba 1.89 }->{$token->{tag_name}};
7173 wakaba 1.51 }
7174 wakaba 1.52 !!!next-token;
7175 wakaba 1.126 next B;
7176 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
7177 wakaba 1.92 undef $self->{form_element};
7178    
7179 wakaba 1.52 ## has an element in scope
7180 wakaba 1.92 my $i;
7181 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7182     my $node = $self->{open_elements}->[$_];
7183 wakaba 1.123 if ($node->[1] & FORM_EL) {
7184 wakaba 1.79 !!!cp ('t418');
7185 wakaba 1.92 $i = $_;
7186 wakaba 1.52 last INSCOPE;
7187 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7188 wakaba 1.79 !!!cp ('t419');
7189 wakaba 1.52 last INSCOPE;
7190     }
7191     } # INSCOPE
7192 wakaba 1.92
7193     unless (defined $i) { # has an element in scope
7194 wakaba 1.79 !!!cp ('t421');
7195 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7196     text => $token->{tag_name}, token => $token);
7197 wakaba 1.157 ## NOTE: Ignore the token.
7198 wakaba 1.92 } else {
7199     ## Step 1. generate implied end tags
7200 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7201 wakaba 1.92 !!!cp ('t417');
7202     pop @{$self->{open_elements}};
7203     }
7204    
7205     ## Step 2.
7206 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7207     ne $token->{tag_name}) {
7208 wakaba 1.92 !!!cp ('t417.1');
7209 wakaba 1.122 !!!parse-error (type => 'not closed',
7210 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
7211 wakaba 1.122 ->manakai_local_name,
7212     token => $token);
7213 wakaba 1.92 } else {
7214     !!!cp ('t420');
7215     }
7216    
7217     ## Step 3.
7218     splice @{$self->{open_elements}}, $i;
7219 wakaba 1.52 }
7220    
7221     !!!next-token;
7222 wakaba 1.126 next B;
7223 wakaba 1.52 } elsif ({
7224     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
7225     }->{$token->{tag_name}}) {
7226     ## has an element in scope
7227     my $i;
7228     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7229     my $node = $self->{open_elements}->[$_];
7230 wakaba 1.123 if ($node->[1] & HEADING_EL) {
7231 wakaba 1.79 !!!cp ('t423');
7232 wakaba 1.52 $i = $_;
7233     last INSCOPE;
7234 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7235 wakaba 1.79 !!!cp ('t424');
7236 wakaba 1.52 last INSCOPE;
7237 wakaba 1.51 }
7238 wakaba 1.52 } # INSCOPE
7239 wakaba 1.93
7240     unless (defined $i) { # has an element in scope
7241     !!!cp ('t425.1');
7242 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7243     text => $token->{tag_name}, token => $token);
7244 wakaba 1.157 ## NOTE: Ignore the token.
7245 wakaba 1.79 } else {
7246 wakaba 1.93 ## Step 1. generate implied end tags
7247 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7248 wakaba 1.93 !!!cp ('t422');
7249     pop @{$self->{open_elements}};
7250     }
7251    
7252     ## Step 2.
7253 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7254     ne $token->{tag_name}) {
7255 wakaba 1.93 !!!cp ('t425');
7256 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7257     text => $token->{tag_name}, token => $token);
7258 wakaba 1.93 } else {
7259     !!!cp ('t426');
7260     }
7261    
7262     ## Step 3.
7263     splice @{$self->{open_elements}}, $i;
7264 wakaba 1.36 }
7265 wakaba 1.52
7266     !!!next-token;
7267 wakaba 1.126 next B;
7268 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
7269     ## has an element in scope
7270     my $i;
7271     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7272     my $node = $self->{open_elements}->[$_];
7273 wakaba 1.123 if ($node->[1] & P_EL) {
7274 wakaba 1.87 !!!cp ('t410.1');
7275     $i = $_;
7276 wakaba 1.88 last INSCOPE;
7277 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7278 wakaba 1.87 !!!cp ('t411.1');
7279     last INSCOPE;
7280     }
7281     } # INSCOPE
7282 wakaba 1.91
7283     if (defined $i) {
7284 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7285     ne $token->{tag_name}) {
7286 wakaba 1.87 !!!cp ('t412.1');
7287 wakaba 1.122 !!!parse-error (type => 'not closed',
7288 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
7289 wakaba 1.122 ->manakai_local_name,
7290     token => $token);
7291 wakaba 1.87 } else {
7292 wakaba 1.91 !!!cp ('t414.1');
7293 wakaba 1.87 }
7294 wakaba 1.91
7295 wakaba 1.87 splice @{$self->{open_elements}}, $i;
7296     } else {
7297 wakaba 1.91 !!!cp ('t413.1');
7298 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7299     text => $token->{tag_name}, token => $token);
7300 wakaba 1.91
7301 wakaba 1.87 !!!cp ('t415.1');
7302     ## As if <p>, then reprocess the current token
7303     my $el;
7304 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'p',, $token);
7305 wakaba 1.87 $insert->($el);
7306 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
7307 wakaba 1.87 }
7308 wakaba 1.91
7309 wakaba 1.87 !!!next-token;
7310 wakaba 1.126 next B;
7311 wakaba 1.52 } elsif ({
7312     a => 1,
7313     b => 1, big => 1, em => 1, font => 1, i => 1,
7314     nobr => 1, s => 1, small => 1, strile => 1,
7315     strong => 1, tt => 1, u => 1,
7316     }->{$token->{tag_name}}) {
7317 wakaba 1.79 !!!cp ('t427');
7318 wakaba 1.113 $formatting_end_tag->($token);
7319 wakaba 1.126 next B;
7320 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
7321 wakaba 1.79 !!!cp ('t428');
7322 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7323     text => 'br', token => $token);
7324 wakaba 1.52
7325     ## As if <br>
7326     $reconstruct_active_formatting_elements->($insert_to_current);
7327    
7328     my $el;
7329 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'br',, $token);
7330 wakaba 1.52 $insert->($el);
7331    
7332     ## Ignore the token.
7333     !!!next-token;
7334 wakaba 1.126 next B;
7335 wakaba 1.52 } elsif ({
7336     caption => 1, col => 1, colgroup => 1, frame => 1,
7337     frameset => 1, head => 1, option => 1, optgroup => 1,
7338     tbody => 1, td => 1, tfoot => 1, th => 1,
7339     thead => 1, tr => 1,
7340     area => 1, basefont => 1, bgsound => 1,
7341     embed => 1, hr => 1, iframe => 1, image => 1,
7342     img => 1, input => 1, isindex => 1, noembed => 1,
7343     noframes => 1, param => 1, select => 1, spacer => 1,
7344     table => 1, textarea => 1, wbr => 1,
7345     noscript => 0, ## TODO: if scripting is enabled
7346     }->{$token->{tag_name}}) {
7347 wakaba 1.79 !!!cp ('t429');
7348 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7349     text => $token->{tag_name}, token => $token);
7350 wakaba 1.52 ## Ignore the token
7351     !!!next-token;
7352 wakaba 1.126 next B;
7353 wakaba 1.52
7354     ## ISSUE: Issue on HTML5 new elements in spec
7355    
7356     } else {
7357     ## Step 1
7358     my $node_i = -1;
7359     my $node = $self->{open_elements}->[$node_i];
7360 wakaba 1.51
7361 wakaba 1.52 ## Step 2
7362     S2: {
7363 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
7364 wakaba 1.52 ## Step 1
7365     ## generate implied end tags
7366 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7367 wakaba 1.79 !!!cp ('t430');
7368 wakaba 1.151 ## NOTE: |<ruby><rt></ruby>|.
7369     ## ISSUE: <ruby><rt></rt> will also take this code path,
7370     ## which seems wrong.
7371 wakaba 1.86 pop @{$self->{open_elements}};
7372 wakaba 1.151 $node_i++;
7373 wakaba 1.52 }
7374    
7375     ## Step 2
7376 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7377     ne $token->{tag_name}) {
7378 wakaba 1.79 !!!cp ('t431');
7379 wakaba 1.58 ## NOTE: <x><y></x>
7380 wakaba 1.122 !!!parse-error (type => 'not closed',
7381 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
7382 wakaba 1.122 ->manakai_local_name,
7383     token => $token);
7384 wakaba 1.79 } else {
7385     !!!cp ('t432');
7386 wakaba 1.52 }
7387    
7388     ## Step 3
7389 wakaba 1.151 splice @{$self->{open_elements}}, $node_i if $node_i < 0;
7390 wakaba 1.51
7391 wakaba 1.1 !!!next-token;
7392 wakaba 1.52 last S2;
7393 wakaba 1.1 } else {
7394 wakaba 1.52 ## Step 3
7395 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
7396 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
7397 wakaba 1.123 ($node->[1] & SPECIAL_EL or
7398     $node->[1] & SCOPING_EL)) {
7399 wakaba 1.79 !!!cp ('t433');
7400 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7401     text => $token->{tag_name}, token => $token);
7402 wakaba 1.52 ## Ignore the token
7403     !!!next-token;
7404     last S2;
7405     }
7406 wakaba 1.79
7407     !!!cp ('t434');
7408 wakaba 1.1 }
7409 wakaba 1.52
7410     ## Step 4
7411     $node_i--;
7412     $node = $self->{open_elements}->[$node_i];
7413    
7414     ## Step 5;
7415     redo S2;
7416     } # S2
7417 wakaba 1.126 next B;
7418 wakaba 1.1 }
7419     }
7420 wakaba 1.126 next B;
7421     } continue { # B
7422     if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
7423     ## NOTE: The code below is executed in cases where it does not have
7424     ## to be, but it it is harmless even in those cases.
7425     ## has an element in scope
7426     INSCOPE: {
7427     for (reverse 0..$#{$self->{open_elements}}) {
7428     my $node = $self->{open_elements}->[$_];
7429     if ($node->[1] & FOREIGN_EL) {
7430     last INSCOPE;
7431     } elsif ($node->[1] & SCOPING_EL) {
7432     last;
7433     }
7434     }
7435    
7436     ## NOTE: No foreign element in scope.
7437     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
7438     } # INSCOPE
7439     }
7440 wakaba 1.1 } # B
7441    
7442     ## Stop parsing # MUST
7443    
7444     ## TODO: script stuffs
7445 wakaba 1.3 } # _tree_construct_main
7446    
7447     sub set_inner_html ($$$) {
7448     my $class = shift;
7449     my $node = shift;
7450     my $s = \$_[0];
7451     my $onerror = $_[1];
7452    
7453 wakaba 1.63 ## ISSUE: Should {confident} be true?
7454    
7455 wakaba 1.3 my $nt = $node->node_type;
7456     if ($nt == 9) {
7457     # MUST
7458    
7459     ## Step 1 # MUST
7460     ## TODO: If the document has an active parser, ...
7461     ## ISSUE: There is an issue in the spec.
7462    
7463     ## Step 2 # MUST
7464     my @cn = @{$node->child_nodes};
7465     for (@cn) {
7466     $node->remove_child ($_);
7467     }
7468    
7469     ## Step 3, 4, 5 # MUST
7470     $class->parse_string ($$s => $node, $onerror);
7471     } elsif ($nt == 1) {
7472     ## TODO: If non-html element
7473    
7474     ## NOTE: Most of this code is copied from |parse_string|
7475    
7476     ## Step 1 # MUST
7477 wakaba 1.14 my $this_doc = $node->owner_document;
7478     my $doc = $this_doc->implementation->create_document;
7479 wakaba 1.18 $doc->manakai_is_html (1);
7480 wakaba 1.3 my $p = $class->new;
7481     $p->{document} = $doc;
7482    
7483 wakaba 1.84 ## Step 8 # MUST
7484 wakaba 1.3 my $i = 0;
7485 wakaba 1.121 $p->{line_prev} = $p->{line} = 1;
7486     $p->{column_prev} = $p->{column} = 0;
7487 wakaba 1.76 $p->{set_next_char} = sub {
7488 wakaba 1.3 my $self = shift;
7489 wakaba 1.14
7490 wakaba 1.76 pop @{$self->{prev_char}};
7491     unshift @{$self->{prev_char}}, $self->{next_char};
7492 wakaba 1.14
7493 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
7494     $self->{next_char} = ord substr $$s, $i++, 1;
7495 wakaba 1.121
7496     ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
7497     $p->{column}++;
7498 wakaba 1.4
7499 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
7500 wakaba 1.121 $p->{line}++;
7501     $p->{column} = 0;
7502 wakaba 1.79 !!!cp ('i1');
7503 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
7504 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
7505 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
7506 wakaba 1.121 $p->{line}++;
7507     $p->{column} = 0;
7508 wakaba 1.79 !!!cp ('i2');
7509 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
7510     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
7511 wakaba 1.79 !!!cp ('i3');
7512 wakaba 1.76 } elsif ($self->{next_char} == 0x0000) { # NULL
7513 wakaba 1.79 !!!cp ('i4');
7514 wakaba 1.14 !!!parse-error (type => 'NULL');
7515 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
7516 wakaba 1.132 } elsif ($self->{next_char} <= 0x0008 or
7517     (0x000E <= $self->{next_char} and
7518     $self->{next_char} <= 0x001F) or
7519     (0x007F <= $self->{next_char} and
7520     $self->{next_char} <= 0x009F) or
7521     (0xD800 <= $self->{next_char} and
7522     $self->{next_char} <= 0xDFFF) or
7523     (0xFDD0 <= $self->{next_char} and
7524     $self->{next_char} <= 0xFDDF) or
7525     {
7526     0xFFFE => 1, 0xFFFF => 1, 0x1FFFE => 1, 0x1FFFF => 1,
7527     0x2FFFE => 1, 0x2FFFF => 1, 0x3FFFE => 1, 0x3FFFF => 1,
7528     0x4FFFE => 1, 0x4FFFF => 1, 0x5FFFE => 1, 0x5FFFF => 1,
7529     0x6FFFE => 1, 0x6FFFF => 1, 0x7FFFE => 1, 0x7FFFF => 1,
7530     0x8FFFE => 1, 0x8FFFF => 1, 0x9FFFE => 1, 0x9FFFF => 1,
7531     0xAFFFE => 1, 0xAFFFF => 1, 0xBFFFE => 1, 0xBFFFF => 1,
7532     0xCFFFE => 1, 0xCFFFF => 1, 0xDFFFE => 1, 0xDFFFF => 1,
7533     0xEFFFE => 1, 0xEFFFF => 1, 0xFFFFE => 1, 0xFFFFF => 1,
7534     0x10FFFE => 1, 0x10FFFF => 1,
7535     }->{$self->{next_char}}) {
7536     !!!cp ('i4.1');
7537 wakaba 1.153 if ($self->{next_char} < 0x10000) {
7538     !!!parse-error (type => 'control char',
7539     text => (sprintf 'U+%04X', $self->{next_char}));
7540     } else {
7541     !!!parse-error (type => 'control char',
7542     text => (sprintf 'U-%08X', $self->{next_char}));
7543     }
7544 wakaba 1.3 }
7545     };
7546 wakaba 1.76 $p->{prev_char} = [-1, -1, -1];
7547     $p->{next_char} = -1;
7548 wakaba 1.3
7549     my $ponerror = $onerror || sub {
7550     my (%opt) = @_;
7551 wakaba 1.121 my $line = $opt{line};
7552     my $column = $opt{column};
7553     if (defined $opt{token} and defined $opt{token}->{line}) {
7554     $line = $opt{token}->{line};
7555     $column = $opt{token}->{column};
7556     }
7557     warn "Parse error ($opt{type}) at line $line column $column\n";
7558 wakaba 1.3 };
7559     $p->{parse_error} = sub {
7560 wakaba 1.121 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
7561 wakaba 1.3 };
7562    
7563     $p->_initialize_tokenizer;
7564     $p->_initialize_tree_constructor;
7565    
7566     ## Step 2
7567 wakaba 1.71 my $node_ln = $node->manakai_local_name;
7568 wakaba 1.40 $p->{content_model} = {
7569     title => RCDATA_CONTENT_MODEL,
7570     textarea => RCDATA_CONTENT_MODEL,
7571     style => CDATA_CONTENT_MODEL,
7572     script => CDATA_CONTENT_MODEL,
7573     xmp => CDATA_CONTENT_MODEL,
7574     iframe => CDATA_CONTENT_MODEL,
7575     noembed => CDATA_CONTENT_MODEL,
7576     noframes => CDATA_CONTENT_MODEL,
7577     noscript => CDATA_CONTENT_MODEL,
7578     plaintext => PLAINTEXT_CONTENT_MODEL,
7579     }->{$node_ln};
7580     $p->{content_model} = PCDATA_CONTENT_MODEL
7581     unless defined $p->{content_model};
7582     ## ISSUE: What is "the name of the element"? local name?
7583 wakaba 1.3
7584 wakaba 1.123 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
7585     ## TODO: Foreign element OK?
7586 wakaba 1.3
7587 wakaba 1.84 ## Step 3
7588 wakaba 1.3 my $root = $doc->create_element_ns
7589     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
7590    
7591 wakaba 1.84 ## Step 4 # MUST
7592 wakaba 1.3 $doc->append_child ($root);
7593    
7594 wakaba 1.84 ## Step 5 # MUST
7595 wakaba 1.123 push @{$p->{open_elements}}, [$root, $el_category->{html}];
7596 wakaba 1.3
7597     undef $p->{head_element};
7598    
7599 wakaba 1.84 ## Step 6 # MUST
7600 wakaba 1.3 $p->_reset_insertion_mode;
7601    
7602 wakaba 1.84 ## Step 7 # MUST
7603 wakaba 1.3 my $anode = $node;
7604     AN: while (defined $anode) {
7605     if ($anode->node_type == 1) {
7606     my $nsuri = $anode->namespace_uri;
7607     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
7608 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
7609 wakaba 1.79 !!!cp ('i5');
7610 wakaba 1.3 $p->{form_element} = $anode;
7611     last AN;
7612     }
7613     }
7614     }
7615     $anode = $anode->parent_node;
7616     } # AN
7617    
7618 wakaba 1.84 ## Step 9 # MUST
7619 wakaba 1.3 {
7620     my $self = $p;
7621     !!!next-token;
7622     }
7623     $p->_tree_construction_main;
7624    
7625 wakaba 1.84 ## Step 10 # MUST
7626 wakaba 1.3 my @cn = @{$node->child_nodes};
7627     for (@cn) {
7628     $node->remove_child ($_);
7629     }
7630     ## ISSUE: mutation events? read-only?
7631    
7632 wakaba 1.84 ## Step 11 # MUST
7633 wakaba 1.3 @cn = @{$root->child_nodes};
7634     for (@cn) {
7635 wakaba 1.14 $this_doc->adopt_node ($_);
7636 wakaba 1.3 $node->append_child ($_);
7637     }
7638 wakaba 1.14 ## ISSUE: mutation events?
7639 wakaba 1.3
7640     $p->_terminate_tree_constructor;
7641 wakaba 1.121
7642     delete $p->{parse_error}; # delete loop
7643 wakaba 1.3 } else {
7644     die "$0: |set_inner_html| is not defined for node of type $nt";
7645     }
7646     } # set_inner_html
7647    
7648     } # tree construction stage
7649 wakaba 1.1
7650 wakaba 1.63 package Whatpm::HTML::RestartParser;
7651     push our @ISA, 'Error';
7652    
7653 wakaba 1.1 1;
7654 wakaba 1.159 # $Date: 2008/08/31 12:11:42 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24