/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.141 - (hide annotations) (download) (as text)
Sat May 24 10:18:26 2008 UTC (16 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.140: +42 -2 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	24 May 2008 10:09:03 -0000
	* tokenizer-test-1.test: New test data on EOF in tags are added
	to cover changes in HTML5 revision 1684.

2008-05-24  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	24 May 2008 10:09:40 -0000
	* HTML.pm.src: Support for EOF in new states for tags (HTML5
	revision 1684).

2008-05-24  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.141 our $VERSION=do{my @r=(q$Revision: 1.140 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.18 ## ISSUE:
7     ## var doc = implementation.createDocument (null, null, null);
8     ## doc.write ('');
9     ## alert (doc.compatMode);
10 wakaba 1.1
11 wakaba 1.70 ## TODO: 1252 parse error (revision 1264)
12     ## TODO: 8859-11 = 874 (revision 1271)
13    
14 wakaba 1.139 require IO::Handle;
15    
16 wakaba 1.126 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
17     my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
18     my $SVG_NS = q<http://www.w3.org/2000/svg>;
19     my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
20     my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
21     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
22    
23 wakaba 1.123 sub A_EL () { 0b1 }
24     sub ADDRESS_EL () { 0b10 }
25     sub BODY_EL () { 0b100 }
26     sub BUTTON_EL () { 0b1000 }
27     sub CAPTION_EL () { 0b10000 }
28     sub DD_EL () { 0b100000 }
29     sub DIV_EL () { 0b1000000 }
30     sub DT_EL () { 0b10000000 }
31     sub FORM_EL () { 0b100000000 }
32     sub FORMATTING_EL () { 0b1000000000 }
33     sub FRAMESET_EL () { 0b10000000000 }
34     sub HEADING_EL () { 0b100000000000 }
35     sub HTML_EL () { 0b1000000000000 }
36     sub LI_EL () { 0b10000000000000 }
37     sub NOBR_EL () { 0b100000000000000 }
38     sub OPTION_EL () { 0b1000000000000000 }
39     sub OPTGROUP_EL () { 0b10000000000000000 }
40     sub P_EL () { 0b100000000000000000 }
41     sub SELECT_EL () { 0b1000000000000000000 }
42     sub TABLE_EL () { 0b10000000000000000000 }
43     sub TABLE_CELL_EL () { 0b100000000000000000000 }
44     sub TABLE_ROW_EL () { 0b1000000000000000000000 }
45     sub TABLE_ROW_GROUP_EL () { 0b10000000000000000000000 }
46     sub MISC_SCOPING_EL () { 0b100000000000000000000000 }
47     sub MISC_SPECIAL_EL () { 0b1000000000000000000000000 }
48 wakaba 1.126 sub FOREIGN_EL () { 0b10000000000000000000000000 }
49     sub FOREIGN_FLOW_CONTENT_EL () { 0b100000000000000000000000000 }
50     sub MML_AXML_EL () { 0b1000000000000000000000000000 }
51 wakaba 1.123
52     sub TABLE_ROWS_EL () {
53     TABLE_EL |
54     TABLE_ROW_EL |
55     TABLE_ROW_GROUP_EL
56     }
57    
58     sub END_TAG_OPTIONAL_EL () {
59     DD_EL |
60     DT_EL |
61     LI_EL |
62     P_EL
63     }
64    
65     sub ALL_END_TAG_OPTIONAL_EL () {
66     END_TAG_OPTIONAL_EL |
67     BODY_EL |
68     HTML_EL |
69     TABLE_CELL_EL |
70     TABLE_ROW_EL |
71     TABLE_ROW_GROUP_EL
72     }
73    
74     sub SCOPING_EL () {
75     BUTTON_EL |
76     CAPTION_EL |
77     HTML_EL |
78     TABLE_EL |
79     TABLE_CELL_EL |
80     MISC_SCOPING_EL
81     }
82    
83     sub TABLE_SCOPING_EL () {
84     HTML_EL |
85     TABLE_EL
86     }
87    
88     sub TABLE_ROWS_SCOPING_EL () {
89     HTML_EL |
90     TABLE_ROW_GROUP_EL
91     }
92    
93     sub TABLE_ROW_SCOPING_EL () {
94     HTML_EL |
95     TABLE_ROW_EL
96     }
97    
98     sub SPECIAL_EL () {
99     ADDRESS_EL |
100     BODY_EL |
101     DIV_EL |
102     END_TAG_OPTIONAL_EL |
103     FORM_EL |
104     FRAMESET_EL |
105     HEADING_EL |
106     OPTION_EL |
107     OPTGROUP_EL |
108     SELECT_EL |
109     TABLE_ROW_EL |
110     TABLE_ROW_GROUP_EL |
111     MISC_SPECIAL_EL
112     }
113    
114     my $el_category = {
115     a => A_EL | FORMATTING_EL,
116     address => ADDRESS_EL,
117     applet => MISC_SCOPING_EL,
118     area => MISC_SPECIAL_EL,
119     b => FORMATTING_EL,
120     base => MISC_SPECIAL_EL,
121     basefont => MISC_SPECIAL_EL,
122     bgsound => MISC_SPECIAL_EL,
123     big => FORMATTING_EL,
124     blockquote => MISC_SPECIAL_EL,
125     body => BODY_EL,
126     br => MISC_SPECIAL_EL,
127     button => BUTTON_EL,
128     caption => CAPTION_EL,
129     center => MISC_SPECIAL_EL,
130     col => MISC_SPECIAL_EL,
131     colgroup => MISC_SPECIAL_EL,
132     dd => DD_EL,
133     dir => MISC_SPECIAL_EL,
134     div => DIV_EL,
135     dl => MISC_SPECIAL_EL,
136     dt => DT_EL,
137     em => FORMATTING_EL,
138     embed => MISC_SPECIAL_EL,
139     fieldset => MISC_SPECIAL_EL,
140     font => FORMATTING_EL,
141     form => FORM_EL,
142     frame => MISC_SPECIAL_EL,
143     frameset => FRAMESET_EL,
144     h1 => HEADING_EL,
145     h2 => HEADING_EL,
146     h3 => HEADING_EL,
147     h4 => HEADING_EL,
148     h5 => HEADING_EL,
149     h6 => HEADING_EL,
150     head => MISC_SPECIAL_EL,
151     hr => MISC_SPECIAL_EL,
152     html => HTML_EL,
153     i => FORMATTING_EL,
154     iframe => MISC_SPECIAL_EL,
155     img => MISC_SPECIAL_EL,
156     input => MISC_SPECIAL_EL,
157     isindex => MISC_SPECIAL_EL,
158     li => LI_EL,
159     link => MISC_SPECIAL_EL,
160     listing => MISC_SPECIAL_EL,
161     marquee => MISC_SCOPING_EL,
162     menu => MISC_SPECIAL_EL,
163     meta => MISC_SPECIAL_EL,
164     nobr => NOBR_EL | FORMATTING_EL,
165     noembed => MISC_SPECIAL_EL,
166     noframes => MISC_SPECIAL_EL,
167     noscript => MISC_SPECIAL_EL,
168     object => MISC_SCOPING_EL,
169     ol => MISC_SPECIAL_EL,
170     optgroup => OPTGROUP_EL,
171     option => OPTION_EL,
172     p => P_EL,
173     param => MISC_SPECIAL_EL,
174     plaintext => MISC_SPECIAL_EL,
175     pre => MISC_SPECIAL_EL,
176     s => FORMATTING_EL,
177     script => MISC_SPECIAL_EL,
178     select => SELECT_EL,
179     small => FORMATTING_EL,
180     spacer => MISC_SPECIAL_EL,
181     strike => FORMATTING_EL,
182     strong => FORMATTING_EL,
183     style => MISC_SPECIAL_EL,
184     table => TABLE_EL,
185     tbody => TABLE_ROW_GROUP_EL,
186     td => TABLE_CELL_EL,
187     textarea => MISC_SPECIAL_EL,
188     tfoot => TABLE_ROW_GROUP_EL,
189     th => TABLE_CELL_EL,
190     thead => TABLE_ROW_GROUP_EL,
191     title => MISC_SPECIAL_EL,
192     tr => TABLE_ROW_EL,
193     tt => FORMATTING_EL,
194     u => FORMATTING_EL,
195     ul => MISC_SPECIAL_EL,
196     wbr => MISC_SPECIAL_EL,
197     };
198    
199 wakaba 1.126 my $el_category_f = {
200     $MML_NS => {
201     'annotation-xml' => MML_AXML_EL,
202     mi => FOREIGN_FLOW_CONTENT_EL,
203     mo => FOREIGN_FLOW_CONTENT_EL,
204     mn => FOREIGN_FLOW_CONTENT_EL,
205     ms => FOREIGN_FLOW_CONTENT_EL,
206     mtext => FOREIGN_FLOW_CONTENT_EL,
207     },
208     $SVG_NS => {
209 wakaba 1.131 foreignObject => FOREIGN_FLOW_CONTENT_EL,
210 wakaba 1.126 desc => FOREIGN_FLOW_CONTENT_EL,
211     title => FOREIGN_FLOW_CONTENT_EL,
212     },
213     ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
214     };
215    
216 wakaba 1.131 my $svg_attr_name = {
217     attributetype => 'attributeType',
218     basefrequency => 'baseFrequency',
219     baseprofile => 'baseProfile',
220     calcmode => 'calcMode',
221     clippathunits => 'clipPathUnits',
222     contentscripttype => 'contentScriptType',
223     contentstyletype => 'contentStyleType',
224     diffuseconstant => 'diffuseConstant',
225     edgemode => 'edgeMode',
226     externalresourcesrequired => 'externalResourcesRequired',
227     fecolormatrix => 'feColorMatrix',
228     fecomposite => 'feComposite',
229     fegaussianblur => 'feGaussianBlur',
230     femorphology => 'feMorphology',
231     fetile => 'feTile',
232     filterres => 'filterRes',
233     filterunits => 'filterUnits',
234     glyphref => 'glyphRef',
235     gradienttransform => 'gradientTransform',
236     gradientunits => 'gradientUnits',
237     kernelmatrix => 'kernelMatrix',
238     kernelunitlength => 'kernelUnitLength',
239     keypoints => 'keyPoints',
240     keysplines => 'keySplines',
241     keytimes => 'keyTimes',
242     lengthadjust => 'lengthAdjust',
243     limitingconeangle => 'limitingConeAngle',
244     markerheight => 'markerHeight',
245     markerunits => 'markerUnits',
246     markerwidth => 'markerWidth',
247     maskcontentunits => 'maskContentUnits',
248     maskunits => 'maskUnits',
249     numoctaves => 'numOctaves',
250     pathlength => 'pathLength',
251     patterncontentunits => 'patternContentUnits',
252     patterntransform => 'patternTransform',
253     patternunits => 'patternUnits',
254     pointsatx => 'pointsAtX',
255     pointsaty => 'pointsAtY',
256     pointsatz => 'pointsAtZ',
257     preservealpha => 'preserveAlpha',
258     preserveaspectratio => 'preserveAspectRatio',
259     primitiveunits => 'primitiveUnits',
260     refx => 'refX',
261     refy => 'refY',
262     repeatcount => 'repeatCount',
263     repeatdur => 'repeatDur',
264     requiredextensions => 'requiredExtensions',
265     specularconstant => 'specularConstant',
266     specularexponent => 'specularExponent',
267     spreadmethod => 'spreadMethod',
268     startoffset => 'startOffset',
269     stddeviation => 'stdDeviation',
270     stitchtiles => 'stitchTiles',
271     surfacescale => 'surfaceScale',
272     systemlanguage => 'systemLanguage',
273     tablevalues => 'tableValues',
274     targetx => 'targetX',
275     targety => 'targetY',
276     textlength => 'textLength',
277     viewbox => 'viewBox',
278     viewtarget => 'viewTarget',
279     xchannelselector => 'xChannelSelector',
280     ychannelselector => 'yChannelSelector',
281     zoomandpan => 'zoomAndPan',
282     };
283    
284     my $foreign_attr_xname = {
285     'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
286     'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
287     'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
288     'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
289     'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
290     'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
291     'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
292     'xml:base' => [$XML_NS, ['xml', 'base']],
293     'xml:lang' => [$XML_NS, ['xml', 'lang']],
294     'xml:space' => [$XML_NS, ['xml', 'space']],
295     'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
296     'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
297     };
298    
299     ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
300    
301 wakaba 1.4 my $c1_entity_char = {
302 wakaba 1.10 0x80 => 0x20AC,
303     0x81 => 0xFFFD,
304     0x82 => 0x201A,
305     0x83 => 0x0192,
306     0x84 => 0x201E,
307     0x85 => 0x2026,
308     0x86 => 0x2020,
309     0x87 => 0x2021,
310     0x88 => 0x02C6,
311     0x89 => 0x2030,
312     0x8A => 0x0160,
313     0x8B => 0x2039,
314     0x8C => 0x0152,
315     0x8D => 0xFFFD,
316     0x8E => 0x017D,
317     0x8F => 0xFFFD,
318     0x90 => 0xFFFD,
319     0x91 => 0x2018,
320     0x92 => 0x2019,
321     0x93 => 0x201C,
322     0x94 => 0x201D,
323     0x95 => 0x2022,
324     0x96 => 0x2013,
325     0x97 => 0x2014,
326     0x98 => 0x02DC,
327     0x99 => 0x2122,
328     0x9A => 0x0161,
329     0x9B => 0x203A,
330     0x9C => 0x0153,
331     0x9D => 0xFFFD,
332     0x9E => 0x017E,
333     0x9F => 0x0178,
334 wakaba 1.4 }; # $c1_entity_char
335 wakaba 1.1
336 wakaba 1.63 sub parse_byte_string ($$$$;$) {
337 wakaba 1.138 my $self = shift;
338     my $charset_name = shift;
339     open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
340     return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
341     } # parse_byte_string
342    
343     sub parse_byte_stream ($$$$;$) {
344 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
345 wakaba 1.133 my $charset_name = shift;
346 wakaba 1.138 my $byte_stream = $_[0];
347 wakaba 1.133
348 wakaba 1.134 my $onerror = $_[2] || sub {
349     my (%opt) = @_;
350     warn "Parse error ($opt{type})\n";
351     };
352     $self->{parse_error} = $onerror; # updated later by parse_char_string
353    
354 wakaba 1.133 ## HTML5 encoding sniffing algorithm
355     require Message::Charset::Info;
356     my $charset;
357 wakaba 1.136 my $buffer;
358     my ($char_stream, $e_status);
359 wakaba 1.133
360     SNIFFING: {
361    
362     ## Step 1
363     if (defined $charset_name) {
364     $charset = Message::Charset::Info->get_by_iana_name ($charset_name);
365    
366     ## ISSUE: Unsupported encoding is not ignored according to the spec.
367 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
368     ($byte_stream, allow_error_reporting => 1,
369 wakaba 1.133 allow_fallback => 1);
370 wakaba 1.136 if ($char_stream) {
371 wakaba 1.133 $self->{confident} = 1;
372     last SNIFFING;
373 wakaba 1.136 } else {
374     ## TODO: unsupported error
375 wakaba 1.133 }
376     }
377    
378     ## Step 2
379 wakaba 1.136 my $byte_buffer = '';
380     for (1..1024) {
381     my $char = $byte_stream->getc;
382     last unless defined $char;
383     $byte_buffer .= $char;
384     } ## TODO: timeout
385 wakaba 1.133
386     ## Step 3
387 wakaba 1.136 if ($byte_buffer =~ /^\xFE\xFF/) {
388 wakaba 1.133 $charset = Message::Charset::Info->get_by_iana_name ('utf-16be');
389 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
390     ($byte_stream, allow_error_reporting => 1,
391     allow_fallback => 1, byte_buffer => \$byte_buffer);
392 wakaba 1.133 $self->{confident} = 1;
393     last SNIFFING;
394 wakaba 1.136 } elsif ($byte_buffer =~ /^\xFF\xFE/) {
395 wakaba 1.133 $charset = Message::Charset::Info->get_by_iana_name ('utf-16le');
396 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
397     ($byte_stream, allow_error_reporting => 1,
398     allow_fallback => 1, byte_buffer => \$byte_buffer);
399 wakaba 1.133 $self->{confident} = 1;
400     last SNIFFING;
401 wakaba 1.136 } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
402 wakaba 1.133 $charset = Message::Charset::Info->get_by_iana_name ('utf-8');
403 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
404     ($byte_stream, allow_error_reporting => 1,
405     allow_fallback => 1, byte_buffer => \$byte_buffer);
406 wakaba 1.133 $self->{confident} = 1;
407     last SNIFFING;
408     }
409    
410     ## Step 4
411     ## TODO: <meta charset>
412    
413     ## Step 5
414     ## TODO: from history
415    
416     ## Step 6
417 wakaba 1.65 require Whatpm::Charset::UniversalCharDet;
418 wakaba 1.133 $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
419 wakaba 1.136 ($byte_buffer);
420 wakaba 1.133 if (defined $charset_name) {
421     $charset = Message::Charset::Info->get_by_iana_name ($charset_name);
422    
423     ## ISSUE: Unsupported encoding is not ignored according to the spec.
424 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
425     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
426     ($byte_stream);
427     ($char_stream, $e_status) = $charset->get_decode_handle
428     ($buffer, allow_error_reporting => 1,
429     allow_fallback => 1, byte_buffer => \$byte_buffer);
430     if ($char_stream) {
431     $buffer->{buffer} = $byte_buffer;
432 wakaba 1.134 !!!parse-error (type => 'sniffing:chardet', ## TODO: type name
433     value => $charset_name,
434     level => $self->{info_level},
435     line => 1, column => 1);
436 wakaba 1.133 $self->{confident} = 0;
437     last SNIFFING;
438     }
439     }
440    
441     ## Step 7: default
442     ## TODO: Make this configurable.
443     $charset = Message::Charset::Info->get_by_iana_name ('windows-1252');
444     ## NOTE: We choose |windows-1252| here, since |utf-8| should be
445     ## detectable in the step 6.
446 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
447     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
448     ($byte_stream);
449     ($char_stream, $e_status)
450     = $charset->get_decode_handle ($buffer,
451     allow_error_reporting => 1,
452     allow_fallback => 1,
453     byte_buffer => \$byte_buffer);
454     $buffer->{buffer} = $byte_buffer;
455 wakaba 1.134 !!!parse-error (type => 'sniffing:default', ## TODO: type name
456     value => 'windows-1252',
457     level => $self->{info_level},
458     line => 1, column => 1);
459 wakaba 1.63 $self->{confident} = 0;
460 wakaba 1.133 } # SNIFFING
461    
462 wakaba 1.134 $self->{input_encoding} = $charset->get_iana_name;
463 wakaba 1.133 if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
464 wakaba 1.134 !!!parse-error (type => 'chardecode:fallback', ## TODO: type name
465 wakaba 1.136 value => $self->{input_encoding},
466 wakaba 1.134 level => $self->{unsupported_level},
467     line => 1, column => 1);
468 wakaba 1.133 } elsif (not ($e_status &
469     Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) {
470 wakaba 1.134 !!!parse-error (type => 'chardecode:no error', ## TODO: type name
471     value => $self->{input_encoding},
472     level => $self->{unsupported_level},
473     line => 1, column => 1);
474 wakaba 1.63 }
475    
476     $self->{change_encoding} = sub {
477     my $self = shift;
478 wakaba 1.134 $charset_name = shift;
479 wakaba 1.114 my $token = shift;
480 wakaba 1.63
481 wakaba 1.134 $charset = Message::Charset::Info->get_by_iana_name ($charset_name);
482 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
483     ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
484     byte_buffer => \ $buffer->{buffer});
485 wakaba 1.134
486 wakaba 1.136 if ($char_stream) { # if supported
487 wakaba 1.134 ## "Change the encoding" algorithm:
488 wakaba 1.63
489 wakaba 1.134 ## Step 1
490     if ($charset->{iana_names}->{'utf-16'}) { ## ISSUE: UTF-16BE -> UTF-8? UTF-16LE -> UTF-8?
491     $charset = Message::Charset::Info->get_by_iana_name ('utf-8');
492 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
493     ($byte_stream,
494     byte_buffer => \ $buffer->{buffer});
495 wakaba 1.134 }
496     $charset_name = $charset->get_iana_name;
497    
498     ## Step 2
499     if (defined $self->{input_encoding} and
500     $self->{input_encoding} eq $charset_name) {
501     !!!parse-error (type => 'charset label:matching', ## TODO: type
502     value => $charset_name,
503     level => $self->{info_level});
504     $self->{confident} = 1;
505     return;
506     }
507 wakaba 1.63
508 wakaba 1.134 !!!parse-error (type => 'charset label detected:'.$self->{input_encoding}.
509     ':'.$charset_name, level => 'w', token => $token);
510    
511     ## Step 3
512     # if (can) {
513     ## change the encoding on the fly.
514     #$self->{confident} = 1;
515     #return;
516     # }
517    
518     ## Step 4
519     throw Whatpm::HTML::RestartParser ();
520 wakaba 1.63 }
521     }; # $self->{change_encoding}
522    
523 wakaba 1.136 my $char_onerror = sub {
524     my (undef, $type, %opt) = @_;
525 wakaba 1.137 !!!parse-error (%opt, type => $type,
526     line => $self->{line}, column => $self->{column} + 1);
527 wakaba 1.136 if ($opt{octets}) {
528     ${$opt{octets}} = "\x{FFFD}"; # relacement character
529     }
530     };
531     $char_stream->onerror ($char_onerror);
532    
533 wakaba 1.63 my @args = @_; shift @args; # $s
534     my $return;
535     try {
536 wakaba 1.136 $return = $self->parse_char_stream ($char_stream, @args);
537 wakaba 1.63 } catch Whatpm::HTML::RestartParser with {
538 wakaba 1.134 ## NOTE: Invoked after {change_encoding}.
539    
540     $self->{input_encoding} = $charset->get_iana_name;
541     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
542     !!!parse-error (type => 'chardecode:fallback', ## TODO: type name
543 wakaba 1.136 value => $self->{input_encoding},
544 wakaba 1.134 level => $self->{unsupported_level},
545     line => 1, column => 1);
546     } elsif (not ($e_status &
547     Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) {
548     !!!parse-error (type => 'chardecode:no error', ## TODO: type name
549     value => $self->{input_encoding},
550     level => $self->{unsupported_level},
551     line => 1, column => 1);
552     }
553 wakaba 1.63 $self->{confident} = 1;
554 wakaba 1.136 $char_stream->onerror ($char_onerror);
555     $return = $self->parse_char_stream ($char_stream, @args);
556 wakaba 1.63 };
557     return $return;
558 wakaba 1.138 } # parse_byte_stream
559 wakaba 1.63
560 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
561     ## and the HTML layer MUST ignore it. However, we does strip BOM in
562     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
563     ## because the core part of our HTML parser expects a string of character,
564     ## not a string of bytes or code units or anything which might contain a BOM.
565     ## Therefore, any parser interface that accepts a string of bytes,
566     ## such as |parse_byte_string| in this module, must ensure that it does
567     ## strip the BOM and never strip any ZWNBSP.
568    
569 wakaba 1.135 sub parse_char_string ($$$;$) {
570     my $self = shift;
571 wakaba 1.139 require utf8;
572     my $s = ref $_[0] ? $_[0] : \($_[0]);
573     open my $input, '<' . (utf8::is_utf8 ($$s) ? ':utf8' : ''), $s;
574 wakaba 1.135 return $self->parse_char_stream ($input, @_[1..$#_]);
575     } # parse_char_string
576     *parse_string = \&parse_char_string;
577 wakaba 1.63
578 wakaba 1.135 sub parse_char_stream ($$$;$) {
579 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
580 wakaba 1.135 my $input = $_[0];
581 wakaba 1.1 $self->{document} = $_[1];
582 wakaba 1.63 @{$self->{document}->child_nodes} = ();
583 wakaba 1.1
584 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
585    
586 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
587 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
588     if defined $self->{input_encoding};
589 wakaba 1.63
590 wakaba 1.1 my $i = 0;
591 wakaba 1.112 $self->{line_prev} = $self->{line} = 1;
592     $self->{column_prev} = $self->{column} = 0;
593 wakaba 1.76 $self->{set_next_char} = sub {
594 wakaba 1.1 my $self = shift;
595 wakaba 1.13
596 wakaba 1.76 pop @{$self->{prev_char}};
597     unshift @{$self->{prev_char}}, $self->{next_char};
598 wakaba 1.13
599 wakaba 1.139 my $char;
600     if (defined $self->{next_next_char}) {
601     $char = $self->{next_next_char};
602     delete $self->{next_next_char};
603     } else {
604     $char = $input->getc;
605     }
606 wakaba 1.135 $self->{next_char} = -1 and return unless defined $char;
607     $self->{next_char} = ord $char;
608 wakaba 1.112
609     ($self->{line_prev}, $self->{column_prev})
610     = ($self->{line}, $self->{column});
611     $self->{column}++;
612 wakaba 1.1
613 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
614 wakaba 1.132 !!!cp ('j1');
615 wakaba 1.112 $self->{line}++;
616     $self->{column} = 0;
617 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
618 wakaba 1.132 !!!cp ('j2');
619 wakaba 1.135 my $next = $input->getc;
620 wakaba 1.139 if (defined $next and $next ne "\x0A") {
621     $self->{next_next_char} = $next;
622 wakaba 1.135 }
623 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
624 wakaba 1.112 $self->{line}++;
625     $self->{column} = 0;
626 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
627 wakaba 1.132 !!!cp ('j3');
628 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
629     } elsif ($self->{next_char} == 0x0000) { # NULL
630 wakaba 1.132 !!!cp ('j4');
631 wakaba 1.8 !!!parse-error (type => 'NULL');
632 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
633 wakaba 1.132 } elsif ($self->{next_char} <= 0x0008 or
634     (0x000E <= $self->{next_char} and $self->{next_char} <= 0x001F) or
635     (0x007F <= $self->{next_char} and $self->{next_char} <= 0x009F) or
636     (0xD800 <= $self->{next_char} and $self->{next_char} <= 0xDFFF) or
637     (0xFDD0 <= $self->{next_char} and $self->{next_char} <= 0xFDDF) or
638     {
639     0xFFFE => 1, 0xFFFF => 1, 0x1FFFE => 1, 0x1FFFF => 1,
640     0x2FFFE => 1, 0x2FFFF => 1, 0x3FFFE => 1, 0x3FFFF => 1,
641     0x4FFFE => 1, 0x4FFFF => 1, 0x5FFFE => 1, 0x5FFFF => 1,
642     0x6FFFE => 1, 0x6FFFF => 1, 0x7FFFE => 1, 0x7FFFF => 1,
643     0x8FFFE => 1, 0x8FFFF => 1, 0x9FFFE => 1, 0x9FFFF => 1,
644     0xAFFFE => 1, 0xAFFFF => 1, 0xBFFFE => 1, 0xBFFFF => 1,
645     0xCFFFE => 1, 0xCFFFF => 1, 0xDFFFE => 1, 0xDFFFF => 1,
646     0xEFFFE => 1, 0xEFFFF => 1, 0xFFFFE => 1, 0xFFFFF => 1,
647     0x10FFFE => 1, 0x10FFFF => 1,
648     }->{$self->{next_char}}) {
649     !!!cp ('j5');
650     !!!parse-error (type => 'control char', level => $self->{must_level});
651     ## TODO: error type documentation
652 wakaba 1.1 }
653     };
654 wakaba 1.76 $self->{prev_char} = [-1, -1, -1];
655     $self->{next_char} = -1;
656 wakaba 1.1
657 wakaba 1.3 my $onerror = $_[2] || sub {
658     my (%opt) = @_;
659 wakaba 1.112 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
660     my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
661     warn "Parse error ($opt{type}) at line $line column $column\n";
662 wakaba 1.3 };
663     $self->{parse_error} = sub {
664 wakaba 1.112 $onerror->(line => $self->{line}, column => $self->{column}, @_);
665 wakaba 1.1 };
666    
667     $self->_initialize_tokenizer;
668     $self->_initialize_tree_constructor;
669     $self->_construct_tree;
670     $self->_terminate_tree_constructor;
671    
672 wakaba 1.112 delete $self->{parse_error}; # remove loop
673    
674 wakaba 1.1 return $self->{document};
675 wakaba 1.135 } # parse_char_stream
676 wakaba 1.1
677     sub new ($) {
678     my $class = shift;
679 wakaba 1.134 my $self = bless {
680     must_level => 'm',
681     should_level => 's',
682     good_level => 'w',
683     warn_level => 'w',
684     info_level => 'i',
685     unsupported_level => 'u',
686     }, $class;
687 wakaba 1.76 $self->{set_next_char} = sub {
688     $self->{next_char} = -1;
689 wakaba 1.1 };
690     $self->{parse_error} = sub {
691     #
692     };
693 wakaba 1.63 $self->{change_encoding} = sub {
694     # if ($_[0] is a supported encoding) {
695     # run "change the encoding" algorithm;
696     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
697     # }
698     };
699 wakaba 1.61 $self->{application_cache_selection} = sub {
700     #
701     };
702 wakaba 1.1 return $self;
703     } # new
704    
705 wakaba 1.40 sub CM_ENTITY () { 0b001 } # & markup in data
706     sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
707     sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
708    
709     sub PLAINTEXT_CONTENT_MODEL () { 0 }
710     sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
711     sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
712     sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
713    
714 wakaba 1.57 sub DATA_STATE () { 0 }
715     sub ENTITY_DATA_STATE () { 1 }
716     sub TAG_OPEN_STATE () { 2 }
717     sub CLOSE_TAG_OPEN_STATE () { 3 }
718     sub TAG_NAME_STATE () { 4 }
719     sub BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
720     sub ATTRIBUTE_NAME_STATE () { 6 }
721     sub AFTER_ATTRIBUTE_NAME_STATE () { 7 }
722     sub BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
723     sub ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
724     sub ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
725     sub ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
726     sub ENTITY_IN_ATTRIBUTE_VALUE_STATE () { 12 }
727     sub MARKUP_DECLARATION_OPEN_STATE () { 13 }
728     sub COMMENT_START_STATE () { 14 }
729     sub COMMENT_START_DASH_STATE () { 15 }
730     sub COMMENT_STATE () { 16 }
731     sub COMMENT_END_STATE () { 17 }
732     sub COMMENT_END_DASH_STATE () { 18 }
733     sub BOGUS_COMMENT_STATE () { 19 }
734     sub DOCTYPE_STATE () { 20 }
735     sub BEFORE_DOCTYPE_NAME_STATE () { 21 }
736     sub DOCTYPE_NAME_STATE () { 22 }
737     sub AFTER_DOCTYPE_NAME_STATE () { 23 }
738     sub BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
739     sub DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
740     sub DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
741     sub AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
742     sub BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
743     sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
744     sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
745     sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
746     sub BOGUS_DOCTYPE_STATE () { 32 }
747 wakaba 1.72 sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
748 wakaba 1.125 sub SELF_CLOSING_START_TAG_STATE () { 34 }
749 wakaba 1.127 sub CDATA_BLOCK_STATE () { 35 }
750 wakaba 1.57
751 wakaba 1.55 sub DOCTYPE_TOKEN () { 1 }
752     sub COMMENT_TOKEN () { 2 }
753     sub START_TAG_TOKEN () { 3 }
754     sub END_TAG_TOKEN () { 4 }
755     sub END_OF_FILE_TOKEN () { 5 }
756     sub CHARACTER_TOKEN () { 6 }
757    
758 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
759     sub HEAD_IMS () { 0b1000 }
760     sub BODY_IMS () { 0b10000 }
761 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
762 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
763 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
764 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
765     sub FRAME_IMS () { 0b1000000000 }
766 wakaba 1.101 sub SELECT_IMS () { 0b10000000000 }
767 wakaba 1.126 sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 }
768     ## NOTE: "in foreign content" insertion mode is special; it is combined
769     ## with the secondary insertion mode. In this parser, they are stored
770     ## together in the bit-or'ed form.
771 wakaba 1.54
772 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
773    
774     ## NOTE: "after after body" insertion mode.
775 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
776 wakaba 1.84
777     ## NOTE: "after after frameset" insertion mode.
778 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
779 wakaba 1.84
780 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
781     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
782     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
783     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
784     sub IN_BODY_IM () { BODY_IMS }
785 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
786     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
787     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
788     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
789 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
790     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
791     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
792     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
793 wakaba 1.101 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
794     sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
795 wakaba 1.54 sub IN_COLUMN_GROUP_IM () { 0b10 }
796    
797 wakaba 1.1 ## Implementations MUST act as if state machine in the spec
798    
799     sub _initialize_tokenizer ($) {
800     my $self = shift;
801 wakaba 1.57 $self->{state} = DATA_STATE; # MUST
802 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # be
803 wakaba 1.1 undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
804     undef $self->{current_attribute};
805     undef $self->{last_emitted_start_tag_name};
806     undef $self->{last_attribute_value_state};
807 wakaba 1.125 delete $self->{self_closing};
808 wakaba 1.1 $self->{char} = [];
809 wakaba 1.76 # $self->{next_char}
810 wakaba 1.1 !!!next-input-character;
811     $self->{token} = [];
812 wakaba 1.18 # $self->{escape}
813 wakaba 1.1 } # _initialize_tokenizer
814    
815     ## A token has:
816 wakaba 1.55 ## ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
817     ## CHARACTER_TOKEN, or END_OF_FILE_TOKEN
818     ## ->{name} (DOCTYPE_TOKEN)
819     ## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
820     ## ->{public_identifier} (DOCTYPE_TOKEN)
821     ## ->{system_identifier} (DOCTYPE_TOKEN)
822 wakaba 1.75 ## ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
823 wakaba 1.55 ## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
824 wakaba 1.66 ## ->{name}
825     ## ->{value}
826     ## ->{has_reference} == 1 or 0
827 wakaba 1.55 ## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)
828 wakaba 1.125 ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
829     ## |->{self_closing}| is used to save the value of |$self->{self_closing}|
830     ## while the token is pushed back to the stack.
831    
832     ## ISSUE: "When a DOCTYPE token is created, its
833     ## <i>self-closing flag</i> must be unset (its other state is that it
834     ## be set), and its attributes list must be empty.": Wrong subject?
835 wakaba 1.1
836     ## Emitted token MUST immediately be handled by the tree construction state.
837    
838     ## Before each step, UA MAY check to see if either one of the scripts in
839     ## "list of scripts that will execute as soon as possible" or the first
840     ## script in the "list of scripts that will execute asynchronously",
841     ## has completed loading. If one has, then it MUST be executed
842     ## and removed from the list.
843    
844 wakaba 1.59 ## NOTE: HTML5 "Writing HTML documents" section, applied to
845     ## documents and not to user agents and conformance checkers,
846     ## contains some requirements that are not detected by the
847     ## parsing algorithm:
848     ## - Some requirements on character encoding declarations. ## TODO
849     ## - "Elements MUST NOT contain content that their content model disallows."
850     ## ... Some are parse error, some are not (will be reported by c.c.).
851     ## - Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) ## TODO
852     ## - Text (in elements, attributes, and comments) SHOULD NOT contain
853     ## control characters other than space characters. ## TODO: (what is control character? C0, C1 and DEL? Unicode control character?)
854    
855     ## TODO: HTML5 poses authors two SHOULD-level requirements that cannot
856     ## be detected by the HTML5 parsing algorithm:
857     ## - Text,
858    
859 wakaba 1.1 sub _get_next_token ($) {
860     my $self = shift;
861 wakaba 1.125
862     if ($self->{self_closing}) {
863     !!!parse-error (type => 'nestc', token => $self->{current_token});
864     ## NOTE: The |self_closing| flag is only set by start tag token.
865     ## In addition, when a start tag token is emitted, it is always set to
866     ## |current_token|.
867     delete $self->{self_closing};
868     }
869    
870 wakaba 1.1 if (@{$self->{token}}) {
871 wakaba 1.125 $self->{self_closing} = $self->{token}->[0]->{self_closing};
872 wakaba 1.1 return shift @{$self->{token}};
873     }
874    
875     A: {
876 wakaba 1.57 if ($self->{state} == DATA_STATE) {
877 wakaba 1.76 if ($self->{next_char} == 0x0026) { # &
878 wakaba 1.72 if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
879     not $self->{escape}) {
880 wakaba 1.77 !!!cp (1);
881 wakaba 1.57 $self->{state} = ENTITY_DATA_STATE;
882 wakaba 1.1 !!!next-input-character;
883     redo A;
884     } else {
885 wakaba 1.77 !!!cp (2);
886 wakaba 1.1 #
887     }
888 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
889 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
890 wakaba 1.13 unless ($self->{escape}) {
891 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
892     $self->{prev_char}->[1] == 0x0021 and # !
893     $self->{prev_char}->[2] == 0x003C) { # <
894 wakaba 1.77 !!!cp (3);
895 wakaba 1.13 $self->{escape} = 1;
896 wakaba 1.77 } else {
897     !!!cp (4);
898 wakaba 1.13 }
899 wakaba 1.77 } else {
900     !!!cp (5);
901 wakaba 1.13 }
902     }
903    
904     #
905 wakaba 1.76 } elsif ($self->{next_char} == 0x003C) { # <
906 wakaba 1.40 if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
907     (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
908 wakaba 1.13 not $self->{escape})) {
909 wakaba 1.77 !!!cp (6);
910 wakaba 1.57 $self->{state} = TAG_OPEN_STATE;
911 wakaba 1.1 !!!next-input-character;
912     redo A;
913     } else {
914 wakaba 1.77 !!!cp (7);
915 wakaba 1.1 #
916     }
917 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
918 wakaba 1.13 if ($self->{escape} and
919 wakaba 1.40 ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
920 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
921     $self->{prev_char}->[1] == 0x002D) { # -
922 wakaba 1.77 !!!cp (8);
923 wakaba 1.13 delete $self->{escape};
924 wakaba 1.77 } else {
925     !!!cp (9);
926 wakaba 1.13 }
927 wakaba 1.77 } else {
928     !!!cp (10);
929 wakaba 1.13 }
930    
931     #
932 wakaba 1.76 } elsif ($self->{next_char} == -1) {
933 wakaba 1.77 !!!cp (11);
934 wakaba 1.112 !!!emit ({type => END_OF_FILE_TOKEN,
935     line => $self->{line}, column => $self->{column}});
936 wakaba 1.1 last A; ## TODO: ok?
937 wakaba 1.77 } else {
938     !!!cp (12);
939 wakaba 1.1 }
940     # Anything else
941 wakaba 1.55 my $token = {type => CHARACTER_TOKEN,
942 wakaba 1.112 data => chr $self->{next_char},
943 wakaba 1.120 line => $self->{line}, column => $self->{column},
944 wakaba 1.118 };
945 wakaba 1.1 ## Stay in the data state
946     !!!next-input-character;
947    
948     !!!emit ($token);
949    
950     redo A;
951 wakaba 1.57 } elsif ($self->{state} == ENTITY_DATA_STATE) {
952 wakaba 1.1 ## (cannot happen in CDATA state)
953 wakaba 1.112
954 wakaba 1.120 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
955 wakaba 1.1
956 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity (0, -1);
957 wakaba 1.1
958 wakaba 1.57 $self->{state} = DATA_STATE;
959 wakaba 1.1 # next-input-character is already done
960    
961     unless (defined $token) {
962 wakaba 1.77 !!!cp (13);
963 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '&',
964 wakaba 1.120 line => $l, column => $c,
965 wakaba 1.118 });
966 wakaba 1.1 } else {
967 wakaba 1.77 !!!cp (14);
968 wakaba 1.1 !!!emit ($token);
969     }
970    
971     redo A;
972 wakaba 1.57 } elsif ($self->{state} == TAG_OPEN_STATE) {
973 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
974 wakaba 1.76 if ($self->{next_char} == 0x002F) { # /
975 wakaba 1.77 !!!cp (15);
976 wakaba 1.1 !!!next-input-character;
977 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
978 wakaba 1.1 redo A;
979     } else {
980 wakaba 1.77 !!!cp (16);
981 wakaba 1.1 ## reconsume
982 wakaba 1.57 $self->{state} = DATA_STATE;
983 wakaba 1.1
984 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
985 wakaba 1.120 line => $self->{line_prev},
986     column => $self->{column_prev},
987 wakaba 1.118 });
988 wakaba 1.1
989     redo A;
990     }
991 wakaba 1.40 } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
992 wakaba 1.76 if ($self->{next_char} == 0x0021) { # !
993 wakaba 1.77 !!!cp (17);
994 wakaba 1.57 $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
995 wakaba 1.1 !!!next-input-character;
996     redo A;
997 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
998 wakaba 1.77 !!!cp (18);
999 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
1000 wakaba 1.1 !!!next-input-character;
1001     redo A;
1002 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1003     $self->{next_char} <= 0x005A) { # A..Z
1004 wakaba 1.77 !!!cp (19);
1005 wakaba 1.1 $self->{current_token}
1006 wakaba 1.55 = {type => START_TAG_TOKEN,
1007 wakaba 1.112 tag_name => chr ($self->{next_char} + 0x0020),
1008     line => $self->{line_prev},
1009     column => $self->{column_prev}};
1010 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1011 wakaba 1.1 !!!next-input-character;
1012     redo A;
1013 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
1014     $self->{next_char} <= 0x007A) { # a..z
1015 wakaba 1.77 !!!cp (20);
1016 wakaba 1.55 $self->{current_token} = {type => START_TAG_TOKEN,
1017 wakaba 1.112 tag_name => chr ($self->{next_char}),
1018     line => $self->{line_prev},
1019     column => $self->{column_prev}};
1020 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1021 wakaba 1.1 !!!next-input-character;
1022     redo A;
1023 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1024 wakaba 1.77 !!!cp (21);
1025 wakaba 1.115 !!!parse-error (type => 'empty start tag',
1026     line => $self->{line_prev},
1027     column => $self->{column_prev});
1028 wakaba 1.57 $self->{state} = DATA_STATE;
1029 wakaba 1.1 !!!next-input-character;
1030    
1031 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<>',
1032 wakaba 1.120 line => $self->{line_prev},
1033     column => $self->{column_prev},
1034 wakaba 1.118 });
1035 wakaba 1.1
1036     redo A;
1037 wakaba 1.76 } elsif ($self->{next_char} == 0x003F) { # ?
1038 wakaba 1.77 !!!cp (22);
1039 wakaba 1.115 !!!parse-error (type => 'pio',
1040     line => $self->{line_prev},
1041     column => $self->{column_prev});
1042 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1043 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1044 wakaba 1.120 line => $self->{line_prev},
1045     column => $self->{column_prev},
1046 wakaba 1.118 };
1047 wakaba 1.76 ## $self->{next_char} is intentionally left as is
1048 wakaba 1.1 redo A;
1049     } else {
1050 wakaba 1.77 !!!cp (23);
1051 wakaba 1.136 !!!parse-error (type => 'bare stago',
1052     line => $self->{line_prev},
1053     column => $self->{column_prev});
1054 wakaba 1.57 $self->{state} = DATA_STATE;
1055 wakaba 1.1 ## reconsume
1056    
1057 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
1058 wakaba 1.120 line => $self->{line_prev},
1059     column => $self->{column_prev},
1060 wakaba 1.118 });
1061 wakaba 1.1
1062     redo A;
1063     }
1064     } else {
1065 wakaba 1.40 die "$0: $self->{content_model} in tag open";
1066 wakaba 1.1 }
1067 wakaba 1.57 } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
1068 wakaba 1.113 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
1069 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
1070 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
1071 wakaba 1.112
1072 wakaba 1.30 ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
1073 wakaba 1.23 my @next_char;
1074     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
1075 wakaba 1.76 push @next_char, $self->{next_char};
1076 wakaba 1.23 my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
1077     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
1078 wakaba 1.76 if ($self->{next_char} == $c or $self->{next_char} == $C) {
1079 wakaba 1.77 !!!cp (24);
1080 wakaba 1.23 !!!next-input-character;
1081     next TAGNAME;
1082     } else {
1083 wakaba 1.77 !!!cp (25);
1084 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
1085 wakaba 1.23 !!!back-next-input-character (@next_char);
1086 wakaba 1.57 $self->{state} = DATA_STATE;
1087 wakaba 1.23
1088 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1089 wakaba 1.120 line => $l, column => $c,
1090 wakaba 1.118 });
1091 wakaba 1.23
1092     redo A;
1093     }
1094     }
1095 wakaba 1.76 push @next_char, $self->{next_char};
1096 wakaba 1.23
1097 wakaba 1.76 unless ($self->{next_char} == 0x0009 or # HT
1098     $self->{next_char} == 0x000A or # LF
1099     $self->{next_char} == 0x000B or # VT
1100     $self->{next_char} == 0x000C or # FF
1101     $self->{next_char} == 0x0020 or # SP
1102     $self->{next_char} == 0x003E or # >
1103     $self->{next_char} == 0x002F or # /
1104     $self->{next_char} == -1) {
1105 wakaba 1.77 !!!cp (26);
1106 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
1107 wakaba 1.1 !!!back-next-input-character (@next_char);
1108 wakaba 1.57 $self->{state} = DATA_STATE;
1109 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1110 wakaba 1.120 line => $l, column => $c,
1111 wakaba 1.118 });
1112 wakaba 1.1 redo A;
1113 wakaba 1.23 } else {
1114 wakaba 1.77 !!!cp (27);
1115 wakaba 1.76 $self->{next_char} = shift @next_char;
1116 wakaba 1.23 !!!back-next-input-character (@next_char);
1117     # and consume...
1118 wakaba 1.1 }
1119 wakaba 1.23 } else {
1120     ## No start tag token has ever been emitted
1121 wakaba 1.77 !!!cp (28);
1122 wakaba 1.23 # next-input-character is already done
1123 wakaba 1.57 $self->{state} = DATA_STATE;
1124 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1125 wakaba 1.120 line => $l, column => $c,
1126 wakaba 1.118 });
1127 wakaba 1.1 redo A;
1128     }
1129     }
1130    
1131 wakaba 1.76 if (0x0041 <= $self->{next_char} and
1132     $self->{next_char} <= 0x005A) { # A..Z
1133 wakaba 1.77 !!!cp (29);
1134 wakaba 1.112 $self->{current_token}
1135     = {type => END_TAG_TOKEN,
1136     tag_name => chr ($self->{next_char} + 0x0020),
1137     line => $l, column => $c};
1138 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1139 wakaba 1.1 !!!next-input-character;
1140     redo A;
1141 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
1142     $self->{next_char} <= 0x007A) { # a..z
1143 wakaba 1.77 !!!cp (30);
1144 wakaba 1.55 $self->{current_token} = {type => END_TAG_TOKEN,
1145 wakaba 1.112 tag_name => chr ($self->{next_char}),
1146     line => $l, column => $c};
1147 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1148 wakaba 1.1 !!!next-input-character;
1149     redo A;
1150 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1151 wakaba 1.77 !!!cp (31);
1152 wakaba 1.115 !!!parse-error (type => 'empty end tag',
1153     line => $self->{line_prev}, ## "<" in "</>"
1154     column => $self->{column_prev} - 1);
1155 wakaba 1.57 $self->{state} = DATA_STATE;
1156 wakaba 1.1 !!!next-input-character;
1157     redo A;
1158 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1159 wakaba 1.77 !!!cp (32);
1160 wakaba 1.3 !!!parse-error (type => 'bare etago');
1161 wakaba 1.57 $self->{state} = DATA_STATE;
1162 wakaba 1.1 # reconsume
1163    
1164 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1165 wakaba 1.120 line => $l, column => $c,
1166 wakaba 1.118 });
1167 wakaba 1.1
1168     redo A;
1169     } else {
1170 wakaba 1.77 !!!cp (33);
1171 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
1172 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1173 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1174 wakaba 1.120 line => $self->{line_prev}, # "<" of "</"
1175     column => $self->{column_prev} - 1,
1176 wakaba 1.118 };
1177 wakaba 1.76 ## $self->{next_char} is intentionally left as is
1178 wakaba 1.1 redo A;
1179     }
1180 wakaba 1.57 } elsif ($self->{state} == TAG_NAME_STATE) {
1181 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1182     $self->{next_char} == 0x000A or # LF
1183     $self->{next_char} == 0x000B or # VT
1184     $self->{next_char} == 0x000C or # FF
1185     $self->{next_char} == 0x0020) { # SP
1186 wakaba 1.77 !!!cp (34);
1187 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1188 wakaba 1.1 !!!next-input-character;
1189     redo A;
1190 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1191 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1192 wakaba 1.77 !!!cp (35);
1193 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1194 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1195 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1196 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
1197     # ## NOTE: This should never be reached.
1198     # !!! cp (36);
1199     # !!! parse-error (type => 'end tag attribute');
1200     #} else {
1201 wakaba 1.77 !!!cp (37);
1202 wakaba 1.78 #}
1203 wakaba 1.1 } else {
1204     die "$0: $self->{current_token}->{type}: Unknown token type";
1205     }
1206 wakaba 1.57 $self->{state} = DATA_STATE;
1207 wakaba 1.1 !!!next-input-character;
1208    
1209     !!!emit ($self->{current_token}); # start tag or end tag
1210    
1211     redo A;
1212 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1213     $self->{next_char} <= 0x005A) { # A..Z
1214 wakaba 1.77 !!!cp (38);
1215 wakaba 1.76 $self->{current_token}->{tag_name} .= chr ($self->{next_char} + 0x0020);
1216 wakaba 1.1 # start tag or end tag
1217     ## Stay in this state
1218     !!!next-input-character;
1219     redo A;
1220 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1221 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1222 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1223 wakaba 1.77 !!!cp (39);
1224 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1225 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1226 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1227 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
1228     # ## NOTE: This state should never be reached.
1229     # !!! cp (40);
1230     # !!! parse-error (type => 'end tag attribute');
1231     #} else {
1232 wakaba 1.77 !!!cp (41);
1233 wakaba 1.78 #}
1234 wakaba 1.1 } else {
1235     die "$0: $self->{current_token}->{type}: Unknown token type";
1236     }
1237 wakaba 1.57 $self->{state} = DATA_STATE;
1238 wakaba 1.1 # reconsume
1239    
1240     !!!emit ($self->{current_token}); # start tag or end tag
1241    
1242     redo A;
1243 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1244 wakaba 1.125 !!!cp (42);
1245     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1246 wakaba 1.1 !!!next-input-character;
1247     redo A;
1248     } else {
1249 wakaba 1.77 !!!cp (44);
1250 wakaba 1.76 $self->{current_token}->{tag_name} .= chr $self->{next_char};
1251 wakaba 1.1 # start tag or end tag
1252     ## Stay in the state
1253     !!!next-input-character;
1254     redo A;
1255     }
1256 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1257 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1258     $self->{next_char} == 0x000A or # LF
1259     $self->{next_char} == 0x000B or # VT
1260     $self->{next_char} == 0x000C or # FF
1261     $self->{next_char} == 0x0020) { # SP
1262 wakaba 1.77 !!!cp (45);
1263 wakaba 1.1 ## Stay in the state
1264     !!!next-input-character;
1265     redo A;
1266 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1267 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1268 wakaba 1.77 !!!cp (46);
1269 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1270 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1271 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1272 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1273 wakaba 1.77 !!!cp (47);
1274 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1275 wakaba 1.77 } else {
1276     !!!cp (48);
1277 wakaba 1.1 }
1278     } else {
1279     die "$0: $self->{current_token}->{type}: Unknown token type";
1280     }
1281 wakaba 1.57 $self->{state} = DATA_STATE;
1282 wakaba 1.1 !!!next-input-character;
1283    
1284     !!!emit ($self->{current_token}); # start tag or end tag
1285    
1286     redo A;
1287 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1288     $self->{next_char} <= 0x005A) { # A..Z
1289 wakaba 1.77 !!!cp (49);
1290 wakaba 1.119 $self->{current_attribute}
1291     = {name => chr ($self->{next_char} + 0x0020),
1292     value => '',
1293     line => $self->{line}, column => $self->{column}};
1294 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1295 wakaba 1.1 !!!next-input-character;
1296     redo A;
1297 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1298 wakaba 1.125 !!!cp (50);
1299     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1300 wakaba 1.1 !!!next-input-character;
1301     redo A;
1302 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1303 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1304 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1305 wakaba 1.77 !!!cp (52);
1306 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1307 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1308 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1309 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1310 wakaba 1.77 !!!cp (53);
1311 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1312 wakaba 1.77 } else {
1313     !!!cp (54);
1314 wakaba 1.1 }
1315     } else {
1316     die "$0: $self->{current_token}->{type}: Unknown token type";
1317     }
1318 wakaba 1.57 $self->{state} = DATA_STATE;
1319 wakaba 1.1 # reconsume
1320    
1321     !!!emit ($self->{current_token}); # start tag or end tag
1322    
1323     redo A;
1324     } else {
1325 wakaba 1.72 if ({
1326     0x0022 => 1, # "
1327     0x0027 => 1, # '
1328     0x003D => 1, # =
1329 wakaba 1.76 }->{$self->{next_char}}) {
1330 wakaba 1.77 !!!cp (55);
1331 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1332 wakaba 1.77 } else {
1333     !!!cp (56);
1334 wakaba 1.72 }
1335 wakaba 1.119 $self->{current_attribute}
1336     = {name => chr ($self->{next_char}),
1337     value => '',
1338     line => $self->{line}, column => $self->{column}};
1339 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1340 wakaba 1.1 !!!next-input-character;
1341     redo A;
1342     }
1343 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1344 wakaba 1.1 my $before_leave = sub {
1345     if (exists $self->{current_token}->{attributes} # start tag or end tag
1346     ->{$self->{current_attribute}->{name}}) { # MUST
1347 wakaba 1.77 !!!cp (57);
1348 wakaba 1.120 !!!parse-error (type => 'duplicate attribute:'.$self->{current_attribute}->{name}, line => $self->{current_attribute}->{line}, column => $self->{current_attribute}->{column});
1349 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
1350     } else {
1351 wakaba 1.77 !!!cp (58);
1352 wakaba 1.1 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
1353     = $self->{current_attribute};
1354     }
1355     }; # $before_leave
1356    
1357 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1358     $self->{next_char} == 0x000A or # LF
1359     $self->{next_char} == 0x000B or # VT
1360     $self->{next_char} == 0x000C or # FF
1361     $self->{next_char} == 0x0020) { # SP
1362 wakaba 1.77 !!!cp (59);
1363 wakaba 1.1 $before_leave->();
1364 wakaba 1.57 $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
1365 wakaba 1.1 !!!next-input-character;
1366     redo A;
1367 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1368 wakaba 1.77 !!!cp (60);
1369 wakaba 1.1 $before_leave->();
1370 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1371 wakaba 1.1 !!!next-input-character;
1372     redo A;
1373 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1374 wakaba 1.1 $before_leave->();
1375 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1376 wakaba 1.77 !!!cp (61);
1377 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1378 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1379 wakaba 1.77 !!!cp (62);
1380 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1381 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1382 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1383 wakaba 1.1 }
1384     } else {
1385     die "$0: $self->{current_token}->{type}: Unknown token type";
1386     }
1387 wakaba 1.57 $self->{state} = DATA_STATE;
1388 wakaba 1.1 !!!next-input-character;
1389    
1390     !!!emit ($self->{current_token}); # start tag or end tag
1391    
1392     redo A;
1393 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1394     $self->{next_char} <= 0x005A) { # A..Z
1395 wakaba 1.77 !!!cp (63);
1396 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char} + 0x0020);
1397 wakaba 1.1 ## Stay in the state
1398     !!!next-input-character;
1399     redo A;
1400 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1401 wakaba 1.125 !!!cp (64);
1402 wakaba 1.1 $before_leave->();
1403 wakaba 1.125 $self->{state} = SELF_CLOSING_START_TAG_STATE;
1404 wakaba 1.1 !!!next-input-character;
1405     redo A;
1406 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1407 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1408 wakaba 1.1 $before_leave->();
1409 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1410 wakaba 1.77 !!!cp (66);
1411 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1412 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1413 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1414 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1415 wakaba 1.77 !!!cp (67);
1416 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1417 wakaba 1.77 } else {
1418 wakaba 1.78 ## NOTE: This state should never be reached.
1419 wakaba 1.77 !!!cp (68);
1420 wakaba 1.1 }
1421     } else {
1422     die "$0: $self->{current_token}->{type}: Unknown token type";
1423     }
1424 wakaba 1.57 $self->{state} = DATA_STATE;
1425 wakaba 1.1 # reconsume
1426    
1427     !!!emit ($self->{current_token}); # start tag or end tag
1428    
1429     redo A;
1430     } else {
1431 wakaba 1.76 if ($self->{next_char} == 0x0022 or # "
1432     $self->{next_char} == 0x0027) { # '
1433 wakaba 1.77 !!!cp (69);
1434 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1435 wakaba 1.77 } else {
1436     !!!cp (70);
1437 wakaba 1.72 }
1438 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char});
1439 wakaba 1.1 ## Stay in the state
1440     !!!next-input-character;
1441     redo A;
1442     }
1443 wakaba 1.57 } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1444 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1445     $self->{next_char} == 0x000A or # LF
1446     $self->{next_char} == 0x000B or # VT
1447     $self->{next_char} == 0x000C or # FF
1448     $self->{next_char} == 0x0020) { # SP
1449 wakaba 1.77 !!!cp (71);
1450 wakaba 1.1 ## Stay in the state
1451     !!!next-input-character;
1452     redo A;
1453 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1454 wakaba 1.77 !!!cp (72);
1455 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1456 wakaba 1.1 !!!next-input-character;
1457     redo A;
1458 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1459 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1460 wakaba 1.77 !!!cp (73);
1461 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1462 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1463 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1464 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1465 wakaba 1.77 !!!cp (74);
1466 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1467 wakaba 1.77 } else {
1468 wakaba 1.78 ## NOTE: This state should never be reached.
1469 wakaba 1.77 !!!cp (75);
1470 wakaba 1.1 }
1471     } else {
1472     die "$0: $self->{current_token}->{type}: Unknown token type";
1473     }
1474 wakaba 1.57 $self->{state} = DATA_STATE;
1475 wakaba 1.1 !!!next-input-character;
1476    
1477     !!!emit ($self->{current_token}); # start tag or end tag
1478    
1479     redo A;
1480 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1481     $self->{next_char} <= 0x005A) { # A..Z
1482 wakaba 1.77 !!!cp (76);
1483 wakaba 1.119 $self->{current_attribute}
1484     = {name => chr ($self->{next_char} + 0x0020),
1485     value => '',
1486     line => $self->{line}, column => $self->{column}};
1487 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1488 wakaba 1.1 !!!next-input-character;
1489     redo A;
1490 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1491 wakaba 1.125 !!!cp (77);
1492     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1493 wakaba 1.1 !!!next-input-character;
1494     redo A;
1495 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1496 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1497 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1498 wakaba 1.77 !!!cp (79);
1499 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1500 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1501 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1502 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1503 wakaba 1.77 !!!cp (80);
1504 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1505 wakaba 1.77 } else {
1506 wakaba 1.78 ## NOTE: This state should never be reached.
1507 wakaba 1.77 !!!cp (81);
1508 wakaba 1.1 }
1509     } else {
1510     die "$0: $self->{current_token}->{type}: Unknown token type";
1511     }
1512 wakaba 1.57 $self->{state} = DATA_STATE;
1513 wakaba 1.1 # reconsume
1514    
1515     !!!emit ($self->{current_token}); # start tag or end tag
1516    
1517     redo A;
1518     } else {
1519 wakaba 1.77 !!!cp (82);
1520 wakaba 1.119 $self->{current_attribute}
1521     = {name => chr ($self->{next_char}),
1522     value => '',
1523     line => $self->{line}, column => $self->{column}};
1524 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1525 wakaba 1.1 !!!next-input-character;
1526     redo A;
1527     }
1528 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1529 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1530     $self->{next_char} == 0x000A or # LF
1531     $self->{next_char} == 0x000B or # VT
1532     $self->{next_char} == 0x000C or # FF
1533     $self->{next_char} == 0x0020) { # SP
1534 wakaba 1.77 !!!cp (83);
1535 wakaba 1.1 ## Stay in the state
1536     !!!next-input-character;
1537     redo A;
1538 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
1539 wakaba 1.77 !!!cp (84);
1540 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
1541 wakaba 1.1 !!!next-input-character;
1542     redo A;
1543 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1544 wakaba 1.77 !!!cp (85);
1545 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1546 wakaba 1.1 ## reconsume
1547     redo A;
1548 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
1549 wakaba 1.77 !!!cp (86);
1550 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
1551 wakaba 1.1 !!!next-input-character;
1552     redo A;
1553 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1554 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1555 wakaba 1.77 !!!cp (87);
1556 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1557 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1558 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1559 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1560 wakaba 1.77 !!!cp (88);
1561 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1562 wakaba 1.77 } else {
1563 wakaba 1.78 ## NOTE: This state should never be reached.
1564 wakaba 1.77 !!!cp (89);
1565 wakaba 1.1 }
1566     } else {
1567     die "$0: $self->{current_token}->{type}: Unknown token type";
1568     }
1569 wakaba 1.57 $self->{state} = DATA_STATE;
1570 wakaba 1.1 !!!next-input-character;
1571    
1572     !!!emit ($self->{current_token}); # start tag or end tag
1573    
1574     redo A;
1575 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1576 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1577 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1578 wakaba 1.77 !!!cp (90);
1579 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1580 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1581 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1582 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1583 wakaba 1.77 !!!cp (91);
1584 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1585 wakaba 1.77 } else {
1586 wakaba 1.78 ## NOTE: This state should never be reached.
1587 wakaba 1.77 !!!cp (92);
1588 wakaba 1.1 }
1589     } else {
1590     die "$0: $self->{current_token}->{type}: Unknown token type";
1591     }
1592 wakaba 1.57 $self->{state} = DATA_STATE;
1593 wakaba 1.1 ## reconsume
1594    
1595     !!!emit ($self->{current_token}); # start tag or end tag
1596    
1597     redo A;
1598     } else {
1599 wakaba 1.76 if ($self->{next_char} == 0x003D) { # =
1600 wakaba 1.77 !!!cp (93);
1601 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1602 wakaba 1.77 } else {
1603     !!!cp (94);
1604 wakaba 1.72 }
1605 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1606 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1607 wakaba 1.1 !!!next-input-character;
1608     redo A;
1609     }
1610 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1611 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
1612 wakaba 1.77 !!!cp (95);
1613 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1614 wakaba 1.1 !!!next-input-character;
1615     redo A;
1616 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1617 wakaba 1.77 !!!cp (96);
1618 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1619     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1620 wakaba 1.1 !!!next-input-character;
1621     redo A;
1622 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1623 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1624 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1625 wakaba 1.77 !!!cp (97);
1626 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1627 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1628 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1629 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1630 wakaba 1.77 !!!cp (98);
1631 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1632 wakaba 1.77 } else {
1633 wakaba 1.78 ## NOTE: This state should never be reached.
1634 wakaba 1.77 !!!cp (99);
1635 wakaba 1.1 }
1636     } else {
1637     die "$0: $self->{current_token}->{type}: Unknown token type";
1638     }
1639 wakaba 1.57 $self->{state} = DATA_STATE;
1640 wakaba 1.1 ## reconsume
1641    
1642     !!!emit ($self->{current_token}); # start tag or end tag
1643    
1644     redo A;
1645     } else {
1646 wakaba 1.77 !!!cp (100);
1647 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1648 wakaba 1.1 ## Stay in the state
1649     !!!next-input-character;
1650     redo A;
1651     }
1652 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1653 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
1654 wakaba 1.77 !!!cp (101);
1655 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1656 wakaba 1.1 !!!next-input-character;
1657     redo A;
1658 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1659 wakaba 1.77 !!!cp (102);
1660 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1661     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1662 wakaba 1.1 !!!next-input-character;
1663     redo A;
1664 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1665 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1666 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1667 wakaba 1.77 !!!cp (103);
1668 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1669 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1670 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1671 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1672 wakaba 1.77 !!!cp (104);
1673 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1674 wakaba 1.77 } else {
1675 wakaba 1.78 ## NOTE: This state should never be reached.
1676 wakaba 1.77 !!!cp (105);
1677 wakaba 1.1 }
1678     } else {
1679     die "$0: $self->{current_token}->{type}: Unknown token type";
1680     }
1681 wakaba 1.57 $self->{state} = DATA_STATE;
1682 wakaba 1.1 ## reconsume
1683    
1684     !!!emit ($self->{current_token}); # start tag or end tag
1685    
1686     redo A;
1687     } else {
1688 wakaba 1.77 !!!cp (106);
1689 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1690 wakaba 1.1 ## Stay in the state
1691     !!!next-input-character;
1692     redo A;
1693     }
1694 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1695 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1696     $self->{next_char} == 0x000A or # LF
1697     $self->{next_char} == 0x000B or # HT
1698     $self->{next_char} == 0x000C or # FF
1699     $self->{next_char} == 0x0020) { # SP
1700 wakaba 1.77 !!!cp (107);
1701 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1702 wakaba 1.1 !!!next-input-character;
1703     redo A;
1704 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1705 wakaba 1.77 !!!cp (108);
1706 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1707     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1708 wakaba 1.1 !!!next-input-character;
1709     redo A;
1710 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1711 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1712 wakaba 1.77 !!!cp (109);
1713 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1714 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1715 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1716 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1717 wakaba 1.77 !!!cp (110);
1718 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1719 wakaba 1.77 } else {
1720 wakaba 1.78 ## NOTE: This state should never be reached.
1721 wakaba 1.77 !!!cp (111);
1722 wakaba 1.1 }
1723     } else {
1724     die "$0: $self->{current_token}->{type}: Unknown token type";
1725     }
1726 wakaba 1.57 $self->{state} = DATA_STATE;
1727 wakaba 1.1 !!!next-input-character;
1728    
1729     !!!emit ($self->{current_token}); # start tag or end tag
1730    
1731     redo A;
1732 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1733 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1734 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1735 wakaba 1.77 !!!cp (112);
1736 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1737 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1738 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1739 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1740 wakaba 1.77 !!!cp (113);
1741 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1742 wakaba 1.77 } else {
1743 wakaba 1.78 ## NOTE: This state should never be reached.
1744 wakaba 1.77 !!!cp (114);
1745 wakaba 1.1 }
1746     } else {
1747     die "$0: $self->{current_token}->{type}: Unknown token type";
1748     }
1749 wakaba 1.57 $self->{state} = DATA_STATE;
1750 wakaba 1.1 ## reconsume
1751    
1752     !!!emit ($self->{current_token}); # start tag or end tag
1753    
1754     redo A;
1755     } else {
1756 wakaba 1.72 if ({
1757     0x0022 => 1, # "
1758     0x0027 => 1, # '
1759     0x003D => 1, # =
1760 wakaba 1.76 }->{$self->{next_char}}) {
1761 wakaba 1.77 !!!cp (115);
1762 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1763 wakaba 1.77 } else {
1764     !!!cp (116);
1765 wakaba 1.72 }
1766 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1767 wakaba 1.1 ## Stay in the state
1768     !!!next-input-character;
1769     redo A;
1770     }
1771 wakaba 1.57 } elsif ($self->{state} == ENTITY_IN_ATTRIBUTE_VALUE_STATE) {
1772 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity
1773     (1,
1774     $self->{last_attribute_value_state}
1775     == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE ? 0x0022 : # "
1776     $self->{last_attribute_value_state}
1777     == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE ? 0x0027 : # '
1778     -1);
1779 wakaba 1.1
1780     unless (defined $token) {
1781 wakaba 1.77 !!!cp (117);
1782 wakaba 1.1 $self->{current_attribute}->{value} .= '&';
1783     } else {
1784 wakaba 1.77 !!!cp (118);
1785 wakaba 1.1 $self->{current_attribute}->{value} .= $token->{data};
1786 wakaba 1.66 $self->{current_attribute}->{has_reference} = $token->{has_reference};
1787 wakaba 1.1 ## ISSUE: spec says "append the returned character token to the current attribute's value"
1788     }
1789    
1790     $self->{state} = $self->{last_attribute_value_state};
1791     # next-input-character is already done
1792     redo A;
1793 wakaba 1.72 } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1794 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1795     $self->{next_char} == 0x000A or # LF
1796     $self->{next_char} == 0x000B or # VT
1797     $self->{next_char} == 0x000C or # FF
1798     $self->{next_char} == 0x0020) { # SP
1799 wakaba 1.77 !!!cp (118);
1800 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1801     !!!next-input-character;
1802     redo A;
1803 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1804 wakaba 1.72 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1805 wakaba 1.77 !!!cp (119);
1806 wakaba 1.72 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1807     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1808     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1809     if ($self->{current_token}->{attributes}) {
1810 wakaba 1.77 !!!cp (120);
1811 wakaba 1.72 !!!parse-error (type => 'end tag attribute');
1812 wakaba 1.77 } else {
1813 wakaba 1.78 ## NOTE: This state should never be reached.
1814 wakaba 1.77 !!!cp (121);
1815 wakaba 1.72 }
1816     } else {
1817     die "$0: $self->{current_token}->{type}: Unknown token type";
1818     }
1819     $self->{state} = DATA_STATE;
1820     !!!next-input-character;
1821    
1822     !!!emit ($self->{current_token}); # start tag or end tag
1823    
1824     redo A;
1825 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1826 wakaba 1.125 !!!cp (122);
1827     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1828 wakaba 1.72 !!!next-input-character;
1829 wakaba 1.125 redo A;
1830 wakaba 1.141 } elsif ($self->{next_char} == -1) {
1831     !!!parse-error (type => 'unclosed tag');
1832     if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1833     !!!cp (122.3);
1834     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1835     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1836     if ($self->{current_token}->{attributes}) {
1837     !!!cp (122.1);
1838     !!!parse-error (type => 'end tag attribute');
1839     } else {
1840     ## NOTE: This state should never be reached.
1841     !!!cp (122.2);
1842     }
1843     } else {
1844     die "$0: $self->{current_token}->{type}: Unknown token type";
1845     }
1846     $self->{state} = DATA_STATE;
1847     ## Reconsume.
1848     !!!emit ($self->{current_token}); # start tag or end tag
1849     redo A;
1850 wakaba 1.125 } else {
1851     !!!cp ('124.1');
1852     !!!parse-error (type => 'no space between attributes');
1853     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1854     ## reconsume
1855     redo A;
1856     }
1857     } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
1858     if ($self->{next_char} == 0x003E) { # >
1859     if ($self->{current_token}->{type} == END_TAG_TOKEN) {
1860     !!!cp ('124.2');
1861     !!!parse-error (type => 'nestc', token => $self->{current_token});
1862     ## TODO: Different type than slash in start tag
1863     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1864     if ($self->{current_token}->{attributes}) {
1865     !!!cp ('124.4');
1866     !!!parse-error (type => 'end tag attribute');
1867     } else {
1868     !!!cp ('124.5');
1869     }
1870     ## TODO: Test |<title></title/>|
1871 wakaba 1.72 } else {
1872 wakaba 1.125 !!!cp ('124.3');
1873     $self->{self_closing} = 1;
1874 wakaba 1.72 }
1875 wakaba 1.125
1876     $self->{state} = DATA_STATE;
1877     !!!next-input-character;
1878    
1879     !!!emit ($self->{current_token}); # start tag or end tag
1880    
1881 wakaba 1.72 redo A;
1882 wakaba 1.141 } elsif ($self->{next_char} == -1) {
1883     !!!parse-error (type => 'unclosed tag');
1884     if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1885     !!!cp (124.7);
1886     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1887     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1888     if ($self->{current_token}->{attributes}) {
1889     !!!cp (124.5);
1890     !!!parse-error (type => 'end tag attribute');
1891     } else {
1892     ## NOTE: This state should never be reached.
1893     !!!cp (124.6);
1894     }
1895     } else {
1896     die "$0: $self->{current_token}->{type}: Unknown token type";
1897     }
1898     $self->{state} = DATA_STATE;
1899     ## Reconsume.
1900     !!!emit ($self->{current_token}); # start tag or end tag
1901     redo A;
1902 wakaba 1.72 } else {
1903 wakaba 1.125 !!!cp ('124.4');
1904     !!!parse-error (type => 'nestc');
1905     ## TODO: This error type is wrong.
1906 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1907 wakaba 1.125 ## Reconsume.
1908 wakaba 1.72 redo A;
1909     }
1910 wakaba 1.57 } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
1911 wakaba 1.1 ## (only happen if PCDATA state)
1912    
1913 wakaba 1.112 ## NOTE: Set by the previous state
1914     #my $token = {type => COMMENT_TOKEN, data => ''};
1915 wakaba 1.1
1916     BC: {
1917 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
1918 wakaba 1.77 !!!cp (124);
1919 wakaba 1.57 $self->{state} = DATA_STATE;
1920 wakaba 1.1 !!!next-input-character;
1921    
1922 wakaba 1.112 !!!emit ($self->{current_token}); # comment
1923 wakaba 1.1
1924     redo A;
1925 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1926 wakaba 1.77 !!!cp (125);
1927 wakaba 1.57 $self->{state} = DATA_STATE;
1928 wakaba 1.1 ## reconsume
1929    
1930 wakaba 1.112 !!!emit ($self->{current_token}); # comment
1931 wakaba 1.1
1932     redo A;
1933     } else {
1934 wakaba 1.77 !!!cp (126);
1935 wakaba 1.112 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
1936 wakaba 1.1 !!!next-input-character;
1937     redo BC;
1938     }
1939     } # BC
1940 wakaba 1.77
1941     die "$0: _get_next_token: unexpected case [BC]";
1942 wakaba 1.57 } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
1943 wakaba 1.1 ## (only happen if PCDATA state)
1944    
1945 wakaba 1.120 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1);
1946 wakaba 1.112
1947 wakaba 1.1 my @next_char;
1948 wakaba 1.76 push @next_char, $self->{next_char};
1949 wakaba 1.1
1950 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1951 wakaba 1.1 !!!next-input-character;
1952 wakaba 1.76 push @next_char, $self->{next_char};
1953     if ($self->{next_char} == 0x002D) { # -
1954 wakaba 1.77 !!!cp (127);
1955 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1956 wakaba 1.120 line => $l, column => $c,
1957 wakaba 1.118 };
1958 wakaba 1.57 $self->{state} = COMMENT_START_STATE;
1959 wakaba 1.1 !!!next-input-character;
1960     redo A;
1961 wakaba 1.77 } else {
1962     !!!cp (128);
1963 wakaba 1.1 }
1964 wakaba 1.76 } elsif ($self->{next_char} == 0x0044 or # D
1965     $self->{next_char} == 0x0064) { # d
1966 wakaba 1.1 !!!next-input-character;
1967 wakaba 1.76 push @next_char, $self->{next_char};
1968     if ($self->{next_char} == 0x004F or # O
1969     $self->{next_char} == 0x006F) { # o
1970 wakaba 1.1 !!!next-input-character;
1971 wakaba 1.76 push @next_char, $self->{next_char};
1972     if ($self->{next_char} == 0x0043 or # C
1973     $self->{next_char} == 0x0063) { # c
1974 wakaba 1.1 !!!next-input-character;
1975 wakaba 1.76 push @next_char, $self->{next_char};
1976     if ($self->{next_char} == 0x0054 or # T
1977     $self->{next_char} == 0x0074) { # t
1978 wakaba 1.1 !!!next-input-character;
1979 wakaba 1.76 push @next_char, $self->{next_char};
1980     if ($self->{next_char} == 0x0059 or # Y
1981     $self->{next_char} == 0x0079) { # y
1982 wakaba 1.1 !!!next-input-character;
1983 wakaba 1.76 push @next_char, $self->{next_char};
1984     if ($self->{next_char} == 0x0050 or # P
1985     $self->{next_char} == 0x0070) { # p
1986 wakaba 1.1 !!!next-input-character;
1987 wakaba 1.76 push @next_char, $self->{next_char};
1988     if ($self->{next_char} == 0x0045 or # E
1989     $self->{next_char} == 0x0065) { # e
1990 wakaba 1.77 !!!cp (129);
1991     ## TODO: What a stupid code this is!
1992 wakaba 1.57 $self->{state} = DOCTYPE_STATE;
1993 wakaba 1.112 $self->{current_token} = {type => DOCTYPE_TOKEN,
1994     quirks => 1,
1995 wakaba 1.120 line => $l, column => $c,
1996 wakaba 1.118 };
1997 wakaba 1.1 !!!next-input-character;
1998     redo A;
1999 wakaba 1.77 } else {
2000     !!!cp (130);
2001 wakaba 1.1 }
2002 wakaba 1.77 } else {
2003     !!!cp (131);
2004 wakaba 1.1 }
2005 wakaba 1.77 } else {
2006     !!!cp (132);
2007 wakaba 1.1 }
2008 wakaba 1.77 } else {
2009     !!!cp (133);
2010 wakaba 1.1 }
2011 wakaba 1.77 } else {
2012     !!!cp (134);
2013 wakaba 1.1 }
2014 wakaba 1.77 } else {
2015     !!!cp (135);
2016 wakaba 1.1 }
2017 wakaba 1.127 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
2018     $self->{open_elements}->[-1]->[1] & FOREIGN_EL and
2019     $self->{next_char} == 0x005B) { # [
2020     !!!next-input-character;
2021     push @next_char, $self->{next_char};
2022     if ($self->{next_char} == 0x0043) { # C
2023     !!!next-input-character;
2024     push @next_char, $self->{next_char};
2025     if ($self->{next_char} == 0x0044) { # D
2026     !!!next-input-character;
2027     push @next_char, $self->{next_char};
2028     if ($self->{next_char} == 0x0041) { # A
2029     !!!next-input-character;
2030     push @next_char, $self->{next_char};
2031     if ($self->{next_char} == 0x0054) { # T
2032     !!!next-input-character;
2033     push @next_char, $self->{next_char};
2034     if ($self->{next_char} == 0x0041) { # A
2035     !!!next-input-character;
2036     push @next_char, $self->{next_char};
2037     if ($self->{next_char} == 0x005B) { # [
2038     !!!cp (135.1);
2039     $self->{state} = CDATA_BLOCK_STATE;
2040     !!!next-input-character;
2041     redo A;
2042     } else {
2043     !!!cp (135.2);
2044     }
2045     } else {
2046     !!!cp (135.3);
2047     }
2048     } else {
2049     !!!cp (135.4);
2050     }
2051     } else {
2052     !!!cp (135.5);
2053     }
2054     } else {
2055     !!!cp (135.6);
2056     }
2057     } else {
2058     !!!cp (135.7);
2059     }
2060 wakaba 1.77 } else {
2061     !!!cp (136);
2062 wakaba 1.1 }
2063    
2064 wakaba 1.30 !!!parse-error (type => 'bogus comment');
2065 wakaba 1.76 $self->{next_char} = shift @next_char;
2066 wakaba 1.1 !!!back-next-input-character (@next_char);
2067 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
2068 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
2069 wakaba 1.120 line => $l, column => $c,
2070 wakaba 1.118 };
2071 wakaba 1.1 redo A;
2072    
2073     ## ISSUE: typos in spec: chacacters, is is a parse error
2074     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
2075 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_STATE) {
2076 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2077 wakaba 1.77 !!!cp (137);
2078 wakaba 1.57 $self->{state} = COMMENT_START_DASH_STATE;
2079 wakaba 1.23 !!!next-input-character;
2080     redo A;
2081 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2082 wakaba 1.77 !!!cp (138);
2083 wakaba 1.23 !!!parse-error (type => 'bogus comment');
2084 wakaba 1.57 $self->{state} = DATA_STATE;
2085 wakaba 1.23 !!!next-input-character;
2086    
2087     !!!emit ($self->{current_token}); # comment
2088    
2089     redo A;
2090 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2091 wakaba 1.77 !!!cp (139);
2092 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
2093 wakaba 1.57 $self->{state} = DATA_STATE;
2094 wakaba 1.23 ## reconsume
2095    
2096     !!!emit ($self->{current_token}); # comment
2097    
2098     redo A;
2099     } else {
2100 wakaba 1.77 !!!cp (140);
2101 wakaba 1.23 $self->{current_token}->{data} # comment
2102 wakaba 1.76 .= chr ($self->{next_char});
2103 wakaba 1.57 $self->{state} = COMMENT_STATE;
2104 wakaba 1.23 !!!next-input-character;
2105     redo A;
2106     }
2107 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
2108 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2109 wakaba 1.77 !!!cp (141);
2110 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
2111 wakaba 1.23 !!!next-input-character;
2112     redo A;
2113 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2114 wakaba 1.77 !!!cp (142);
2115 wakaba 1.23 !!!parse-error (type => 'bogus comment');
2116 wakaba 1.57 $self->{state} = DATA_STATE;
2117 wakaba 1.23 !!!next-input-character;
2118    
2119     !!!emit ($self->{current_token}); # comment
2120    
2121     redo A;
2122 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2123 wakaba 1.77 !!!cp (143);
2124 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
2125 wakaba 1.57 $self->{state} = DATA_STATE;
2126 wakaba 1.23 ## reconsume
2127    
2128     !!!emit ($self->{current_token}); # comment
2129    
2130     redo A;
2131     } else {
2132 wakaba 1.77 !!!cp (144);
2133 wakaba 1.23 $self->{current_token}->{data} # comment
2134 wakaba 1.76 .= '-' . chr ($self->{next_char});
2135 wakaba 1.57 $self->{state} = COMMENT_STATE;
2136 wakaba 1.23 !!!next-input-character;
2137     redo A;
2138     }
2139 wakaba 1.57 } elsif ($self->{state} == COMMENT_STATE) {
2140 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2141 wakaba 1.77 !!!cp (145);
2142 wakaba 1.57 $self->{state} = COMMENT_END_DASH_STATE;
2143 wakaba 1.1 !!!next-input-character;
2144     redo A;
2145 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2146 wakaba 1.77 !!!cp (146);
2147 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2148 wakaba 1.57 $self->{state} = DATA_STATE;
2149 wakaba 1.1 ## reconsume
2150    
2151     !!!emit ($self->{current_token}); # comment
2152    
2153     redo A;
2154     } else {
2155 wakaba 1.77 !!!cp (147);
2156 wakaba 1.76 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
2157 wakaba 1.1 ## Stay in the state
2158     !!!next-input-character;
2159     redo A;
2160     }
2161 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2162 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2163 wakaba 1.77 !!!cp (148);
2164 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
2165 wakaba 1.1 !!!next-input-character;
2166     redo A;
2167 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2168 wakaba 1.77 !!!cp (149);
2169 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2170 wakaba 1.57 $self->{state} = DATA_STATE;
2171 wakaba 1.1 ## reconsume
2172    
2173     !!!emit ($self->{current_token}); # comment
2174    
2175     redo A;
2176     } else {
2177 wakaba 1.77 !!!cp (150);
2178 wakaba 1.76 $self->{current_token}->{data} .= '-' . chr ($self->{next_char}); # comment
2179 wakaba 1.57 $self->{state} = COMMENT_STATE;
2180 wakaba 1.1 !!!next-input-character;
2181     redo A;
2182     }
2183 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_STATE) {
2184 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2185 wakaba 1.77 !!!cp (151);
2186 wakaba 1.57 $self->{state} = DATA_STATE;
2187 wakaba 1.1 !!!next-input-character;
2188    
2189     !!!emit ($self->{current_token}); # comment
2190    
2191     redo A;
2192 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
2193 wakaba 1.77 !!!cp (152);
2194 wakaba 1.114 !!!parse-error (type => 'dash in comment',
2195     line => $self->{line_prev},
2196     column => $self->{column_prev});
2197 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
2198     ## Stay in the state
2199     !!!next-input-character;
2200     redo A;
2201 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2202 wakaba 1.77 !!!cp (153);
2203 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2204 wakaba 1.57 $self->{state} = DATA_STATE;
2205 wakaba 1.1 ## reconsume
2206    
2207     !!!emit ($self->{current_token}); # comment
2208    
2209     redo A;
2210     } else {
2211 wakaba 1.77 !!!cp (154);
2212 wakaba 1.114 !!!parse-error (type => 'dash in comment',
2213     line => $self->{line_prev},
2214     column => $self->{column_prev});
2215 wakaba 1.76 $self->{current_token}->{data} .= '--' . chr ($self->{next_char}); # comment
2216 wakaba 1.57 $self->{state} = COMMENT_STATE;
2217 wakaba 1.1 !!!next-input-character;
2218     redo A;
2219     }
2220 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_STATE) {
2221 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2222     $self->{next_char} == 0x000A or # LF
2223     $self->{next_char} == 0x000B or # VT
2224     $self->{next_char} == 0x000C or # FF
2225     $self->{next_char} == 0x0020) { # SP
2226 wakaba 1.77 !!!cp (155);
2227 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2228 wakaba 1.1 !!!next-input-character;
2229     redo A;
2230     } else {
2231 wakaba 1.77 !!!cp (156);
2232 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
2233 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2234 wakaba 1.1 ## reconsume
2235     redo A;
2236     }
2237 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
2238 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2239     $self->{next_char} == 0x000A or # LF
2240     $self->{next_char} == 0x000B or # VT
2241     $self->{next_char} == 0x000C or # FF
2242     $self->{next_char} == 0x0020) { # SP
2243 wakaba 1.77 !!!cp (157);
2244 wakaba 1.1 ## Stay in the state
2245     !!!next-input-character;
2246     redo A;
2247 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2248 wakaba 1.77 !!!cp (158);
2249 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
2250 wakaba 1.57 $self->{state} = DATA_STATE;
2251 wakaba 1.1 !!!next-input-character;
2252    
2253 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
2254 wakaba 1.1
2255     redo A;
2256 wakaba 1.77 } elsif ($self->{next_char} == -1) {
2257     !!!cp (159);
2258 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
2259 wakaba 1.57 $self->{state} = DATA_STATE;
2260 wakaba 1.1 ## reconsume
2261    
2262 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
2263 wakaba 1.1
2264     redo A;
2265     } else {
2266 wakaba 1.77 !!!cp (160);
2267 wakaba 1.112 $self->{current_token}->{name} = chr $self->{next_char};
2268     delete $self->{current_token}->{quirks};
2269 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
2270 wakaba 1.57 $self->{state} = DOCTYPE_NAME_STATE;
2271 wakaba 1.1 !!!next-input-character;
2272     redo A;
2273     }
2274 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
2275 wakaba 1.18 ## ISSUE: Redundant "First," in the spec.
2276 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2277     $self->{next_char} == 0x000A or # LF
2278     $self->{next_char} == 0x000B or # VT
2279     $self->{next_char} == 0x000C or # FF
2280     $self->{next_char} == 0x0020) { # SP
2281 wakaba 1.77 !!!cp (161);
2282 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_NAME_STATE;
2283 wakaba 1.1 !!!next-input-character;
2284     redo A;
2285 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2286 wakaba 1.77 !!!cp (162);
2287 wakaba 1.57 $self->{state} = DATA_STATE;
2288 wakaba 1.1 !!!next-input-character;
2289    
2290     !!!emit ($self->{current_token}); # DOCTYPE
2291    
2292     redo A;
2293 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2294 wakaba 1.77 !!!cp (163);
2295 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2296 wakaba 1.57 $self->{state} = DATA_STATE;
2297 wakaba 1.1 ## reconsume
2298    
2299 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2300 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2301 wakaba 1.1
2302     redo A;
2303     } else {
2304 wakaba 1.77 !!!cp (164);
2305 wakaba 1.1 $self->{current_token}->{name}
2306 wakaba 1.76 .= chr ($self->{next_char}); # DOCTYPE
2307 wakaba 1.1 ## Stay in the state
2308     !!!next-input-character;
2309     redo A;
2310     }
2311 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
2312 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2313     $self->{next_char} == 0x000A or # LF
2314     $self->{next_char} == 0x000B or # VT
2315     $self->{next_char} == 0x000C or # FF
2316     $self->{next_char} == 0x0020) { # SP
2317 wakaba 1.77 !!!cp (165);
2318 wakaba 1.1 ## Stay in the state
2319     !!!next-input-character;
2320     redo A;
2321 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2322 wakaba 1.77 !!!cp (166);
2323 wakaba 1.57 $self->{state} = DATA_STATE;
2324 wakaba 1.1 !!!next-input-character;
2325    
2326     !!!emit ($self->{current_token}); # DOCTYPE
2327    
2328     redo A;
2329 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2330 wakaba 1.77 !!!cp (167);
2331 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2332 wakaba 1.57 $self->{state} = DATA_STATE;
2333 wakaba 1.1 ## reconsume
2334    
2335 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2336 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2337    
2338     redo A;
2339 wakaba 1.76 } elsif ($self->{next_char} == 0x0050 or # P
2340     $self->{next_char} == 0x0070) { # p
2341 wakaba 1.18 !!!next-input-character;
2342 wakaba 1.76 if ($self->{next_char} == 0x0055 or # U
2343     $self->{next_char} == 0x0075) { # u
2344 wakaba 1.18 !!!next-input-character;
2345 wakaba 1.76 if ($self->{next_char} == 0x0042 or # B
2346     $self->{next_char} == 0x0062) { # b
2347 wakaba 1.18 !!!next-input-character;
2348 wakaba 1.76 if ($self->{next_char} == 0x004C or # L
2349     $self->{next_char} == 0x006C) { # l
2350 wakaba 1.18 !!!next-input-character;
2351 wakaba 1.76 if ($self->{next_char} == 0x0049 or # I
2352     $self->{next_char} == 0x0069) { # i
2353 wakaba 1.18 !!!next-input-character;
2354 wakaba 1.76 if ($self->{next_char} == 0x0043 or # C
2355     $self->{next_char} == 0x0063) { # c
2356 wakaba 1.77 !!!cp (168);
2357 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2358 wakaba 1.18 !!!next-input-character;
2359     redo A;
2360 wakaba 1.77 } else {
2361     !!!cp (169);
2362 wakaba 1.18 }
2363 wakaba 1.77 } else {
2364     !!!cp (170);
2365 wakaba 1.18 }
2366 wakaba 1.77 } else {
2367     !!!cp (171);
2368 wakaba 1.18 }
2369 wakaba 1.77 } else {
2370     !!!cp (172);
2371 wakaba 1.18 }
2372 wakaba 1.77 } else {
2373     !!!cp (173);
2374 wakaba 1.18 }
2375    
2376     #
2377 wakaba 1.76 } elsif ($self->{next_char} == 0x0053 or # S
2378     $self->{next_char} == 0x0073) { # s
2379 wakaba 1.18 !!!next-input-character;
2380 wakaba 1.76 if ($self->{next_char} == 0x0059 or # Y
2381     $self->{next_char} == 0x0079) { # y
2382 wakaba 1.18 !!!next-input-character;
2383 wakaba 1.76 if ($self->{next_char} == 0x0053 or # S
2384     $self->{next_char} == 0x0073) { # s
2385 wakaba 1.18 !!!next-input-character;
2386 wakaba 1.76 if ($self->{next_char} == 0x0054 or # T
2387     $self->{next_char} == 0x0074) { # t
2388 wakaba 1.18 !!!next-input-character;
2389 wakaba 1.76 if ($self->{next_char} == 0x0045 or # E
2390     $self->{next_char} == 0x0065) { # e
2391 wakaba 1.18 !!!next-input-character;
2392 wakaba 1.76 if ($self->{next_char} == 0x004D or # M
2393     $self->{next_char} == 0x006D) { # m
2394 wakaba 1.77 !!!cp (174);
2395 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2396 wakaba 1.18 !!!next-input-character;
2397     redo A;
2398 wakaba 1.77 } else {
2399     !!!cp (175);
2400 wakaba 1.18 }
2401 wakaba 1.77 } else {
2402     !!!cp (176);
2403 wakaba 1.18 }
2404 wakaba 1.77 } else {
2405     !!!cp (177);
2406 wakaba 1.18 }
2407 wakaba 1.77 } else {
2408     !!!cp (178);
2409 wakaba 1.18 }
2410 wakaba 1.77 } else {
2411     !!!cp (179);
2412 wakaba 1.18 }
2413    
2414     #
2415     } else {
2416 wakaba 1.77 !!!cp (180);
2417 wakaba 1.18 !!!next-input-character;
2418     #
2419     }
2420    
2421     !!!parse-error (type => 'string after DOCTYPE name');
2422 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2423 wakaba 1.73
2424 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2425 wakaba 1.18 # next-input-character is already done
2426     redo A;
2427 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2428 wakaba 1.18 if ({
2429     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2430     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2431 wakaba 1.76 }->{$self->{next_char}}) {
2432 wakaba 1.77 !!!cp (181);
2433 wakaba 1.18 ## Stay in the state
2434     !!!next-input-character;
2435     redo A;
2436 wakaba 1.76 } elsif ($self->{next_char} eq 0x0022) { # "
2437 wakaba 1.77 !!!cp (182);
2438 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2439 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
2440 wakaba 1.18 !!!next-input-character;
2441     redo A;
2442 wakaba 1.76 } elsif ($self->{next_char} eq 0x0027) { # '
2443 wakaba 1.77 !!!cp (183);
2444 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2445 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
2446 wakaba 1.18 !!!next-input-character;
2447     redo A;
2448 wakaba 1.76 } elsif ($self->{next_char} eq 0x003E) { # >
2449 wakaba 1.77 !!!cp (184);
2450 wakaba 1.18 !!!parse-error (type => 'no PUBLIC literal');
2451    
2452 wakaba 1.57 $self->{state} = DATA_STATE;
2453 wakaba 1.18 !!!next-input-character;
2454    
2455 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2456 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2457    
2458     redo A;
2459 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2460 wakaba 1.77 !!!cp (185);
2461 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2462    
2463 wakaba 1.57 $self->{state} = DATA_STATE;
2464 wakaba 1.18 ## reconsume
2465    
2466 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2467 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2468    
2469     redo A;
2470     } else {
2471 wakaba 1.77 !!!cp (186);
2472 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC');
2473 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2474 wakaba 1.73
2475 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2476 wakaba 1.18 !!!next-input-character;
2477     redo A;
2478     }
2479 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2480 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2481 wakaba 1.77 !!!cp (187);
2482 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2483 wakaba 1.18 !!!next-input-character;
2484     redo A;
2485 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2486 wakaba 1.77 !!!cp (188);
2487 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2488    
2489     $self->{state} = DATA_STATE;
2490     !!!next-input-character;
2491    
2492 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2493 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2494    
2495     redo A;
2496 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2497 wakaba 1.77 !!!cp (189);
2498 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2499    
2500 wakaba 1.57 $self->{state} = DATA_STATE;
2501 wakaba 1.18 ## reconsume
2502    
2503 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2504 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2505    
2506     redo A;
2507     } else {
2508 wakaba 1.77 !!!cp (190);
2509 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2510 wakaba 1.76 .= chr $self->{next_char};
2511 wakaba 1.18 ## Stay in the state
2512     !!!next-input-character;
2513     redo A;
2514     }
2515 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
2516 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2517 wakaba 1.77 !!!cp (191);
2518 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2519 wakaba 1.18 !!!next-input-character;
2520     redo A;
2521 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2522 wakaba 1.77 !!!cp (192);
2523 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2524    
2525     $self->{state} = DATA_STATE;
2526     !!!next-input-character;
2527    
2528 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2529 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2530    
2531     redo A;
2532 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2533 wakaba 1.77 !!!cp (193);
2534 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2535    
2536 wakaba 1.57 $self->{state} = DATA_STATE;
2537 wakaba 1.18 ## reconsume
2538    
2539 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2540 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2541    
2542     redo A;
2543     } else {
2544 wakaba 1.77 !!!cp (194);
2545 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2546 wakaba 1.76 .= chr $self->{next_char};
2547 wakaba 1.18 ## Stay in the state
2548     !!!next-input-character;
2549     redo A;
2550     }
2551 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2552 wakaba 1.18 if ({
2553     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2554     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2555 wakaba 1.76 }->{$self->{next_char}}) {
2556 wakaba 1.77 !!!cp (195);
2557 wakaba 1.18 ## Stay in the state
2558     !!!next-input-character;
2559     redo A;
2560 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2561 wakaba 1.77 !!!cp (196);
2562 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2563 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2564 wakaba 1.18 !!!next-input-character;
2565     redo A;
2566 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2567 wakaba 1.77 !!!cp (197);
2568 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2569 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2570 wakaba 1.18 !!!next-input-character;
2571     redo A;
2572 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2573 wakaba 1.77 !!!cp (198);
2574 wakaba 1.57 $self->{state} = DATA_STATE;
2575 wakaba 1.18 !!!next-input-character;
2576    
2577     !!!emit ($self->{current_token}); # DOCTYPE
2578    
2579     redo A;
2580 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2581 wakaba 1.77 !!!cp (199);
2582 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2583    
2584 wakaba 1.57 $self->{state} = DATA_STATE;
2585 wakaba 1.26 ## reconsume
2586 wakaba 1.18
2587 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2588 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2589    
2590     redo A;
2591     } else {
2592 wakaba 1.77 !!!cp (200);
2593 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC literal');
2594 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2595 wakaba 1.73
2596 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2597 wakaba 1.18 !!!next-input-character;
2598     redo A;
2599     }
2600 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2601 wakaba 1.18 if ({
2602     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2603     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2604 wakaba 1.76 }->{$self->{next_char}}) {
2605 wakaba 1.77 !!!cp (201);
2606 wakaba 1.18 ## Stay in the state
2607     !!!next-input-character;
2608     redo A;
2609 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2610 wakaba 1.77 !!!cp (202);
2611 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2612 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2613 wakaba 1.18 !!!next-input-character;
2614     redo A;
2615 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2616 wakaba 1.77 !!!cp (203);
2617 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2618 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2619 wakaba 1.18 !!!next-input-character;
2620     redo A;
2621 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2622 wakaba 1.77 !!!cp (204);
2623 wakaba 1.18 !!!parse-error (type => 'no SYSTEM literal');
2624 wakaba 1.57 $self->{state} = DATA_STATE;
2625 wakaba 1.18 !!!next-input-character;
2626    
2627 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2628 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2629    
2630     redo A;
2631 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2632 wakaba 1.77 !!!cp (205);
2633 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2634    
2635 wakaba 1.57 $self->{state} = DATA_STATE;
2636 wakaba 1.26 ## reconsume
2637 wakaba 1.18
2638 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2639 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2640    
2641     redo A;
2642     } else {
2643 wakaba 1.77 !!!cp (206);
2644 wakaba 1.30 !!!parse-error (type => 'string after SYSTEM');
2645 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2646 wakaba 1.73
2647 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2648 wakaba 1.18 !!!next-input-character;
2649     redo A;
2650     }
2651 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2652 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2653 wakaba 1.77 !!!cp (207);
2654 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2655 wakaba 1.18 !!!next-input-character;
2656     redo A;
2657 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2658 wakaba 1.77 !!!cp (208);
2659 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2660    
2661     $self->{state} = DATA_STATE;
2662     !!!next-input-character;
2663    
2664 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2665 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2666    
2667     redo A;
2668 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2669 wakaba 1.77 !!!cp (209);
2670 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2671    
2672 wakaba 1.57 $self->{state} = DATA_STATE;
2673 wakaba 1.18 ## reconsume
2674    
2675 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2676 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2677    
2678     redo A;
2679     } else {
2680 wakaba 1.77 !!!cp (210);
2681 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2682 wakaba 1.76 .= chr $self->{next_char};
2683 wakaba 1.18 ## Stay in the state
2684     !!!next-input-character;
2685     redo A;
2686     }
2687 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
2688 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2689 wakaba 1.77 !!!cp (211);
2690 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2691 wakaba 1.18 !!!next-input-character;
2692     redo A;
2693 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2694 wakaba 1.77 !!!cp (212);
2695 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2696    
2697     $self->{state} = DATA_STATE;
2698     !!!next-input-character;
2699    
2700 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2701 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2702    
2703     redo A;
2704 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2705 wakaba 1.77 !!!cp (213);
2706 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2707    
2708 wakaba 1.57 $self->{state} = DATA_STATE;
2709 wakaba 1.18 ## reconsume
2710    
2711 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2712 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
2713    
2714     redo A;
2715     } else {
2716 wakaba 1.77 !!!cp (214);
2717 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2718 wakaba 1.76 .= chr $self->{next_char};
2719 wakaba 1.18 ## Stay in the state
2720     !!!next-input-character;
2721     redo A;
2722     }
2723 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2724 wakaba 1.18 if ({
2725     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2726     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2727 wakaba 1.76 }->{$self->{next_char}}) {
2728 wakaba 1.77 !!!cp (215);
2729 wakaba 1.18 ## Stay in the state
2730     !!!next-input-character;
2731     redo A;
2732 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2733 wakaba 1.77 !!!cp (216);
2734 wakaba 1.57 $self->{state} = DATA_STATE;
2735 wakaba 1.18 !!!next-input-character;
2736    
2737     !!!emit ($self->{current_token}); # DOCTYPE
2738    
2739     redo A;
2740 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2741 wakaba 1.77 !!!cp (217);
2742 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2743    
2744 wakaba 1.57 $self->{state} = DATA_STATE;
2745 wakaba 1.26 ## reconsume
2746 wakaba 1.18
2747 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2748 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2749    
2750     redo A;
2751     } else {
2752 wakaba 1.77 !!!cp (218);
2753 wakaba 1.18 !!!parse-error (type => 'string after SYSTEM literal');
2754 wakaba 1.75 #$self->{current_token}->{quirks} = 1;
2755 wakaba 1.73
2756 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2757 wakaba 1.1 !!!next-input-character;
2758     redo A;
2759     }
2760 wakaba 1.57 } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
2761 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2762 wakaba 1.77 !!!cp (219);
2763 wakaba 1.57 $self->{state} = DATA_STATE;
2764 wakaba 1.1 !!!next-input-character;
2765    
2766     !!!emit ($self->{current_token}); # DOCTYPE
2767    
2768     redo A;
2769 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2770 wakaba 1.77 !!!cp (220);
2771 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2772 wakaba 1.57 $self->{state} = DATA_STATE;
2773 wakaba 1.1 ## reconsume
2774    
2775     !!!emit ($self->{current_token}); # DOCTYPE
2776    
2777     redo A;
2778     } else {
2779 wakaba 1.77 !!!cp (221);
2780 wakaba 1.1 ## Stay in the state
2781     !!!next-input-character;
2782     redo A;
2783     }
2784 wakaba 1.127 } elsif ($self->{state} == CDATA_BLOCK_STATE) {
2785     my $s = '';
2786    
2787     my ($l, $c) = ($self->{line}, $self->{column});
2788    
2789     CS: while ($self->{next_char} != -1) {
2790     if ($self->{next_char} == 0x005D) { # ]
2791     !!!next-input-character;
2792     if ($self->{next_char} == 0x005D) { # ]
2793     !!!next-input-character;
2794     MDC: {
2795     if ($self->{next_char} == 0x003E) { # >
2796     !!!cp (221.1);
2797     !!!next-input-character;
2798     last CS;
2799     } elsif ($self->{next_char} == 0x005D) { # ]
2800     !!!cp (221.2);
2801     $s .= ']';
2802     !!!next-input-character;
2803     redo MDC;
2804     } else {
2805     !!!cp (221.3);
2806     $s .= ']]';
2807     #
2808     }
2809     } # MDC
2810     } else {
2811     !!!cp (221.4);
2812     $s .= ']';
2813     #
2814     }
2815     } else {
2816     !!!cp (221.5);
2817     #
2818     }
2819     $s .= chr $self->{next_char};
2820     !!!next-input-character;
2821     } # CS
2822    
2823     $self->{state} = DATA_STATE;
2824     ## next-input-character done or EOF, which is reconsumed.
2825    
2826     if (length $s) {
2827     !!!cp (221.6);
2828     !!!emit ({type => CHARACTER_TOKEN, data => $s,
2829     line => $l, column => $c});
2830     } else {
2831     !!!cp (221.7);
2832     }
2833    
2834     redo A;
2835    
2836     ## ISSUE: "text tokens" in spec.
2837     ## TODO: Streaming support
2838 wakaba 1.1 } else {
2839     die "$0: $self->{state}: Unknown state";
2840     }
2841     } # A
2842    
2843     die "$0: _get_next_token: unexpected case";
2844     } # _get_next_token
2845    
2846 wakaba 1.72 sub _tokenize_attempt_to_consume_an_entity ($$$) {
2847     my ($self, $in_attr, $additional) = @_;
2848 wakaba 1.20
2849 wakaba 1.112 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
2850    
2851 wakaba 1.20 if ({
2852     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2853     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
2854 wakaba 1.72 $additional => 1,
2855 wakaba 1.76 }->{$self->{next_char}}) {
2856 wakaba 1.78 !!!cp (1001);
2857 wakaba 1.20 ## Don't consume
2858     ## No error
2859     return undef;
2860 wakaba 1.76 } elsif ($self->{next_char} == 0x0023) { # #
2861 wakaba 1.1 !!!next-input-character;
2862 wakaba 1.76 if ($self->{next_char} == 0x0078 or # x
2863     $self->{next_char} == 0x0058) { # X
2864 wakaba 1.26 my $code;
2865 wakaba 1.1 X: {
2866 wakaba 1.76 my $x_char = $self->{next_char};
2867 wakaba 1.1 !!!next-input-character;
2868 wakaba 1.76 if (0x0030 <= $self->{next_char} and
2869     $self->{next_char} <= 0x0039) { # 0..9
2870 wakaba 1.78 !!!cp (1002);
2871 wakaba 1.26 $code ||= 0;
2872     $code *= 0x10;
2873 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2874 wakaba 1.1 redo X;
2875 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
2876     $self->{next_char} <= 0x0066) { # a..f
2877 wakaba 1.78 !!!cp (1003);
2878 wakaba 1.26 $code ||= 0;
2879     $code *= 0x10;
2880 wakaba 1.76 $code += $self->{next_char} - 0x0060 + 9;
2881 wakaba 1.1 redo X;
2882 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
2883     $self->{next_char} <= 0x0046) { # A..F
2884 wakaba 1.78 !!!cp (1004);
2885 wakaba 1.26 $code ||= 0;
2886     $code *= 0x10;
2887 wakaba 1.76 $code += $self->{next_char} - 0x0040 + 9;
2888 wakaba 1.1 redo X;
2889 wakaba 1.26 } elsif (not defined $code) { # no hexadecimal digit
2890 wakaba 1.78 !!!cp (1005);
2891 wakaba 1.112 !!!parse-error (type => 'bare hcro', line => $l, column => $c);
2892 wakaba 1.76 !!!back-next-input-character ($x_char, $self->{next_char});
2893     $self->{next_char} = 0x0023; # #
2894 wakaba 1.1 return undef;
2895 wakaba 1.76 } elsif ($self->{next_char} == 0x003B) { # ;
2896 wakaba 1.78 !!!cp (1006);
2897 wakaba 1.1 !!!next-input-character;
2898     } else {
2899 wakaba 1.78 !!!cp (1007);
2900 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2901 wakaba 1.1 }
2902    
2903 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2904 wakaba 1.78 !!!cp (1008);
2905 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U+%04X', $code), line => $l, column => $c);
2906 wakaba 1.26 $code = 0xFFFD;
2907     } elsif ($code > 0x10FFFF) {
2908 wakaba 1.78 !!!cp (1009);
2909 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U-%08X', $code), line => $l, column => $c);
2910 wakaba 1.26 $code = 0xFFFD;
2911     } elsif ($code == 0x000D) {
2912 wakaba 1.78 !!!cp (1010);
2913 wakaba 1.112 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
2914 wakaba 1.26 $code = 0x000A;
2915     } elsif (0x80 <= $code and $code <= 0x9F) {
2916 wakaba 1.78 !!!cp (1011);
2917 wakaba 1.112 !!!parse-error (type => (sprintf 'C1 character reference:U+%04X', $code), line => $l, column => $c);
2918 wakaba 1.26 $code = $c1_entity_char->{$code};
2919 wakaba 1.1 }
2920    
2921 wakaba 1.66 return {type => CHARACTER_TOKEN, data => chr $code,
2922 wakaba 1.118 has_reference => 1,
2923 wakaba 1.120 line => $l, column => $c,
2924 wakaba 1.118 };
2925 wakaba 1.1 } # X
2926 wakaba 1.76 } elsif (0x0030 <= $self->{next_char} and
2927     $self->{next_char} <= 0x0039) { # 0..9
2928     my $code = $self->{next_char} - 0x0030;
2929 wakaba 1.1 !!!next-input-character;
2930    
2931 wakaba 1.76 while (0x0030 <= $self->{next_char} and
2932     $self->{next_char} <= 0x0039) { # 0..9
2933 wakaba 1.78 !!!cp (1012);
2934 wakaba 1.1 $code *= 10;
2935 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2936 wakaba 1.1
2937     !!!next-input-character;
2938     }
2939    
2940 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
2941 wakaba 1.78 !!!cp (1013);
2942 wakaba 1.1 !!!next-input-character;
2943     } else {
2944 wakaba 1.78 !!!cp (1014);
2945 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2946 wakaba 1.1 }
2947    
2948 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2949 wakaba 1.78 !!!cp (1015);
2950 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U+%04X', $code), line => $l, column => $c);
2951 wakaba 1.26 $code = 0xFFFD;
2952     } elsif ($code > 0x10FFFF) {
2953 wakaba 1.78 !!!cp (1016);
2954 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U-%08X', $code), line => $l, column => $c);
2955 wakaba 1.26 $code = 0xFFFD;
2956     } elsif ($code == 0x000D) {
2957 wakaba 1.78 !!!cp (1017);
2958 wakaba 1.112 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
2959 wakaba 1.26 $code = 0x000A;
2960 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
2961 wakaba 1.78 !!!cp (1018);
2962 wakaba 1.112 !!!parse-error (type => (sprintf 'C1 character reference:U+%04X', $code), line => $l, column => $c);
2963 wakaba 1.4 $code = $c1_entity_char->{$code};
2964 wakaba 1.1 }
2965    
2966 wakaba 1.112 return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1,
2967 wakaba 1.120 line => $l, column => $c,
2968 wakaba 1.118 };
2969 wakaba 1.1 } else {
2970 wakaba 1.78 !!!cp (1019);
2971 wakaba 1.112 !!!parse-error (type => 'bare nero', line => $l, column => $c);
2972 wakaba 1.76 !!!back-next-input-character ($self->{next_char});
2973     $self->{next_char} = 0x0023; # #
2974 wakaba 1.1 return undef;
2975     }
2976 wakaba 1.76 } elsif ((0x0041 <= $self->{next_char} and
2977     $self->{next_char} <= 0x005A) or
2978     (0x0061 <= $self->{next_char} and
2979     $self->{next_char} <= 0x007A)) {
2980     my $entity_name = chr $self->{next_char};
2981 wakaba 1.1 !!!next-input-character;
2982    
2983     my $value = $entity_name;
2984 wakaba 1.37 my $match = 0;
2985 wakaba 1.16 require Whatpm::_NamedEntityList;
2986     our $EntityChar;
2987 wakaba 1.1
2988 wakaba 1.128 while (length $entity_name < 30 and
2989 wakaba 1.1 ## NOTE: Some number greater than the maximum length of entity name
2990 wakaba 1.76 ((0x0041 <= $self->{next_char} and # a
2991     $self->{next_char} <= 0x005A) or # x
2992     (0x0061 <= $self->{next_char} and # a
2993     $self->{next_char} <= 0x007A) or # z
2994     (0x0030 <= $self->{next_char} and # 0
2995     $self->{next_char} <= 0x0039) or # 9
2996     $self->{next_char} == 0x003B)) { # ;
2997     $entity_name .= chr $self->{next_char};
2998 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
2999 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
3000 wakaba 1.78 !!!cp (1020);
3001 wakaba 1.26 $value = $EntityChar->{$entity_name};
3002 wakaba 1.16 $match = 1;
3003     !!!next-input-character;
3004     last;
3005 wakaba 1.37 } else {
3006 wakaba 1.78 !!!cp (1021);
3007 wakaba 1.26 $value = $EntityChar->{$entity_name};
3008     $match = -1;
3009 wakaba 1.37 !!!next-input-character;
3010 wakaba 1.16 }
3011 wakaba 1.1 } else {
3012 wakaba 1.78 !!!cp (1022);
3013 wakaba 1.76 $value .= chr $self->{next_char};
3014 wakaba 1.37 $match *= 2;
3015     !!!next-input-character;
3016 wakaba 1.1 }
3017     }
3018    
3019 wakaba 1.16 if ($match > 0) {
3020 wakaba 1.78 !!!cp (1023);
3021 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
3022 wakaba 1.120 line => $l, column => $c,
3023 wakaba 1.118 };
3024 wakaba 1.16 } elsif ($match < 0) {
3025 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
3026 wakaba 1.37 if ($in_attr and $match < -1) {
3027 wakaba 1.78 !!!cp (1024);
3028 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$entity_name,
3029 wakaba 1.120 line => $l, column => $c,
3030 wakaba 1.118 };
3031 wakaba 1.37 } else {
3032 wakaba 1.78 !!!cp (1025);
3033 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
3034 wakaba 1.120 line => $l, column => $c,
3035 wakaba 1.118 };
3036 wakaba 1.37 }
3037 wakaba 1.1 } else {
3038 wakaba 1.78 !!!cp (1026);
3039 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
3040 wakaba 1.66 ## NOTE: "No characters are consumed" in the spec.
3041 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$value,
3042 wakaba 1.120 line => $l, column => $c,
3043 wakaba 1.118 };
3044 wakaba 1.1 }
3045     } else {
3046 wakaba 1.78 !!!cp (1027);
3047 wakaba 1.1 ## no characters are consumed
3048 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
3049 wakaba 1.1 return undef;
3050     }
3051     } # _tokenize_attempt_to_consume_an_entity
3052    
3053     sub _initialize_tree_constructor ($) {
3054     my $self = shift;
3055     ## NOTE: $self->{document} MUST be specified before this method is called
3056     $self->{document}->strict_error_checking (0);
3057     ## TODO: Turn mutation events off # MUST
3058     ## TODO: Turn loose Document option (manakai extension) on
3059 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
3060 wakaba 1.1 } # _initialize_tree_constructor
3061    
3062     sub _terminate_tree_constructor ($) {
3063     my $self = shift;
3064     $self->{document}->strict_error_checking (1);
3065     ## TODO: Turn mutation events on
3066     } # _terminate_tree_constructor
3067    
3068     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
3069    
3070 wakaba 1.3 { # tree construction stage
3071     my $token;
3072    
3073 wakaba 1.1 sub _construct_tree ($) {
3074     my ($self) = @_;
3075    
3076     ## When an interactive UA render the $self->{document} available
3077     ## to the user, or when it begin accepting user input, are
3078     ## not defined.
3079    
3080     ## Append a character: collect it and all subsequent consecutive
3081     ## characters and insert one Text node whose data is concatenation
3082     ## of all those characters. # MUST
3083    
3084     !!!next-token;
3085    
3086 wakaba 1.3 undef $self->{form_element};
3087     undef $self->{head_element};
3088     $self->{open_elements} = [];
3089     undef $self->{inner_html_node};
3090    
3091 wakaba 1.84 ## NOTE: The "initial" insertion mode.
3092 wakaba 1.3 $self->_tree_construction_initial; # MUST
3093 wakaba 1.84
3094     ## NOTE: The "before html" insertion mode.
3095 wakaba 1.3 $self->_tree_construction_root_element;
3096 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
3097    
3098     ## NOTE: The "before head" insertion mode and so on.
3099 wakaba 1.3 $self->_tree_construction_main;
3100     } # _construct_tree
3101    
3102     sub _tree_construction_initial ($) {
3103     my $self = shift;
3104 wakaba 1.84
3105     ## NOTE: "initial" insertion mode
3106    
3107 wakaba 1.18 INITIAL: {
3108 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3109 wakaba 1.18 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
3110     ## error, switch to a conformance checking mode for another
3111     ## language.
3112     my $doctype_name = $token->{name};
3113     $doctype_name = '' unless defined $doctype_name;
3114     $doctype_name =~ tr/a-z/A-Z/;
3115     if (not defined $token->{name} or # <!DOCTYPE>
3116     defined $token->{public_identifier} or
3117     defined $token->{system_identifier}) {
3118 wakaba 1.79 !!!cp ('t1');
3119 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
3120 wakaba 1.18 } elsif ($doctype_name ne 'HTML') {
3121 wakaba 1.79 !!!cp ('t2');
3122 wakaba 1.18 ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
3123 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
3124 wakaba 1.79 } else {
3125     !!!cp ('t3');
3126 wakaba 1.18 }
3127    
3128     my $doctype = $self->{document}->create_document_type_definition
3129     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
3130 wakaba 1.122 ## NOTE: Default value for both |public_id| and |system_id| attributes
3131     ## are empty strings, so that we don't set any value in missing cases.
3132 wakaba 1.18 $doctype->public_id ($token->{public_identifier})
3133     if defined $token->{public_identifier};
3134     $doctype->system_id ($token->{system_identifier})
3135     if defined $token->{system_identifier};
3136     ## NOTE: Other DocumentType attributes are null or empty lists.
3137     ## ISSUE: internalSubset = null??
3138     $self->{document}->append_child ($doctype);
3139    
3140 wakaba 1.75 if ($token->{quirks} or $doctype_name ne 'HTML') {
3141 wakaba 1.79 !!!cp ('t4');
3142 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3143     } elsif (defined $token->{public_identifier}) {
3144     my $pubid = $token->{public_identifier};
3145     $pubid =~ tr/a-z/A-z/;
3146     if ({
3147     "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
3148     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
3149     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
3150     "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
3151     "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
3152     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
3153     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
3154     "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
3155     "-//IETF//DTD HTML 2.0//EN" => 1,
3156     "-//IETF//DTD HTML 2.1E//EN" => 1,
3157     "-//IETF//DTD HTML 3.0//EN" => 1,
3158     "-//IETF//DTD HTML 3.0//EN//" => 1,
3159     "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
3160     "-//IETF//DTD HTML 3.2//EN" => 1,
3161     "-//IETF//DTD HTML 3//EN" => 1,
3162     "-//IETF//DTD HTML LEVEL 0//EN" => 1,
3163     "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
3164     "-//IETF//DTD HTML LEVEL 1//EN" => 1,
3165     "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
3166     "-//IETF//DTD HTML LEVEL 2//EN" => 1,
3167     "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
3168     "-//IETF//DTD HTML LEVEL 3//EN" => 1,
3169     "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
3170     "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
3171     "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
3172     "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
3173     "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
3174     "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
3175     "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
3176     "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
3177     "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
3178     "-//IETF//DTD HTML STRICT//EN" => 1,
3179     "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
3180     "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
3181     "-//IETF//DTD HTML//EN" => 1,
3182     "-//IETF//DTD HTML//EN//2.0" => 1,
3183     "-//IETF//DTD HTML//EN//3.0" => 1,
3184     "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
3185     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
3186     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
3187     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
3188     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
3189     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
3190     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
3191     "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
3192     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
3193     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
3194     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
3195 wakaba 1.72 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//EN" => 1,
3196     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//EN" => 1,
3197     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//EN" => 1,
3198 wakaba 1.18 "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
3199     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
3200     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
3201     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
3202     "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
3203     "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
3204     "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
3205     "-//W3C//DTD HTML 3.2//EN" => 1,
3206     "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
3207     "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
3208     "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
3209     "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
3210     "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
3211     "-//W3C//DTD W3 HTML//EN" => 1,
3212     "-//W3O//DTD W3 HTML 3.0//EN" => 1,
3213     "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
3214     "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
3215     "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
3216     "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
3217     "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
3218     "HTML" => 1,
3219     }->{$pubid}) {
3220 wakaba 1.79 !!!cp ('t5');
3221 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3222     } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
3223     $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
3224     if (defined $token->{system_identifier}) {
3225 wakaba 1.79 !!!cp ('t6');
3226 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3227     } else {
3228 wakaba 1.79 !!!cp ('t7');
3229 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
3230 wakaba 1.3 }
3231 wakaba 1.80 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 FRAMESET//EN" or
3232     $pubid eq "-//W3C//DTD XHTML 1.0 TRANSITIONAL//EN") {
3233 wakaba 1.79 !!!cp ('t8');
3234 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
3235 wakaba 1.79 } else {
3236     !!!cp ('t9');
3237 wakaba 1.18 }
3238 wakaba 1.79 } else {
3239     !!!cp ('t10');
3240 wakaba 1.18 }
3241     if (defined $token->{system_identifier}) {
3242     my $sysid = $token->{system_identifier};
3243     $sysid =~ tr/A-Z/a-z/;
3244     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
3245 wakaba 1.80 ## TODO: Check the spec: PUBLIC "(limited quirks)" "(quirks)"
3246 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3247 wakaba 1.79 !!!cp ('t11');
3248     } else {
3249     !!!cp ('t12');
3250 wakaba 1.18 }
3251 wakaba 1.79 } else {
3252     !!!cp ('t13');
3253 wakaba 1.18 }
3254    
3255 wakaba 1.84 ## Go to the "before html" insertion mode.
3256 wakaba 1.18 !!!next-token;
3257     return;
3258     } elsif ({
3259 wakaba 1.55 START_TAG_TOKEN, 1,
3260     END_TAG_TOKEN, 1,
3261     END_OF_FILE_TOKEN, 1,
3262 wakaba 1.18 }->{$token->{type}}) {
3263 wakaba 1.79 !!!cp ('t14');
3264 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
3265 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3266 wakaba 1.84 ## Go to the "before html" insertion mode.
3267 wakaba 1.18 ## reprocess
3268 wakaba 1.125 !!!ack-later;
3269 wakaba 1.18 return;
3270 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
3271 wakaba 1.18 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
3272     ## Ignore the token
3273 wakaba 1.26
3274 wakaba 1.18 unless (length $token->{data}) {
3275 wakaba 1.79 !!!cp ('t15');
3276 wakaba 1.84 ## Stay in the insertion mode.
3277 wakaba 1.18 !!!next-token;
3278     redo INITIAL;
3279 wakaba 1.79 } else {
3280     !!!cp ('t16');
3281 wakaba 1.3 }
3282 wakaba 1.79 } else {
3283     !!!cp ('t17');
3284 wakaba 1.3 }
3285 wakaba 1.18
3286 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
3287 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3288 wakaba 1.84 ## Go to the "before html" insertion mode.
3289 wakaba 1.18 ## reprocess
3290     return;
3291 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3292 wakaba 1.79 !!!cp ('t18');
3293 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
3294     $self->{document}->append_child ($comment);
3295    
3296 wakaba 1.84 ## Stay in the insertion mode.
3297 wakaba 1.18 !!!next-token;
3298     redo INITIAL;
3299     } else {
3300 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
3301 wakaba 1.18 }
3302     } # INITIAL
3303 wakaba 1.79
3304     die "$0: _tree_construction_initial: This should be never reached";
3305 wakaba 1.3 } # _tree_construction_initial
3306    
3307     sub _tree_construction_root_element ($) {
3308     my $self = shift;
3309 wakaba 1.84
3310     ## NOTE: "before html" insertion mode.
3311 wakaba 1.3
3312     B: {
3313 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3314 wakaba 1.79 !!!cp ('t19');
3315 wakaba 1.113 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
3316 wakaba 1.3 ## Ignore the token
3317 wakaba 1.84 ## Stay in the insertion mode.
3318 wakaba 1.3 !!!next-token;
3319     redo B;
3320 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3321 wakaba 1.79 !!!cp ('t20');
3322 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
3323     $self->{document}->append_child ($comment);
3324 wakaba 1.84 ## Stay in the insertion mode.
3325 wakaba 1.3 !!!next-token;
3326     redo B;
3327 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
3328 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
3329     ## Ignore the token.
3330    
3331 wakaba 1.3 unless (length $token->{data}) {
3332 wakaba 1.79 !!!cp ('t21');
3333 wakaba 1.84 ## Stay in the insertion mode.
3334 wakaba 1.3 !!!next-token;
3335     redo B;
3336 wakaba 1.79 } else {
3337     !!!cp ('t22');
3338 wakaba 1.3 }
3339 wakaba 1.79 } else {
3340     !!!cp ('t23');
3341 wakaba 1.3 }
3342 wakaba 1.61
3343     $self->{application_cache_selection}->(undef);
3344    
3345     #
3346     } elsif ($token->{type} == START_TAG_TOKEN) {
3347 wakaba 1.84 if ($token->{tag_name} eq 'html') {
3348     my $root_element;
3349 wakaba 1.126 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
3350 wakaba 1.84 $self->{document}->append_child ($root_element);
3351 wakaba 1.123 push @{$self->{open_elements}},
3352     [$root_element, $el_category->{html}];
3353 wakaba 1.84
3354     if ($token->{attributes}->{manifest}) {
3355     !!!cp ('t24');
3356     $self->{application_cache_selection}
3357     ->($token->{attributes}->{manifest}->{value});
3358 wakaba 1.118 ## ISSUE: Spec is unclear on relative references.
3359     ## According to Hixie (#whatwg 2008-03-19), it should be
3360     ## resolved against the base URI of the document in HTML
3361     ## or xml:base of the element in XHTML.
3362 wakaba 1.84 } else {
3363     !!!cp ('t25');
3364     $self->{application_cache_selection}->(undef);
3365     }
3366    
3367 wakaba 1.125 !!!nack ('t25c');
3368    
3369 wakaba 1.84 !!!next-token;
3370     return; ## Go to the "before head" insertion mode.
3371 wakaba 1.61 } else {
3372 wakaba 1.84 !!!cp ('t25.1');
3373     #
3374 wakaba 1.61 }
3375 wakaba 1.3 } elsif ({
3376 wakaba 1.55 END_TAG_TOKEN, 1,
3377     END_OF_FILE_TOKEN, 1,
3378 wakaba 1.3 }->{$token->{type}}) {
3379 wakaba 1.79 !!!cp ('t26');
3380 wakaba 1.3 #
3381     } else {
3382 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
3383 wakaba 1.3 }
3384 wakaba 1.61
3385 wakaba 1.126 my $root_element;
3386     !!!create-element ($root_element, $HTML_NS, 'html',, $token);
3387 wakaba 1.84 $self->{document}->append_child ($root_element);
3388 wakaba 1.123 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
3389 wakaba 1.84
3390     $self->{application_cache_selection}->(undef);
3391    
3392     ## NOTE: Reprocess the token.
3393 wakaba 1.125 !!!ack-later;
3394 wakaba 1.84 return; ## Go to the "before head" insertion mode.
3395    
3396     ## ISSUE: There is an issue in the spec
3397 wakaba 1.3 } # B
3398 wakaba 1.79
3399     die "$0: _tree_construction_root_element: This should never be reached";
3400 wakaba 1.3 } # _tree_construction_root_element
3401    
3402     sub _reset_insertion_mode ($) {
3403     my $self = shift;
3404    
3405     ## Step 1
3406     my $last;
3407    
3408     ## Step 2
3409     my $i = -1;
3410     my $node = $self->{open_elements}->[$i];
3411    
3412     ## Step 3
3413     S3: {
3414 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
3415     $last = 1;
3416     if (defined $self->{inner_html_node}) {
3417 wakaba 1.140 !!!cp ('t28');
3418     $node = $self->{inner_html_node};
3419     } else {
3420     die "_reset_insertion_mode: t27";
3421 wakaba 1.3 }
3422     }
3423 wakaba 1.140
3424     ## Step 4..14
3425     my $new_mode;
3426     if ($node->[1] & FOREIGN_EL) {
3427     !!!cp ('t28.1');
3428     ## NOTE: Strictly spaking, the line below only applies to MathML and
3429     ## SVG elements. Currently the HTML syntax supports only MathML and
3430     ## SVG elements as foreigners.
3431     $new_mode = $self->{insertion_mode} | IN_FOREIGN_CONTENT_IM;
3432     ## ISSUE: What is set as the secondary insertion mode?
3433     } elsif ($node->[1] & TABLE_CELL_EL) {
3434     if ($last) {
3435     !!!cp ('t28.2');
3436     #
3437     } else {
3438     !!!cp ('t28.3');
3439     $new_mode = IN_CELL_IM;
3440     }
3441     } else {
3442     !!!cp ('t28.4');
3443     $new_mode = {
3444 wakaba 1.54 select => IN_SELECT_IM,
3445 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
3446     ## insertion mode to "in select" by themselves.
3447 wakaba 1.54 tr => IN_ROW_IM,
3448     tbody => IN_TABLE_BODY_IM,
3449     thead => IN_TABLE_BODY_IM,
3450     tfoot => IN_TABLE_BODY_IM,
3451     caption => IN_CAPTION_IM,
3452     colgroup => IN_COLUMN_GROUP_IM,
3453     table => IN_TABLE_IM,
3454     head => IN_BODY_IM, # not in head!
3455     body => IN_BODY_IM,
3456     frameset => IN_FRAMESET_IM,
3457 wakaba 1.123 }->{$node->[0]->manakai_local_name};
3458 wakaba 1.140 }
3459     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
3460 wakaba 1.3
3461 wakaba 1.126 ## Step 15
3462 wakaba 1.123 if ($node->[1] & HTML_EL) {
3463 wakaba 1.3 unless (defined $self->{head_element}) {
3464 wakaba 1.79 !!!cp ('t29');
3465 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
3466 wakaba 1.3 } else {
3467 wakaba 1.81 ## ISSUE: Can this state be reached?
3468 wakaba 1.79 !!!cp ('t30');
3469 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3470 wakaba 1.3 }
3471     return;
3472 wakaba 1.79 } else {
3473     !!!cp ('t31');
3474 wakaba 1.3 }
3475    
3476 wakaba 1.126 ## Step 16
3477 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
3478 wakaba 1.3
3479 wakaba 1.126 ## Step 17
3480 wakaba 1.3 $i--;
3481     $node = $self->{open_elements}->[$i];
3482    
3483 wakaba 1.126 ## Step 18
3484 wakaba 1.3 redo S3;
3485     } # S3
3486 wakaba 1.79
3487     die "$0: _reset_insertion_mode: This line should never be reached";
3488 wakaba 1.3 } # _reset_insertion_mode
3489    
3490     sub _tree_construction_main ($) {
3491     my $self = shift;
3492    
3493 wakaba 1.1 my $active_formatting_elements = [];
3494    
3495     my $reconstruct_active_formatting_elements = sub { # MUST
3496     my $insert = shift;
3497    
3498     ## Step 1
3499     return unless @$active_formatting_elements;
3500    
3501     ## Step 3
3502     my $i = -1;
3503     my $entry = $active_formatting_elements->[$i];
3504    
3505     ## Step 2
3506     return if $entry->[0] eq '#marker';
3507 wakaba 1.3 for (@{$self->{open_elements}}) {
3508 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3509 wakaba 1.79 !!!cp ('t32');
3510 wakaba 1.1 return;
3511     }
3512     }
3513    
3514     S4: {
3515     ## Step 4
3516     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
3517    
3518     ## Step 5
3519     $i--;
3520     $entry = $active_formatting_elements->[$i];
3521    
3522     ## Step 6
3523     if ($entry->[0] eq '#marker') {
3524 wakaba 1.81 !!!cp ('t33_1');
3525 wakaba 1.1 #
3526     } else {
3527     my $in_open_elements;
3528 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
3529 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3530 wakaba 1.79 !!!cp ('t33');
3531 wakaba 1.1 $in_open_elements = 1;
3532     last OE;
3533     }
3534     }
3535     if ($in_open_elements) {
3536 wakaba 1.79 !!!cp ('t34');
3537 wakaba 1.1 #
3538     } else {
3539 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
3540 wakaba 1.79 !!!cp ('t35');
3541 wakaba 1.1 redo S4;
3542     }
3543     }
3544    
3545     ## Step 7
3546     $i++;
3547     $entry = $active_formatting_elements->[$i];
3548     } # S4
3549    
3550     S7: {
3551     ## Step 8
3552     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
3553    
3554     ## Step 9
3555     $insert->($clone->[0]);
3556 wakaba 1.3 push @{$self->{open_elements}}, $clone;
3557 wakaba 1.1
3558     ## Step 10
3559 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
3560 wakaba 1.1
3561     ## Step 11
3562     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
3563 wakaba 1.79 !!!cp ('t36');
3564 wakaba 1.1 ## Step 7'
3565     $i++;
3566     $entry = $active_formatting_elements->[$i];
3567    
3568     redo S7;
3569     }
3570 wakaba 1.79
3571     !!!cp ('t37');
3572 wakaba 1.1 } # S7
3573     }; # $reconstruct_active_formatting_elements
3574    
3575     my $clear_up_to_marker = sub {
3576     for (reverse 0..$#$active_formatting_elements) {
3577     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3578 wakaba 1.79 !!!cp ('t38');
3579 wakaba 1.1 splice @$active_formatting_elements, $_;
3580     return;
3581     }
3582     }
3583 wakaba 1.79
3584     !!!cp ('t39');
3585 wakaba 1.1 }; # $clear_up_to_marker
3586    
3587 wakaba 1.96 my $insert;
3588    
3589     my $parse_rcdata = sub ($) {
3590     my ($content_model_flag) = @_;
3591 wakaba 1.25
3592     ## Step 1
3593     my $start_tag_name = $token->{tag_name};
3594     my $el;
3595 wakaba 1.126 !!!create-element ($el, $HTML_NS, $start_tag_name, $token->{attributes}, $token);
3596 wakaba 1.25
3597     ## Step 2
3598 wakaba 1.96 $insert->($el);
3599 wakaba 1.25
3600     ## Step 3
3601 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
3602 wakaba 1.13 delete $self->{escape}; # MUST
3603 wakaba 1.25
3604     ## Step 4
3605 wakaba 1.1 my $text = '';
3606 wakaba 1.125 !!!nack ('t40.1');
3607 wakaba 1.1 !!!next-token;
3608 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing
3609 wakaba 1.79 !!!cp ('t40');
3610 wakaba 1.1 $text .= $token->{data};
3611     !!!next-token;
3612 wakaba 1.25 }
3613    
3614     ## Step 5
3615 wakaba 1.1 if (length $text) {
3616 wakaba 1.79 !!!cp ('t41');
3617 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
3618     $el->append_child ($text);
3619 wakaba 1.1 }
3620 wakaba 1.25
3621     ## Step 6
3622 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3623 wakaba 1.25
3624     ## Step 7
3625 wakaba 1.79 if ($token->{type} == END_TAG_TOKEN and
3626     $token->{tag_name} eq $start_tag_name) {
3627     !!!cp ('t42');
3628 wakaba 1.1 ## Ignore the token
3629     } else {
3630 wakaba 1.96 ## NOTE: An end-of-file token.
3631     if ($content_model_flag == CDATA_CONTENT_MODEL) {
3632     !!!cp ('t43');
3633 wakaba 1.113 !!!parse-error (type => 'in CDATA:#'.$token->{type}, token => $token);
3634 wakaba 1.96 } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
3635     !!!cp ('t44');
3636 wakaba 1.113 !!!parse-error (type => 'in RCDATA:#'.$token->{type}, token => $token);
3637 wakaba 1.96 } else {
3638     die "$0: $content_model_flag in parse_rcdata";
3639     }
3640 wakaba 1.1 }
3641     !!!next-token;
3642 wakaba 1.25 }; # $parse_rcdata
3643 wakaba 1.1
3644 wakaba 1.96 my $script_start_tag = sub () {
3645 wakaba 1.1 my $script_el;
3646 wakaba 1.126 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
3647 wakaba 1.1 ## TODO: mark as "parser-inserted"
3648    
3649 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
3650 wakaba 1.13 delete $self->{escape}; # MUST
3651 wakaba 1.1
3652     my $text = '';
3653 wakaba 1.125 !!!nack ('t45.1');
3654 wakaba 1.1 !!!next-token;
3655 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
3656 wakaba 1.79 !!!cp ('t45');
3657 wakaba 1.1 $text .= $token->{data};
3658     !!!next-token;
3659     } # stop if non-character token or tokenizer stops tokenising
3660     if (length $text) {
3661 wakaba 1.79 !!!cp ('t46');
3662 wakaba 1.1 $script_el->manakai_append_text ($text);
3663     }
3664    
3665 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3666 wakaba 1.1
3667 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
3668 wakaba 1.1 $token->{tag_name} eq 'script') {
3669 wakaba 1.79 !!!cp ('t47');
3670 wakaba 1.1 ## Ignore the token
3671     } else {
3672 wakaba 1.79 !!!cp ('t48');
3673 wakaba 1.113 !!!parse-error (type => 'in CDATA:#'.$token->{type}, token => $token);
3674 wakaba 1.1 ## ISSUE: And ignore?
3675     ## TODO: mark as "already executed"
3676     }
3677    
3678 wakaba 1.3 if (defined $self->{inner_html_node}) {
3679 wakaba 1.79 !!!cp ('t49');
3680 wakaba 1.3 ## TODO: mark as "already executed"
3681     } else {
3682 wakaba 1.79 !!!cp ('t50');
3683 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
3684     ## TODO: insertion point = just before the next input character
3685 wakaba 1.25
3686     $insert->($script_el);
3687 wakaba 1.1
3688     ## TODO: insertion point = $old_insertion_point (might be "undefined")
3689    
3690     ## TODO: if there is a script that will execute as soon as the parser resume, then...
3691     }
3692    
3693     !!!next-token;
3694     }; # $script_start_tag
3695    
3696 wakaba 1.102 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
3697     ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
3698     my $open_tables = [[$self->{open_elements}->[0]->[0]]];
3699    
3700 wakaba 1.1 my $formatting_end_tag = sub {
3701 wakaba 1.113 my $end_tag_token = shift;
3702     my $tag_name = $end_tag_token->{tag_name};
3703 wakaba 1.1
3704 wakaba 1.103 ## NOTE: The adoption agency algorithm (AAA).
3705 wakaba 1.102
3706 wakaba 1.1 FET: {
3707     ## Step 1
3708     my $formatting_element;
3709     my $formatting_element_i_in_active;
3710     AFE: for (reverse 0..$#$active_formatting_elements) {
3711 wakaba 1.123 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3712     !!!cp ('t52');
3713     last AFE;
3714     } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
3715     eq $tag_name) {
3716 wakaba 1.79 !!!cp ('t51');
3717 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
3718     $formatting_element_i_in_active = $_;
3719     last AFE;
3720     }
3721     } # AFE
3722     unless (defined $formatting_element) {
3723 wakaba 1.79 !!!cp ('t53');
3724 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$tag_name, token => $end_tag_token);
3725 wakaba 1.1 ## Ignore the token
3726     !!!next-token;
3727     return;
3728     }
3729     ## has an element in scope
3730     my $in_scope = 1;
3731     my $formatting_element_i_in_open;
3732 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3733     my $node = $self->{open_elements}->[$_];
3734 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
3735     if ($in_scope) {
3736 wakaba 1.79 !!!cp ('t54');
3737 wakaba 1.1 $formatting_element_i_in_open = $_;
3738     last INSCOPE;
3739     } else { # in open elements but not in scope
3740 wakaba 1.79 !!!cp ('t55');
3741 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name},
3742     token => $end_tag_token);
3743 wakaba 1.1 ## Ignore the token
3744     !!!next-token;
3745     return;
3746     }
3747 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
3748 wakaba 1.79 !!!cp ('t56');
3749 wakaba 1.1 $in_scope = 0;
3750     }
3751     } # INSCOPE
3752     unless (defined $formatting_element_i_in_open) {
3753 wakaba 1.79 !!!cp ('t57');
3754 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name},
3755     token => $end_tag_token);
3756 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
3757     !!!next-token; ## TODO: ok?
3758     return;
3759     }
3760 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
3761 wakaba 1.79 !!!cp ('t58');
3762 wakaba 1.122 !!!parse-error (type => 'not closed',
3763     value => $self->{open_elements}->[-1]->[0]
3764     ->manakai_local_name,
3765 wakaba 1.113 token => $end_tag_token);
3766 wakaba 1.1 }
3767    
3768     ## Step 2
3769     my $furthest_block;
3770     my $furthest_block_i_in_open;
3771 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3772     my $node = $self->{open_elements}->[$_];
3773 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
3774 wakaba 1.1 #not $phrasing_category->{$node->[1]} and
3775 wakaba 1.123 ($node->[1] & SPECIAL_EL or
3776     $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
3777 wakaba 1.79 !!!cp ('t59');
3778 wakaba 1.1 $furthest_block = $node;
3779     $furthest_block_i_in_open = $_;
3780     } elsif ($node->[0] eq $formatting_element->[0]) {
3781 wakaba 1.79 !!!cp ('t60');
3782 wakaba 1.1 last OE;
3783     }
3784     } # OE
3785    
3786     ## Step 3
3787     unless (defined $furthest_block) { # MUST
3788 wakaba 1.79 !!!cp ('t61');
3789 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
3790 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
3791     !!!next-token;
3792     return;
3793     }
3794    
3795     ## Step 4
3796 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
3797 wakaba 1.1
3798     ## Step 5
3799     my $furthest_block_parent = $furthest_block->[0]->parent_node;
3800     if (defined $furthest_block_parent) {
3801 wakaba 1.79 !!!cp ('t62');
3802 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
3803     }
3804    
3805     ## Step 6
3806     my $bookmark_prev_el
3807     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
3808     ->[0];
3809    
3810     ## Step 7
3811     my $node = $furthest_block;
3812     my $node_i_in_open = $furthest_block_i_in_open;
3813     my $last_node = $furthest_block;
3814     S7: {
3815     ## Step 1
3816     $node_i_in_open--;
3817 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
3818 wakaba 1.1
3819     ## Step 2
3820     my $node_i_in_active;
3821     S7S2: {
3822     for (reverse 0..$#$active_formatting_elements) {
3823     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3824 wakaba 1.79 !!!cp ('t63');
3825 wakaba 1.1 $node_i_in_active = $_;
3826     last S7S2;
3827     }
3828     }
3829 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
3830 wakaba 1.1 redo S7;
3831     } # S7S2
3832    
3833     ## Step 3
3834     last S7 if $node->[0] eq $formatting_element->[0];
3835    
3836     ## Step 4
3837     if ($last_node->[0] eq $furthest_block->[0]) {
3838 wakaba 1.79 !!!cp ('t64');
3839 wakaba 1.1 $bookmark_prev_el = $node->[0];
3840     }
3841    
3842     ## Step 5
3843     if ($node->[0]->has_child_nodes ()) {
3844 wakaba 1.79 !!!cp ('t65');
3845 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
3846     $active_formatting_elements->[$node_i_in_active] = $clone;
3847 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
3848 wakaba 1.1 $node = $clone;
3849     }
3850    
3851     ## Step 6
3852     $node->[0]->append_child ($last_node->[0]);
3853    
3854     ## Step 7
3855     $last_node = $node;
3856    
3857     ## Step 8
3858     redo S7;
3859     } # S7
3860    
3861     ## Step 8
3862 wakaba 1.123 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
3863 wakaba 1.102 my $foster_parent_element;
3864     my $next_sibling;
3865 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
3866     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
3867 wakaba 1.102 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3868     if (defined $parent and $parent->node_type == 1) {
3869     !!!cp ('t65.1');
3870     $foster_parent_element = $parent;
3871     $next_sibling = $self->{open_elements}->[$_]->[0];
3872     } else {
3873     !!!cp ('t65.2');
3874     $foster_parent_element
3875     = $self->{open_elements}->[$_ - 1]->[0];
3876     }
3877     last OE;
3878     }
3879     } # OE
3880     $foster_parent_element = $self->{open_elements}->[0]->[0]
3881     unless defined $foster_parent_element;
3882     $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
3883     $open_tables->[-1]->[1] = 1; # tainted
3884     } else {
3885     !!!cp ('t65.3');
3886     $common_ancestor_node->[0]->append_child ($last_node->[0]);
3887     }
3888 wakaba 1.1
3889     ## Step 9
3890     my $clone = [$formatting_element->[0]->clone_node (0),
3891     $formatting_element->[1]];
3892    
3893     ## Step 10
3894     my @cn = @{$furthest_block->[0]->child_nodes};
3895     $clone->[0]->append_child ($_) for @cn;
3896    
3897     ## Step 11
3898     $furthest_block->[0]->append_child ($clone->[0]);
3899    
3900     ## Step 12
3901     my $i;
3902     AFE: for (reverse 0..$#$active_formatting_elements) {
3903     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
3904 wakaba 1.79 !!!cp ('t66');
3905 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
3906     $i-- and last AFE if defined $i;
3907     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
3908 wakaba 1.79 !!!cp ('t67');
3909 wakaba 1.1 $i = $_;
3910     }
3911     } # AFE
3912     splice @$active_formatting_elements, $i + 1, 0, $clone;
3913    
3914     ## Step 13
3915     undef $i;
3916 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3917     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
3918 wakaba 1.79 !!!cp ('t68');
3919 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
3920 wakaba 1.1 $i-- and last OE if defined $i;
3921 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
3922 wakaba 1.79 !!!cp ('t69');
3923 wakaba 1.1 $i = $_;
3924     }
3925     } # OE
3926 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
3927 wakaba 1.1
3928     ## Step 14
3929     redo FET;
3930     } # FET
3931     }; # $formatting_end_tag
3932    
3933 wakaba 1.96 $insert = my $insert_to_current = sub {
3934 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
3935 wakaba 1.1 }; # $insert_to_current
3936    
3937     my $insert_to_foster = sub {
3938 wakaba 1.95 my $child = shift;
3939 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
3940 wakaba 1.95 # MUST
3941     my $foster_parent_element;
3942     my $next_sibling;
3943 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
3944     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
3945 wakaba 1.3 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3946 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3947 wakaba 1.79 !!!cp ('t70');
3948 wakaba 1.1 $foster_parent_element = $parent;
3949 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3950 wakaba 1.1 } else {
3951 wakaba 1.79 !!!cp ('t71');
3952 wakaba 1.1 $foster_parent_element
3953 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
3954 wakaba 1.1 }
3955     last OE;
3956     }
3957     } # OE
3958 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
3959 wakaba 1.1 unless defined $foster_parent_element;
3960     $foster_parent_element->insert_before
3961     ($child, $next_sibling);
3962 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
3963     } else {
3964     !!!cp ('t72');
3965     $self->{open_elements}->[-1]->[0]->append_child ($child);
3966     }
3967 wakaba 1.1 }; # $insert_to_foster
3968    
3969 wakaba 1.126 B: while (1) {
3970 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3971 wakaba 1.79 !!!cp ('t73');
3972 wakaba 1.113 !!!parse-error (type => 'DOCTYPE in the middle', token => $token);
3973 wakaba 1.52 ## Ignore the token
3974     ## Stay in the phase
3975     !!!next-token;
3976 wakaba 1.126 next B;
3977 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
3978 wakaba 1.52 $token->{tag_name} eq 'html') {
3979 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
3980 wakaba 1.79 !!!cp ('t79');
3981 wakaba 1.113 !!!parse-error (type => 'after html:html', token => $token);
3982 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
3983     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
3984 wakaba 1.79 !!!cp ('t80');
3985 wakaba 1.113 !!!parse-error (type => 'after html:html', token => $token);
3986 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
3987 wakaba 1.79 } else {
3988     !!!cp ('t81');
3989 wakaba 1.52 }
3990    
3991 wakaba 1.84 !!!cp ('t82');
3992 wakaba 1.113 !!!parse-error (type => 'not first start tag', token => $token);
3993 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
3994     for my $attr_name (keys %{$token->{attributes}}) {
3995     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
3996 wakaba 1.79 !!!cp ('t84');
3997 wakaba 1.52 $top_el->set_attribute_ns
3998     (undef, [undef, $attr_name],
3999     $token->{attributes}->{$attr_name}->{value});
4000     }
4001     }
4002 wakaba 1.125 !!!nack ('t84.1');
4003 wakaba 1.52 !!!next-token;
4004 wakaba 1.126 next B;
4005 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
4006 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
4007 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
4008 wakaba 1.79 !!!cp ('t85');
4009 wakaba 1.52 $self->{document}->append_child ($comment);
4010 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
4011 wakaba 1.79 !!!cp ('t86');
4012 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
4013     } else {
4014 wakaba 1.79 !!!cp ('t87');
4015 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4016     }
4017     !!!next-token;
4018 wakaba 1.126 next B;
4019     } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
4020     if ($token->{type} == CHARACTER_TOKEN) {
4021     !!!cp ('t87.1');
4022     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4023     !!!next-token;
4024     next B;
4025     } elsif ($token->{type} == START_TAG_TOKEN) {
4026 wakaba 1.129 if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
4027     $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
4028 wakaba 1.126 not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
4029     ($token->{tag_name} eq 'svg' and
4030     $self->{open_elements}->[-1]->[1] & MML_AXML_EL)) {
4031     ## NOTE: "using the rules for secondary insertion mode"then"continue"
4032     !!!cp ('t87.2');
4033     #
4034     } elsif ({
4035 wakaba 1.130 b => 1, big => 1, blockquote => 1, body => 1, br => 1,
4036     center => 1, code => 1, dd => 1, div => 1, dl => 1, em => 1,
4037     embed => 1, font => 1, h1 => 1, h2 => 1, h3 => 1, ## No h4!
4038     h5 => 1, h6 => 1, head => 1, hr => 1, i => 1, img => 1,
4039     li => 1, menu => 1, meta => 1, nobr => 1, p => 1, pre => 1,
4040     ruby => 1, s => 1, small => 1, span => 1, strong => 1,
4041     sub => 1, sup => 1, table => 1, tt => 1, u => 1, ul => 1,
4042     var => 1,
4043 wakaba 1.126 }->{$token->{tag_name}}) {
4044     !!!cp ('t87.2');
4045     !!!parse-error (type => 'not closed',
4046     value => $self->{open_elements}->[-1]->[0]
4047     ->manakai_local_name,
4048     token => $token);
4049    
4050     pop @{$self->{open_elements}}
4051     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
4052    
4053 wakaba 1.130 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
4054 wakaba 1.126 ## Reprocess.
4055     next B;
4056     } else {
4057 wakaba 1.131 my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
4058     my $tag_name = $token->{tag_name};
4059     if ($nsuri eq $SVG_NS) {
4060     $tag_name = {
4061     altglyph => 'altGlyph',
4062     altglyphdef => 'altGlyphDef',
4063     altglyphitem => 'altGlyphItem',
4064     animatecolor => 'animateColor',
4065     animatemotion => 'animateMotion',
4066     animatetransform => 'animateTransform',
4067     clippath => 'clipPath',
4068     feblend => 'feBlend',
4069     fecolormatrix => 'feColorMatrix',
4070     fecomponenttransfer => 'feComponentTransfer',
4071     fecomposite => 'feComposite',
4072     feconvolvematrix => 'feConvolveMatrix',
4073     fediffuselighting => 'feDiffuseLighting',
4074     fedisplacementmap => 'feDisplacementMap',
4075     fedistantlight => 'feDistantLight',
4076     feflood => 'feFlood',
4077     fefunca => 'feFuncA',
4078     fefuncb => 'feFuncB',
4079     fefuncg => 'feFuncG',
4080     fefuncr => 'feFuncR',
4081     fegaussianblur => 'feGaussianBlur',
4082     feimage => 'feImage',
4083     femerge => 'feMerge',
4084     femergenode => 'feMergeNode',
4085     femorphology => 'feMorphology',
4086     feoffset => 'feOffset',
4087     fepointlight => 'fePointLight',
4088     fespecularlighting => 'feSpecularLighting',
4089     fespotlight => 'feSpotLight',
4090     fetile => 'feTile',
4091     feturbulence => 'feTurbulence',
4092     foreignobject => 'foreignObject',
4093     glyphref => 'glyphRef',
4094     lineargradient => 'linearGradient',
4095     radialgradient => 'radialGradient',
4096     #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
4097     textpath => 'textPath',
4098     }->{$tag_name} || $tag_name;
4099     }
4100    
4101     ## "adjust SVG attributes" (SVG only) - done in insert-element-f
4102    
4103     ## "adjust foreign attributes" - done in insert-element-f
4104 wakaba 1.126
4105 wakaba 1.131 !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);
4106 wakaba 1.126
4107     if ($self->{self_closing}) {
4108     pop @{$self->{open_elements}};
4109     !!!ack ('t87.3');
4110     } else {
4111     !!!cp ('t87.4');
4112     }
4113    
4114     !!!next-token;
4115     next B;
4116     }
4117     } elsif ($token->{type} == END_TAG_TOKEN) {
4118     ## NOTE: "using the rules for secondary insertion mode" then "continue"
4119     !!!cp ('t87.5');
4120     #
4121     } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4122     ## NOTE: "using the rules for secondary insertion mode" then "continue"
4123     !!!cp ('t87.6');
4124     #
4125     ## TODO: ...
4126     } else {
4127     die "$0: $token->{type}: Unknown token type";
4128     }
4129     }
4130    
4131     if ($self->{insertion_mode} & HEAD_IMS) {
4132 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4133 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4134 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4135     !!!cp ('t88.2');
4136     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4137     } else {
4138     !!!cp ('t88.1');
4139     ## Ignore the token.
4140     !!!next-token;
4141 wakaba 1.126 next B;
4142 wakaba 1.99 }
4143 wakaba 1.52 unless (length $token->{data}) {
4144 wakaba 1.79 !!!cp ('t88');
4145 wakaba 1.52 !!!next-token;
4146 wakaba 1.126 next B;
4147 wakaba 1.1 }
4148     }
4149 wakaba 1.52
4150 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4151 wakaba 1.79 !!!cp ('t89');
4152 wakaba 1.52 ## As if <head>
4153 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4154 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4155 wakaba 1.123 push @{$self->{open_elements}},
4156     [$self->{head_element}, $el_category->{head}];
4157 wakaba 1.52
4158     ## Reprocess in the "in head" insertion mode...
4159     pop @{$self->{open_elements}};
4160    
4161     ## Reprocess in the "after head" insertion mode...
4162 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4163 wakaba 1.79 !!!cp ('t90');
4164 wakaba 1.52 ## As if </noscript>
4165     pop @{$self->{open_elements}};
4166 wakaba 1.113 !!!parse-error (type => 'in noscript:#character', token => $token);
4167 wakaba 1.1
4168 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
4169     ## As if </head>
4170     pop @{$self->{open_elements}};
4171    
4172     ## Reprocess in the "after head" insertion mode...
4173 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4174 wakaba 1.79 !!!cp ('t91');
4175 wakaba 1.52 pop @{$self->{open_elements}};
4176    
4177     ## Reprocess in the "after head" insertion mode...
4178 wakaba 1.79 } else {
4179     !!!cp ('t92');
4180 wakaba 1.1 }
4181 wakaba 1.52
4182 wakaba 1.123 ## "after head" insertion mode
4183     ## As if <body>
4184     !!!insert-element ('body',, $token);
4185     $self->{insertion_mode} = IN_BODY_IM;
4186     ## reprocess
4187 wakaba 1.126 next B;
4188 wakaba 1.123 } elsif ($token->{type} == START_TAG_TOKEN) {
4189     if ($token->{tag_name} eq 'head') {
4190     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4191     !!!cp ('t93');
4192 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
4193 wakaba 1.123 $self->{open_elements}->[-1]->[0]->append_child
4194     ($self->{head_element});
4195     push @{$self->{open_elements}},
4196     [$self->{head_element}, $el_category->{head}];
4197     $self->{insertion_mode} = IN_HEAD_IM;
4198 wakaba 1.125 !!!nack ('t93.1');
4199 wakaba 1.123 !!!next-token;
4200 wakaba 1.126 next B;
4201 wakaba 1.125 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4202 wakaba 1.139 !!!cp ('t93.2');
4203     !!!parse-error (type => 'after head:head', token => $token); ## TODO: error type
4204     ## Ignore the token
4205     !!!nack ('t93.3');
4206     !!!next-token;
4207     next B;
4208 wakaba 1.125 } else {
4209     !!!cp ('t95');
4210     !!!parse-error (type => 'in head:head', token => $token); # or in head noscript
4211     ## Ignore the token
4212     !!!nack ('t95.1');
4213     !!!next-token;
4214 wakaba 1.126 next B;
4215 wakaba 1.125 }
4216     } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4217 wakaba 1.126 !!!cp ('t96');
4218     ## As if <head>
4219     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4220     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4221     push @{$self->{open_elements}},
4222     [$self->{head_element}, $el_category->{head}];
4223 wakaba 1.52
4224 wakaba 1.126 $self->{insertion_mode} = IN_HEAD_IM;
4225     ## Reprocess in the "in head" insertion mode...
4226     } else {
4227     !!!cp ('t97');
4228     }
4229 wakaba 1.52
4230 wakaba 1.49 if ($token->{tag_name} eq 'base') {
4231 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4232 wakaba 1.79 !!!cp ('t98');
4233 wakaba 1.49 ## As if </noscript>
4234     pop @{$self->{open_elements}};
4235 wakaba 1.113 !!!parse-error (type => 'in noscript:base', token => $token);
4236 wakaba 1.49
4237 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4238 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4239 wakaba 1.79 } else {
4240     !!!cp ('t99');
4241 wakaba 1.49 }
4242    
4243     ## NOTE: There is a "as if in head" code clone.
4244 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4245 wakaba 1.79 !!!cp ('t100');
4246 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4247 wakaba 1.123 push @{$self->{open_elements}},
4248     [$self->{head_element}, $el_category->{head}];
4249 wakaba 1.79 } else {
4250     !!!cp ('t101');
4251 wakaba 1.49 }
4252 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4253 wakaba 1.49 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4254 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4255 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4256 wakaba 1.125 !!!nack ('t101.1');
4257 wakaba 1.49 !!!next-token;
4258 wakaba 1.126 next B;
4259 wakaba 1.49 } elsif ($token->{tag_name} eq 'link') {
4260 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4261 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4262 wakaba 1.79 !!!cp ('t102');
4263 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4264 wakaba 1.123 push @{$self->{open_elements}},
4265     [$self->{head_element}, $el_category->{head}];
4266 wakaba 1.79 } else {
4267     !!!cp ('t103');
4268 wakaba 1.25 }
4269 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4270 wakaba 1.25 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4271 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4272 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4273 wakaba 1.125 !!!ack ('t103.1');
4274 wakaba 1.1 !!!next-token;
4275 wakaba 1.126 next B;
4276 wakaba 1.34 } elsif ($token->{tag_name} eq 'meta') {
4277     ## NOTE: There is a "as if in head" code clone.
4278 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4279 wakaba 1.79 !!!cp ('t104');
4280 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4281 wakaba 1.123 push @{$self->{open_elements}},
4282     [$self->{head_element}, $el_category->{head}];
4283 wakaba 1.79 } else {
4284     !!!cp ('t105');
4285 wakaba 1.34 }
4286 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4287 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4288 wakaba 1.34
4289     unless ($self->{confident}) {
4290 wakaba 1.134 if ($token->{attributes}->{charset}) {
4291 wakaba 1.79 !!!cp ('t106');
4292 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4293     ## in the {change_encoding} callback.
4294 wakaba 1.63 $self->{change_encoding}
4295 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value},
4296     $token);
4297 wakaba 1.66
4298     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4299     ->set_user_data (manakai_has_reference =>
4300     $token->{attributes}->{charset}
4301     ->{has_reference});
4302 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
4303     if ($token->{attributes}->{content}->{value}
4304 wakaba 1.70 =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4305     [\x09-\x0D\x20]*=
4306 wakaba 1.34 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4307     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
4308 wakaba 1.79 !!!cp ('t107');
4309 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4310     ## in the {change_encoding} callback.
4311 wakaba 1.63 $self->{change_encoding}
4312 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
4313     $token);
4314 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4315     ->set_user_data (manakai_has_reference =>
4316     $token->{attributes}->{content}
4317     ->{has_reference});
4318 wakaba 1.79 } else {
4319     !!!cp ('t108');
4320 wakaba 1.63 }
4321 wakaba 1.34 }
4322 wakaba 1.66 } else {
4323     if ($token->{attributes}->{charset}) {
4324 wakaba 1.79 !!!cp ('t109');
4325 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4326     ->set_user_data (manakai_has_reference =>
4327     $token->{attributes}->{charset}
4328     ->{has_reference});
4329     }
4330 wakaba 1.68 if ($token->{attributes}->{content}) {
4331 wakaba 1.79 !!!cp ('t110');
4332 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4333     ->set_user_data (manakai_has_reference =>
4334     $token->{attributes}->{content}
4335     ->{has_reference});
4336     }
4337 wakaba 1.34 }
4338    
4339 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4340 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4341 wakaba 1.125 !!!ack ('t110.1');
4342 wakaba 1.34 !!!next-token;
4343 wakaba 1.126 next B;
4344 wakaba 1.49 } elsif ($token->{tag_name} eq 'title') {
4345 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4346 wakaba 1.79 !!!cp ('t111');
4347 wakaba 1.49 ## As if </noscript>
4348     pop @{$self->{open_elements}};
4349 wakaba 1.113 !!!parse-error (type => 'in noscript:title', token => $token);
4350 wakaba 1.49
4351 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4352 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4353 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4354 wakaba 1.79 !!!cp ('t112');
4355 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4356 wakaba 1.123 push @{$self->{open_elements}},
4357     [$self->{head_element}, $el_category->{head}];
4358 wakaba 1.79 } else {
4359     !!!cp ('t113');
4360 wakaba 1.25 }
4361 wakaba 1.49
4362     ## NOTE: There is a "as if in head" code clone.
4363 wakaba 1.31 my $parent = defined $self->{head_element} ? $self->{head_element}
4364     : $self->{open_elements}->[-1]->[0];
4365 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
4366 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4367 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4368 wakaba 1.126 next B;
4369 wakaba 1.25 } elsif ($token->{tag_name} eq 'style') {
4370     ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
4371 wakaba 1.54 ## insertion mode IN_HEAD_IM)
4372 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4373 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4374 wakaba 1.79 !!!cp ('t114');
4375 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4376 wakaba 1.123 push @{$self->{open_elements}},
4377     [$self->{head_element}, $el_category->{head}];
4378 wakaba 1.79 } else {
4379     !!!cp ('t115');
4380 wakaba 1.25 }
4381 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4382 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4383 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4384 wakaba 1.126 next B;
4385 wakaba 1.25 } elsif ($token->{tag_name} eq 'noscript') {
4386 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
4387 wakaba 1.79 !!!cp ('t116');
4388 wakaba 1.25 ## NOTE: and scripting is disalbed
4389 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4390 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
4391 wakaba 1.125 !!!nack ('t116.1');
4392 wakaba 1.1 !!!next-token;
4393 wakaba 1.126 next B;
4394 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4395 wakaba 1.79 !!!cp ('t117');
4396 wakaba 1.113 !!!parse-error (type => 'in noscript:noscript', token => $token);
4397 wakaba 1.1 ## Ignore the token
4398 wakaba 1.125 !!!nack ('t117.1');
4399 wakaba 1.41 !!!next-token;
4400 wakaba 1.126 next B;
4401 wakaba 1.1 } else {
4402 wakaba 1.79 !!!cp ('t118');
4403 wakaba 1.25 #
4404 wakaba 1.1 }
4405 wakaba 1.49 } elsif ($token->{tag_name} eq 'script') {
4406 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4407 wakaba 1.79 !!!cp ('t119');
4408 wakaba 1.49 ## As if </noscript>
4409     pop @{$self->{open_elements}};
4410 wakaba 1.113 !!!parse-error (type => 'in noscript:script', token => $token);
4411 wakaba 1.49
4412 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4413 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4414 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4415 wakaba 1.79 !!!cp ('t120');
4416 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4417 wakaba 1.123 push @{$self->{open_elements}},
4418     [$self->{head_element}, $el_category->{head}];
4419 wakaba 1.79 } else {
4420     !!!cp ('t121');
4421 wakaba 1.25 }
4422 wakaba 1.49
4423 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4424 wakaba 1.100 $script_start_tag->();
4425     pop @{$self->{open_elements}} # <head>
4426 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4427 wakaba 1.126 next B;
4428 wakaba 1.49 } elsif ($token->{tag_name} eq 'body' or
4429 wakaba 1.25 $token->{tag_name} eq 'frameset') {
4430 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4431 wakaba 1.79 !!!cp ('t122');
4432 wakaba 1.49 ## As if </noscript>
4433     pop @{$self->{open_elements}};
4434 wakaba 1.113 !!!parse-error (type => 'in noscript:'.$token->{tag_name}, token => $token);
4435 wakaba 1.49
4436     ## Reprocess in the "in head" insertion mode...
4437     ## As if </head>
4438     pop @{$self->{open_elements}};
4439    
4440     ## Reprocess in the "after head" insertion mode...
4441 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4442 wakaba 1.79 !!!cp ('t124');
4443 wakaba 1.49 pop @{$self->{open_elements}};
4444    
4445     ## Reprocess in the "after head" insertion mode...
4446 wakaba 1.79 } else {
4447     !!!cp ('t125');
4448 wakaba 1.49 }
4449    
4450     ## "after head" insertion mode
4451 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4452 wakaba 1.54 if ($token->{tag_name} eq 'body') {
4453 wakaba 1.79 !!!cp ('t126');
4454 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4455     } elsif ($token->{tag_name} eq 'frameset') {
4456 wakaba 1.79 !!!cp ('t127');
4457 wakaba 1.54 $self->{insertion_mode} = IN_FRAMESET_IM;
4458     } else {
4459     die "$0: tag name: $self->{tag_name}";
4460     }
4461 wakaba 1.125 !!!nack ('t127.1');
4462 wakaba 1.1 !!!next-token;
4463 wakaba 1.126 next B;
4464 wakaba 1.1 } else {
4465 wakaba 1.79 !!!cp ('t128');
4466 wakaba 1.1 #
4467     }
4468 wakaba 1.49
4469 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4470 wakaba 1.79 !!!cp ('t129');
4471 wakaba 1.49 ## As if </noscript>
4472     pop @{$self->{open_elements}};
4473 wakaba 1.113 !!!parse-error (type => 'in noscript:/'.$token->{tag_name}, token => $token);
4474 wakaba 1.49
4475     ## Reprocess in the "in head" insertion mode...
4476     ## As if </head>
4477 wakaba 1.25 pop @{$self->{open_elements}};
4478 wakaba 1.49
4479     ## Reprocess in the "after head" insertion mode...
4480 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4481 wakaba 1.79 !!!cp ('t130');
4482 wakaba 1.49 ## As if </head>
4483 wakaba 1.25 pop @{$self->{open_elements}};
4484 wakaba 1.49
4485     ## Reprocess in the "after head" insertion mode...
4486 wakaba 1.79 } else {
4487     !!!cp ('t131');
4488 wakaba 1.49 }
4489    
4490     ## "after head" insertion mode
4491     ## As if <body>
4492 wakaba 1.116 !!!insert-element ('body',, $token);
4493 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4494 wakaba 1.49 ## reprocess
4495 wakaba 1.125 !!!ack-later;
4496 wakaba 1.126 next B;
4497 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4498 wakaba 1.49 if ($token->{tag_name} eq 'head') {
4499 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4500 wakaba 1.79 !!!cp ('t132');
4501 wakaba 1.50 ## As if <head>
4502 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4503 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4504 wakaba 1.123 push @{$self->{open_elements}},
4505     [$self->{head_element}, $el_category->{head}];
4506 wakaba 1.50
4507     ## Reprocess in the "in head" insertion mode...
4508     pop @{$self->{open_elements}};
4509 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4510 wakaba 1.50 !!!next-token;
4511 wakaba 1.126 next B;
4512 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4513 wakaba 1.79 !!!cp ('t133');
4514 wakaba 1.49 ## As if </noscript>
4515     pop @{$self->{open_elements}};
4516 wakaba 1.113 !!!parse-error (type => 'in noscript:/head', token => $token);
4517 wakaba 1.49
4518     ## Reprocess in the "in head" insertion mode...
4519 wakaba 1.50 pop @{$self->{open_elements}};
4520 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4521 wakaba 1.50 !!!next-token;
4522 wakaba 1.126 next B;
4523 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4524 wakaba 1.79 !!!cp ('t134');
4525 wakaba 1.49 pop @{$self->{open_elements}};
4526 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4527 wakaba 1.49 !!!next-token;
4528 wakaba 1.126 next B;
4529 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4530     !!!cp ('t134.1');
4531     !!!parse-error (type => 'unmatched end tag:head', token => $token);
4532     ## Ignore the token
4533     !!!next-token;
4534     next B;
4535 wakaba 1.49 } else {
4536 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4537 wakaba 1.49 }
4538     } elsif ($token->{tag_name} eq 'noscript') {
4539 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4540 wakaba 1.79 !!!cp ('t136');
4541 wakaba 1.49 pop @{$self->{open_elements}};
4542 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4543 wakaba 1.49 !!!next-token;
4544 wakaba 1.126 next B;
4545 wakaba 1.139 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or
4546     $self->{insertion_mode} == AFTER_HEAD_IM) {
4547 wakaba 1.79 !!!cp ('t137');
4548 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:noscript', token => $token);
4549 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
4550     !!!next-token;
4551 wakaba 1.126 next B;
4552 wakaba 1.49 } else {
4553 wakaba 1.79 !!!cp ('t138');
4554 wakaba 1.49 #
4555     }
4556     } elsif ({
4557 wakaba 1.31 body => 1, html => 1,
4558     }->{$token->{tag_name}}) {
4559 wakaba 1.139 if ($self->{insertion_mode} == BEFORE_HEAD_IM or
4560     $self->{insertion_mode} == IN_HEAD_IM or
4561     $self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4562 wakaba 1.79 !!!cp ('t140');
4563 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4564 wakaba 1.49 ## Ignore the token
4565     !!!next-token;
4566 wakaba 1.126 next B;
4567 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4568     !!!cp ('t140.1');
4569     !!!parse-error (type => 'unmatched end tag:' . $token->{tag_name}, token => $token);
4570     ## Ignore the token
4571     !!!next-token;
4572     next B;
4573 wakaba 1.79 } else {
4574 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4575 wakaba 1.49 }
4576 wakaba 1.139 } elsif ($token->{tag_name} eq 'p') {
4577     !!!cp ('t142');
4578     !!!parse-error (type => 'unmatched end tag:p', token => $token);
4579     ## Ignore the token
4580     !!!next-token;
4581     next B;
4582     } elsif ($token->{tag_name} eq 'br') {
4583 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4584 wakaba 1.139 !!!cp ('t142.2');
4585     ## (before head) as if <head>, (in head) as if </head>
4586 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4587 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4588 wakaba 1.139 $self->{insertion_mode} = AFTER_HEAD_IM;
4589    
4590     ## Reprocess in the "after head" insertion mode...
4591     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4592     !!!cp ('t143.2');
4593     ## As if </head>
4594     pop @{$self->{open_elements}};
4595     $self->{insertion_mode} = AFTER_HEAD_IM;
4596    
4597     ## Reprocess in the "after head" insertion mode...
4598     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4599     !!!cp ('t143.3');
4600     ## ISSUE: Two parse errors for <head><noscript></br>
4601     !!!parse-error (type => 'unmatched end tag:br', token => $token);
4602     ## As if </noscript>
4603     pop @{$self->{open_elements}};
4604     $self->{insertion_mode} = IN_HEAD_IM;
4605 wakaba 1.50
4606     ## Reprocess in the "in head" insertion mode...
4607 wakaba 1.139 ## As if </head>
4608     pop @{$self->{open_elements}};
4609     $self->{insertion_mode} = AFTER_HEAD_IM;
4610    
4611     ## Reprocess in the "after head" insertion mode...
4612     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4613     !!!cp ('t143.4');
4614     #
4615 wakaba 1.79 } else {
4616 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4617 wakaba 1.50 }
4618    
4619 wakaba 1.139 ## ISSUE: does not agree with IE7 - it doesn't ignore </br>.
4620     !!!parse-error (type => 'unmatched end tag:br', token => $token);
4621     ## Ignore the token
4622     !!!next-token;
4623     next B;
4624 wakaba 1.25 } else {
4625 wakaba 1.139 !!!cp ('t145');
4626     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4627     ## Ignore the token
4628     !!!next-token;
4629     next B;
4630 wakaba 1.49 }
4631    
4632 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4633 wakaba 1.79 !!!cp ('t146');
4634 wakaba 1.49 ## As if </noscript>
4635     pop @{$self->{open_elements}};
4636 wakaba 1.113 !!!parse-error (type => 'in noscript:/'.$token->{tag_name}, token => $token);
4637 wakaba 1.49
4638     ## Reprocess in the "in head" insertion mode...
4639     ## As if </head>
4640     pop @{$self->{open_elements}};
4641    
4642     ## Reprocess in the "after head" insertion mode...
4643 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4644 wakaba 1.79 !!!cp ('t147');
4645 wakaba 1.49 ## As if </head>
4646     pop @{$self->{open_elements}};
4647    
4648     ## Reprocess in the "after head" insertion mode...
4649 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4650 wakaba 1.82 ## ISSUE: This case cannot be reached?
4651 wakaba 1.79 !!!cp ('t148');
4652 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4653 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
4654     !!!next-token;
4655 wakaba 1.126 next B;
4656 wakaba 1.79 } else {
4657     !!!cp ('t149');
4658 wakaba 1.1 }
4659    
4660 wakaba 1.49 ## "after head" insertion mode
4661     ## As if <body>
4662 wakaba 1.116 !!!insert-element ('body',, $token);
4663 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4664 wakaba 1.52 ## reprocess
4665 wakaba 1.126 next B;
4666 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4667     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4668     !!!cp ('t149.1');
4669    
4670     ## NOTE: As if <head>
4671 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4672 wakaba 1.104 $self->{open_elements}->[-1]->[0]->append_child
4673     ($self->{head_element});
4674 wakaba 1.123 #push @{$self->{open_elements}},
4675     # [$self->{head_element}, $el_category->{head}];
4676 wakaba 1.104 #$self->{insertion_mode} = IN_HEAD_IM;
4677     ## NOTE: Reprocess.
4678    
4679     ## NOTE: As if </head>
4680     #pop @{$self->{open_elements}};
4681     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4682     ## NOTE: Reprocess.
4683    
4684     #
4685     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4686     !!!cp ('t149.2');
4687    
4688     ## NOTE: As if </head>
4689     pop @{$self->{open_elements}};
4690     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4691     ## NOTE: Reprocess.
4692    
4693     #
4694     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4695     !!!cp ('t149.3');
4696    
4697 wakaba 1.113 !!!parse-error (type => 'in noscript:#eof', token => $token);
4698 wakaba 1.104
4699     ## As if </noscript>
4700     pop @{$self->{open_elements}};
4701     #$self->{insertion_mode} = IN_HEAD_IM;
4702     ## NOTE: Reprocess.
4703    
4704     ## NOTE: As if </head>
4705     pop @{$self->{open_elements}};
4706     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4707     ## NOTE: Reprocess.
4708    
4709     #
4710     } else {
4711     !!!cp ('t149.4');
4712     #
4713     }
4714    
4715     ## NOTE: As if <body>
4716 wakaba 1.116 !!!insert-element ('body',, $token);
4717 wakaba 1.104 $self->{insertion_mode} = IN_BODY_IM;
4718     ## NOTE: Reprocess.
4719 wakaba 1.126 next B;
4720 wakaba 1.104 } else {
4721     die "$0: $token->{type}: Unknown token type";
4722     }
4723 wakaba 1.52
4724     ## ISSUE: An issue in the spec.
4725 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
4726 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4727 wakaba 1.79 !!!cp ('t150');
4728 wakaba 1.52 ## NOTE: There is a code clone of "character in body".
4729     $reconstruct_active_formatting_elements->($insert_to_current);
4730    
4731     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4732    
4733     !!!next-token;
4734 wakaba 1.126 next B;
4735 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4736 wakaba 1.52 if ({
4737     caption => 1, col => 1, colgroup => 1, tbody => 1,
4738     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
4739     }->{$token->{tag_name}}) {
4740 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
4741 wakaba 1.52 ## have an element in table scope
4742 wakaba 1.108 for (reverse 0..$#{$self->{open_elements}}) {
4743 wakaba 1.52 my $node = $self->{open_elements}->[$_];
4744 wakaba 1.123 if ($node->[1] & TABLE_CELL_EL) {
4745 wakaba 1.79 !!!cp ('t151');
4746 wakaba 1.108
4747     ## Close the cell
4748 wakaba 1.125 !!!back-token; # <x>
4749 wakaba 1.122 $token = {type => END_TAG_TOKEN,
4750     tag_name => $node->[0]->manakai_local_name,
4751 wakaba 1.114 line => $token->{line},
4752     column => $token->{column}};
4753 wakaba 1.126 next B;
4754 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4755 wakaba 1.79 !!!cp ('t152');
4756 wakaba 1.108 ## ISSUE: This case can never be reached, maybe.
4757     last;
4758 wakaba 1.52 }
4759 wakaba 1.108 }
4760    
4761     !!!cp ('t153');
4762     !!!parse-error (type => 'start tag not allowed',
4763 wakaba 1.113 value => $token->{tag_name}, token => $token);
4764 wakaba 1.108 ## Ignore the token
4765 wakaba 1.125 !!!nack ('t153.1');
4766 wakaba 1.108 !!!next-token;
4767 wakaba 1.126 next B;
4768 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4769 wakaba 1.113 !!!parse-error (type => 'not closed:caption', token => $token);
4770 wakaba 1.52
4771 wakaba 1.108 ## NOTE: As if </caption>.
4772 wakaba 1.52 ## have a table element in table scope
4773     my $i;
4774 wakaba 1.108 INSCOPE: {
4775     for (reverse 0..$#{$self->{open_elements}}) {
4776     my $node = $self->{open_elements}->[$_];
4777 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4778 wakaba 1.108 !!!cp ('t155');
4779     $i = $_;
4780     last INSCOPE;
4781 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4782 wakaba 1.108 !!!cp ('t156');
4783     last;
4784     }
4785 wakaba 1.52 }
4786 wakaba 1.108
4787     !!!cp ('t157');
4788     !!!parse-error (type => 'start tag not allowed',
4789 wakaba 1.113 value => $token->{tag_name}, token => $token);
4790 wakaba 1.108 ## Ignore the token
4791 wakaba 1.125 !!!nack ('t157.1');
4792 wakaba 1.108 !!!next-token;
4793 wakaba 1.126 next B;
4794 wakaba 1.52 } # INSCOPE
4795    
4796     ## generate implied end tags
4797 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4798     & END_TAG_OPTIONAL_EL) {
4799 wakaba 1.79 !!!cp ('t158');
4800 wakaba 1.86 pop @{$self->{open_elements}};
4801 wakaba 1.52 }
4802    
4803 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
4804 wakaba 1.79 !!!cp ('t159');
4805 wakaba 1.122 !!!parse-error (type => 'not closed',
4806     value => $self->{open_elements}->[-1]->[0]
4807     ->manakai_local_name,
4808     token => $token);
4809 wakaba 1.79 } else {
4810     !!!cp ('t160');
4811 wakaba 1.52 }
4812    
4813     splice @{$self->{open_elements}}, $i;
4814    
4815     $clear_up_to_marker->();
4816    
4817 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4818 wakaba 1.52
4819     ## reprocess
4820 wakaba 1.125 !!!ack-later;
4821 wakaba 1.126 next B;
4822 wakaba 1.52 } else {
4823 wakaba 1.79 !!!cp ('t161');
4824 wakaba 1.52 #
4825     }
4826     } else {
4827 wakaba 1.79 !!!cp ('t162');
4828 wakaba 1.52 #
4829     }
4830 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4831 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4832 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
4833 wakaba 1.43 ## have an element in table scope
4834 wakaba 1.52 my $i;
4835 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4836     my $node = $self->{open_elements}->[$_];
4837 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
4838 wakaba 1.79 !!!cp ('t163');
4839 wakaba 1.52 $i = $_;
4840 wakaba 1.43 last INSCOPE;
4841 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4842 wakaba 1.79 !!!cp ('t164');
4843 wakaba 1.43 last INSCOPE;
4844     }
4845     } # INSCOPE
4846 wakaba 1.52 unless (defined $i) {
4847 wakaba 1.79 !!!cp ('t165');
4848 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4849 wakaba 1.43 ## Ignore the token
4850     !!!next-token;
4851 wakaba 1.126 next B;
4852 wakaba 1.43 }
4853    
4854 wakaba 1.52 ## generate implied end tags
4855 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4856     & END_TAG_OPTIONAL_EL) {
4857 wakaba 1.79 !!!cp ('t166');
4858 wakaba 1.86 pop @{$self->{open_elements}};
4859 wakaba 1.52 }
4860 wakaba 1.86
4861 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
4862     ne $token->{tag_name}) {
4863 wakaba 1.79 !!!cp ('t167');
4864 wakaba 1.122 !!!parse-error (type => 'not closed',
4865     value => $self->{open_elements}->[-1]->[0]
4866     ->manakai_local_name,
4867     token => $token);
4868 wakaba 1.79 } else {
4869     !!!cp ('t168');
4870 wakaba 1.52 }
4871    
4872     splice @{$self->{open_elements}}, $i;
4873    
4874     $clear_up_to_marker->();
4875    
4876 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
4877 wakaba 1.52
4878     !!!next-token;
4879 wakaba 1.126 next B;
4880 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4881 wakaba 1.79 !!!cp ('t169');
4882 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4883 wakaba 1.52 ## Ignore the token
4884     !!!next-token;
4885 wakaba 1.126 next B;
4886 wakaba 1.52 } else {
4887 wakaba 1.79 !!!cp ('t170');
4888 wakaba 1.52 #
4889     }
4890     } elsif ($token->{tag_name} eq 'caption') {
4891 wakaba 1.54 if ($self->{insertion_mode} == IN_CAPTION_IM) {
4892 wakaba 1.43 ## have a table element in table scope
4893     my $i;
4894 wakaba 1.108 INSCOPE: {
4895     for (reverse 0..$#{$self->{open_elements}}) {
4896     my $node = $self->{open_elements}->[$_];
4897 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4898 wakaba 1.108 !!!cp ('t171');
4899     $i = $_;
4900     last INSCOPE;
4901 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4902 wakaba 1.108 !!!cp ('t172');
4903     last;
4904     }
4905 wakaba 1.43 }
4906 wakaba 1.108
4907     !!!cp ('t173');
4908     !!!parse-error (type => 'unmatched end tag',
4909 wakaba 1.113 value => $token->{tag_name}, token => $token);
4910 wakaba 1.108 ## Ignore the token
4911     !!!next-token;
4912 wakaba 1.126 next B;
4913 wakaba 1.43 } # INSCOPE
4914    
4915     ## generate implied end tags
4916 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4917     & END_TAG_OPTIONAL_EL) {
4918 wakaba 1.79 !!!cp ('t174');
4919 wakaba 1.86 pop @{$self->{open_elements}};
4920 wakaba 1.43 }
4921 wakaba 1.52
4922 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
4923 wakaba 1.79 !!!cp ('t175');
4924 wakaba 1.122 !!!parse-error (type => 'not closed',
4925     value => $self->{open_elements}->[-1]->[0]
4926     ->manakai_local_name,
4927     token => $token);
4928 wakaba 1.79 } else {
4929     !!!cp ('t176');
4930 wakaba 1.52 }
4931    
4932     splice @{$self->{open_elements}}, $i;
4933    
4934     $clear_up_to_marker->();
4935    
4936 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4937 wakaba 1.52
4938     !!!next-token;
4939 wakaba 1.126 next B;
4940 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CELL_IM) {
4941 wakaba 1.79 !!!cp ('t177');
4942 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4943 wakaba 1.52 ## Ignore the token
4944     !!!next-token;
4945 wakaba 1.126 next B;
4946 wakaba 1.52 } else {
4947 wakaba 1.79 !!!cp ('t178');
4948 wakaba 1.52 #
4949     }
4950     } elsif ({
4951     table => 1, tbody => 1, tfoot => 1,
4952     thead => 1, tr => 1,
4953     }->{$token->{tag_name}} and
4954 wakaba 1.54 $self->{insertion_mode} == IN_CELL_IM) {
4955 wakaba 1.52 ## have an element in table scope
4956     my $i;
4957     my $tn;
4958 wakaba 1.108 INSCOPE: {
4959     for (reverse 0..$#{$self->{open_elements}}) {
4960     my $node = $self->{open_elements}->[$_];
4961 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
4962 wakaba 1.108 !!!cp ('t179');
4963     $i = $_;
4964    
4965     ## Close the cell
4966 wakaba 1.125 !!!back-token; # </x>
4967 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $tn,
4968     line => $token->{line},
4969     column => $token->{column}};
4970 wakaba 1.126 next B;
4971 wakaba 1.123 } elsif ($node->[1] & TABLE_CELL_EL) {
4972 wakaba 1.108 !!!cp ('t180');
4973 wakaba 1.123 $tn = $node->[0]->manakai_local_name;
4974 wakaba 1.108 ## NOTE: There is exactly one |td| or |th| element
4975     ## in scope in the stack of open elements by definition.
4976 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4977 wakaba 1.108 ## ISSUE: Can this be reached?
4978     !!!cp ('t181');
4979     last;
4980     }
4981 wakaba 1.52 }
4982 wakaba 1.108
4983 wakaba 1.79 !!!cp ('t182');
4984 wakaba 1.108 !!!parse-error (type => 'unmatched end tag',
4985 wakaba 1.113 value => $token->{tag_name}, token => $token);
4986 wakaba 1.52 ## Ignore the token
4987     !!!next-token;
4988 wakaba 1.126 next B;
4989 wakaba 1.108 } # INSCOPE
4990 wakaba 1.52 } elsif ($token->{tag_name} eq 'table' and
4991 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
4992 wakaba 1.113 !!!parse-error (type => 'not closed:caption', token => $token);
4993 wakaba 1.52
4994     ## As if </caption>
4995     ## have a table element in table scope
4996     my $i;
4997     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4998     my $node = $self->{open_elements}->[$_];
4999 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
5000 wakaba 1.79 !!!cp ('t184');
5001 wakaba 1.52 $i = $_;
5002     last INSCOPE;
5003 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5004 wakaba 1.79 !!!cp ('t185');
5005 wakaba 1.52 last INSCOPE;
5006     }
5007     } # INSCOPE
5008     unless (defined $i) {
5009 wakaba 1.79 !!!cp ('t186');
5010 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:caption', token => $token);
5011 wakaba 1.52 ## Ignore the token
5012     !!!next-token;
5013 wakaba 1.126 next B;
5014 wakaba 1.52 }
5015    
5016     ## generate implied end tags
5017 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5018 wakaba 1.79 !!!cp ('t187');
5019 wakaba 1.86 pop @{$self->{open_elements}};
5020 wakaba 1.52 }
5021    
5022 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
5023 wakaba 1.79 !!!cp ('t188');
5024 wakaba 1.122 !!!parse-error (type => 'not closed',
5025     value => $self->{open_elements}->[-1]->[0]
5026     ->manakai_local_name,
5027     token => $token);
5028 wakaba 1.79 } else {
5029     !!!cp ('t189');
5030 wakaba 1.52 }
5031    
5032     splice @{$self->{open_elements}}, $i;
5033    
5034     $clear_up_to_marker->();
5035    
5036 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5037 wakaba 1.52
5038     ## reprocess
5039 wakaba 1.126 next B;
5040 wakaba 1.52 } elsif ({
5041     body => 1, col => 1, colgroup => 1, html => 1,
5042     }->{$token->{tag_name}}) {
5043 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
5044 wakaba 1.79 !!!cp ('t190');
5045 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5046 wakaba 1.52 ## Ignore the token
5047     !!!next-token;
5048 wakaba 1.126 next B;
5049 wakaba 1.52 } else {
5050 wakaba 1.79 !!!cp ('t191');
5051 wakaba 1.52 #
5052     }
5053     } elsif ({
5054     tbody => 1, tfoot => 1,
5055     thead => 1, tr => 1,
5056     }->{$token->{tag_name}} and
5057 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
5058 wakaba 1.79 !!!cp ('t192');
5059 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5060 wakaba 1.52 ## Ignore the token
5061     !!!next-token;
5062 wakaba 1.126 next B;
5063 wakaba 1.52 } else {
5064 wakaba 1.79 !!!cp ('t193');
5065 wakaba 1.52 #
5066     }
5067 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5068     for my $entry (@{$self->{open_elements}}) {
5069 wakaba 1.123 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
5070 wakaba 1.104 !!!cp ('t75');
5071 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5072 wakaba 1.104 last;
5073     }
5074     }
5075    
5076     ## Stop parsing.
5077     last B;
5078 wakaba 1.52 } else {
5079     die "$0: $token->{type}: Unknown token type";
5080     }
5081    
5082     $insert = $insert_to_current;
5083     #
5084 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
5085 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
5086 wakaba 1.95 if (not $open_tables->[-1]->[1] and # tainted
5087     $token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5088     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5089 wakaba 1.52
5090 wakaba 1.95 unless (length $token->{data}) {
5091     !!!cp ('t194');
5092     !!!next-token;
5093 wakaba 1.126 next B;
5094 wakaba 1.95 } else {
5095     !!!cp ('t195');
5096     }
5097     }
5098 wakaba 1.52
5099 wakaba 1.113 !!!parse-error (type => 'in table:#character', token => $token);
5100 wakaba 1.52
5101     ## As if in body, but insert into foster parent element
5102     ## ISSUE: Spec says that "whenever a node would be inserted
5103     ## into the current node" while characters might not be
5104     ## result in a new Text node.
5105     $reconstruct_active_formatting_elements->($insert_to_foster);
5106    
5107 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
5108 wakaba 1.52 # MUST
5109     my $foster_parent_element;
5110     my $next_sibling;
5111     my $prev_sibling;
5112     OE: for (reverse 0..$#{$self->{open_elements}}) {
5113 wakaba 1.123 if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
5114 wakaba 1.52 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5115     if (defined $parent and $parent->node_type == 1) {
5116 wakaba 1.79 !!!cp ('t196');
5117 wakaba 1.52 $foster_parent_element = $parent;
5118     $next_sibling = $self->{open_elements}->[$_]->[0];
5119     $prev_sibling = $next_sibling->previous_sibling;
5120     } else {
5121 wakaba 1.79 !!!cp ('t197');
5122 wakaba 1.52 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5123     $prev_sibling = $foster_parent_element->last_child;
5124     }
5125     last OE;
5126     }
5127     } # OE
5128     $foster_parent_element = $self->{open_elements}->[0]->[0] and
5129     $prev_sibling = $foster_parent_element->last_child
5130     unless defined $foster_parent_element;
5131     if (defined $prev_sibling and
5132     $prev_sibling->node_type == 3) {
5133 wakaba 1.79 !!!cp ('t198');
5134 wakaba 1.52 $prev_sibling->manakai_append_text ($token->{data});
5135     } else {
5136 wakaba 1.79 !!!cp ('t199');
5137 wakaba 1.52 $foster_parent_element->insert_before
5138     ($self->{document}->create_text_node ($token->{data}),
5139     $next_sibling);
5140     }
5141 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
5142     } else {
5143     !!!cp ('t200');
5144     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5145     }
5146 wakaba 1.52
5147 wakaba 1.95 !!!next-token;
5148 wakaba 1.126 next B;
5149 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
5150 wakaba 1.52 if ({
5151 wakaba 1.54 tr => ($self->{insertion_mode} != IN_ROW_IM),
5152 wakaba 1.52 th => 1, td => 1,
5153     }->{$token->{tag_name}}) {
5154 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_IM) {
5155 wakaba 1.52 ## Clear back to table context
5156 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5157     & TABLE_SCOPING_EL)) {
5158 wakaba 1.79 !!!cp ('t201');
5159 wakaba 1.52 pop @{$self->{open_elements}};
5160 wakaba 1.43 }
5161    
5162 wakaba 1.116 !!!insert-element ('tbody',, $token);
5163 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5164 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
5165     }
5166    
5167 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5168 wakaba 1.52 unless ($token->{tag_name} eq 'tr') {
5169 wakaba 1.79 !!!cp ('t202');
5170 wakaba 1.113 !!!parse-error (type => 'missing start tag:tr', token => $token);
5171 wakaba 1.52 }
5172 wakaba 1.43
5173 wakaba 1.52 ## Clear back to table body context
5174 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5175     & TABLE_ROWS_SCOPING_EL)) {
5176 wakaba 1.79 !!!cp ('t203');
5177 wakaba 1.83 ## ISSUE: Can this case be reached?
5178 wakaba 1.52 pop @{$self->{open_elements}};
5179     }
5180 wakaba 1.43
5181 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
5182 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
5183 wakaba 1.79 !!!cp ('t204');
5184 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5185 wakaba 1.125 !!!nack ('t204');
5186 wakaba 1.52 !!!next-token;
5187 wakaba 1.126 next B;
5188 wakaba 1.52 } else {
5189 wakaba 1.79 !!!cp ('t205');
5190 wakaba 1.116 !!!insert-element ('tr',, $token);
5191 wakaba 1.52 ## reprocess in the "in row" insertion mode
5192     }
5193 wakaba 1.79 } else {
5194     !!!cp ('t206');
5195 wakaba 1.52 }
5196    
5197     ## Clear back to table row context
5198 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5199     & TABLE_ROW_SCOPING_EL)) {
5200 wakaba 1.79 !!!cp ('t207');
5201 wakaba 1.52 pop @{$self->{open_elements}};
5202 wakaba 1.43 }
5203 wakaba 1.52
5204 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5205 wakaba 1.54 $self->{insertion_mode} = IN_CELL_IM;
5206 wakaba 1.52
5207     push @$active_formatting_elements, ['#marker', ''];
5208    
5209 wakaba 1.125 !!!nack ('t207.1');
5210 wakaba 1.52 !!!next-token;
5211 wakaba 1.126 next B;
5212 wakaba 1.52 } elsif ({
5213     caption => 1, col => 1, colgroup => 1,
5214     tbody => 1, tfoot => 1, thead => 1,
5215 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
5216 wakaba 1.52 }->{$token->{tag_name}}) {
5217 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5218 wakaba 1.52 ## As if </tr>
5219 wakaba 1.43 ## have an element in table scope
5220     my $i;
5221     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5222     my $node = $self->{open_elements}->[$_];
5223 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5224 wakaba 1.79 !!!cp ('t208');
5225 wakaba 1.43 $i = $_;
5226     last INSCOPE;
5227 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5228 wakaba 1.79 !!!cp ('t209');
5229 wakaba 1.43 last INSCOPE;
5230     }
5231     } # INSCOPE
5232 wakaba 1.79 unless (defined $i) {
5233 wakaba 1.125 !!!cp ('t210');
5234 wakaba 1.83 ## TODO: This type is wrong.
5235 wakaba 1.125 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name}, token => $token);
5236 wakaba 1.52 ## Ignore the token
5237 wakaba 1.125 !!!nack ('t210.1');
5238 wakaba 1.52 !!!next-token;
5239 wakaba 1.126 next B;
5240 wakaba 1.43 }
5241    
5242 wakaba 1.52 ## Clear back to table row context
5243 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5244     & TABLE_ROW_SCOPING_EL)) {
5245 wakaba 1.79 !!!cp ('t211');
5246 wakaba 1.83 ## ISSUE: Can this case be reached?
5247 wakaba 1.52 pop @{$self->{open_elements}};
5248 wakaba 1.1 }
5249 wakaba 1.43
5250 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5251 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5252 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
5253 wakaba 1.79 !!!cp ('t212');
5254 wakaba 1.52 ## reprocess
5255 wakaba 1.125 !!!ack-later;
5256 wakaba 1.126 next B;
5257 wakaba 1.52 } else {
5258 wakaba 1.79 !!!cp ('t213');
5259 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
5260     }
5261 wakaba 1.1 }
5262 wakaba 1.52
5263 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5264 wakaba 1.52 ## have an element in table scope
5265 wakaba 1.43 my $i;
5266     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5267     my $node = $self->{open_elements}->[$_];
5268 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
5269 wakaba 1.79 !!!cp ('t214');
5270 wakaba 1.43 $i = $_;
5271     last INSCOPE;
5272 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5273 wakaba 1.79 !!!cp ('t215');
5274 wakaba 1.43 last INSCOPE;
5275     }
5276     } # INSCOPE
5277 wakaba 1.52 unless (defined $i) {
5278 wakaba 1.79 !!!cp ('t216');
5279 wakaba 1.82 ## TODO: This erorr type ios wrong.
5280 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5281 wakaba 1.52 ## Ignore the token
5282 wakaba 1.125 !!!nack ('t216.1');
5283 wakaba 1.52 !!!next-token;
5284 wakaba 1.126 next B;
5285 wakaba 1.43 }
5286 wakaba 1.52
5287     ## Clear back to table body context
5288 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5289     & TABLE_ROWS_SCOPING_EL)) {
5290 wakaba 1.79 !!!cp ('t217');
5291 wakaba 1.83 ## ISSUE: Can this state be reached?
5292 wakaba 1.52 pop @{$self->{open_elements}};
5293 wakaba 1.43 }
5294    
5295 wakaba 1.52 ## As if <{current node}>
5296     ## have an element in table scope
5297     ## true by definition
5298 wakaba 1.43
5299 wakaba 1.52 ## Clear back to table body context
5300     ## nop by definition
5301 wakaba 1.43
5302 wakaba 1.52 pop @{$self->{open_elements}};
5303 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5304 wakaba 1.52 ## reprocess in "in table" insertion mode...
5305 wakaba 1.79 } else {
5306     !!!cp ('t218');
5307 wakaba 1.52 }
5308    
5309     if ($token->{tag_name} eq 'col') {
5310     ## Clear back to table context
5311 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5312     & TABLE_SCOPING_EL)) {
5313 wakaba 1.79 !!!cp ('t219');
5314 wakaba 1.83 ## ISSUE: Can this state be reached?
5315 wakaba 1.52 pop @{$self->{open_elements}};
5316     }
5317 wakaba 1.43
5318 wakaba 1.116 !!!insert-element ('colgroup',, $token);
5319 wakaba 1.54 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
5320 wakaba 1.52 ## reprocess
5321 wakaba 1.125 !!!ack-later;
5322 wakaba 1.126 next B;
5323 wakaba 1.52 } elsif ({
5324     caption => 1,
5325     colgroup => 1,
5326     tbody => 1, tfoot => 1, thead => 1,
5327     }->{$token->{tag_name}}) {
5328     ## Clear back to table context
5329 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5330     & TABLE_SCOPING_EL)) {
5331 wakaba 1.79 !!!cp ('t220');
5332 wakaba 1.83 ## ISSUE: Can this state be reached?
5333 wakaba 1.52 pop @{$self->{open_elements}};
5334 wakaba 1.1 }
5335 wakaba 1.52
5336     push @$active_formatting_elements, ['#marker', '']
5337     if $token->{tag_name} eq 'caption';
5338    
5339 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5340 wakaba 1.52 $self->{insertion_mode} = {
5341 wakaba 1.54 caption => IN_CAPTION_IM,
5342     colgroup => IN_COLUMN_GROUP_IM,
5343     tbody => IN_TABLE_BODY_IM,
5344     tfoot => IN_TABLE_BODY_IM,
5345     thead => IN_TABLE_BODY_IM,
5346 wakaba 1.52 }->{$token->{tag_name}};
5347 wakaba 1.1 !!!next-token;
5348 wakaba 1.125 !!!nack ('t220.1');
5349 wakaba 1.126 next B;
5350 wakaba 1.52 } else {
5351     die "$0: in table: <>: $token->{tag_name}";
5352 wakaba 1.1 }
5353 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
5354 wakaba 1.122 !!!parse-error (type => 'not closed',
5355     value => $self->{open_elements}->[-1]->[0]
5356     ->manakai_local_name,
5357     token => $token);
5358 wakaba 1.1
5359 wakaba 1.52 ## As if </table>
5360 wakaba 1.1 ## have a table element in table scope
5361     my $i;
5362 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5363     my $node = $self->{open_elements}->[$_];
5364 wakaba 1.123 if ($node->[1] & TABLE_EL) {
5365 wakaba 1.79 !!!cp ('t221');
5366 wakaba 1.1 $i = $_;
5367     last INSCOPE;
5368 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5369 wakaba 1.79 !!!cp ('t222');
5370 wakaba 1.1 last INSCOPE;
5371     }
5372     } # INSCOPE
5373     unless (defined $i) {
5374 wakaba 1.79 !!!cp ('t223');
5375 wakaba 1.83 ## TODO: The following is wrong, maybe.
5376 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:table', token => $token);
5377 wakaba 1.52 ## Ignore tokens </table><table>
5378 wakaba 1.125 !!!nack ('t223.1');
5379 wakaba 1.1 !!!next-token;
5380 wakaba 1.126 next B;
5381 wakaba 1.1 }
5382    
5383 wakaba 1.106 ## TODO: Followings are removed from the latest spec.
5384 wakaba 1.1 ## generate implied end tags
5385 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5386 wakaba 1.79 !!!cp ('t224');
5387 wakaba 1.86 pop @{$self->{open_elements}};
5388 wakaba 1.1 }
5389    
5390 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & TABLE_EL) {
5391 wakaba 1.79 !!!cp ('t225');
5392 wakaba 1.122 ## NOTE: |<table><tr><table>|
5393     !!!parse-error (type => 'not closed',
5394     value => $self->{open_elements}->[-1]->[0]
5395     ->manakai_local_name,
5396     token => $token);
5397 wakaba 1.79 } else {
5398     !!!cp ('t226');
5399 wakaba 1.1 }
5400    
5401 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5402 wakaba 1.95 pop @{$open_tables};
5403 wakaba 1.1
5404 wakaba 1.52 $self->_reset_insertion_mode;
5405 wakaba 1.1
5406 wakaba 1.125 ## reprocess
5407     !!!ack-later;
5408 wakaba 1.126 next B;
5409 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
5410     if (not $open_tables->[-1]->[1]) { # tainted
5411     !!!cp ('t227.8');
5412     ## NOTE: This is a "as if in head" code clone.
5413     $parse_rcdata->(CDATA_CONTENT_MODEL);
5414 wakaba 1.126 next B;
5415 wakaba 1.100 } else {
5416     !!!cp ('t227.7');
5417     #
5418     }
5419     } elsif ($token->{tag_name} eq 'script') {
5420     if (not $open_tables->[-1]->[1]) { # tainted
5421     !!!cp ('t227.6');
5422     ## NOTE: This is a "as if in head" code clone.
5423     $script_start_tag->();
5424 wakaba 1.126 next B;
5425 wakaba 1.100 } else {
5426     !!!cp ('t227.5');
5427     #
5428     }
5429 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
5430     if (not $open_tables->[-1]->[1]) { # tainted
5431     if ($token->{attributes}->{type}) { ## TODO: case
5432     my $type = lc $token->{attributes}->{type}->{value};
5433     if ($type eq 'hidden') {
5434     !!!cp ('t227.3');
5435 wakaba 1.113 !!!parse-error (type => 'in table:'.$token->{tag_name}, token => $token);
5436 wakaba 1.98
5437 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5438 wakaba 1.98
5439     ## TODO: form element pointer
5440    
5441     pop @{$self->{open_elements}};
5442    
5443     !!!next-token;
5444 wakaba 1.125 !!!ack ('t227.2.1');
5445 wakaba 1.126 next B;
5446 wakaba 1.98 } else {
5447     !!!cp ('t227.2');
5448     #
5449     }
5450     } else {
5451     !!!cp ('t227.1');
5452     #
5453     }
5454     } else {
5455     !!!cp ('t227.4');
5456     #
5457     }
5458 wakaba 1.58 } else {
5459 wakaba 1.79 !!!cp ('t227');
5460 wakaba 1.58 #
5461     }
5462 wakaba 1.98
5463 wakaba 1.113 !!!parse-error (type => 'in table:'.$token->{tag_name}, token => $token);
5464 wakaba 1.98
5465     $insert = $insert_to_foster;
5466     #
5467 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
5468 wakaba 1.52 if ($token->{tag_name} eq 'tr' and
5469 wakaba 1.54 $self->{insertion_mode} == IN_ROW_IM) {
5470 wakaba 1.52 ## have an element in table scope
5471     my $i;
5472     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5473     my $node = $self->{open_elements}->[$_];
5474 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5475 wakaba 1.79 !!!cp ('t228');
5476 wakaba 1.52 $i = $_;
5477     last INSCOPE;
5478 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5479 wakaba 1.79 !!!cp ('t229');
5480 wakaba 1.52 last INSCOPE;
5481     }
5482     } # INSCOPE
5483     unless (defined $i) {
5484 wakaba 1.79 !!!cp ('t230');
5485 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5486 wakaba 1.52 ## Ignore the token
5487 wakaba 1.125 !!!nack ('t230.1');
5488 wakaba 1.42 !!!next-token;
5489 wakaba 1.126 next B;
5490 wakaba 1.79 } else {
5491     !!!cp ('t232');
5492 wakaba 1.42 }
5493    
5494 wakaba 1.52 ## Clear back to table row context
5495 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5496     & TABLE_ROW_SCOPING_EL)) {
5497 wakaba 1.79 !!!cp ('t231');
5498 wakaba 1.83 ## ISSUE: Can this state be reached?
5499 wakaba 1.52 pop @{$self->{open_elements}};
5500     }
5501 wakaba 1.42
5502 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5503 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5504 wakaba 1.52 !!!next-token;
5505 wakaba 1.125 !!!nack ('t231.1');
5506 wakaba 1.126 next B;
5507 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
5508 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5509 wakaba 1.52 ## As if </tr>
5510     ## have an element in table scope
5511     my $i;
5512     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5513     my $node = $self->{open_elements}->[$_];
5514 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5515 wakaba 1.79 !!!cp ('t233');
5516 wakaba 1.52 $i = $_;
5517     last INSCOPE;
5518 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5519 wakaba 1.79 !!!cp ('t234');
5520 wakaba 1.52 last INSCOPE;
5521 wakaba 1.42 }
5522 wakaba 1.52 } # INSCOPE
5523     unless (defined $i) {
5524 wakaba 1.79 !!!cp ('t235');
5525 wakaba 1.83 ## TODO: The following is wrong.
5526 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{type}, token => $token);
5527 wakaba 1.52 ## Ignore the token
5528 wakaba 1.125 !!!nack ('t236.1');
5529 wakaba 1.52 !!!next-token;
5530 wakaba 1.126 next B;
5531 wakaba 1.42 }
5532 wakaba 1.52
5533     ## Clear back to table row context
5534 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5535     & TABLE_ROW_SCOPING_EL)) {
5536 wakaba 1.79 !!!cp ('t236');
5537 wakaba 1.83 ## ISSUE: Can this state be reached?
5538 wakaba 1.46 pop @{$self->{open_elements}};
5539 wakaba 1.1 }
5540 wakaba 1.46
5541 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5542 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5543 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
5544 wakaba 1.1 }
5545    
5546 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5547 wakaba 1.52 ## have an element in table scope
5548     my $i;
5549     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5550     my $node = $self->{open_elements}->[$_];
5551 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
5552 wakaba 1.79 !!!cp ('t237');
5553 wakaba 1.52 $i = $_;
5554     last INSCOPE;
5555 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5556 wakaba 1.79 !!!cp ('t238');
5557 wakaba 1.52 last INSCOPE;
5558     }
5559     } # INSCOPE
5560     unless (defined $i) {
5561 wakaba 1.79 !!!cp ('t239');
5562 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5563 wakaba 1.52 ## Ignore the token
5564 wakaba 1.125 !!!nack ('t239.1');
5565 wakaba 1.52 !!!next-token;
5566 wakaba 1.126 next B;
5567 wakaba 1.47 }
5568    
5569     ## Clear back to table body context
5570 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5571     & TABLE_ROWS_SCOPING_EL)) {
5572 wakaba 1.79 !!!cp ('t240');
5573 wakaba 1.47 pop @{$self->{open_elements}};
5574     }
5575    
5576 wakaba 1.52 ## As if <{current node}>
5577     ## have an element in table scope
5578     ## true by definition
5579    
5580     ## Clear back to table body context
5581     ## nop by definition
5582    
5583     pop @{$self->{open_elements}};
5584 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5585 wakaba 1.52 ## reprocess in the "in table" insertion mode...
5586     }
5587    
5588 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
5589     ## When you edit the code fragment below, please ensure that
5590     ## the code for <table> in the "in table" insertion mode
5591     ## is synced with it.
5592    
5593 wakaba 1.52 ## have a table element in table scope
5594     my $i;
5595     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5596     my $node = $self->{open_elements}->[$_];
5597 wakaba 1.123 if ($node->[1] & TABLE_EL) {
5598 wakaba 1.79 !!!cp ('t241');
5599 wakaba 1.52 $i = $_;
5600     last INSCOPE;
5601 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5602 wakaba 1.79 !!!cp ('t242');
5603 wakaba 1.52 last INSCOPE;
5604 wakaba 1.47 }
5605 wakaba 1.52 } # INSCOPE
5606     unless (defined $i) {
5607 wakaba 1.79 !!!cp ('t243');
5608 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5609 wakaba 1.52 ## Ignore the token
5610 wakaba 1.125 !!!nack ('t243.1');
5611 wakaba 1.52 !!!next-token;
5612 wakaba 1.126 next B;
5613 wakaba 1.3 }
5614 wakaba 1.52
5615     splice @{$self->{open_elements}}, $i;
5616 wakaba 1.95 pop @{$open_tables};
5617 wakaba 1.1
5618 wakaba 1.52 $self->_reset_insertion_mode;
5619 wakaba 1.47
5620     !!!next-token;
5621 wakaba 1.126 next B;
5622 wakaba 1.47 } elsif ({
5623 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
5624 wakaba 1.52 }->{$token->{tag_name}} and
5625 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
5626 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5627 wakaba 1.52 ## have an element in table scope
5628     my $i;
5629     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5630     my $node = $self->{open_elements}->[$_];
5631 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5632 wakaba 1.79 !!!cp ('t247');
5633 wakaba 1.52 $i = $_;
5634     last INSCOPE;
5635 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5636 wakaba 1.79 !!!cp ('t248');
5637 wakaba 1.52 last INSCOPE;
5638     }
5639     } # INSCOPE
5640     unless (defined $i) {
5641 wakaba 1.79 !!!cp ('t249');
5642 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5643 wakaba 1.52 ## Ignore the token
5644 wakaba 1.125 !!!nack ('t249.1');
5645 wakaba 1.52 !!!next-token;
5646 wakaba 1.126 next B;
5647 wakaba 1.52 }
5648    
5649 wakaba 1.48 ## As if </tr>
5650     ## have an element in table scope
5651     my $i;
5652     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5653     my $node = $self->{open_elements}->[$_];
5654 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5655 wakaba 1.79 !!!cp ('t250');
5656 wakaba 1.48 $i = $_;
5657     last INSCOPE;
5658 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5659 wakaba 1.79 !!!cp ('t251');
5660 wakaba 1.48 last INSCOPE;
5661     }
5662     } # INSCOPE
5663 wakaba 1.52 unless (defined $i) {
5664 wakaba 1.79 !!!cp ('t252');
5665 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:tr', token => $token);
5666 wakaba 1.52 ## Ignore the token
5667 wakaba 1.125 !!!nack ('t252.1');
5668 wakaba 1.52 !!!next-token;
5669 wakaba 1.126 next B;
5670 wakaba 1.52 }
5671 wakaba 1.48
5672     ## Clear back to table row context
5673 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5674     & TABLE_ROW_SCOPING_EL)) {
5675 wakaba 1.79 !!!cp ('t253');
5676 wakaba 1.83 ## ISSUE: Can this case be reached?
5677 wakaba 1.48 pop @{$self->{open_elements}};
5678     }
5679    
5680     pop @{$self->{open_elements}}; # tr
5681 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5682 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
5683     }
5684    
5685     ## have an element in table scope
5686     my $i;
5687     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5688     my $node = $self->{open_elements}->[$_];
5689 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5690 wakaba 1.79 !!!cp ('t254');
5691 wakaba 1.52 $i = $_;
5692     last INSCOPE;
5693 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5694 wakaba 1.79 !!!cp ('t255');
5695 wakaba 1.52 last INSCOPE;
5696     }
5697     } # INSCOPE
5698     unless (defined $i) {
5699 wakaba 1.79 !!!cp ('t256');
5700 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5701 wakaba 1.52 ## Ignore the token
5702 wakaba 1.125 !!!nack ('t256.1');
5703 wakaba 1.52 !!!next-token;
5704 wakaba 1.126 next B;
5705 wakaba 1.52 }
5706    
5707     ## Clear back to table body context
5708 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5709     & TABLE_ROWS_SCOPING_EL)) {
5710 wakaba 1.79 !!!cp ('t257');
5711 wakaba 1.83 ## ISSUE: Can this case be reached?
5712 wakaba 1.52 pop @{$self->{open_elements}};
5713     }
5714    
5715     pop @{$self->{open_elements}};
5716 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5717 wakaba 1.125 !!!nack ('t257.1');
5718 wakaba 1.52 !!!next-token;
5719 wakaba 1.126 next B;
5720 wakaba 1.52 } elsif ({
5721     body => 1, caption => 1, col => 1, colgroup => 1,
5722     html => 1, td => 1, th => 1,
5723 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
5724     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
5725 wakaba 1.52 }->{$token->{tag_name}}) {
5726 wakaba 1.125 !!!cp ('t258');
5727     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5728     ## Ignore the token
5729     !!!nack ('t258.1');
5730     !!!next-token;
5731 wakaba 1.126 next B;
5732 wakaba 1.58 } else {
5733 wakaba 1.79 !!!cp ('t259');
5734 wakaba 1.113 !!!parse-error (type => 'in table:/'.$token->{tag_name}, token => $token);
5735 wakaba 1.52
5736 wakaba 1.58 $insert = $insert_to_foster;
5737     #
5738     }
5739 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5740 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
5741 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5742 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5743 wakaba 1.104 !!!cp ('t259.1');
5744 wakaba 1.105 #
5745 wakaba 1.104 } else {
5746     !!!cp ('t259.2');
5747 wakaba 1.105 #
5748 wakaba 1.104 }
5749    
5750     ## Stop parsing
5751     last B;
5752 wakaba 1.58 } else {
5753     die "$0: $token->{type}: Unknown token type";
5754     }
5755 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
5756 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5757 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5758     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5759     unless (length $token->{data}) {
5760 wakaba 1.79 !!!cp ('t260');
5761 wakaba 1.52 !!!next-token;
5762 wakaba 1.126 next B;
5763 wakaba 1.52 }
5764     }
5765    
5766 wakaba 1.79 !!!cp ('t261');
5767 wakaba 1.52 #
5768 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5769 wakaba 1.52 if ($token->{tag_name} eq 'col') {
5770 wakaba 1.79 !!!cp ('t262');
5771 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5772 wakaba 1.52 pop @{$self->{open_elements}};
5773 wakaba 1.125 !!!ack ('t262.1');
5774 wakaba 1.52 !!!next-token;
5775 wakaba 1.126 next B;
5776 wakaba 1.52 } else {
5777 wakaba 1.79 !!!cp ('t263');
5778 wakaba 1.52 #
5779     }
5780 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5781 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
5782 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
5783 wakaba 1.79 !!!cp ('t264');
5784 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:colgroup', token => $token);
5785 wakaba 1.52 ## Ignore the token
5786     !!!next-token;
5787 wakaba 1.126 next B;
5788 wakaba 1.52 } else {
5789 wakaba 1.79 !!!cp ('t265');
5790 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
5791 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5792 wakaba 1.52 !!!next-token;
5793 wakaba 1.126 next B;
5794 wakaba 1.52 }
5795     } elsif ($token->{tag_name} eq 'col') {
5796 wakaba 1.79 !!!cp ('t266');
5797 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:col', token => $token);
5798 wakaba 1.52 ## Ignore the token
5799     !!!next-token;
5800 wakaba 1.126 next B;
5801 wakaba 1.52 } else {
5802 wakaba 1.79 !!!cp ('t267');
5803 wakaba 1.52 #
5804     }
5805 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5806 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
5807 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5808     !!!cp ('t270.2');
5809     ## Stop parsing.
5810     last B;
5811     } else {
5812     ## NOTE: As if </colgroup>.
5813     !!!cp ('t270.1');
5814     pop @{$self->{open_elements}}; # colgroup
5815     $self->{insertion_mode} = IN_TABLE_IM;
5816     ## Reprocess.
5817 wakaba 1.126 next B;
5818 wakaba 1.104 }
5819     } else {
5820     die "$0: $token->{type}: Unknown token type";
5821     }
5822 wakaba 1.52
5823     ## As if </colgroup>
5824 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
5825 wakaba 1.79 !!!cp ('t269');
5826 wakaba 1.104 ## TODO: Wrong error type?
5827 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:colgroup', token => $token);
5828 wakaba 1.52 ## Ignore the token
5829 wakaba 1.125 !!!nack ('t269.1');
5830 wakaba 1.52 !!!next-token;
5831 wakaba 1.126 next B;
5832 wakaba 1.52 } else {
5833 wakaba 1.79 !!!cp ('t270');
5834 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
5835 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5836 wakaba 1.125 !!!ack-later;
5837 wakaba 1.52 ## reprocess
5838 wakaba 1.126 next B;
5839 wakaba 1.52 }
5840 wakaba 1.101 } elsif ($self->{insertion_mode} & SELECT_IMS) {
5841 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
5842 wakaba 1.79 !!!cp ('t271');
5843 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5844     !!!next-token;
5845 wakaba 1.126 next B;
5846 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
5847 wakaba 1.123 if ($token->{tag_name} eq 'option') {
5848     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5849     !!!cp ('t272');
5850     ## As if </option>
5851     pop @{$self->{open_elements}};
5852     } else {
5853     !!!cp ('t273');
5854     }
5855 wakaba 1.52
5856 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5857 wakaba 1.125 !!!nack ('t273.1');
5858 wakaba 1.123 !!!next-token;
5859 wakaba 1.126 next B;
5860 wakaba 1.123 } elsif ($token->{tag_name} eq 'optgroup') {
5861     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5862     !!!cp ('t274');
5863     ## As if </option>
5864     pop @{$self->{open_elements}};
5865     } else {
5866     !!!cp ('t275');
5867     }
5868 wakaba 1.52
5869 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
5870     !!!cp ('t276');
5871     ## As if </optgroup>
5872     pop @{$self->{open_elements}};
5873     } else {
5874     !!!cp ('t277');
5875     }
5876 wakaba 1.52
5877 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5878 wakaba 1.125 !!!nack ('t277.1');
5879 wakaba 1.123 !!!next-token;
5880 wakaba 1.126 next B;
5881 wakaba 1.101 } elsif ($token->{tag_name} eq 'select' or
5882     $token->{tag_name} eq 'input' or
5883     ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
5884     {
5885     caption => 1, table => 1,
5886     tbody => 1, tfoot => 1, thead => 1,
5887     tr => 1, td => 1, th => 1,
5888     }->{$token->{tag_name}})) {
5889     ## TODO: The type below is not good - <select> is replaced by </select>
5890 wakaba 1.113 !!!parse-error (type => 'not closed:select', token => $token);
5891 wakaba 1.101 ## NOTE: As if the token were </select> (<select> case) or
5892     ## as if there were </select> (otherwise).
5893 wakaba 1.123 ## have an element in table scope
5894     my $i;
5895     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5896     my $node = $self->{open_elements}->[$_];
5897     if ($node->[1] & SELECT_EL) {
5898     !!!cp ('t278');
5899     $i = $_;
5900     last INSCOPE;
5901     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5902     !!!cp ('t279');
5903     last INSCOPE;
5904     }
5905     } # INSCOPE
5906     unless (defined $i) {
5907     !!!cp ('t280');
5908     !!!parse-error (type => 'unmatched end tag:select', token => $token);
5909     ## Ignore the token
5910 wakaba 1.125 !!!nack ('t280.1');
5911 wakaba 1.123 !!!next-token;
5912 wakaba 1.126 next B;
5913 wakaba 1.123 }
5914 wakaba 1.52
5915 wakaba 1.123 !!!cp ('t281');
5916     splice @{$self->{open_elements}}, $i;
5917 wakaba 1.52
5918 wakaba 1.123 $self->_reset_insertion_mode;
5919 wakaba 1.47
5920 wakaba 1.101 if ($token->{tag_name} eq 'select') {
5921 wakaba 1.125 !!!nack ('t281.2');
5922 wakaba 1.101 !!!next-token;
5923 wakaba 1.126 next B;
5924 wakaba 1.101 } else {
5925     !!!cp ('t281.1');
5926 wakaba 1.125 !!!ack-later;
5927 wakaba 1.101 ## Reprocess the token.
5928 wakaba 1.126 next B;
5929 wakaba 1.101 }
5930 wakaba 1.58 } else {
5931 wakaba 1.79 !!!cp ('t282');
5932 wakaba 1.113 !!!parse-error (type => 'in select:'.$token->{tag_name}, token => $token);
5933 wakaba 1.58 ## Ignore the token
5934 wakaba 1.125 !!!nack ('t282.1');
5935 wakaba 1.58 !!!next-token;
5936 wakaba 1.126 next B;
5937 wakaba 1.58 }
5938     } elsif ($token->{type} == END_TAG_TOKEN) {
5939 wakaba 1.123 if ($token->{tag_name} eq 'optgroup') {
5940     if ($self->{open_elements}->[-1]->[1] & OPTION_EL and
5941     $self->{open_elements}->[-2]->[1] & OPTGROUP_EL) {
5942     !!!cp ('t283');
5943     ## As if </option>
5944     splice @{$self->{open_elements}}, -2;
5945     } elsif ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
5946     !!!cp ('t284');
5947     pop @{$self->{open_elements}};
5948     } else {
5949     !!!cp ('t285');
5950     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5951     ## Ignore the token
5952     }
5953 wakaba 1.125 !!!nack ('t285.1');
5954 wakaba 1.123 !!!next-token;
5955 wakaba 1.126 next B;
5956 wakaba 1.123 } elsif ($token->{tag_name} eq 'option') {
5957     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5958     !!!cp ('t286');
5959     pop @{$self->{open_elements}};
5960     } else {
5961     !!!cp ('t287');
5962     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5963     ## Ignore the token
5964     }
5965 wakaba 1.125 !!!nack ('t287.1');
5966 wakaba 1.123 !!!next-token;
5967 wakaba 1.126 next B;
5968 wakaba 1.123 } elsif ($token->{tag_name} eq 'select') {
5969     ## have an element in table scope
5970     my $i;
5971     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5972     my $node = $self->{open_elements}->[$_];
5973     if ($node->[1] & SELECT_EL) {
5974     !!!cp ('t288');
5975     $i = $_;
5976     last INSCOPE;
5977     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5978     !!!cp ('t289');
5979     last INSCOPE;
5980     }
5981     } # INSCOPE
5982     unless (defined $i) {
5983     !!!cp ('t290');
5984     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5985     ## Ignore the token
5986 wakaba 1.125 !!!nack ('t290.1');
5987 wakaba 1.123 !!!next-token;
5988 wakaba 1.126 next B;
5989 wakaba 1.123 }
5990 wakaba 1.52
5991 wakaba 1.123 !!!cp ('t291');
5992     splice @{$self->{open_elements}}, $i;
5993 wakaba 1.52
5994 wakaba 1.123 $self->_reset_insertion_mode;
5995 wakaba 1.52
5996 wakaba 1.125 !!!nack ('t291.1');
5997 wakaba 1.123 !!!next-token;
5998 wakaba 1.126 next B;
5999 wakaba 1.101 } elsif ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
6000     {
6001     caption => 1, table => 1, tbody => 1,
6002     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
6003     }->{$token->{tag_name}}) {
6004 wakaba 1.83 ## TODO: The following is wrong?
6005 wakaba 1.123 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6006 wakaba 1.52
6007 wakaba 1.123 ## have an element in table scope
6008     my $i;
6009     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6010     my $node = $self->{open_elements}->[$_];
6011     if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6012     !!!cp ('t292');
6013     $i = $_;
6014     last INSCOPE;
6015     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6016     !!!cp ('t293');
6017     last INSCOPE;
6018     }
6019     } # INSCOPE
6020     unless (defined $i) {
6021     !!!cp ('t294');
6022     ## Ignore the token
6023 wakaba 1.125 !!!nack ('t294.1');
6024 wakaba 1.123 !!!next-token;
6025 wakaba 1.126 next B;
6026 wakaba 1.123 }
6027 wakaba 1.52
6028 wakaba 1.123 ## As if </select>
6029     ## have an element in table scope
6030     undef $i;
6031     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6032     my $node = $self->{open_elements}->[$_];
6033     if ($node->[1] & SELECT_EL) {
6034     !!!cp ('t295');
6035     $i = $_;
6036     last INSCOPE;
6037     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6038 wakaba 1.83 ## ISSUE: Can this state be reached?
6039 wakaba 1.123 !!!cp ('t296');
6040     last INSCOPE;
6041     }
6042     } # INSCOPE
6043     unless (defined $i) {
6044     !!!cp ('t297');
6045 wakaba 1.83 ## TODO: The following error type is correct?
6046 wakaba 1.123 !!!parse-error (type => 'unmatched end tag:select', token => $token);
6047     ## Ignore the </select> token
6048 wakaba 1.125 !!!nack ('t297.1');
6049 wakaba 1.123 !!!next-token; ## TODO: ok?
6050 wakaba 1.126 next B;
6051 wakaba 1.123 }
6052 wakaba 1.52
6053 wakaba 1.123 !!!cp ('t298');
6054     splice @{$self->{open_elements}}, $i;
6055 wakaba 1.52
6056 wakaba 1.123 $self->_reset_insertion_mode;
6057 wakaba 1.52
6058 wakaba 1.125 !!!ack-later;
6059 wakaba 1.123 ## reprocess
6060 wakaba 1.126 next B;
6061 wakaba 1.58 } else {
6062 wakaba 1.79 !!!cp ('t299');
6063 wakaba 1.113 !!!parse-error (type => 'in select:/'.$token->{tag_name}, token => $token);
6064 wakaba 1.52 ## Ignore the token
6065 wakaba 1.125 !!!nack ('t299.3');
6066 wakaba 1.52 !!!next-token;
6067 wakaba 1.126 next B;
6068 wakaba 1.58 }
6069 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6070 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
6071 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
6072     !!!cp ('t299.1');
6073 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
6074 wakaba 1.104 } else {
6075     !!!cp ('t299.2');
6076     }
6077    
6078     ## Stop parsing.
6079     last B;
6080 wakaba 1.58 } else {
6081     die "$0: $token->{type}: Unknown token type";
6082     }
6083 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
6084 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6085 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6086     my $data = $1;
6087     ## As if in body
6088     $reconstruct_active_formatting_elements->($insert_to_current);
6089    
6090     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6091    
6092     unless (length $token->{data}) {
6093 wakaba 1.79 !!!cp ('t300');
6094 wakaba 1.52 !!!next-token;
6095 wakaba 1.126 next B;
6096 wakaba 1.52 }
6097     }
6098    
6099 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6100 wakaba 1.79 !!!cp ('t301');
6101 wakaba 1.113 !!!parse-error (type => 'after html:#character', token => $token);
6102 wakaba 1.52
6103 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
6104 wakaba 1.79 } else {
6105     !!!cp ('t302');
6106 wakaba 1.52 }
6107    
6108     ## "after body" insertion mode
6109 wakaba 1.113 !!!parse-error (type => 'after body:#character', token => $token);
6110 wakaba 1.52
6111 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
6112 wakaba 1.52 ## reprocess
6113 wakaba 1.126 next B;
6114 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
6115 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6116 wakaba 1.79 !!!cp ('t303');
6117 wakaba 1.113 !!!parse-error (type => 'after html:'.$token->{tag_name}, token => $token);
6118 wakaba 1.52
6119 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
6120 wakaba 1.79 } else {
6121     !!!cp ('t304');
6122 wakaba 1.52 }
6123    
6124     ## "after body" insertion mode
6125 wakaba 1.113 !!!parse-error (type => 'after body:'.$token->{tag_name}, token => $token);
6126 wakaba 1.52
6127 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
6128 wakaba 1.125 !!!ack-later;
6129 wakaba 1.52 ## reprocess
6130 wakaba 1.126 next B;
6131 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6132 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6133 wakaba 1.79 !!!cp ('t305');
6134 wakaba 1.113 !!!parse-error (type => 'after html:/'.$token->{tag_name}, token => $token);
6135 wakaba 1.52
6136 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
6137 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
6138 wakaba 1.79 } else {
6139     !!!cp ('t306');
6140 wakaba 1.52 }
6141    
6142     ## "after body" insertion mode
6143     if ($token->{tag_name} eq 'html') {
6144     if (defined $self->{inner_html_node}) {
6145 wakaba 1.79 !!!cp ('t307');
6146 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:html', token => $token);
6147 wakaba 1.52 ## Ignore the token
6148     !!!next-token;
6149 wakaba 1.126 next B;
6150 wakaba 1.52 } else {
6151 wakaba 1.79 !!!cp ('t308');
6152 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
6153 wakaba 1.52 !!!next-token;
6154 wakaba 1.126 next B;
6155 wakaba 1.52 }
6156     } else {
6157 wakaba 1.79 !!!cp ('t309');
6158 wakaba 1.113 !!!parse-error (type => 'after body:/'.$token->{tag_name}, token => $token);
6159 wakaba 1.52
6160 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
6161 wakaba 1.52 ## reprocess
6162 wakaba 1.126 next B;
6163 wakaba 1.52 }
6164 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6165     !!!cp ('t309.2');
6166     ## Stop parsing
6167     last B;
6168 wakaba 1.52 } else {
6169     die "$0: $token->{type}: Unknown token type";
6170     }
6171 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
6172 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6173 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6174     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6175    
6176     unless (length $token->{data}) {
6177 wakaba 1.79 !!!cp ('t310');
6178 wakaba 1.52 !!!next-token;
6179 wakaba 1.126 next B;
6180 wakaba 1.52 }
6181     }
6182    
6183     if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {
6184 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6185 wakaba 1.79 !!!cp ('t311');
6186 wakaba 1.113 !!!parse-error (type => 'in frameset:#character', token => $token);
6187 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
6188 wakaba 1.79 !!!cp ('t312');
6189 wakaba 1.113 !!!parse-error (type => 'after frameset:#character', token => $token);
6190 wakaba 1.52 } else { # "after html frameset"
6191 wakaba 1.79 !!!cp ('t313');
6192 wakaba 1.113 !!!parse-error (type => 'after html:#character', token => $token);
6193 wakaba 1.52
6194 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
6195 wakaba 1.84 ## Reprocess in the "after frameset" insertion mode.
6196 wakaba 1.113 !!!parse-error (type => 'after frameset:#character', token => $token);
6197 wakaba 1.52 }
6198    
6199     ## Ignore the token.
6200     if (length $token->{data}) {
6201 wakaba 1.79 !!!cp ('t314');
6202 wakaba 1.52 ## reprocess the rest of characters
6203     } else {
6204 wakaba 1.79 !!!cp ('t315');
6205 wakaba 1.52 !!!next-token;
6206     }
6207 wakaba 1.126 next B;
6208 wakaba 1.52 }
6209    
6210     die qq[$0: Character "$token->{data}"];
6211 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
6212 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
6213 wakaba 1.79 !!!cp ('t316');
6214 wakaba 1.113 !!!parse-error (type => 'after html:'.$token->{tag_name}, token => $token);
6215 wakaba 1.1
6216 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
6217 wakaba 1.84 ## Process in the "after frameset" insertion mode.
6218 wakaba 1.79 } else {
6219     !!!cp ('t317');
6220     }
6221 wakaba 1.1
6222 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
6223 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6224 wakaba 1.79 !!!cp ('t318');
6225 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6226 wakaba 1.125 !!!nack ('t318.1');
6227 wakaba 1.52 !!!next-token;
6228 wakaba 1.126 next B;
6229 wakaba 1.52 } elsif ($token->{tag_name} eq 'frame' and
6230 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6231 wakaba 1.79 !!!cp ('t319');
6232 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6233 wakaba 1.52 pop @{$self->{open_elements}};
6234 wakaba 1.125 !!!ack ('t319.1');
6235 wakaba 1.52 !!!next-token;
6236 wakaba 1.126 next B;
6237 wakaba 1.52 } elsif ($token->{tag_name} eq 'noframes') {
6238 wakaba 1.79 !!!cp ('t320');
6239 wakaba 1.52 ## NOTE: As if in body.
6240 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6241 wakaba 1.126 next B;
6242 wakaba 1.52 } else {
6243 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6244 wakaba 1.79 !!!cp ('t321');
6245 wakaba 1.113 !!!parse-error (type => 'in frameset:'.$token->{tag_name}, token => $token);
6246 wakaba 1.52 } else {
6247 wakaba 1.79 !!!cp ('t322');
6248 wakaba 1.113 !!!parse-error (type => 'after frameset:'.$token->{tag_name}, token => $token);
6249 wakaba 1.52 }
6250     ## Ignore the token
6251 wakaba 1.125 !!!nack ('t322.1');
6252 wakaba 1.52 !!!next-token;
6253 wakaba 1.126 next B;
6254 wakaba 1.52 }
6255 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6256 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
6257 wakaba 1.79 !!!cp ('t323');
6258 wakaba 1.113 !!!parse-error (type => 'after html:/'.$token->{tag_name}, token => $token);
6259 wakaba 1.1
6260 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
6261 wakaba 1.84 ## Process in the "after frameset" insertion mode.
6262 wakaba 1.79 } else {
6263     !!!cp ('t324');
6264 wakaba 1.52 }
6265 wakaba 1.1
6266 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
6267 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6268 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
6269 wakaba 1.52 @{$self->{open_elements}} == 1) {
6270 wakaba 1.79 !!!cp ('t325');
6271 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6272 wakaba 1.52 ## Ignore the token
6273     !!!next-token;
6274     } else {
6275 wakaba 1.79 !!!cp ('t326');
6276 wakaba 1.52 pop @{$self->{open_elements}};
6277     !!!next-token;
6278     }
6279 wakaba 1.47
6280 wakaba 1.52 if (not defined $self->{inner_html_node} and
6281 wakaba 1.123 not ($self->{open_elements}->[-1]->[1] & FRAMESET_EL)) {
6282 wakaba 1.79 !!!cp ('t327');
6283 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
6284 wakaba 1.79 } else {
6285     !!!cp ('t328');
6286 wakaba 1.52 }
6287 wakaba 1.126 next B;
6288 wakaba 1.52 } elsif ($token->{tag_name} eq 'html' and
6289 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
6290 wakaba 1.79 !!!cp ('t329');
6291 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
6292 wakaba 1.52 !!!next-token;
6293 wakaba 1.126 next B;
6294 wakaba 1.52 } else {
6295 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6296 wakaba 1.79 !!!cp ('t330');
6297 wakaba 1.113 !!!parse-error (type => 'in frameset:/'.$token->{tag_name}, token => $token);
6298 wakaba 1.52 } else {
6299 wakaba 1.79 !!!cp ('t331');
6300 wakaba 1.113 !!!parse-error (type => 'after frameset:/'.$token->{tag_name}, token => $token);
6301 wakaba 1.52 }
6302     ## Ignore the token
6303     !!!next-token;
6304 wakaba 1.126 next B;
6305 wakaba 1.52 }
6306 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6307 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
6308 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
6309     !!!cp ('t331.1');
6310 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
6311 wakaba 1.104 } else {
6312     !!!cp ('t331.2');
6313     }
6314    
6315     ## Stop parsing
6316     last B;
6317 wakaba 1.52 } else {
6318     die "$0: $token->{type}: Unknown token type";
6319     }
6320 wakaba 1.47
6321 wakaba 1.52 ## ISSUE: An issue in spec here
6322     } else {
6323     die "$0: $self->{insertion_mode}: Unknown insertion mode";
6324     }
6325 wakaba 1.47
6326 wakaba 1.52 ## "in body" insertion mode
6327 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
6328 wakaba 1.52 if ($token->{tag_name} eq 'script') {
6329 wakaba 1.79 !!!cp ('t332');
6330 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6331 wakaba 1.100 $script_start_tag->();
6332 wakaba 1.126 next B;
6333 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
6334 wakaba 1.79 !!!cp ('t333');
6335 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6336 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6337 wakaba 1.126 next B;
6338 wakaba 1.52 } elsif ({
6339     base => 1, link => 1,
6340     }->{$token->{tag_name}}) {
6341 wakaba 1.79 !!!cp ('t334');
6342 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
6343 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6344 wakaba 1.52 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
6345 wakaba 1.125 !!!ack ('t334.1');
6346 wakaba 1.52 !!!next-token;
6347 wakaba 1.126 next B;
6348 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
6349     ## NOTE: This is an "as if in head" code clone, only "-t" differs
6350 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6351 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
6352 wakaba 1.46
6353 wakaba 1.52 unless ($self->{confident}) {
6354 wakaba 1.134 if ($token->{attributes}->{charset}) {
6355 wakaba 1.79 !!!cp ('t335');
6356 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
6357     ## in the {change_encoding} callback.
6358 wakaba 1.63 $self->{change_encoding}
6359 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value}, $token);
6360 wakaba 1.66
6361     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
6362     ->set_user_data (manakai_has_reference =>
6363     $token->{attributes}->{charset}
6364     ->{has_reference});
6365 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
6366     if ($token->{attributes}->{content}->{value}
6367 wakaba 1.70 =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
6368     [\x09-\x0D\x20]*=
6369 wakaba 1.52 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
6370     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
6371 wakaba 1.79 !!!cp ('t336');
6372 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
6373     ## in the {change_encoding} callback.
6374 wakaba 1.63 $self->{change_encoding}
6375 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
6376 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
6377     ->set_user_data (manakai_has_reference =>
6378     $token->{attributes}->{content}
6379     ->{has_reference});
6380 wakaba 1.63 }
6381 wakaba 1.52 }
6382 wakaba 1.66 } else {
6383     if ($token->{attributes}->{charset}) {
6384 wakaba 1.79 !!!cp ('t337');
6385 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
6386     ->set_user_data (manakai_has_reference =>
6387     $token->{attributes}->{charset}
6388     ->{has_reference});
6389     }
6390 wakaba 1.68 if ($token->{attributes}->{content}) {
6391 wakaba 1.79 !!!cp ('t338');
6392 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
6393     ->set_user_data (manakai_has_reference =>
6394     $token->{attributes}->{content}
6395     ->{has_reference});
6396     }
6397 wakaba 1.52 }
6398 wakaba 1.1
6399 wakaba 1.125 !!!ack ('t338.1');
6400 wakaba 1.52 !!!next-token;
6401 wakaba 1.126 next B;
6402 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
6403 wakaba 1.79 !!!cp ('t341');
6404 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6405 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
6406 wakaba 1.126 next B;
6407 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
6408 wakaba 1.113 !!!parse-error (type => 'in body:body', token => $token);
6409 wakaba 1.46
6410 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
6411 wakaba 1.123 not ($self->{open_elements}->[1]->[1] & BODY_EL)) {
6412 wakaba 1.79 !!!cp ('t342');
6413 wakaba 1.52 ## Ignore the token
6414     } else {
6415     my $body_el = $self->{open_elements}->[1]->[0];
6416     for my $attr_name (keys %{$token->{attributes}}) {
6417     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
6418 wakaba 1.79 !!!cp ('t343');
6419 wakaba 1.52 $body_el->set_attribute_ns
6420     (undef, [undef, $attr_name],
6421     $token->{attributes}->{$attr_name}->{value});
6422     }
6423     }
6424     }
6425 wakaba 1.125 !!!nack ('t343.1');
6426 wakaba 1.52 !!!next-token;
6427 wakaba 1.126 next B;
6428 wakaba 1.52 } elsif ({
6429     address => 1, blockquote => 1, center => 1, dir => 1,
6430 wakaba 1.85 div => 1, dl => 1, fieldset => 1,
6431     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6432 wakaba 1.97 menu => 1, ol => 1, p => 1, ul => 1,
6433     pre => 1, listing => 1,
6434 wakaba 1.109 form => 1,
6435     table => 1,
6436     hr => 1,
6437 wakaba 1.52 }->{$token->{tag_name}}) {
6438 wakaba 1.109 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
6439     !!!cp ('t350');
6440 wakaba 1.113 !!!parse-error (type => 'in form:form', token => $token);
6441 wakaba 1.109 ## Ignore the token
6442 wakaba 1.125 !!!nack ('t350.1');
6443 wakaba 1.109 !!!next-token;
6444 wakaba 1.126 next B;
6445 wakaba 1.109 }
6446    
6447 wakaba 1.52 ## has a p element in scope
6448     INSCOPE: for (reverse @{$self->{open_elements}}) {
6449 wakaba 1.123 if ($_->[1] & P_EL) {
6450 wakaba 1.79 !!!cp ('t344');
6451 wakaba 1.125 !!!back-token; # <form>
6452 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6453     line => $token->{line}, column => $token->{column}};
6454 wakaba 1.126 next B;
6455 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6456 wakaba 1.79 !!!cp ('t345');
6457 wakaba 1.52 last INSCOPE;
6458     }
6459     } # INSCOPE
6460    
6461 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6462 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
6463 wakaba 1.125 !!!nack ('t346.1');
6464 wakaba 1.52 !!!next-token;
6465 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6466 wakaba 1.52 $token->{data} =~ s/^\x0A//;
6467     unless (length $token->{data}) {
6468 wakaba 1.79 !!!cp ('t346');
6469 wakaba 1.1 !!!next-token;
6470 wakaba 1.79 } else {
6471     !!!cp ('t349');
6472 wakaba 1.52 }
6473 wakaba 1.79 } else {
6474     !!!cp ('t348');
6475 wakaba 1.52 }
6476 wakaba 1.109 } elsif ($token->{tag_name} eq 'form') {
6477     !!!cp ('t347.1');
6478     $self->{form_element} = $self->{open_elements}->[-1]->[0];
6479    
6480 wakaba 1.125 !!!nack ('t347.2');
6481 wakaba 1.109 !!!next-token;
6482     } elsif ($token->{tag_name} eq 'table') {
6483     !!!cp ('t382');
6484     push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
6485    
6486     $self->{insertion_mode} = IN_TABLE_IM;
6487    
6488 wakaba 1.125 !!!nack ('t382.1');
6489 wakaba 1.109 !!!next-token;
6490     } elsif ($token->{tag_name} eq 'hr') {
6491     !!!cp ('t386');
6492     pop @{$self->{open_elements}};
6493    
6494 wakaba 1.125 !!!nack ('t386.1');
6495 wakaba 1.109 !!!next-token;
6496 wakaba 1.52 } else {
6497 wakaba 1.125 !!!nack ('t347.1');
6498 wakaba 1.52 !!!next-token;
6499     }
6500 wakaba 1.126 next B;
6501 wakaba 1.109 } elsif ({li => 1, dt => 1, dd => 1}->{$token->{tag_name}}) {
6502 wakaba 1.52 ## has a p element in scope
6503     INSCOPE: for (reverse @{$self->{open_elements}}) {
6504 wakaba 1.123 if ($_->[1] & P_EL) {
6505 wakaba 1.79 !!!cp ('t353');
6506 wakaba 1.125 !!!back-token; # <x>
6507 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6508     line => $token->{line}, column => $token->{column}};
6509 wakaba 1.126 next B;
6510 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6511 wakaba 1.79 !!!cp ('t354');
6512 wakaba 1.52 last INSCOPE;
6513     }
6514     } # INSCOPE
6515    
6516     ## Step 1
6517     my $i = -1;
6518     my $node = $self->{open_elements}->[$i];
6519 wakaba 1.109 my $li_or_dtdd = {li => {li => 1},
6520     dt => {dt => 1, dd => 1},
6521     dd => {dt => 1, dd => 1}}->{$token->{tag_name}};
6522 wakaba 1.52 LI: {
6523     ## Step 2
6524 wakaba 1.123 if ($li_or_dtdd->{$node->[0]->manakai_local_name}) {
6525 wakaba 1.52 if ($i != -1) {
6526 wakaba 1.79 !!!cp ('t355');
6527 wakaba 1.122 !!!parse-error (type => 'not closed',
6528     value => $self->{open_elements}->[-1]->[0]
6529     ->manakai_local_name,
6530     token => $token);
6531 wakaba 1.79 } else {
6532     !!!cp ('t356');
6533 wakaba 1.52 }
6534     splice @{$self->{open_elements}}, $i;
6535     last LI;
6536 wakaba 1.79 } else {
6537     !!!cp ('t357');
6538 wakaba 1.52 }
6539    
6540     ## Step 3
6541 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
6542 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
6543 wakaba 1.123 ($node->[1] & SPECIAL_EL or
6544     $node->[1] & SCOPING_EL) and
6545     not ($node->[1] & ADDRESS_EL) and
6546     not ($node->[1] & DIV_EL)) {
6547 wakaba 1.79 !!!cp ('t358');
6548 wakaba 1.52 last LI;
6549     }
6550    
6551 wakaba 1.79 !!!cp ('t359');
6552 wakaba 1.52 ## Step 4
6553     $i--;
6554     $node = $self->{open_elements}->[$i];
6555     redo LI;
6556     } # LI
6557    
6558 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6559 wakaba 1.125 !!!nack ('t359.1');
6560 wakaba 1.52 !!!next-token;
6561 wakaba 1.126 next B;
6562 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
6563     ## has a p element in scope
6564     INSCOPE: for (reverse @{$self->{open_elements}}) {
6565 wakaba 1.123 if ($_->[1] & P_EL) {
6566 wakaba 1.79 !!!cp ('t367');
6567 wakaba 1.125 !!!back-token; # <plaintext>
6568 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6569     line => $token->{line}, column => $token->{column}};
6570 wakaba 1.126 next B;
6571 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6572 wakaba 1.79 !!!cp ('t368');
6573 wakaba 1.52 last INSCOPE;
6574 wakaba 1.46 }
6575 wakaba 1.52 } # INSCOPE
6576    
6577 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6578 wakaba 1.52
6579     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
6580    
6581 wakaba 1.125 !!!nack ('t368.1');
6582 wakaba 1.52 !!!next-token;
6583 wakaba 1.126 next B;
6584 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
6585     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
6586     my $node = $active_formatting_elements->[$i];
6587 wakaba 1.123 if ($node->[1] & A_EL) {
6588 wakaba 1.79 !!!cp ('t371');
6589 wakaba 1.113 !!!parse-error (type => 'in a:a', token => $token);
6590 wakaba 1.52
6591 wakaba 1.125 !!!back-token; # <a>
6592 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'a',
6593     line => $token->{line}, column => $token->{column}};
6594 wakaba 1.113 $formatting_end_tag->($token);
6595 wakaba 1.52
6596     AFE2: for (reverse 0..$#$active_formatting_elements) {
6597     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
6598 wakaba 1.79 !!!cp ('t372');
6599 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
6600     last AFE2;
6601 wakaba 1.1 }
6602 wakaba 1.52 } # AFE2
6603     OE: for (reverse 0..$#{$self->{open_elements}}) {
6604     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
6605 wakaba 1.79 !!!cp ('t373');
6606 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
6607     last OE;
6608 wakaba 1.1 }
6609 wakaba 1.52 } # OE
6610     last AFE;
6611     } elsif ($node->[0] eq '#marker') {
6612 wakaba 1.79 !!!cp ('t374');
6613 wakaba 1.52 last AFE;
6614     }
6615     } # AFE
6616    
6617     $reconstruct_active_formatting_elements->($insert_to_current);
6618 wakaba 1.1
6619 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6620 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
6621 wakaba 1.1
6622 wakaba 1.125 !!!nack ('t374.1');
6623 wakaba 1.52 !!!next-token;
6624 wakaba 1.126 next B;
6625 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
6626     $reconstruct_active_formatting_elements->($insert_to_current);
6627 wakaba 1.1
6628 wakaba 1.52 ## has a |nobr| element in scope
6629     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6630     my $node = $self->{open_elements}->[$_];
6631 wakaba 1.123 if ($node->[1] & NOBR_EL) {
6632 wakaba 1.79 !!!cp ('t376');
6633 wakaba 1.113 !!!parse-error (type => 'in nobr:nobr', token => $token);
6634 wakaba 1.125 !!!back-token; # <nobr>
6635 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
6636     line => $token->{line}, column => $token->{column}};
6637 wakaba 1.126 next B;
6638 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6639 wakaba 1.79 !!!cp ('t377');
6640 wakaba 1.52 last INSCOPE;
6641     }
6642     } # INSCOPE
6643    
6644 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6645 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
6646    
6647 wakaba 1.125 !!!nack ('t377.1');
6648 wakaba 1.52 !!!next-token;
6649 wakaba 1.126 next B;
6650 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
6651     ## has a button element in scope
6652     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6653     my $node = $self->{open_elements}->[$_];
6654 wakaba 1.123 if ($node->[1] & BUTTON_EL) {
6655 wakaba 1.79 !!!cp ('t378');
6656 wakaba 1.113 !!!parse-error (type => 'in button:button', token => $token);
6657 wakaba 1.125 !!!back-token; # <button>
6658 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'button',
6659     line => $token->{line}, column => $token->{column}};
6660 wakaba 1.126 next B;
6661 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6662 wakaba 1.79 !!!cp ('t379');
6663 wakaba 1.52 last INSCOPE;
6664     }
6665     } # INSCOPE
6666    
6667     $reconstruct_active_formatting_elements->($insert_to_current);
6668    
6669 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6670 wakaba 1.85
6671     ## TODO: associate with $self->{form_element} if defined
6672    
6673 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
6674 wakaba 1.1
6675 wakaba 1.125 !!!nack ('t379.1');
6676 wakaba 1.52 !!!next-token;
6677 wakaba 1.126 next B;
6678 wakaba 1.103 } elsif ({
6679 wakaba 1.109 xmp => 1,
6680     iframe => 1,
6681     noembed => 1,
6682     noframes => 1,
6683     noscript => 0, ## TODO: 1 if scripting is enabled
6684 wakaba 1.103 }->{$token->{tag_name}}) {
6685 wakaba 1.109 if ($token->{tag_name} eq 'xmp') {
6686     !!!cp ('t381');
6687     $reconstruct_active_formatting_elements->($insert_to_current);
6688     } else {
6689     !!!cp ('t399');
6690     }
6691     ## NOTE: There is an "as if in body" code clone.
6692 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6693 wakaba 1.126 next B;
6694 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
6695 wakaba 1.113 !!!parse-error (type => 'isindex', token => $token);
6696 wakaba 1.52
6697     if (defined $self->{form_element}) {
6698 wakaba 1.79 !!!cp ('t389');
6699 wakaba 1.52 ## Ignore the token
6700 wakaba 1.125 !!!nack ('t389'); ## NOTE: Not acknowledged.
6701 wakaba 1.52 !!!next-token;
6702 wakaba 1.126 next B;
6703 wakaba 1.52 } else {
6704     my $at = $token->{attributes};
6705     my $form_attrs;
6706     $form_attrs->{action} = $at->{action} if $at->{action};
6707     my $prompt_attr = $at->{prompt};
6708     $at->{name} = {name => 'name', value => 'isindex'};
6709     delete $at->{action};
6710     delete $at->{prompt};
6711     my @tokens = (
6712 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
6713 wakaba 1.114 attributes => $form_attrs,
6714     line => $token->{line}, column => $token->{column}},
6715     {type => START_TAG_TOKEN, tag_name => 'hr',
6716     line => $token->{line}, column => $token->{column}},
6717     {type => START_TAG_TOKEN, tag_name => 'p',
6718     line => $token->{line}, column => $token->{column}},
6719     {type => START_TAG_TOKEN, tag_name => 'label',
6720     line => $token->{line}, column => $token->{column}},
6721 wakaba 1.52 );
6722     if ($prompt_attr) {
6723 wakaba 1.79 !!!cp ('t390');
6724 wakaba 1.114 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
6725 wakaba 1.118 #line => $token->{line}, column => $token->{column},
6726     };
6727 wakaba 1.1 } else {
6728 wakaba 1.79 !!!cp ('t391');
6729 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
6730 wakaba 1.114 data => 'This is a searchable index. Insert your search keywords here: ',
6731 wakaba 1.118 #line => $token->{line}, column => $token->{column},
6732     }; # SHOULD
6733 wakaba 1.52 ## TODO: make this configurable
6734 wakaba 1.1 }
6735 wakaba 1.52 push @tokens,
6736 wakaba 1.114 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
6737     line => $token->{line}, column => $token->{column}},
6738 wakaba 1.55 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
6739 wakaba 1.114 {type => END_TAG_TOKEN, tag_name => 'label',
6740     line => $token->{line}, column => $token->{column}},
6741     {type => END_TAG_TOKEN, tag_name => 'p',
6742     line => $token->{line}, column => $token->{column}},
6743     {type => START_TAG_TOKEN, tag_name => 'hr',
6744     line => $token->{line}, column => $token->{column}},
6745     {type => END_TAG_TOKEN, tag_name => 'form',
6746     line => $token->{line}, column => $token->{column}};
6747 wakaba 1.125 !!!nack ('t391.1'); ## NOTE: Not acknowledged.
6748 wakaba 1.52 !!!back-token (@tokens);
6749 wakaba 1.125 !!!next-token;
6750 wakaba 1.126 next B;
6751 wakaba 1.52 }
6752     } elsif ($token->{tag_name} eq 'textarea') {
6753     my $tag_name = $token->{tag_name};
6754     my $el;
6755 wakaba 1.126 !!!create-element ($el, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
6756 wakaba 1.52
6757     ## TODO: $self->{form_element} if defined
6758     $self->{content_model} = RCDATA_CONTENT_MODEL;
6759     delete $self->{escape}; # MUST
6760    
6761     $insert->($el);
6762    
6763     my $text = '';
6764 wakaba 1.125 !!!nack ('t392.1');
6765 wakaba 1.52 !!!next-token;
6766 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6767 wakaba 1.52 $token->{data} =~ s/^\x0A//;
6768 wakaba 1.51 unless (length $token->{data}) {
6769 wakaba 1.79 !!!cp ('t392');
6770 wakaba 1.51 !!!next-token;
6771 wakaba 1.79 } else {
6772     !!!cp ('t393');
6773 wakaba 1.51 }
6774 wakaba 1.79 } else {
6775     !!!cp ('t394');
6776 wakaba 1.51 }
6777 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
6778 wakaba 1.79 !!!cp ('t395');
6779 wakaba 1.52 $text .= $token->{data};
6780     !!!next-token;
6781     }
6782     if (length $text) {
6783 wakaba 1.79 !!!cp ('t396');
6784 wakaba 1.52 $el->manakai_append_text ($text);
6785     }
6786    
6787     $self->{content_model} = PCDATA_CONTENT_MODEL;
6788 wakaba 1.51
6789 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
6790 wakaba 1.52 $token->{tag_name} eq $tag_name) {
6791 wakaba 1.79 !!!cp ('t397');
6792 wakaba 1.52 ## Ignore the token
6793     } else {
6794 wakaba 1.79 !!!cp ('t398');
6795 wakaba 1.113 !!!parse-error (type => 'in RCDATA:#'.$token->{type}, token => $token);
6796 wakaba 1.51 }
6797 wakaba 1.52 !!!next-token;
6798 wakaba 1.126 next B;
6799     } elsif ($token->{tag_name} eq 'math' or
6800     $token->{tag_name} eq 'svg') {
6801     $reconstruct_active_formatting_elements->($insert_to_current);
6802 wakaba 1.131
6803     ## "adjust SVG attributes" ('svg' only) - done in insert-element-f
6804    
6805     ## "adjust foreign attributes" - done in insert-element-f
6806 wakaba 1.126
6807 wakaba 1.131 !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
6808 wakaba 1.126
6809     if ($self->{self_closing}) {
6810     pop @{$self->{open_elements}};
6811     !!!ack ('t398.1');
6812     } else {
6813     !!!cp ('t398.2');
6814     $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
6815     ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
6816     ## mode, "in body" (not "in foreign content") secondary insertion
6817     ## mode, maybe.
6818     }
6819    
6820     !!!next-token;
6821     next B;
6822 wakaba 1.52 } elsif ({
6823     caption => 1, col => 1, colgroup => 1, frame => 1,
6824     frameset => 1, head => 1, option => 1, optgroup => 1,
6825     tbody => 1, td => 1, tfoot => 1, th => 1,
6826     thead => 1, tr => 1,
6827     }->{$token->{tag_name}}) {
6828 wakaba 1.79 !!!cp ('t401');
6829 wakaba 1.113 !!!parse-error (type => 'in body:'.$token->{tag_name}, token => $token);
6830 wakaba 1.52 ## Ignore the token
6831 wakaba 1.125 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
6832 wakaba 1.52 !!!next-token;
6833 wakaba 1.126 next B;
6834 wakaba 1.52
6835     ## ISSUE: An issue on HTML5 new elements in the spec.
6836     } else {
6837 wakaba 1.110 if ($token->{tag_name} eq 'image') {
6838     !!!cp ('t384');
6839 wakaba 1.113 !!!parse-error (type => 'image', token => $token);
6840 wakaba 1.110 $token->{tag_name} = 'img';
6841     } else {
6842     !!!cp ('t385');
6843     }
6844    
6845     ## NOTE: There is an "as if <br>" code clone.
6846 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
6847    
6848 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6849 wakaba 1.109
6850 wakaba 1.110 if ({
6851     applet => 1, marquee => 1, object => 1,
6852     }->{$token->{tag_name}}) {
6853     !!!cp ('t380');
6854     push @$active_formatting_elements, ['#marker', ''];
6855 wakaba 1.125 !!!nack ('t380.1');
6856 wakaba 1.110 } elsif ({
6857     b => 1, big => 1, em => 1, font => 1, i => 1,
6858     s => 1, small => 1, strile => 1,
6859     strong => 1, tt => 1, u => 1,
6860     }->{$token->{tag_name}}) {
6861     !!!cp ('t375');
6862     push @$active_formatting_elements, $self->{open_elements}->[-1];
6863 wakaba 1.125 !!!nack ('t375.1');
6864 wakaba 1.110 } elsif ($token->{tag_name} eq 'input') {
6865     !!!cp ('t388');
6866     ## TODO: associate with $self->{form_element} if defined
6867     pop @{$self->{open_elements}};
6868 wakaba 1.125 !!!ack ('t388.2');
6869 wakaba 1.110 } elsif ({
6870     area => 1, basefont => 1, bgsound => 1, br => 1,
6871     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
6872     #image => 1,
6873     }->{$token->{tag_name}}) {
6874     !!!cp ('t388.1');
6875     pop @{$self->{open_elements}};
6876 wakaba 1.125 !!!ack ('t388.3');
6877 wakaba 1.110 } elsif ($token->{tag_name} eq 'select') {
6878 wakaba 1.109 ## TODO: associate with $self->{form_element} if defined
6879    
6880     if ($self->{insertion_mode} & TABLE_IMS or
6881     $self->{insertion_mode} & BODY_TABLE_IMS or
6882     $self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
6883     !!!cp ('t400.1');
6884     $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
6885     } else {
6886     !!!cp ('t400.2');
6887     $self->{insertion_mode} = IN_SELECT_IM;
6888     }
6889 wakaba 1.125 !!!nack ('t400.3');
6890 wakaba 1.110 } else {
6891 wakaba 1.125 !!!nack ('t402');
6892 wakaba 1.109 }
6893 wakaba 1.51
6894 wakaba 1.52 !!!next-token;
6895 wakaba 1.126 next B;
6896 wakaba 1.52 }
6897 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6898 wakaba 1.52 if ($token->{tag_name} eq 'body') {
6899 wakaba 1.107 ## has a |body| element in scope
6900     my $i;
6901 wakaba 1.111 INSCOPE: {
6902     for (reverse @{$self->{open_elements}}) {
6903 wakaba 1.123 if ($_->[1] & BODY_EL) {
6904 wakaba 1.111 !!!cp ('t405');
6905     $i = $_;
6906     last INSCOPE;
6907 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6908 wakaba 1.111 !!!cp ('t405.1');
6909     last;
6910     }
6911 wakaba 1.52 }
6912 wakaba 1.111
6913     !!!parse-error (type => 'start tag not allowed',
6914 wakaba 1.113 value => $token->{tag_name}, token => $token);
6915 wakaba 1.107 ## NOTE: Ignore the token.
6916 wakaba 1.52 !!!next-token;
6917 wakaba 1.126 next B;
6918 wakaba 1.111 } # INSCOPE
6919 wakaba 1.107
6920     for (@{$self->{open_elements}}) {
6921 wakaba 1.123 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL) {
6922 wakaba 1.107 !!!cp ('t403');
6923 wakaba 1.122 !!!parse-error (type => 'not closed',
6924     value => $_->[0]->manakai_local_name,
6925     token => $token);
6926 wakaba 1.107 last;
6927     } else {
6928     !!!cp ('t404');
6929     }
6930     }
6931    
6932     $self->{insertion_mode} = AFTER_BODY_IM;
6933     !!!next-token;
6934 wakaba 1.126 next B;
6935 wakaba 1.52 } elsif ($token->{tag_name} eq 'html') {
6936 wakaba 1.122 ## TODO: Update this code. It seems that the code below is not
6937     ## up-to-date, though it has same effect as speced.
6938 wakaba 1.123 if (@{$self->{open_elements}} > 1 and
6939     $self->{open_elements}->[1]->[1] & BODY_EL) {
6940 wakaba 1.52 ## ISSUE: There is an issue in the spec.
6941 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & BODY_EL) {
6942 wakaba 1.79 !!!cp ('t406');
6943 wakaba 1.122 !!!parse-error (type => 'not closed',
6944     value => $self->{open_elements}->[1]->[0]
6945     ->manakai_local_name,
6946     token => $token);
6947 wakaba 1.79 } else {
6948     !!!cp ('t407');
6949 wakaba 1.1 }
6950 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
6951 wakaba 1.52 ## reprocess
6952 wakaba 1.126 next B;
6953 wakaba 1.51 } else {
6954 wakaba 1.79 !!!cp ('t408');
6955 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6956 wakaba 1.52 ## Ignore the token
6957     !!!next-token;
6958 wakaba 1.126 next B;
6959 wakaba 1.51 }
6960 wakaba 1.52 } elsif ({
6961     address => 1, blockquote => 1, center => 1, dir => 1,
6962     div => 1, dl => 1, fieldset => 1, listing => 1,
6963     menu => 1, ol => 1, pre => 1, ul => 1,
6964     dd => 1, dt => 1, li => 1,
6965 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
6966 wakaba 1.52 }->{$token->{tag_name}}) {
6967     ## has an element in scope
6968     my $i;
6969     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6970     my $node = $self->{open_elements}->[$_];
6971 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6972 wakaba 1.79 !!!cp ('t410');
6973 wakaba 1.52 $i = $_;
6974 wakaba 1.87 last INSCOPE;
6975 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6976 wakaba 1.79 !!!cp ('t411');
6977 wakaba 1.52 last INSCOPE;
6978 wakaba 1.51 }
6979 wakaba 1.52 } # INSCOPE
6980 wakaba 1.89
6981     unless (defined $i) { # has an element in scope
6982     !!!cp ('t413');
6983 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6984 wakaba 1.89 } else {
6985     ## Step 1. generate implied end tags
6986     while ({
6987     dd => ($token->{tag_name} ne 'dd'),
6988     dt => ($token->{tag_name} ne 'dt'),
6989     li => ($token->{tag_name} ne 'li'),
6990     p => 1,
6991 wakaba 1.123 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
6992 wakaba 1.89 !!!cp ('t409');
6993     pop @{$self->{open_elements}};
6994     }
6995    
6996     ## Step 2.
6997 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6998     ne $token->{tag_name}) {
6999 wakaba 1.79 !!!cp ('t412');
7000 wakaba 1.122 !!!parse-error (type => 'not closed',
7001     value => $self->{open_elements}->[-1]->[0]
7002     ->manakai_local_name,
7003     token => $token);
7004 wakaba 1.51 } else {
7005 wakaba 1.89 !!!cp ('t414');
7006 wakaba 1.51 }
7007 wakaba 1.89
7008     ## Step 3.
7009 wakaba 1.52 splice @{$self->{open_elements}}, $i;
7010 wakaba 1.89
7011     ## Step 4.
7012     $clear_up_to_marker->()
7013     if {
7014 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
7015 wakaba 1.89 }->{$token->{tag_name}};
7016 wakaba 1.51 }
7017 wakaba 1.52 !!!next-token;
7018 wakaba 1.126 next B;
7019 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
7020 wakaba 1.92 undef $self->{form_element};
7021    
7022 wakaba 1.52 ## has an element in scope
7023 wakaba 1.92 my $i;
7024 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7025     my $node = $self->{open_elements}->[$_];
7026 wakaba 1.123 if ($node->[1] & FORM_EL) {
7027 wakaba 1.79 !!!cp ('t418');
7028 wakaba 1.92 $i = $_;
7029 wakaba 1.52 last INSCOPE;
7030 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7031 wakaba 1.79 !!!cp ('t419');
7032 wakaba 1.52 last INSCOPE;
7033     }
7034     } # INSCOPE
7035 wakaba 1.92
7036     unless (defined $i) { # has an element in scope
7037 wakaba 1.79 !!!cp ('t421');
7038 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
7039 wakaba 1.92 } else {
7040     ## Step 1. generate implied end tags
7041 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7042 wakaba 1.92 !!!cp ('t417');
7043     pop @{$self->{open_elements}};
7044     }
7045    
7046     ## Step 2.
7047 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7048     ne $token->{tag_name}) {
7049 wakaba 1.92 !!!cp ('t417.1');
7050 wakaba 1.122 !!!parse-error (type => 'not closed',
7051     value => $self->{open_elements}->[-1]->[0]
7052     ->manakai_local_name,
7053     token => $token);
7054 wakaba 1.92 } else {
7055     !!!cp ('t420');
7056     }
7057    
7058     ## Step 3.
7059     splice @{$self->{open_elements}}, $i;
7060 wakaba 1.52 }
7061    
7062     !!!next-token;
7063 wakaba 1.126 next B;
7064 wakaba 1.52 } elsif ({
7065     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
7066     }->{$token->{tag_name}}) {
7067     ## has an element in scope
7068     my $i;
7069     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7070     my $node = $self->{open_elements}->[$_];
7071 wakaba 1.123 if ($node->[1] & HEADING_EL) {
7072 wakaba 1.79 !!!cp ('t423');
7073 wakaba 1.52 $i = $_;
7074     last INSCOPE;
7075 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7076 wakaba 1.79 !!!cp ('t424');
7077 wakaba 1.52 last INSCOPE;
7078 wakaba 1.51 }
7079 wakaba 1.52 } # INSCOPE
7080 wakaba 1.93
7081     unless (defined $i) { # has an element in scope
7082     !!!cp ('t425.1');
7083 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
7084 wakaba 1.79 } else {
7085 wakaba 1.93 ## Step 1. generate implied end tags
7086 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7087 wakaba 1.93 !!!cp ('t422');
7088     pop @{$self->{open_elements}};
7089     }
7090    
7091     ## Step 2.
7092 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7093     ne $token->{tag_name}) {
7094 wakaba 1.93 !!!cp ('t425');
7095 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
7096 wakaba 1.93 } else {
7097     !!!cp ('t426');
7098     }
7099    
7100     ## Step 3.
7101     splice @{$self->{open_elements}}, $i;
7102 wakaba 1.36 }
7103 wakaba 1.52
7104     !!!next-token;
7105 wakaba 1.126 next B;
7106 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
7107     ## has an element in scope
7108     my $i;
7109     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7110     my $node = $self->{open_elements}->[$_];
7111 wakaba 1.123 if ($node->[1] & P_EL) {
7112 wakaba 1.87 !!!cp ('t410.1');
7113     $i = $_;
7114 wakaba 1.88 last INSCOPE;
7115 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7116 wakaba 1.87 !!!cp ('t411.1');
7117     last INSCOPE;
7118     }
7119     } # INSCOPE
7120 wakaba 1.91
7121     if (defined $i) {
7122 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7123     ne $token->{tag_name}) {
7124 wakaba 1.87 !!!cp ('t412.1');
7125 wakaba 1.122 !!!parse-error (type => 'not closed',
7126     value => $self->{open_elements}->[-1]->[0]
7127     ->manakai_local_name,
7128     token => $token);
7129 wakaba 1.87 } else {
7130 wakaba 1.91 !!!cp ('t414.1');
7131 wakaba 1.87 }
7132 wakaba 1.91
7133 wakaba 1.87 splice @{$self->{open_elements}}, $i;
7134     } else {
7135 wakaba 1.91 !!!cp ('t413.1');
7136 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
7137 wakaba 1.91
7138 wakaba 1.87 !!!cp ('t415.1');
7139     ## As if <p>, then reprocess the current token
7140     my $el;
7141 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'p',, $token);
7142 wakaba 1.87 $insert->($el);
7143 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
7144 wakaba 1.87 }
7145 wakaba 1.91
7146 wakaba 1.87 !!!next-token;
7147 wakaba 1.126 next B;
7148 wakaba 1.52 } elsif ({
7149     a => 1,
7150     b => 1, big => 1, em => 1, font => 1, i => 1,
7151     nobr => 1, s => 1, small => 1, strile => 1,
7152     strong => 1, tt => 1, u => 1,
7153     }->{$token->{tag_name}}) {
7154 wakaba 1.79 !!!cp ('t427');
7155 wakaba 1.113 $formatting_end_tag->($token);
7156 wakaba 1.126 next B;
7157 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
7158 wakaba 1.79 !!!cp ('t428');
7159 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:br', token => $token);
7160 wakaba 1.52
7161     ## As if <br>
7162     $reconstruct_active_formatting_elements->($insert_to_current);
7163    
7164     my $el;
7165 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'br',, $token);
7166 wakaba 1.52 $insert->($el);
7167    
7168     ## Ignore the token.
7169     !!!next-token;
7170 wakaba 1.126 next B;
7171 wakaba 1.52 } elsif ({
7172     caption => 1, col => 1, colgroup => 1, frame => 1,
7173     frameset => 1, head => 1, option => 1, optgroup => 1,
7174     tbody => 1, td => 1, tfoot => 1, th => 1,
7175     thead => 1, tr => 1,
7176     area => 1, basefont => 1, bgsound => 1,
7177     embed => 1, hr => 1, iframe => 1, image => 1,
7178     img => 1, input => 1, isindex => 1, noembed => 1,
7179     noframes => 1, param => 1, select => 1, spacer => 1,
7180     table => 1, textarea => 1, wbr => 1,
7181     noscript => 0, ## TODO: if scripting is enabled
7182     }->{$token->{tag_name}}) {
7183 wakaba 1.79 !!!cp ('t429');
7184 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
7185 wakaba 1.52 ## Ignore the token
7186     !!!next-token;
7187 wakaba 1.126 next B;
7188 wakaba 1.52
7189     ## ISSUE: Issue on HTML5 new elements in spec
7190    
7191     } else {
7192     ## Step 1
7193     my $node_i = -1;
7194     my $node = $self->{open_elements}->[$node_i];
7195 wakaba 1.51
7196 wakaba 1.52 ## Step 2
7197     S2: {
7198 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
7199 wakaba 1.52 ## Step 1
7200     ## generate implied end tags
7201 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7202 wakaba 1.79 !!!cp ('t430');
7203 wakaba 1.83 ## ISSUE: Can this case be reached?
7204 wakaba 1.86 pop @{$self->{open_elements}};
7205 wakaba 1.52 }
7206    
7207     ## Step 2
7208 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7209     ne $token->{tag_name}) {
7210 wakaba 1.79 !!!cp ('t431');
7211 wakaba 1.58 ## NOTE: <x><y></x>
7212 wakaba 1.122 !!!parse-error (type => 'not closed',
7213     value => $self->{open_elements}->[-1]->[0]
7214     ->manakai_local_name,
7215     token => $token);
7216 wakaba 1.79 } else {
7217     !!!cp ('t432');
7218 wakaba 1.52 }
7219    
7220     ## Step 3
7221     splice @{$self->{open_elements}}, $node_i;
7222 wakaba 1.51
7223 wakaba 1.1 !!!next-token;
7224 wakaba 1.52 last S2;
7225 wakaba 1.1 } else {
7226 wakaba 1.52 ## Step 3
7227 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
7228 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
7229 wakaba 1.123 ($node->[1] & SPECIAL_EL or
7230     $node->[1] & SCOPING_EL)) {
7231 wakaba 1.79 !!!cp ('t433');
7232 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
7233 wakaba 1.52 ## Ignore the token
7234     !!!next-token;
7235     last S2;
7236     }
7237 wakaba 1.79
7238     !!!cp ('t434');
7239 wakaba 1.1 }
7240 wakaba 1.52
7241     ## Step 4
7242     $node_i--;
7243     $node = $self->{open_elements}->[$node_i];
7244    
7245     ## Step 5;
7246     redo S2;
7247     } # S2
7248 wakaba 1.126 next B;
7249 wakaba 1.1 }
7250     }
7251 wakaba 1.126 next B;
7252     } continue { # B
7253     if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
7254     ## NOTE: The code below is executed in cases where it does not have
7255     ## to be, but it it is harmless even in those cases.
7256     ## has an element in scope
7257     INSCOPE: {
7258     for (reverse 0..$#{$self->{open_elements}}) {
7259     my $node = $self->{open_elements}->[$_];
7260     if ($node->[1] & FOREIGN_EL) {
7261     last INSCOPE;
7262     } elsif ($node->[1] & SCOPING_EL) {
7263     last;
7264     }
7265     }
7266    
7267     ## NOTE: No foreign element in scope.
7268     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
7269     } # INSCOPE
7270     }
7271 wakaba 1.1 } # B
7272    
7273     ## Stop parsing # MUST
7274    
7275     ## TODO: script stuffs
7276 wakaba 1.3 } # _tree_construct_main
7277    
7278     sub set_inner_html ($$$) {
7279     my $class = shift;
7280     my $node = shift;
7281     my $s = \$_[0];
7282     my $onerror = $_[1];
7283    
7284 wakaba 1.63 ## ISSUE: Should {confident} be true?
7285    
7286 wakaba 1.3 my $nt = $node->node_type;
7287     if ($nt == 9) {
7288     # MUST
7289    
7290     ## Step 1 # MUST
7291     ## TODO: If the document has an active parser, ...
7292     ## ISSUE: There is an issue in the spec.
7293    
7294     ## Step 2 # MUST
7295     my @cn = @{$node->child_nodes};
7296     for (@cn) {
7297     $node->remove_child ($_);
7298     }
7299    
7300     ## Step 3, 4, 5 # MUST
7301     $class->parse_string ($$s => $node, $onerror);
7302     } elsif ($nt == 1) {
7303     ## TODO: If non-html element
7304    
7305     ## NOTE: Most of this code is copied from |parse_string|
7306    
7307     ## Step 1 # MUST
7308 wakaba 1.14 my $this_doc = $node->owner_document;
7309     my $doc = $this_doc->implementation->create_document;
7310 wakaba 1.18 $doc->manakai_is_html (1);
7311 wakaba 1.3 my $p = $class->new;
7312     $p->{document} = $doc;
7313    
7314 wakaba 1.84 ## Step 8 # MUST
7315 wakaba 1.3 my $i = 0;
7316 wakaba 1.121 $p->{line_prev} = $p->{line} = 1;
7317     $p->{column_prev} = $p->{column} = 0;
7318 wakaba 1.76 $p->{set_next_char} = sub {
7319 wakaba 1.3 my $self = shift;
7320 wakaba 1.14
7321 wakaba 1.76 pop @{$self->{prev_char}};
7322     unshift @{$self->{prev_char}}, $self->{next_char};
7323 wakaba 1.14
7324 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
7325     $self->{next_char} = ord substr $$s, $i++, 1;
7326 wakaba 1.121
7327     ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
7328     $p->{column}++;
7329 wakaba 1.4
7330 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
7331 wakaba 1.121 $p->{line}++;
7332     $p->{column} = 0;
7333 wakaba 1.79 !!!cp ('i1');
7334 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
7335 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
7336 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
7337 wakaba 1.121 $p->{line}++;
7338     $p->{column} = 0;
7339 wakaba 1.79 !!!cp ('i2');
7340 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
7341     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
7342 wakaba 1.79 !!!cp ('i3');
7343 wakaba 1.76 } elsif ($self->{next_char} == 0x0000) { # NULL
7344 wakaba 1.79 !!!cp ('i4');
7345 wakaba 1.14 !!!parse-error (type => 'NULL');
7346 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
7347 wakaba 1.132 } elsif ($self->{next_char} <= 0x0008 or
7348     (0x000E <= $self->{next_char} and
7349     $self->{next_char} <= 0x001F) or
7350     (0x007F <= $self->{next_char} and
7351     $self->{next_char} <= 0x009F) or
7352     (0xD800 <= $self->{next_char} and
7353     $self->{next_char} <= 0xDFFF) or
7354     (0xFDD0 <= $self->{next_char} and
7355     $self->{next_char} <= 0xFDDF) or
7356     {
7357     0xFFFE => 1, 0xFFFF => 1, 0x1FFFE => 1, 0x1FFFF => 1,
7358     0x2FFFE => 1, 0x2FFFF => 1, 0x3FFFE => 1, 0x3FFFF => 1,
7359     0x4FFFE => 1, 0x4FFFF => 1, 0x5FFFE => 1, 0x5FFFF => 1,
7360     0x6FFFE => 1, 0x6FFFF => 1, 0x7FFFE => 1, 0x7FFFF => 1,
7361     0x8FFFE => 1, 0x8FFFF => 1, 0x9FFFE => 1, 0x9FFFF => 1,
7362     0xAFFFE => 1, 0xAFFFF => 1, 0xBFFFE => 1, 0xBFFFF => 1,
7363     0xCFFFE => 1, 0xCFFFF => 1, 0xDFFFE => 1, 0xDFFFF => 1,
7364     0xEFFFE => 1, 0xEFFFF => 1, 0xFFFFE => 1, 0xFFFFF => 1,
7365     0x10FFFE => 1, 0x10FFFF => 1,
7366     }->{$self->{next_char}}) {
7367     !!!cp ('i4.1');
7368     !!!parse-error (type => 'control char', level => $self->{must_level});
7369     ## TODO: error type documentation
7370 wakaba 1.3 }
7371     };
7372 wakaba 1.76 $p->{prev_char} = [-1, -1, -1];
7373     $p->{next_char} = -1;
7374 wakaba 1.3
7375     my $ponerror = $onerror || sub {
7376     my (%opt) = @_;
7377 wakaba 1.121 my $line = $opt{line};
7378     my $column = $opt{column};
7379     if (defined $opt{token} and defined $opt{token}->{line}) {
7380     $line = $opt{token}->{line};
7381     $column = $opt{token}->{column};
7382     }
7383     warn "Parse error ($opt{type}) at line $line column $column\n";
7384 wakaba 1.3 };
7385     $p->{parse_error} = sub {
7386 wakaba 1.121 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
7387 wakaba 1.3 };
7388    
7389     $p->_initialize_tokenizer;
7390     $p->_initialize_tree_constructor;
7391    
7392     ## Step 2
7393 wakaba 1.71 my $node_ln = $node->manakai_local_name;
7394 wakaba 1.40 $p->{content_model} = {
7395     title => RCDATA_CONTENT_MODEL,
7396     textarea => RCDATA_CONTENT_MODEL,
7397     style => CDATA_CONTENT_MODEL,
7398     script => CDATA_CONTENT_MODEL,
7399     xmp => CDATA_CONTENT_MODEL,
7400     iframe => CDATA_CONTENT_MODEL,
7401     noembed => CDATA_CONTENT_MODEL,
7402     noframes => CDATA_CONTENT_MODEL,
7403     noscript => CDATA_CONTENT_MODEL,
7404     plaintext => PLAINTEXT_CONTENT_MODEL,
7405     }->{$node_ln};
7406     $p->{content_model} = PCDATA_CONTENT_MODEL
7407     unless defined $p->{content_model};
7408     ## ISSUE: What is "the name of the element"? local name?
7409 wakaba 1.3
7410 wakaba 1.123 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
7411     ## TODO: Foreign element OK?
7412 wakaba 1.3
7413 wakaba 1.84 ## Step 3
7414 wakaba 1.3 my $root = $doc->create_element_ns
7415     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
7416    
7417 wakaba 1.84 ## Step 4 # MUST
7418 wakaba 1.3 $doc->append_child ($root);
7419    
7420 wakaba 1.84 ## Step 5 # MUST
7421 wakaba 1.123 push @{$p->{open_elements}}, [$root, $el_category->{html}];
7422 wakaba 1.3
7423     undef $p->{head_element};
7424    
7425 wakaba 1.84 ## Step 6 # MUST
7426 wakaba 1.3 $p->_reset_insertion_mode;
7427    
7428 wakaba 1.84 ## Step 7 # MUST
7429 wakaba 1.3 my $anode = $node;
7430     AN: while (defined $anode) {
7431     if ($anode->node_type == 1) {
7432     my $nsuri = $anode->namespace_uri;
7433     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
7434 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
7435 wakaba 1.79 !!!cp ('i5');
7436 wakaba 1.3 $p->{form_element} = $anode;
7437     last AN;
7438     }
7439     }
7440     }
7441     $anode = $anode->parent_node;
7442     } # AN
7443    
7444 wakaba 1.84 ## Step 9 # MUST
7445 wakaba 1.3 {
7446     my $self = $p;
7447     !!!next-token;
7448     }
7449     $p->_tree_construction_main;
7450    
7451 wakaba 1.84 ## Step 10 # MUST
7452 wakaba 1.3 my @cn = @{$node->child_nodes};
7453     for (@cn) {
7454     $node->remove_child ($_);
7455     }
7456     ## ISSUE: mutation events? read-only?
7457    
7458 wakaba 1.84 ## Step 11 # MUST
7459 wakaba 1.3 @cn = @{$root->child_nodes};
7460     for (@cn) {
7461 wakaba 1.14 $this_doc->adopt_node ($_);
7462 wakaba 1.3 $node->append_child ($_);
7463     }
7464 wakaba 1.14 ## ISSUE: mutation events?
7465 wakaba 1.3
7466     $p->_terminate_tree_constructor;
7467 wakaba 1.121
7468     delete $p->{parse_error}; # delete loop
7469 wakaba 1.3 } else {
7470     die "$0: |set_inner_html| is not defined for node of type $nt";
7471     }
7472     } # set_inner_html
7473    
7474     } # tree construction stage
7475 wakaba 1.1
7476 wakaba 1.63 package Whatpm::HTML::RestartParser;
7477     push our @ISA, 'Error';
7478    
7479 wakaba 1.1 1;
7480 wakaba 1.141 # $Date: 2008/05/24 09:59:52 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24