/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.235 - (hide annotations) (download) (as text)
Sun Sep 6 08:02:54 2009 UTC (15 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.234: +7 -4 lines
File MIME type: application/x-wais-source
++ whatpm/Whatpm/ChangeLog	6 Sep 2009 08:02:03 -0000
	* HTML.pm.src: Set the |confidence| flag to |irrlevant| when the
	fragment parsing algorithm is invoked (HTML5 revision 2699).

2009-09-06  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.235 our $VERSION=do{my @r=(q$Revision: 1.234 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.208 use Whatpm::HTML::Tokenizer;
7    
8 wakaba 1.182 ## NOTE: This module don't check all HTML5 parse errors; character
9     ## encoding related parse errors are expected to be handled by relevant
10     ## modules.
11     ## Parse errors for control characters that are not allowed in HTML5
12     ## documents, for surrogate code points, and for noncharacter code
13     ## points, as well as U+FFFD substitions for characters whose code points
14     ## is higher than U+10FFFF may be detected by combining the parser with
15     ## the checker implemented by Whatpm::Charset::UnicodeChecker (for its
16     ## usage example, see |t/HTML-tree.t| in the Whatpm package or the
17     ## WebHACC::Language::HTML module in the WebHACC package).
18    
19 wakaba 1.18 ## ISSUE:
20     ## var doc = implementation.createDocument (null, null, null);
21     ## doc.write ('');
22     ## alert (doc.compatMode);
23 wakaba 1.1
24 wakaba 1.139 require IO::Handle;
25    
26 wakaba 1.208 ## Namespace URLs
27    
28 wakaba 1.126 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
29     my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
30     my $SVG_NS = q<http://www.w3.org/2000/svg>;
31     my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
32     my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
33     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
34    
35 wakaba 1.208 ## Element categories
36    
37 wakaba 1.206 ## Bits 12-15
38     sub SPECIAL_EL () { 0b1_000000000000000 }
39     sub SCOPING_EL () { 0b1_00000000000000 }
40     sub FORMATTING_EL () { 0b1_0000000000000 }
41     sub PHRASING_EL () { 0b1_000000000000 }
42    
43     ## Bits 10-11
44 wakaba 1.208 #sub FOREIGN_EL () { 0b1_00000000000 } # see Whatpm::HTML::Tokenizer
45 wakaba 1.206 sub FOREIGN_FLOW_CONTENT_EL () { 0b1_0000000000 }
46    
47     ## Bits 6-9
48     sub TABLE_SCOPING_EL () { 0b1_000000000 }
49     sub TABLE_ROWS_SCOPING_EL () { 0b1_00000000 }
50     sub TABLE_ROW_SCOPING_EL () { 0b1_0000000 }
51     sub TABLE_ROWS_EL () { 0b1_000000 }
52    
53     ## Bit 5
54     sub ADDRESS_DIV_P_EL () { 0b1_00000 }
55    
56     ## NOTE: Used in </body> and EOF algorithms.
57     ## Bit 4
58     sub ALL_END_TAG_OPTIONAL_EL () { 0b1_0000 }
59 wakaba 1.123
60 wakaba 1.151 ## NOTE: Used in "generate implied end tags" algorithm.
61 wakaba 1.194 ## NOTE: There is a code where a modified version of
62     ## END_TAG_OPTIONAL_EL is used in "generate implied end tags"
63     ## implementation (search for the algorithm name).
64 wakaba 1.206 ## Bit 3
65     sub END_TAG_OPTIONAL_EL () { 0b1_000 }
66    
67     ## Bits 0-2
68    
69     sub MISC_SPECIAL_EL () { SPECIAL_EL | 0b000 }
70     sub FORM_EL () { SPECIAL_EL | 0b001 }
71     sub FRAMESET_EL () { SPECIAL_EL | 0b010 }
72     sub HEADING_EL () { SPECIAL_EL | 0b011 }
73     sub SELECT_EL () { SPECIAL_EL | 0b100 }
74     sub SCRIPT_EL () { SPECIAL_EL | 0b101 }
75    
76     sub ADDRESS_DIV_EL () { SPECIAL_EL | ADDRESS_DIV_P_EL | 0b001 }
77     sub BODY_EL () { SPECIAL_EL | ALL_END_TAG_OPTIONAL_EL | 0b001 }
78    
79 wakaba 1.207 sub DTDD_EL () {
80 wakaba 1.206 SPECIAL_EL |
81     END_TAG_OPTIONAL_EL |
82     ALL_END_TAG_OPTIONAL_EL |
83     0b010
84     }
85     sub LI_EL () {
86     SPECIAL_EL |
87     END_TAG_OPTIONAL_EL |
88     ALL_END_TAG_OPTIONAL_EL |
89     0b100
90     }
91     sub P_EL () {
92     SPECIAL_EL |
93     ADDRESS_DIV_P_EL |
94     END_TAG_OPTIONAL_EL |
95     ALL_END_TAG_OPTIONAL_EL |
96     0b001
97 wakaba 1.123 }
98    
99 wakaba 1.206 sub TABLE_ROW_EL () {
100     SPECIAL_EL |
101     TABLE_ROWS_EL |
102     TABLE_ROW_SCOPING_EL |
103     ALL_END_TAG_OPTIONAL_EL |
104     0b001
105     }
106     sub TABLE_ROW_GROUP_EL () {
107     SPECIAL_EL |
108     TABLE_ROWS_EL |
109     TABLE_ROWS_SCOPING_EL |
110     ALL_END_TAG_OPTIONAL_EL |
111     0b001
112 wakaba 1.123 }
113    
114 wakaba 1.206 sub MISC_SCOPING_EL () { SCOPING_EL | 0b000 }
115     sub BUTTON_EL () { SCOPING_EL | 0b001 }
116     sub CAPTION_EL () { SCOPING_EL | 0b010 }
117     sub HTML_EL () {
118     SCOPING_EL |
119     TABLE_SCOPING_EL |
120     TABLE_ROWS_SCOPING_EL |
121     TABLE_ROW_SCOPING_EL |
122     ALL_END_TAG_OPTIONAL_EL |
123     0b001
124 wakaba 1.123 }
125 wakaba 1.206 sub TABLE_EL () {
126     SCOPING_EL |
127     TABLE_ROWS_EL |
128     TABLE_SCOPING_EL |
129     0b001
130 wakaba 1.123 }
131 wakaba 1.206 sub TABLE_CELL_EL () {
132     SCOPING_EL |
133     TABLE_ROW_SCOPING_EL |
134     ALL_END_TAG_OPTIONAL_EL |
135     0b001
136 wakaba 1.123 }
137    
138 wakaba 1.206 sub MISC_FORMATTING_EL () { FORMATTING_EL | 0b000 }
139     sub A_EL () { FORMATTING_EL | 0b001 }
140     sub NOBR_EL () { FORMATTING_EL | 0b010 }
141    
142     sub RUBY_EL () { PHRASING_EL | 0b001 }
143    
144     ## ISSUE: ALL_END_TAG_OPTIONAL_EL?
145     sub OPTGROUP_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b001 }
146     sub OPTION_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b010 }
147     sub RUBY_COMPONENT_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b100 }
148 wakaba 1.123
149 wakaba 1.206 sub MML_AXML_EL () { PHRASING_EL | FOREIGN_EL | 0b001 }
150 wakaba 1.123
151     my $el_category = {
152 wakaba 1.206 a => A_EL,
153     address => ADDRESS_DIV_EL,
154 wakaba 1.123 applet => MISC_SCOPING_EL,
155     area => MISC_SPECIAL_EL,
156 wakaba 1.193 article => MISC_SPECIAL_EL,
157     aside => MISC_SPECIAL_EL,
158 wakaba 1.123 b => FORMATTING_EL,
159     base => MISC_SPECIAL_EL,
160     basefont => MISC_SPECIAL_EL,
161     bgsound => MISC_SPECIAL_EL,
162     big => FORMATTING_EL,
163     blockquote => MISC_SPECIAL_EL,
164     body => BODY_EL,
165     br => MISC_SPECIAL_EL,
166     button => BUTTON_EL,
167     caption => CAPTION_EL,
168     center => MISC_SPECIAL_EL,
169     col => MISC_SPECIAL_EL,
170     colgroup => MISC_SPECIAL_EL,
171 wakaba 1.193 command => MISC_SPECIAL_EL,
172     datagrid => MISC_SPECIAL_EL,
173 wakaba 1.207 dd => DTDD_EL,
174 wakaba 1.193 details => MISC_SPECIAL_EL,
175     dialog => MISC_SPECIAL_EL,
176 wakaba 1.123 dir => MISC_SPECIAL_EL,
177 wakaba 1.206 div => ADDRESS_DIV_EL,
178 wakaba 1.123 dl => MISC_SPECIAL_EL,
179 wakaba 1.207 dt => DTDD_EL,
180 wakaba 1.123 em => FORMATTING_EL,
181     embed => MISC_SPECIAL_EL,
182     fieldset => MISC_SPECIAL_EL,
183 wakaba 1.193 figure => MISC_SPECIAL_EL,
184 wakaba 1.123 font => FORMATTING_EL,
185 wakaba 1.193 footer => MISC_SPECIAL_EL,
186 wakaba 1.123 form => FORM_EL,
187     frame => MISC_SPECIAL_EL,
188     frameset => FRAMESET_EL,
189     h1 => HEADING_EL,
190     h2 => HEADING_EL,
191     h3 => HEADING_EL,
192     h4 => HEADING_EL,
193     h5 => HEADING_EL,
194     h6 => HEADING_EL,
195     head => MISC_SPECIAL_EL,
196 wakaba 1.193 header => MISC_SPECIAL_EL,
197 wakaba 1.123 hr => MISC_SPECIAL_EL,
198     html => HTML_EL,
199     i => FORMATTING_EL,
200     iframe => MISC_SPECIAL_EL,
201     img => MISC_SPECIAL_EL,
202 wakaba 1.193 #image => MISC_SPECIAL_EL, ## NOTE: Commented out in the spec.
203 wakaba 1.123 input => MISC_SPECIAL_EL,
204     isindex => MISC_SPECIAL_EL,
205 wakaba 1.232 ## XXX keygen? (Whether a void element is in Special or not does not
206     ## affect to the processing, however.)
207 wakaba 1.123 li => LI_EL,
208     link => MISC_SPECIAL_EL,
209     listing => MISC_SPECIAL_EL,
210     marquee => MISC_SCOPING_EL,
211     menu => MISC_SPECIAL_EL,
212     meta => MISC_SPECIAL_EL,
213 wakaba 1.193 nav => MISC_SPECIAL_EL,
214 wakaba 1.206 nobr => NOBR_EL,
215 wakaba 1.123 noembed => MISC_SPECIAL_EL,
216     noframes => MISC_SPECIAL_EL,
217     noscript => MISC_SPECIAL_EL,
218     object => MISC_SCOPING_EL,
219     ol => MISC_SPECIAL_EL,
220     optgroup => OPTGROUP_EL,
221     option => OPTION_EL,
222     p => P_EL,
223     param => MISC_SPECIAL_EL,
224     plaintext => MISC_SPECIAL_EL,
225     pre => MISC_SPECIAL_EL,
226 wakaba 1.151 rp => RUBY_COMPONENT_EL,
227     rt => RUBY_COMPONENT_EL,
228     ruby => RUBY_EL,
229 wakaba 1.123 s => FORMATTING_EL,
230     script => MISC_SPECIAL_EL,
231     select => SELECT_EL,
232 wakaba 1.193 section => MISC_SPECIAL_EL,
233 wakaba 1.123 small => FORMATTING_EL,
234     spacer => MISC_SPECIAL_EL,
235     strike => FORMATTING_EL,
236     strong => FORMATTING_EL,
237     style => MISC_SPECIAL_EL,
238     table => TABLE_EL,
239     tbody => TABLE_ROW_GROUP_EL,
240     td => TABLE_CELL_EL,
241     textarea => MISC_SPECIAL_EL,
242     tfoot => TABLE_ROW_GROUP_EL,
243     th => TABLE_CELL_EL,
244     thead => TABLE_ROW_GROUP_EL,
245     title => MISC_SPECIAL_EL,
246     tr => TABLE_ROW_EL,
247     tt => FORMATTING_EL,
248     u => FORMATTING_EL,
249     ul => MISC_SPECIAL_EL,
250     wbr => MISC_SPECIAL_EL,
251     };
252    
253 wakaba 1.126 my $el_category_f = {
254     $MML_NS => {
255     'annotation-xml' => MML_AXML_EL,
256 wakaba 1.206 mi => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
257     mo => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
258     mn => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
259     ms => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
260     mtext => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
261 wakaba 1.126 },
262     $SVG_NS => {
263 wakaba 1.206 foreignObject => SCOPING_EL | FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
264     desc => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
265     title => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
266 wakaba 1.126 },
267     ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
268     };
269    
270 wakaba 1.131 my $svg_attr_name = {
271 wakaba 1.146 attributename => 'attributeName',
272 wakaba 1.131 attributetype => 'attributeType',
273     basefrequency => 'baseFrequency',
274     baseprofile => 'baseProfile',
275     calcmode => 'calcMode',
276     clippathunits => 'clipPathUnits',
277     contentscripttype => 'contentScriptType',
278     contentstyletype => 'contentStyleType',
279     diffuseconstant => 'diffuseConstant',
280     edgemode => 'edgeMode',
281     externalresourcesrequired => 'externalResourcesRequired',
282     filterres => 'filterRes',
283     filterunits => 'filterUnits',
284     glyphref => 'glyphRef',
285     gradienttransform => 'gradientTransform',
286     gradientunits => 'gradientUnits',
287     kernelmatrix => 'kernelMatrix',
288     kernelunitlength => 'kernelUnitLength',
289     keypoints => 'keyPoints',
290     keysplines => 'keySplines',
291     keytimes => 'keyTimes',
292     lengthadjust => 'lengthAdjust',
293     limitingconeangle => 'limitingConeAngle',
294     markerheight => 'markerHeight',
295     markerunits => 'markerUnits',
296     markerwidth => 'markerWidth',
297     maskcontentunits => 'maskContentUnits',
298     maskunits => 'maskUnits',
299     numoctaves => 'numOctaves',
300     pathlength => 'pathLength',
301     patterncontentunits => 'patternContentUnits',
302     patterntransform => 'patternTransform',
303     patternunits => 'patternUnits',
304     pointsatx => 'pointsAtX',
305     pointsaty => 'pointsAtY',
306     pointsatz => 'pointsAtZ',
307     preservealpha => 'preserveAlpha',
308     preserveaspectratio => 'preserveAspectRatio',
309     primitiveunits => 'primitiveUnits',
310     refx => 'refX',
311     refy => 'refY',
312     repeatcount => 'repeatCount',
313     repeatdur => 'repeatDur',
314     requiredextensions => 'requiredExtensions',
315 wakaba 1.146 requiredfeatures => 'requiredFeatures',
316 wakaba 1.131 specularconstant => 'specularConstant',
317     specularexponent => 'specularExponent',
318     spreadmethod => 'spreadMethod',
319     startoffset => 'startOffset',
320     stddeviation => 'stdDeviation',
321     stitchtiles => 'stitchTiles',
322     surfacescale => 'surfaceScale',
323     systemlanguage => 'systemLanguage',
324     tablevalues => 'tableValues',
325     targetx => 'targetX',
326     targety => 'targetY',
327     textlength => 'textLength',
328     viewbox => 'viewBox',
329     viewtarget => 'viewTarget',
330     xchannelselector => 'xChannelSelector',
331     ychannelselector => 'yChannelSelector',
332     zoomandpan => 'zoomAndPan',
333     };
334    
335     my $foreign_attr_xname = {
336     'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
337     'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
338     'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
339     'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
340     'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
341     'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
342     'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
343     'xml:base' => [$XML_NS, ['xml', 'base']],
344     'xml:lang' => [$XML_NS, ['xml', 'lang']],
345     'xml:space' => [$XML_NS, ['xml', 'space']],
346     'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
347     'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
348     };
349    
350     ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
351    
352 wakaba 1.192 ## TODO: Invoke the reset algorithm when a resettable element is
353     ## created (cf. HTML5 revision 2259).
354    
355 wakaba 1.63 sub parse_byte_string ($$$$;$) {
356 wakaba 1.138 my $self = shift;
357     my $charset_name = shift;
358     open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
359     return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
360     } # parse_byte_string
361    
362 wakaba 1.162 sub parse_byte_stream ($$$$;$$) {
363     # my ($self, $charset_name, $byte_stream, $doc, $onerror, $get_wrapper) = @_;
364 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
365 wakaba 1.133 my $charset_name = shift;
366 wakaba 1.138 my $byte_stream = $_[0];
367 wakaba 1.133
368 wakaba 1.134 my $onerror = $_[2] || sub {
369     my (%opt) = @_;
370     warn "Parse error ($opt{type})\n";
371     };
372     $self->{parse_error} = $onerror; # updated later by parse_char_string
373    
374 wakaba 1.162 my $get_wrapper = $_[3] || sub ($) {
375     return $_[0]; # $_[0] = byte stream handle, returned = arg to char handle
376     };
377    
378 wakaba 1.133 ## HTML5 encoding sniffing algorithm
379     require Message::Charset::Info;
380     my $charset;
381 wakaba 1.136 my $buffer;
382     my ($char_stream, $e_status);
383 wakaba 1.133
384     SNIFFING: {
385 wakaba 1.160 ## NOTE: By setting |allow_fallback| option true when the
386     ## |get_decode_handle| method is invoked, we ignore what the HTML5
387     ## spec requires, i.e. unsupported encoding should be ignored.
388     ## TODO: We should not do this unless the parser is invoked
389     ## in the conformance checking mode, in which this behavior
390     ## would be useful.
391 wakaba 1.133
392     ## Step 1
393     if (defined $charset_name) {
394 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
395     ## TODO: Is this ok? Transfer protocol's parameter should be
396     ## interpreted in its semantics?
397 wakaba 1.133
398 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
399     ($byte_stream, allow_error_reporting => 1,
400 wakaba 1.133 allow_fallback => 1);
401 wakaba 1.136 if ($char_stream) {
402 wakaba 1.133 $self->{confident} = 1;
403     last SNIFFING;
404 wakaba 1.136 } else {
405 wakaba 1.190 !!!parse-error (type => 'charset:not supported',
406     layer => 'encode',
407     line => 1, column => 1,
408     value => $charset_name,
409     level => $self->{level}->{uncertain});
410 wakaba 1.133 }
411     }
412    
413     ## Step 2
414 wakaba 1.136 my $byte_buffer = '';
415     for (1..1024) {
416     my $char = $byte_stream->getc;
417     last unless defined $char;
418     $byte_buffer .= $char;
419     } ## TODO: timeout
420 wakaba 1.133
421     ## Step 3
422 wakaba 1.136 if ($byte_buffer =~ /^\xFE\xFF/) {
423 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-16be');
424 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
425     ($byte_stream, allow_error_reporting => 1,
426     allow_fallback => 1, byte_buffer => \$byte_buffer);
427 wakaba 1.133 $self->{confident} = 1;
428     last SNIFFING;
429 wakaba 1.136 } elsif ($byte_buffer =~ /^\xFF\xFE/) {
430 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-16le');
431 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
432     ($byte_stream, allow_error_reporting => 1,
433     allow_fallback => 1, byte_buffer => \$byte_buffer);
434 wakaba 1.133 $self->{confident} = 1;
435     last SNIFFING;
436 wakaba 1.136 } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
437 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
438 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
439     ($byte_stream, allow_error_reporting => 1,
440     allow_fallback => 1, byte_buffer => \$byte_buffer);
441 wakaba 1.133 $self->{confident} = 1;
442     last SNIFFING;
443     }
444    
445     ## Step 4
446     ## TODO: <meta charset>
447    
448     ## Step 5
449     ## TODO: from history
450    
451     ## Step 6
452 wakaba 1.65 require Whatpm::Charset::UniversalCharDet;
453 wakaba 1.133 $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
454 wakaba 1.136 ($byte_buffer);
455 wakaba 1.133 if (defined $charset_name) {
456 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
457 wakaba 1.133
458 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
459     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
460     ($byte_stream);
461     ($char_stream, $e_status) = $charset->get_decode_handle
462     ($buffer, allow_error_reporting => 1,
463     allow_fallback => 1, byte_buffer => \$byte_buffer);
464     if ($char_stream) {
465     $buffer->{buffer} = $byte_buffer;
466 wakaba 1.153 !!!parse-error (type => 'sniffing:chardet',
467     text => $charset_name,
468     level => $self->{level}->{info},
469     layer => 'encode',
470 wakaba 1.134 line => 1, column => 1);
471 wakaba 1.133 $self->{confident} = 0;
472     last SNIFFING;
473     }
474     }
475    
476     ## Step 7: default
477     ## TODO: Make this configurable.
478 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('windows-1252');
479 wakaba 1.133 ## NOTE: We choose |windows-1252| here, since |utf-8| should be
480     ## detectable in the step 6.
481 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
482     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
483     ($byte_stream);
484     ($char_stream, $e_status)
485     = $charset->get_decode_handle ($buffer,
486     allow_error_reporting => 1,
487     allow_fallback => 1,
488     byte_buffer => \$byte_buffer);
489     $buffer->{buffer} = $byte_buffer;
490 wakaba 1.153 !!!parse-error (type => 'sniffing:default',
491     text => 'windows-1252',
492     level => $self->{level}->{info},
493     line => 1, column => 1,
494     layer => 'encode');
495 wakaba 1.63 $self->{confident} = 0;
496 wakaba 1.133 } # SNIFFING
497    
498     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
499 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
500 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
501 wakaba 1.160 #text => $self->{input_encoding},
502 wakaba 1.153 level => $self->{level}->{uncertain},
503     line => 1, column => 1,
504     layer => 'encode');
505 wakaba 1.133 } elsif (not ($e_status &
506 wakaba 1.178 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
507 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name;
508 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
509     text => $self->{input_encoding},
510     level => $self->{level}->{uncertain},
511     line => 1, column => 1,
512     layer => 'encode');
513 wakaba 1.160 } else {
514     $self->{input_encoding} = $charset->get_iana_name;
515 wakaba 1.63 }
516    
517     $self->{change_encoding} = sub {
518     my $self = shift;
519 wakaba 1.134 $charset_name = shift;
520 wakaba 1.114 my $token = shift;
521 wakaba 1.63
522 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
523 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
524     ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
525     byte_buffer => \ $buffer->{buffer});
526 wakaba 1.134
527 wakaba 1.136 if ($char_stream) { # if supported
528 wakaba 1.134 ## "Change the encoding" algorithm:
529 wakaba 1.215
530     ## Step 1
531     if (defined $self->{input_encoding} and
532     $self->{input_encoding} eq $charset_name) {
533     !!!parse-error (type => 'charset label:matching',
534     text => $charset_name,
535     level => $self->{level}->{info});
536     $self->{confident} = 1;
537     return;
538     }
539 wakaba 1.63
540 wakaba 1.214 ## Step 2 (HTML5 revision 3205)
541     if (defined $self->{input_encoding} and
542     Message::Charset::Info->get_by_html_name ($self->{input_encoding})
543     ->{category} & Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
544     $self->{confident} = 1;
545     return;
546     }
547    
548     ## Step 3
549 wakaba 1.149 if ($charset->{category} &
550     Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
551 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
552 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
553     ($byte_stream,
554     byte_buffer => \ $buffer->{buffer});
555 wakaba 1.134 }
556     $charset_name = $charset->get_iana_name;
557 wakaba 1.63
558 wakaba 1.153 !!!parse-error (type => 'charset label detected',
559     text => $self->{input_encoding},
560     value => $charset_name,
561     level => $self->{level}->{warn},
562     token => $token);
563 wakaba 1.134
564 wakaba 1.214 ## Step 4
565 wakaba 1.134 # if (can) {
566     ## change the encoding on the fly.
567     #$self->{confident} = 1;
568     #return;
569     # }
570    
571 wakaba 1.214 ## Step 5
572 wakaba 1.134 throw Whatpm::HTML::RestartParser ();
573 wakaba 1.63 }
574     }; # $self->{change_encoding}
575    
576 wakaba 1.136 my $char_onerror = sub {
577     my (undef, $type, %opt) = @_;
578 wakaba 1.153 !!!parse-error (layer => 'encode',
579 wakaba 1.174 line => $self->{line}, column => $self->{column} + 1,
580     %opt, type => $type);
581 wakaba 1.136 if ($opt{octets}) {
582     ${$opt{octets}} = "\x{FFFD}"; # relacement character
583     }
584     };
585 wakaba 1.162
586     my $wrapped_char_stream = $get_wrapper->($char_stream);
587     $wrapped_char_stream->onerror ($char_onerror);
588 wakaba 1.136
589 wakaba 1.182 my @args = ($_[1], $_[2]); # $doc, $onerror - $get_wrapper = undef;
590 wakaba 1.63 my $return;
591     try {
592 wakaba 1.162 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
593 wakaba 1.63 } catch Whatpm::HTML::RestartParser with {
594 wakaba 1.134 ## NOTE: Invoked after {change_encoding}.
595    
596     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
597 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
598 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
599     level => $self->{level}->{uncertain},
600 wakaba 1.160 #text => $self->{input_encoding},
601 wakaba 1.153 line => 1, column => 1,
602     layer => 'encode');
603 wakaba 1.134 } elsif (not ($e_status &
604 wakaba 1.178 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
605 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name;
606 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
607     text => $self->{input_encoding},
608     level => $self->{level}->{uncertain},
609     line => 1, column => 1,
610     layer => 'encode');
611 wakaba 1.160 } else {
612     $self->{input_encoding} = $charset->get_iana_name;
613 wakaba 1.134 }
614 wakaba 1.63 $self->{confident} = 1;
615 wakaba 1.162
616     $wrapped_char_stream = $get_wrapper->($char_stream);
617     $wrapped_char_stream->onerror ($char_onerror);
618    
619     $return = $self->parse_char_stream ($wrapped_char_stream, @args);
620 wakaba 1.63 };
621     return $return;
622 wakaba 1.138 } # parse_byte_stream
623 wakaba 1.63
624 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
625     ## and the HTML layer MUST ignore it. However, we does strip BOM in
626     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
627     ## because the core part of our HTML parser expects a string of character,
628     ## not a string of bytes or code units or anything which might contain a BOM.
629     ## Therefore, any parser interface that accepts a string of bytes,
630     ## such as |parse_byte_string| in this module, must ensure that it does
631     ## strip the BOM and never strip any ZWNBSP.
632    
633 wakaba 1.162 sub parse_char_string ($$$;$$) {
634     #my ($self, $s, $doc, $onerror, $get_wrapper) = @_;
635 wakaba 1.135 my $self = shift;
636 wakaba 1.139 my $s = ref $_[0] ? $_[0] : \($_[0]);
637 wakaba 1.171 require Whatpm::Charset::DecodeHandle;
638     my $input = Whatpm::Charset::DecodeHandle::CharString->new ($s);
639 wakaba 1.135 return $self->parse_char_stream ($input, @_[1..$#_]);
640     } # parse_char_string
641 wakaba 1.162 *parse_string = \&parse_char_string; ## NOTE: Alias for backward compatibility.
642 wakaba 1.63
643 wakaba 1.182 sub parse_char_stream ($$$;$$) {
644 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
645 wakaba 1.135 my $input = $_[0];
646 wakaba 1.1 $self->{document} = $_[1];
647 wakaba 1.63 @{$self->{document}->child_nodes} = ();
648 wakaba 1.1
649 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
650    
651 wakaba 1.235 ## Confidence: irrelevant.
652 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
653 wakaba 1.235
654 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
655     if defined $self->{input_encoding};
656 wakaba 1.178 ## TODO: |{input_encoding}| is needless?
657 wakaba 1.63
658 wakaba 1.112 $self->{line_prev} = $self->{line} = 1;
659 wakaba 1.179 $self->{column_prev} = -1;
660     $self->{column} = 0;
661 wakaba 1.183 $self->{set_nc} = sub {
662 wakaba 1.1 my $self = shift;
663 wakaba 1.13
664 wakaba 1.178 my $char = '';
665 wakaba 1.183 if (defined $self->{next_nc}) {
666     $char = $self->{next_nc};
667     delete $self->{next_nc};
668     $self->{nc} = ord $char;
669 wakaba 1.139 } else {
670 wakaba 1.179 $self->{char_buffer} = '';
671     $self->{char_buffer_pos} = 0;
672    
673     my $count = $input->manakai_read_until
674 wakaba 1.182 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/, $self->{char_buffer_pos});
675 wakaba 1.179 if ($count) {
676     $self->{line_prev} = $self->{line};
677     $self->{column_prev} = $self->{column};
678     $self->{column}++;
679 wakaba 1.183 $self->{nc}
680 wakaba 1.179 = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
681     return;
682     }
683    
684 wakaba 1.178 if ($input->read ($char, 1)) {
685 wakaba 1.183 $self->{nc} = ord $char;
686 wakaba 1.178 } else {
687 wakaba 1.183 $self->{nc} = -1;
688 wakaba 1.178 return;
689     }
690 wakaba 1.139 }
691 wakaba 1.112
692     ($self->{line_prev}, $self->{column_prev})
693     = ($self->{line}, $self->{column});
694     $self->{column}++;
695 wakaba 1.1
696 wakaba 1.183 if ($self->{nc} == 0x000A) { # LF
697 wakaba 1.132 !!!cp ('j1');
698 wakaba 1.112 $self->{line}++;
699     $self->{column} = 0;
700 wakaba 1.183 } elsif ($self->{nc} == 0x000D) { # CR
701 wakaba 1.132 !!!cp ('j2');
702 wakaba 1.170 ## TODO: support for abort/streaming
703 wakaba 1.178 my $next = '';
704     if ($input->read ($next, 1) and $next ne "\x0A") {
705 wakaba 1.183 $self->{next_nc} = $next;
706 wakaba 1.135 }
707 wakaba 1.183 $self->{nc} = 0x000A; # LF # MUST
708 wakaba 1.112 $self->{line}++;
709     $self->{column} = 0;
710 wakaba 1.183 } elsif ($self->{nc} == 0x0000) { # NULL
711 wakaba 1.132 !!!cp ('j4');
712 wakaba 1.8 !!!parse-error (type => 'NULL');
713 wakaba 1.183 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
714 wakaba 1.1 }
715     };
716    
717 wakaba 1.172 $self->{read_until} = sub {
718     #my ($scalar, $specials_range, $offset) = @_;
719 wakaba 1.183 return 0 if defined $self->{next_nc};
720 wakaba 1.180
721 wakaba 1.182 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
722 wakaba 1.180 my $offset = $_[2] || 0;
723    
724     if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
725     pos ($self->{char_buffer}) = $self->{char_buffer_pos};
726     if ($self->{char_buffer} =~ /\G(?>$pattern)+/) {
727     substr ($_[0], $offset)
728     = substr ($self->{char_buffer}, $-[0], $+[0] - $-[0]);
729     my $count = $+[0] - $-[0];
730     if ($count) {
731     $self->{column} += $count;
732     $self->{char_buffer_pos} += $count;
733     $self->{line_prev} = $self->{line};
734     $self->{column_prev} = $self->{column} - 1;
735 wakaba 1.183 $self->{nc} = -1;
736 wakaba 1.180 }
737     return $count;
738     } else {
739     return 0;
740     }
741     } else {
742     my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
743     if ($count) {
744     $self->{column} += $count;
745     $self->{line_prev} = $self->{line};
746     $self->{column_prev} = $self->{column} - 1;
747 wakaba 1.183 $self->{nc} = -1;
748 wakaba 1.180 }
749     return $count;
750 wakaba 1.172 }
751     }; # $self->{read_until}
752 wakaba 1.171
753 wakaba 1.3 my $onerror = $_[2] || sub {
754     my (%opt) = @_;
755 wakaba 1.112 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
756     my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
757     warn "Parse error ($opt{type}) at line $line column $column\n";
758 wakaba 1.3 };
759     $self->{parse_error} = sub {
760 wakaba 1.112 $onerror->(line => $self->{line}, column => $self->{column}, @_);
761 wakaba 1.1 };
762    
763 wakaba 1.182 my $char_onerror = sub {
764     my (undef, $type, %opt) = @_;
765     !!!parse-error (layer => 'encode',
766     line => $self->{line}, column => $self->{column} + 1,
767     %opt, type => $type);
768     }; # $char_onerror
769    
770     if ($_[3]) {
771     $input = $_[3]->($input);
772     $input->onerror ($char_onerror);
773     } else {
774     $input->onerror ($char_onerror) unless defined $input->onerror;
775     }
776    
777 wakaba 1.1 $self->_initialize_tokenizer;
778     $self->_initialize_tree_constructor;
779     $self->_construct_tree;
780     $self->_terminate_tree_constructor;
781    
782 wakaba 1.112 delete $self->{parse_error}; # remove loop
783    
784 wakaba 1.1 return $self->{document};
785 wakaba 1.135 } # parse_char_stream
786 wakaba 1.1
787     sub new ($) {
788     my $class = shift;
789 wakaba 1.134 my $self = bless {
790 wakaba 1.153 level => {must => 'm',
791 wakaba 1.159 should => 's',
792 wakaba 1.153 warn => 'w',
793     info => 'i',
794     uncertain => 'u'},
795 wakaba 1.134 }, $class;
796 wakaba 1.183 $self->{set_nc} = sub {
797     $self->{nc} = -1;
798 wakaba 1.1 };
799     $self->{parse_error} = sub {
800     #
801     };
802 wakaba 1.63 $self->{change_encoding} = sub {
803     # if ($_[0] is a supported encoding) {
804     # run "change the encoding" algorithm;
805     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
806     # }
807     };
808 wakaba 1.61 $self->{application_cache_selection} = sub {
809     #
810     };
811 wakaba 1.1 return $self;
812     } # new
813    
814 wakaba 1.208 ## Insertion modes
815 wakaba 1.55
816 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
817     sub HEAD_IMS () { 0b1000 }
818     sub BODY_IMS () { 0b10000 }
819 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
820 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
821 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
822 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
823     sub FRAME_IMS () { 0b1000000000 }
824 wakaba 1.101 sub SELECT_IMS () { 0b10000000000 }
825 wakaba 1.208 #sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 } # see Whatpm::HTML::Tokenizer
826 wakaba 1.126 ## NOTE: "in foreign content" insertion mode is special; it is combined
827     ## with the secondary insertion mode. In this parser, they are stored
828     ## together in the bit-or'ed form.
829 wakaba 1.205 sub IN_CDATA_RCDATA_IM () { 0b1000000000000 }
830     ## NOTE: "in CDATA/RCDATA" insertion mode is also special; it is
831     ## combined with the original insertion mode. In thie parser,
832     ## they are stored together in the bit-or'ed form.
833 wakaba 1.54
834 wakaba 1.210 sub IM_MASK () { 0b11111111111 }
835    
836 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
837    
838     ## NOTE: "after after body" insertion mode.
839 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
840 wakaba 1.84
841     ## NOTE: "after after frameset" insertion mode.
842 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
843 wakaba 1.84
844 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
845     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
846     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
847     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
848     sub IN_BODY_IM () { BODY_IMS }
849 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
850     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
851     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
852     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
853 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
854     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
855     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
856     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
857 wakaba 1.101 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
858     sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
859 wakaba 1.54 sub IN_COLUMN_GROUP_IM () { 0b10 }
860    
861 wakaba 1.1 sub _initialize_tree_constructor ($) {
862     my $self = shift;
863     ## NOTE: $self->{document} MUST be specified before this method is called
864     $self->{document}->strict_error_checking (0);
865     ## TODO: Turn mutation events off # MUST
866     ## TODO: Turn loose Document option (manakai extension) on
867 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
868 wakaba 1.154 $self->{document}->set_user_data (manakai_source_line => 1);
869     $self->{document}->set_user_data (manakai_source_column => 1);
870 wakaba 1.1 } # _initialize_tree_constructor
871    
872     sub _terminate_tree_constructor ($) {
873     my $self = shift;
874     $self->{document}->strict_error_checking (1);
875     ## TODO: Turn mutation events on
876     } # _terminate_tree_constructor
877    
878     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
879    
880 wakaba 1.3 { # tree construction stage
881     my $token;
882    
883 wakaba 1.1 sub _construct_tree ($) {
884     my ($self) = @_;
885    
886     ## When an interactive UA render the $self->{document} available
887     ## to the user, or when it begin accepting user input, are
888     ## not defined.
889    
890     !!!next-token;
891    
892 wakaba 1.3 undef $self->{form_element};
893     undef $self->{head_element};
894 wakaba 1.202 undef $self->{head_element_inserted};
895 wakaba 1.3 $self->{open_elements} = [];
896     undef $self->{inner_html_node};
897 wakaba 1.206 undef $self->{ignore_newline};
898 wakaba 1.3
899 wakaba 1.84 ## NOTE: The "initial" insertion mode.
900 wakaba 1.3 $self->_tree_construction_initial; # MUST
901 wakaba 1.84
902     ## NOTE: The "before html" insertion mode.
903 wakaba 1.3 $self->_tree_construction_root_element;
904 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
905    
906     ## NOTE: The "before head" insertion mode and so on.
907 wakaba 1.3 $self->_tree_construction_main;
908     } # _construct_tree
909    
910     sub _tree_construction_initial ($) {
911     my $self = shift;
912 wakaba 1.84
913     ## NOTE: "initial" insertion mode
914    
915 wakaba 1.18 INITIAL: {
916 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
917 wakaba 1.227 ## NOTE: Conformance checkers MAY, instead of reporting "not
918     ## HTML5" error, switch to a conformance checking mode for
919     ## another language. (We don't support such mode switchings; it
920     ## is nonsense to do anything different from what browsers do.)
921 wakaba 1.18 my $doctype_name = $token->{name};
922     $doctype_name = '' unless defined $doctype_name;
923 wakaba 1.227 my $doctype = $self->{document}->create_document_type_definition
924     ($doctype_name);
925    
926 wakaba 1.228 $doctype_name =~ tr/A-Z/a-z/; # ASCII case-insensitive
927     if ($doctype_name ne 'html') {
928 wakaba 1.79 !!!cp ('t1');
929 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
930 wakaba 1.228 } elsif (defined $token->{pubid}) {
931 wakaba 1.79 !!!cp ('t2');
932 wakaba 1.228 ## XXX Obsolete permitted DOCTYPEs
933 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
934 wakaba 1.228 } elsif (defined $token->{sysid}) {
935     if ($token->{sysid} eq 'about:legacy-compat') {
936     !!!cp ('t1.2'); ## <!DOCTYPE HTML SYSTEM "about:legacy-compat">
937 wakaba 1.159 !!!parse-error (type => 'XSLT-compat', token => $token,
938     level => $self->{level}->{should});
939     } else {
940     !!!parse-error (type => 'not HTML5', token => $token);
941     }
942 wakaba 1.228 } else { ## <!DOCTYPE HTML>
943 wakaba 1.79 !!!cp ('t3');
944 wakaba 1.159 #
945 wakaba 1.18 }
946    
947 wakaba 1.122 ## NOTE: Default value for both |public_id| and |system_id| attributes
948     ## are empty strings, so that we don't set any value in missing cases.
949 wakaba 1.183 $doctype->public_id ($token->{pubid}) if defined $token->{pubid};
950     $doctype->system_id ($token->{sysid}) if defined $token->{sysid};
951 wakaba 1.227
952 wakaba 1.18 ## NOTE: Other DocumentType attributes are null or empty lists.
953 wakaba 1.211 ## In Firefox3, |internalSubset| attribute is set to the empty
954     ## string, while |null| is an allowed value for the attribute
955     ## according to DOM3 Core.
956 wakaba 1.18 $self->{document}->append_child ($doctype);
957    
958 wakaba 1.228 if ($token->{quirks} or $doctype_name ne 'html') {
959 wakaba 1.79 !!!cp ('t4');
960 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
961 wakaba 1.183 } elsif (defined $token->{pubid}) {
962     my $pubid = $token->{pubid};
963 wakaba 1.18 $pubid =~ tr/a-z/A-z/;
964 wakaba 1.143 my $prefix = [
965     "+//SILMARIL//DTD HTML PRO V0R11 19970101//",
966     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
967     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
968     "-//IETF//DTD HTML 2.0 LEVEL 1//",
969     "-//IETF//DTD HTML 2.0 LEVEL 2//",
970     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//",
971     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//",
972     "-//IETF//DTD HTML 2.0 STRICT//",
973     "-//IETF//DTD HTML 2.0//",
974     "-//IETF//DTD HTML 2.1E//",
975     "-//IETF//DTD HTML 3.0//",
976     "-//IETF//DTD HTML 3.2 FINAL//",
977     "-//IETF//DTD HTML 3.2//",
978     "-//IETF//DTD HTML 3//",
979     "-//IETF//DTD HTML LEVEL 0//",
980     "-//IETF//DTD HTML LEVEL 1//",
981     "-//IETF//DTD HTML LEVEL 2//",
982     "-//IETF//DTD HTML LEVEL 3//",
983     "-//IETF//DTD HTML STRICT LEVEL 0//",
984     "-//IETF//DTD HTML STRICT LEVEL 1//",
985     "-//IETF//DTD HTML STRICT LEVEL 2//",
986     "-//IETF//DTD HTML STRICT LEVEL 3//",
987     "-//IETF//DTD HTML STRICT//",
988     "-//IETF//DTD HTML//",
989     "-//METRIUS//DTD METRIUS PRESENTATIONAL//",
990     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//",
991     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//",
992     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//",
993     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//",
994     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//",
995     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//",
996     "-//NETSCAPE COMM. CORP.//DTD HTML//",
997     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//",
998     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//",
999     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//",
1000     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//",
1001     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//",
1002     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//",
1003     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//",
1004     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//",
1005     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//",
1006     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//",
1007     "-//W3C//DTD HTML 3 1995-03-24//",
1008     "-//W3C//DTD HTML 3.2 DRAFT//",
1009     "-//W3C//DTD HTML 3.2 FINAL//",
1010     "-//W3C//DTD HTML 3.2//",
1011     "-//W3C//DTD HTML 3.2S DRAFT//",
1012     "-//W3C//DTD HTML 4.0 FRAMESET//",
1013     "-//W3C//DTD HTML 4.0 TRANSITIONAL//",
1014     "-//W3C//DTD HTML EXPERIMETNAL 19960712//",
1015     "-//W3C//DTD HTML EXPERIMENTAL 970421//",
1016     "-//W3C//DTD W3 HTML//",
1017     "-//W3O//DTD W3 HTML 3.0//",
1018     "-//WEBTECHS//DTD MOZILLA HTML 2.0//",
1019     "-//WEBTECHS//DTD MOZILLA HTML//",
1020     ]; # $prefix
1021     my $match;
1022     for (@$prefix) {
1023     if (substr ($prefix, 0, length $_) eq $_) {
1024     $match = 1;
1025     last;
1026     }
1027     }
1028     if ($match or
1029     $pubid eq "-//W3O//DTD W3 HTML STRICT 3.0//EN//" or
1030     $pubid eq "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" or
1031     $pubid eq "HTML") {
1032 wakaba 1.79 !!!cp ('t5');
1033 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1034 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
1035     $pubid =~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
1036 wakaba 1.183 if (defined $token->{sysid}) {
1037 wakaba 1.79 !!!cp ('t6');
1038 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1039     } else {
1040 wakaba 1.79 !!!cp ('t7');
1041 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
1042 wakaba 1.3 }
1043 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
1044     $pubid =~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
1045 wakaba 1.79 !!!cp ('t8');
1046 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
1047 wakaba 1.79 } else {
1048     !!!cp ('t9');
1049 wakaba 1.18 }
1050 wakaba 1.79 } else {
1051     !!!cp ('t10');
1052 wakaba 1.18 }
1053 wakaba 1.183 if (defined $token->{sysid}) {
1054     my $sysid = $token->{sysid};
1055 wakaba 1.18 $sysid =~ tr/A-Z/a-z/;
1056     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
1057 wakaba 1.143 ## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"| is
1058     ## marked as quirks.
1059 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1060 wakaba 1.79 !!!cp ('t11');
1061     } else {
1062     !!!cp ('t12');
1063 wakaba 1.18 }
1064 wakaba 1.79 } else {
1065     !!!cp ('t13');
1066 wakaba 1.18 }
1067    
1068 wakaba 1.84 ## Go to the "before html" insertion mode.
1069 wakaba 1.18 !!!next-token;
1070     return;
1071     } elsif ({
1072 wakaba 1.55 START_TAG_TOKEN, 1,
1073     END_TAG_TOKEN, 1,
1074     END_OF_FILE_TOKEN, 1,
1075 wakaba 1.18 }->{$token->{type}}) {
1076 wakaba 1.79 !!!cp ('t14');
1077 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
1078 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1079 wakaba 1.84 ## Go to the "before html" insertion mode.
1080 wakaba 1.18 ## reprocess
1081 wakaba 1.125 !!!ack-later;
1082 wakaba 1.18 return;
1083 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
1084 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1085 wakaba 1.18 ## Ignore the token
1086 wakaba 1.26
1087 wakaba 1.18 unless (length $token->{data}) {
1088 wakaba 1.79 !!!cp ('t15');
1089 wakaba 1.84 ## Stay in the insertion mode.
1090 wakaba 1.18 !!!next-token;
1091     redo INITIAL;
1092 wakaba 1.79 } else {
1093     !!!cp ('t16');
1094 wakaba 1.3 }
1095 wakaba 1.79 } else {
1096     !!!cp ('t17');
1097 wakaba 1.3 }
1098 wakaba 1.18
1099 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
1100 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1101 wakaba 1.84 ## Go to the "before html" insertion mode.
1102 wakaba 1.18 ## reprocess
1103     return;
1104 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
1105 wakaba 1.79 !!!cp ('t18');
1106 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
1107     $self->{document}->append_child ($comment);
1108    
1109 wakaba 1.84 ## Stay in the insertion mode.
1110 wakaba 1.18 !!!next-token;
1111     redo INITIAL;
1112     } else {
1113 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
1114 wakaba 1.18 }
1115     } # INITIAL
1116 wakaba 1.79
1117     die "$0: _tree_construction_initial: This should be never reached";
1118 wakaba 1.3 } # _tree_construction_initial
1119    
1120     sub _tree_construction_root_element ($) {
1121     my $self = shift;
1122 wakaba 1.84
1123     ## NOTE: "before html" insertion mode.
1124 wakaba 1.3
1125     B: {
1126 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
1127 wakaba 1.79 !!!cp ('t19');
1128 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1129 wakaba 1.3 ## Ignore the token
1130 wakaba 1.84 ## Stay in the insertion mode.
1131 wakaba 1.3 !!!next-token;
1132     redo B;
1133 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
1134 wakaba 1.79 !!!cp ('t20');
1135 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
1136     $self->{document}->append_child ($comment);
1137 wakaba 1.84 ## Stay in the insertion mode.
1138 wakaba 1.3 !!!next-token;
1139     redo B;
1140 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
1141 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1142 wakaba 1.26 ## Ignore the token.
1143    
1144 wakaba 1.3 unless (length $token->{data}) {
1145 wakaba 1.79 !!!cp ('t21');
1146 wakaba 1.84 ## Stay in the insertion mode.
1147 wakaba 1.3 !!!next-token;
1148     redo B;
1149 wakaba 1.79 } else {
1150     !!!cp ('t22');
1151 wakaba 1.3 }
1152 wakaba 1.79 } else {
1153     !!!cp ('t23');
1154 wakaba 1.3 }
1155 wakaba 1.61
1156     $self->{application_cache_selection}->(undef);
1157    
1158     #
1159     } elsif ($token->{type} == START_TAG_TOKEN) {
1160 wakaba 1.84 if ($token->{tag_name} eq 'html') {
1161     my $root_element;
1162 wakaba 1.126 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
1163 wakaba 1.84 $self->{document}->append_child ($root_element);
1164 wakaba 1.123 push @{$self->{open_elements}},
1165     [$root_element, $el_category->{html}];
1166 wakaba 1.84
1167     if ($token->{attributes}->{manifest}) {
1168     !!!cp ('t24');
1169     $self->{application_cache_selection}
1170     ->($token->{attributes}->{manifest}->{value});
1171 wakaba 1.118 ## ISSUE: Spec is unclear on relative references.
1172     ## According to Hixie (#whatwg 2008-03-19), it should be
1173     ## resolved against the base URI of the document in HTML
1174     ## or xml:base of the element in XHTML.
1175 wakaba 1.84 } else {
1176     !!!cp ('t25');
1177     $self->{application_cache_selection}->(undef);
1178     }
1179    
1180 wakaba 1.125 !!!nack ('t25c');
1181    
1182 wakaba 1.84 !!!next-token;
1183     return; ## Go to the "before head" insertion mode.
1184 wakaba 1.61 } else {
1185 wakaba 1.84 !!!cp ('t25.1');
1186     #
1187 wakaba 1.61 }
1188 wakaba 1.3 } elsif ({
1189 wakaba 1.55 END_TAG_TOKEN, 1,
1190     END_OF_FILE_TOKEN, 1,
1191 wakaba 1.3 }->{$token->{type}}) {
1192 wakaba 1.79 !!!cp ('t26');
1193 wakaba 1.3 #
1194     } else {
1195 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
1196 wakaba 1.3 }
1197 wakaba 1.61
1198 wakaba 1.126 my $root_element;
1199     !!!create-element ($root_element, $HTML_NS, 'html',, $token);
1200 wakaba 1.84 $self->{document}->append_child ($root_element);
1201 wakaba 1.123 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
1202 wakaba 1.84
1203     $self->{application_cache_selection}->(undef);
1204    
1205     ## NOTE: Reprocess the token.
1206 wakaba 1.125 !!!ack-later;
1207 wakaba 1.84 return; ## Go to the "before head" insertion mode.
1208 wakaba 1.3 } # B
1209 wakaba 1.79
1210     die "$0: _tree_construction_root_element: This should never be reached";
1211 wakaba 1.3 } # _tree_construction_root_element
1212    
1213     sub _reset_insertion_mode ($) {
1214     my $self = shift;
1215    
1216     ## Step 1
1217     my $last;
1218    
1219     ## Step 2
1220     my $i = -1;
1221     my $node = $self->{open_elements}->[$i];
1222    
1223     ## Step 3
1224     S3: {
1225 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
1226     $last = 1;
1227     if (defined $self->{inner_html_node}) {
1228 wakaba 1.140 !!!cp ('t28');
1229     $node = $self->{inner_html_node};
1230     } else {
1231     die "_reset_insertion_mode: t27";
1232 wakaba 1.3 }
1233     }
1234 wakaba 1.140
1235     ## Step 4..14
1236     my $new_mode;
1237     if ($node->[1] & FOREIGN_EL) {
1238     !!!cp ('t28.1');
1239     ## NOTE: Strictly spaking, the line below only applies to MathML and
1240     ## SVG elements. Currently the HTML syntax supports only MathML and
1241     ## SVG elements as foreigners.
1242 wakaba 1.148 $new_mode = IN_BODY_IM | IN_FOREIGN_CONTENT_IM;
1243 wakaba 1.206 } elsif ($node->[1] == TABLE_CELL_EL) {
1244 wakaba 1.140 if ($last) {
1245     !!!cp ('t28.2');
1246     #
1247     } else {
1248     !!!cp ('t28.3');
1249     $new_mode = IN_CELL_IM;
1250     }
1251     } else {
1252     !!!cp ('t28.4');
1253     $new_mode = {
1254 wakaba 1.54 select => IN_SELECT_IM,
1255 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
1256     ## insertion mode to "in select" by themselves.
1257 wakaba 1.54 tr => IN_ROW_IM,
1258     tbody => IN_TABLE_BODY_IM,
1259     thead => IN_TABLE_BODY_IM,
1260     tfoot => IN_TABLE_BODY_IM,
1261     caption => IN_CAPTION_IM,
1262     colgroup => IN_COLUMN_GROUP_IM,
1263     table => IN_TABLE_IM,
1264     head => IN_BODY_IM, # not in head!
1265     body => IN_BODY_IM,
1266     frameset => IN_FRAMESET_IM,
1267 wakaba 1.123 }->{$node->[0]->manakai_local_name};
1268 wakaba 1.140 }
1269     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1270 wakaba 1.3
1271 wakaba 1.126 ## Step 15
1272 wakaba 1.206 if ($node->[1] == HTML_EL) {
1273 wakaba 1.3 unless (defined $self->{head_element}) {
1274 wakaba 1.79 !!!cp ('t29');
1275 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
1276 wakaba 1.3 } else {
1277 wakaba 1.81 ## ISSUE: Can this state be reached?
1278 wakaba 1.79 !!!cp ('t30');
1279 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
1280 wakaba 1.3 }
1281     return;
1282 wakaba 1.79 } else {
1283     !!!cp ('t31');
1284 wakaba 1.3 }
1285    
1286 wakaba 1.126 ## Step 16
1287 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
1288 wakaba 1.3
1289 wakaba 1.126 ## Step 17
1290 wakaba 1.3 $i--;
1291     $node = $self->{open_elements}->[$i];
1292    
1293 wakaba 1.126 ## Step 18
1294 wakaba 1.3 redo S3;
1295     } # S3
1296 wakaba 1.79
1297     die "$0: _reset_insertion_mode: This line should never be reached";
1298 wakaba 1.3 } # _reset_insertion_mode
1299    
1300     sub _tree_construction_main ($) {
1301     my $self = shift;
1302    
1303 wakaba 1.1 my $active_formatting_elements = [];
1304    
1305     my $reconstruct_active_formatting_elements = sub { # MUST
1306     my $insert = shift;
1307    
1308     ## Step 1
1309     return unless @$active_formatting_elements;
1310    
1311     ## Step 3
1312     my $i = -1;
1313     my $entry = $active_formatting_elements->[$i];
1314    
1315     ## Step 2
1316     return if $entry->[0] eq '#marker';
1317 wakaba 1.3 for (@{$self->{open_elements}}) {
1318 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1319 wakaba 1.79 !!!cp ('t32');
1320 wakaba 1.1 return;
1321     }
1322     }
1323    
1324     S4: {
1325     ## Step 4
1326     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1327    
1328     ## Step 5
1329     $i--;
1330     $entry = $active_formatting_elements->[$i];
1331    
1332     ## Step 6
1333     if ($entry->[0] eq '#marker') {
1334 wakaba 1.81 !!!cp ('t33_1');
1335 wakaba 1.1 #
1336     } else {
1337     my $in_open_elements;
1338 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
1339 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1340 wakaba 1.79 !!!cp ('t33');
1341 wakaba 1.1 $in_open_elements = 1;
1342     last OE;
1343     }
1344     }
1345     if ($in_open_elements) {
1346 wakaba 1.79 !!!cp ('t34');
1347 wakaba 1.1 #
1348     } else {
1349 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
1350 wakaba 1.79 !!!cp ('t35');
1351 wakaba 1.1 redo S4;
1352     }
1353     }
1354    
1355     ## Step 7
1356     $i++;
1357     $entry = $active_formatting_elements->[$i];
1358     } # S4
1359    
1360     S7: {
1361     ## Step 8
1362     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1363    
1364     ## Step 9
1365     $insert->($clone->[0]);
1366 wakaba 1.3 push @{$self->{open_elements}}, $clone;
1367 wakaba 1.1
1368     ## Step 10
1369 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1370 wakaba 1.1
1371     ## Step 11
1372     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1373 wakaba 1.79 !!!cp ('t36');
1374 wakaba 1.1 ## Step 7'
1375     $i++;
1376     $entry = $active_formatting_elements->[$i];
1377    
1378     redo S7;
1379     }
1380 wakaba 1.79
1381     !!!cp ('t37');
1382 wakaba 1.1 } # S7
1383     }; # $reconstruct_active_formatting_elements
1384    
1385     my $clear_up_to_marker = sub {
1386     for (reverse 0..$#$active_formatting_elements) {
1387     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1388 wakaba 1.79 !!!cp ('t38');
1389 wakaba 1.1 splice @$active_formatting_elements, $_;
1390     return;
1391     }
1392     }
1393 wakaba 1.79
1394     !!!cp ('t39');
1395 wakaba 1.1 }; # $clear_up_to_marker
1396    
1397 wakaba 1.96 my $insert;
1398    
1399     my $parse_rcdata = sub ($) {
1400     my ($content_model_flag) = @_;
1401 wakaba 1.25
1402     ## Step 1
1403     my $start_tag_name = $token->{tag_name};
1404 wakaba 1.205 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
1405 wakaba 1.25
1406     ## Step 2
1407 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
1408 wakaba 1.13 delete $self->{escape}; # MUST
1409 wakaba 1.25
1410 wakaba 1.205 ## Step 3, 4
1411     $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1412    
1413 wakaba 1.125 !!!nack ('t40.1');
1414 wakaba 1.1 !!!next-token;
1415 wakaba 1.25 }; # $parse_rcdata
1416 wakaba 1.1
1417 wakaba 1.96 my $script_start_tag = sub () {
1418 wakaba 1.205 ## Step 1
1419 wakaba 1.1 my $script_el;
1420 wakaba 1.126 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
1421 wakaba 1.205
1422     ## Step 2
1423 wakaba 1.1 ## TODO: mark as "parser-inserted"
1424    
1425 wakaba 1.205 ## Step 3
1426     ## TODO: Mark as "already executed", if ...
1427    
1428 wakaba 1.221 ## Step 4 (HTML5 revision 2702)
1429 wakaba 1.205 $insert->($script_el);
1430     push @{$self->{open_elements}}, [$script_el, $el_category->{script}];
1431    
1432     ## Step 5
1433 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
1434 wakaba 1.13 delete $self->{escape}; # MUST
1435 wakaba 1.1
1436 wakaba 1.205 ## Step 6-7
1437     $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1438 wakaba 1.25
1439 wakaba 1.205 !!!nack ('t40.2');
1440 wakaba 1.1 !!!next-token;
1441     }; # $script_start_tag
1442    
1443 wakaba 1.102 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
1444 wakaba 1.229 ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag (OBSOLETE; unused).
1445 wakaba 1.202 ## NOTE: $open_tables->[-1]->[2] is set false when non-Text node inserted.
1446 wakaba 1.102 my $open_tables = [[$self->{open_elements}->[0]->[0]]];
1447    
1448 wakaba 1.1 my $formatting_end_tag = sub {
1449 wakaba 1.113 my $end_tag_token = shift;
1450     my $tag_name = $end_tag_token->{tag_name};
1451 wakaba 1.1
1452 wakaba 1.103 ## NOTE: The adoption agency algorithm (AAA).
1453 wakaba 1.102
1454 wakaba 1.1 FET: {
1455     ## Step 1
1456     my $formatting_element;
1457     my $formatting_element_i_in_active;
1458     AFE: for (reverse 0..$#$active_formatting_elements) {
1459 wakaba 1.123 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1460     !!!cp ('t52');
1461     last AFE;
1462     } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
1463     eq $tag_name) {
1464 wakaba 1.79 !!!cp ('t51');
1465 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
1466     $formatting_element_i_in_active = $_;
1467     last AFE;
1468     }
1469     } # AFE
1470     unless (defined $formatting_element) {
1471 wakaba 1.79 !!!cp ('t53');
1472 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => $tag_name, token => $end_tag_token);
1473 wakaba 1.1 ## Ignore the token
1474     !!!next-token;
1475     return;
1476     }
1477     ## has an element in scope
1478     my $in_scope = 1;
1479     my $formatting_element_i_in_open;
1480 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
1481     my $node = $self->{open_elements}->[$_];
1482 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
1483     if ($in_scope) {
1484 wakaba 1.79 !!!cp ('t54');
1485 wakaba 1.1 $formatting_element_i_in_open = $_;
1486     last INSCOPE;
1487     } else { # in open elements but not in scope
1488 wakaba 1.79 !!!cp ('t55');
1489 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
1490     text => $token->{tag_name},
1491 wakaba 1.113 token => $end_tag_token);
1492 wakaba 1.1 ## Ignore the token
1493     !!!next-token;
1494     return;
1495     }
1496 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
1497 wakaba 1.79 !!!cp ('t56');
1498 wakaba 1.1 $in_scope = 0;
1499     }
1500     } # INSCOPE
1501     unless (defined $formatting_element_i_in_open) {
1502 wakaba 1.79 !!!cp ('t57');
1503 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
1504     text => $token->{tag_name},
1505 wakaba 1.113 token => $end_tag_token);
1506 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
1507     !!!next-token; ## TODO: ok?
1508     return;
1509     }
1510 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
1511 wakaba 1.79 !!!cp ('t58');
1512 wakaba 1.122 !!!parse-error (type => 'not closed',
1513 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
1514 wakaba 1.122 ->manakai_local_name,
1515 wakaba 1.113 token => $end_tag_token);
1516 wakaba 1.1 }
1517    
1518     ## Step 2
1519     my $furthest_block;
1520     my $furthest_block_i_in_open;
1521 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
1522     my $node = $self->{open_elements}->[$_];
1523 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
1524 wakaba 1.1 #not $phrasing_category->{$node->[1]} and
1525 wakaba 1.123 ($node->[1] & SPECIAL_EL or
1526     $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
1527 wakaba 1.79 !!!cp ('t59');
1528 wakaba 1.1 $furthest_block = $node;
1529     $furthest_block_i_in_open = $_;
1530 wakaba 1.203 ## NOTE: The topmost (eldest) node.
1531 wakaba 1.1 } elsif ($node->[0] eq $formatting_element->[0]) {
1532 wakaba 1.79 !!!cp ('t60');
1533 wakaba 1.1 last OE;
1534     }
1535     } # OE
1536    
1537     ## Step 3
1538     unless (defined $furthest_block) { # MUST
1539 wakaba 1.79 !!!cp ('t61');
1540 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
1541 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
1542     !!!next-token;
1543     return;
1544     }
1545    
1546     ## Step 4
1547 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
1548 wakaba 1.1
1549     ## Step 5
1550     my $furthest_block_parent = $furthest_block->[0]->parent_node;
1551     if (defined $furthest_block_parent) {
1552 wakaba 1.79 !!!cp ('t62');
1553 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
1554     }
1555    
1556     ## Step 6
1557     my $bookmark_prev_el
1558     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
1559     ->[0];
1560    
1561     ## Step 7
1562     my $node = $furthest_block;
1563     my $node_i_in_open = $furthest_block_i_in_open;
1564     my $last_node = $furthest_block;
1565     S7: {
1566     ## Step 1
1567     $node_i_in_open--;
1568 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
1569 wakaba 1.1
1570     ## Step 2
1571     my $node_i_in_active;
1572     S7S2: {
1573     for (reverse 0..$#$active_formatting_elements) {
1574     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
1575 wakaba 1.79 !!!cp ('t63');
1576 wakaba 1.1 $node_i_in_active = $_;
1577     last S7S2;
1578     }
1579     }
1580 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
1581 wakaba 1.1 redo S7;
1582     } # S7S2
1583    
1584     ## Step 3
1585     last S7 if $node->[0] eq $formatting_element->[0];
1586    
1587     ## Step 4
1588     if ($last_node->[0] eq $furthest_block->[0]) {
1589 wakaba 1.79 !!!cp ('t64');
1590 wakaba 1.1 $bookmark_prev_el = $node->[0];
1591     }
1592    
1593     ## Step 5
1594     if ($node->[0]->has_child_nodes ()) {
1595 wakaba 1.79 !!!cp ('t65');
1596 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
1597     $active_formatting_elements->[$node_i_in_active] = $clone;
1598 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
1599 wakaba 1.1 $node = $clone;
1600     }
1601    
1602     ## Step 6
1603     $node->[0]->append_child ($last_node->[0]);
1604    
1605     ## Step 7
1606     $last_node = $node;
1607    
1608     ## Step 8
1609     redo S7;
1610     } # S7
1611    
1612     ## Step 8
1613 wakaba 1.123 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
1614 wakaba 1.234 ## Foster parenting.
1615 wakaba 1.102 my $foster_parent_element;
1616     my $next_sibling;
1617 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
1618 wakaba 1.206 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1619 wakaba 1.234 !!!cp ('t65.2');
1620     $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
1621     $next_sibling = $self->{open_elements}->[$_]->[0];
1622     undef $next_sibling
1623     unless $next_sibling->parent_node eq $foster_parent_element;
1624     last OE;
1625     }
1626     } # OE
1627     $foster_parent_element ||= $self->{open_elements}->[0]->[0];
1628    
1629 wakaba 1.102 $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
1630     $open_tables->[-1]->[1] = 1; # tainted
1631     } else {
1632     !!!cp ('t65.3');
1633     $common_ancestor_node->[0]->append_child ($last_node->[0]);
1634     }
1635 wakaba 1.1
1636     ## Step 9
1637     my $clone = [$formatting_element->[0]->clone_node (0),
1638     $formatting_element->[1]];
1639    
1640     ## Step 10
1641     my @cn = @{$furthest_block->[0]->child_nodes};
1642     $clone->[0]->append_child ($_) for @cn;
1643    
1644     ## Step 11
1645     $furthest_block->[0]->append_child ($clone->[0]);
1646    
1647     ## Step 12
1648     my $i;
1649     AFE: for (reverse 0..$#$active_formatting_elements) {
1650     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
1651 wakaba 1.79 !!!cp ('t66');
1652 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
1653     $i-- and last AFE if defined $i;
1654     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
1655 wakaba 1.79 !!!cp ('t67');
1656 wakaba 1.1 $i = $_;
1657     }
1658     } # AFE
1659     splice @$active_formatting_elements, $i + 1, 0, $clone;
1660    
1661     ## Step 13
1662     undef $i;
1663 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
1664     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
1665 wakaba 1.79 !!!cp ('t68');
1666 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
1667 wakaba 1.1 $i-- and last OE if defined $i;
1668 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
1669 wakaba 1.79 !!!cp ('t69');
1670 wakaba 1.1 $i = $_;
1671     }
1672     } # OE
1673 wakaba 1.203 splice @{$self->{open_elements}}, $i + 1, 0, $clone;
1674 wakaba 1.1
1675     ## Step 14
1676     redo FET;
1677     } # FET
1678     }; # $formatting_end_tag
1679    
1680 wakaba 1.96 $insert = my $insert_to_current = sub {
1681 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
1682 wakaba 1.1 }; # $insert_to_current
1683    
1684 wakaba 1.234 ## Foster parenting. Note that there are three "foster parenting"
1685     ## code in the parser: for elements (this one), for texts, and for
1686     ## elements in the AAA code.
1687 wakaba 1.1 my $insert_to_foster = sub {
1688 wakaba 1.95 my $child = shift;
1689 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
1690 wakaba 1.95 # MUST
1691     my $foster_parent_element;
1692     my $next_sibling;
1693 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
1694 wakaba 1.206 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1695 wakaba 1.234 !!!cp ('t71');
1696     $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
1697     $next_sibling = $self->{open_elements}->[$_]->[0];
1698     undef $next_sibling
1699     unless $next_sibling->parent_node eq $foster_parent_element;
1700     last OE;
1701     }
1702     } # OE
1703     $foster_parent_element ||= $self->{open_elements}->[0]->[0];
1704    
1705     $foster_parent_element->insert_before ($child, $next_sibling);
1706 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
1707     } else {
1708     !!!cp ('t72');
1709     $self->{open_elements}->[-1]->[0]->append_child ($child);
1710     }
1711 wakaba 1.1 }; # $insert_to_foster
1712    
1713 wakaba 1.204 ## NOTE: Insert a character (MUST): When a character is inserted, if
1714     ## the last node that was inserted by the parser is a Text node and
1715     ## the character has to be inserted after that node, then the
1716     ## character is appended to the Text node. However, if any other
1717     ## node is inserted by the parser, then a new Text node is created
1718     ## and the character is appended as that Text node. If I'm not
1719     ## wrong, for a parser with scripting disabled, there are only two
1720     ## cases where this occurs. One is the case where an element node
1721     ## is inserted to the |head| element. This is covered by using the
1722 wakaba 1.202 ## |$self->{head_element_inserted}| flag. Another is the case where
1723     ## an element or comment is inserted into the |table| subtree while
1724     ## foster parenting happens. This is covered by using the [2] flag
1725     ## of the |$open_tables| structure. All other cases are handled
1726     ## simply by calling |manakai_append_text| method.
1727    
1728 wakaba 1.204 ## TODO: |<body><script>document.write("a<br>");
1729     ## document.body.removeChild (document.body.lastChild);
1730     ## document.write ("b")</script>|
1731    
1732 wakaba 1.126 B: while (1) {
1733 wakaba 1.230
1734     ## The "in table text" insertion mode.
1735     if ($self->{insertion_mode} & TABLE_IMS and
1736     not $self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
1737     not $self->{insertion_mode} & IN_CDATA_RCDATA_IM) {
1738     C: {
1739     my $s;
1740     if ($token->{type} == CHARACTER_TOKEN) {
1741     !!!cp ('t194');
1742     $self->{pending_chars} ||= [];
1743     push @{$self->{pending_chars}}, $token;
1744     !!!next-token;
1745     next B;
1746     } else {
1747     if ($self->{pending_chars}) {
1748     $s = join '', map { $_->{data} } @{$self->{pending_chars}};
1749     delete $self->{pending_chars};
1750     if ($s =~ /[^\x09\x0A\x0C\x0D\x20]/) {
1751     !!!cp ('t195');
1752     #
1753     } else {
1754     !!!cp ('t195.1');
1755     #$self->{open_elements}->[-1]->[0]->manakai_append_text ($s);
1756     $self->{open_elements}->[-1]->[0]->append_child
1757     ($self->{document}->create_text_node ($s));
1758     last C;
1759     }
1760     } else {
1761     !!!cp ('t195.2');
1762     last C;
1763     }
1764     }
1765    
1766 wakaba 1.234 ## Foster parenting.
1767 wakaba 1.230 !!!parse-error (type => 'in table:#text', token => $token);
1768    
1769     ## NOTE: As if in body, but insert into the foster parent element.
1770     $reconstruct_active_formatting_elements->($insert_to_foster);
1771    
1772     if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
1773     # MUST
1774     my $foster_parent_element;
1775     my $next_sibling;
1776     OE: for (reverse 0..$#{$self->{open_elements}}) {
1777     if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1778 wakaba 1.234 !!!cp ('t197');
1779     $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
1780     $next_sibling = $self->{open_elements}->[$_]->[0];
1781     undef $next_sibling
1782     unless $next_sibling->parent_node eq $foster_parent_element;
1783 wakaba 1.230 last OE;
1784     }
1785     } # OE
1786 wakaba 1.234 $foster_parent_element ||= $self->{open_elements}->[0]->[0];
1787    
1788     !!!cp ('t199');
1789     $foster_parent_element->insert_before
1790     ($self->{document}->create_text_node ($s), $next_sibling);
1791    
1792 wakaba 1.230 $open_tables->[-1]->[1] = 1; # tainted
1793     $open_tables->[-1]->[2] = 1; # ~node inserted
1794     } else {
1795     ## NOTE: Fragment case or in a foster parent'ed element
1796     ## (e.g. |<table><span>a|). In fragment case, whether the
1797     ## character is appended to existing node or a new node is
1798     ## created is irrelevant, since the foster parent'ed nodes
1799     ## are discarded and fragment parsing does not invoke any
1800     ## script.
1801     !!!cp ('t200');
1802     $self->{open_elements}->[-1]->[0]->manakai_append_text ($s);
1803     }
1804     } # C
1805     } # TABLE_IMS
1806    
1807 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
1808 wakaba 1.79 !!!cp ('t73');
1809 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1810 wakaba 1.52 ## Ignore the token
1811     ## Stay in the phase
1812     !!!next-token;
1813 wakaba 1.126 next B;
1814 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
1815 wakaba 1.52 $token->{tag_name} eq 'html') {
1816 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
1817 wakaba 1.79 !!!cp ('t79');
1818 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
1819 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
1820     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
1821 wakaba 1.79 !!!cp ('t80');
1822 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
1823 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
1824 wakaba 1.79 } else {
1825     !!!cp ('t81');
1826 wakaba 1.52 }
1827    
1828 wakaba 1.84 !!!cp ('t82');
1829 wakaba 1.113 !!!parse-error (type => 'not first start tag', token => $token);
1830 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
1831     for my $attr_name (keys %{$token->{attributes}}) {
1832     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
1833 wakaba 1.79 !!!cp ('t84');
1834 wakaba 1.52 $top_el->set_attribute_ns
1835     (undef, [undef, $attr_name],
1836     $token->{attributes}->{$attr_name}->{value});
1837     }
1838     }
1839 wakaba 1.125 !!!nack ('t84.1');
1840 wakaba 1.52 !!!next-token;
1841 wakaba 1.126 next B;
1842 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
1843 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
1844 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
1845 wakaba 1.79 !!!cp ('t85');
1846 wakaba 1.52 $self->{document}->append_child ($comment);
1847 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
1848 wakaba 1.79 !!!cp ('t86');
1849 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
1850     } else {
1851 wakaba 1.79 !!!cp ('t87');
1852 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
1853 wakaba 1.202 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
1854 wakaba 1.52 }
1855     !!!next-token;
1856 wakaba 1.126 next B;
1857 wakaba 1.205 } elsif ($self->{insertion_mode} & IN_CDATA_RCDATA_IM) {
1858     if ($token->{type} == CHARACTER_TOKEN) {
1859     $token->{data} =~ s/^\x0A// if $self->{ignore_newline};
1860     delete $self->{ignore_newline};
1861    
1862     if (length $token->{data}) {
1863     !!!cp ('t43');
1864     $self->{open_elements}->[-1]->[0]->manakai_append_text
1865     ($token->{data});
1866     } else {
1867     !!!cp ('t43.1');
1868     }
1869     !!!next-token;
1870     next B;
1871     } elsif ($token->{type} == END_TAG_TOKEN) {
1872     delete $self->{ignore_newline};
1873    
1874     if ($token->{tag_name} eq 'script') {
1875     !!!cp ('t50');
1876    
1877     ## Para 1-2
1878     my $script = pop @{$self->{open_elements}};
1879    
1880     ## Para 3
1881     $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1882    
1883     ## Para 4
1884     ## TODO: $old_insertion_point = $current_insertion_point;
1885     ## TODO: $current_insertion_point = just before $self->{nc};
1886    
1887     ## Para 5
1888     ## TODO: Run the $script->[0].
1889    
1890     ## Para 6
1891     ## TODO: $current_insertion_point = $old_insertion_point;
1892    
1893     ## Para 7
1894     ## TODO: if ($pending_external_script) {
1895     ## TODO: ...
1896     ## TODO: }
1897    
1898     !!!next-token;
1899     next B;
1900     } else {
1901     !!!cp ('t42');
1902    
1903     pop @{$self->{open_elements}};
1904    
1905     $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1906     !!!next-token;
1907     next B;
1908     }
1909     } elsif ($token->{type} == END_OF_FILE_TOKEN) {
1910     delete $self->{ignore_newline};
1911    
1912     !!!cp ('t44');
1913     !!!parse-error (type => 'not closed',
1914     text => $self->{open_elements}->[-1]->[0]
1915     ->manakai_local_name,
1916     token => $token);
1917    
1918 wakaba 1.206 #if ($self->{open_elements}->[-1]->[1] == SCRIPT_EL) {
1919 wakaba 1.205 # ## TODO: Mark as "already executed"
1920     #}
1921    
1922     pop @{$self->{open_elements}};
1923    
1924     $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1925     ## Reprocess.
1926     next B;
1927     } else {
1928     die "$0: $token->{type}: In CDATA/RCDATA: Unknown token type";
1929     }
1930 wakaba 1.126 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
1931     if ($token->{type} == CHARACTER_TOKEN) {
1932     !!!cp ('t87.1');
1933     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
1934     !!!next-token;
1935     next B;
1936     } elsif ($token->{type} == START_TAG_TOKEN) {
1937 wakaba 1.129 if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
1938     $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
1939 wakaba 1.126 not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
1940     ($token->{tag_name} eq 'svg' and
1941 wakaba 1.206 $self->{open_elements}->[-1]->[1] == MML_AXML_EL)) {
1942 wakaba 1.126 ## NOTE: "using the rules for secondary insertion mode"then"continue"
1943     !!!cp ('t87.2');
1944     #
1945     } elsif ({
1946 wakaba 1.130 b => 1, big => 1, blockquote => 1, body => 1, br => 1,
1947 wakaba 1.146 center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1,
1948 wakaba 1.223 em => 1, embed => 1, h1 => 1, h2 => 1, h3 => 1,
1949 wakaba 1.146 h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1,
1950     img => 1, li => 1, listing => 1, menu => 1, meta => 1,
1951     nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1,
1952     small => 1, span => 1, strong => 1, strike => 1, sub => 1,
1953     sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1,
1954 wakaba 1.223 }->{$token->{tag_name}} or
1955     ($token->{tag_name} eq 'font' and
1956     ($token->{attributes}->{color} or
1957     $token->{attributes}->{face} or
1958     $token->{attributes}->{size}))) {
1959 wakaba 1.126 !!!cp ('t87.2');
1960     !!!parse-error (type => 'not closed',
1961 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
1962 wakaba 1.126 ->manakai_local_name,
1963     token => $token);
1964    
1965     pop @{$self->{open_elements}}
1966     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
1967    
1968 wakaba 1.130 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
1969 wakaba 1.126 ## Reprocess.
1970     next B;
1971     } else {
1972 wakaba 1.131 my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
1973     my $tag_name = $token->{tag_name};
1974     if ($nsuri eq $SVG_NS) {
1975     $tag_name = {
1976     altglyph => 'altGlyph',
1977     altglyphdef => 'altGlyphDef',
1978     altglyphitem => 'altGlyphItem',
1979     animatecolor => 'animateColor',
1980     animatemotion => 'animateMotion',
1981     animatetransform => 'animateTransform',
1982     clippath => 'clipPath',
1983     feblend => 'feBlend',
1984     fecolormatrix => 'feColorMatrix',
1985     fecomponenttransfer => 'feComponentTransfer',
1986     fecomposite => 'feComposite',
1987     feconvolvematrix => 'feConvolveMatrix',
1988     fediffuselighting => 'feDiffuseLighting',
1989     fedisplacementmap => 'feDisplacementMap',
1990     fedistantlight => 'feDistantLight',
1991     feflood => 'feFlood',
1992     fefunca => 'feFuncA',
1993     fefuncb => 'feFuncB',
1994     fefuncg => 'feFuncG',
1995     fefuncr => 'feFuncR',
1996     fegaussianblur => 'feGaussianBlur',
1997     feimage => 'feImage',
1998     femerge => 'feMerge',
1999     femergenode => 'feMergeNode',
2000     femorphology => 'feMorphology',
2001     feoffset => 'feOffset',
2002     fepointlight => 'fePointLight',
2003     fespecularlighting => 'feSpecularLighting',
2004     fespotlight => 'feSpotLight',
2005     fetile => 'feTile',
2006     feturbulence => 'feTurbulence',
2007     foreignobject => 'foreignObject',
2008     glyphref => 'glyphRef',
2009     lineargradient => 'linearGradient',
2010     radialgradient => 'radialGradient',
2011     #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
2012     textpath => 'textPath',
2013     }->{$tag_name} || $tag_name;
2014     }
2015    
2016     ## "adjust SVG attributes" (SVG only) - done in insert-element-f
2017    
2018     ## "adjust foreign attributes" - done in insert-element-f
2019 wakaba 1.126
2020 wakaba 1.131 !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);
2021 wakaba 1.126
2022     if ($self->{self_closing}) {
2023     pop @{$self->{open_elements}};
2024     !!!ack ('t87.3');
2025     } else {
2026     !!!cp ('t87.4');
2027     }
2028    
2029     !!!next-token;
2030     next B;
2031     }
2032     } elsif ($token->{type} == END_TAG_TOKEN) {
2033     ## NOTE: "using the rules for secondary insertion mode" then "continue"
2034 wakaba 1.219 if ($token->{tag_name} eq 'script') {
2035     !!!cp ('t87.41');
2036     #
2037     ## XXXscript: Execute script here.
2038     } else {
2039     !!!cp ('t87.5');
2040     #
2041     }
2042 wakaba 1.126 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2043     !!!cp ('t87.6');
2044 wakaba 1.146 !!!parse-error (type => 'not closed',
2045 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2046 wakaba 1.146 ->manakai_local_name,
2047     token => $token);
2048    
2049     pop @{$self->{open_elements}}
2050     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
2051    
2052 wakaba 1.200 ## NOTE: |<span><svg>| ... two parse errors, |<svg>| ... a parse error.
2053    
2054 wakaba 1.146 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
2055     ## Reprocess.
2056     next B;
2057 wakaba 1.126 } else {
2058     die "$0: $token->{type}: Unknown token type";
2059     }
2060     }
2061    
2062     if ($self->{insertion_mode} & HEAD_IMS) {
2063 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
2064 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
2065 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2066 wakaba 1.202 if ($self->{head_element_inserted}) {
2067     !!!cp ('t88.3');
2068     $self->{open_elements}->[-1]->[0]->append_child
2069     ($self->{document}->create_text_node ($1));
2070     delete $self->{head_element_inserted};
2071     ## NOTE: |</head> <link> |
2072     #
2073     } else {
2074     !!!cp ('t88.2');
2075     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
2076     ## NOTE: |</head> &#x20;|
2077     #
2078     }
2079 wakaba 1.99 } else {
2080     !!!cp ('t88.1');
2081     ## Ignore the token.
2082 wakaba 1.177 #
2083 wakaba 1.99 }
2084 wakaba 1.52 unless (length $token->{data}) {
2085 wakaba 1.79 !!!cp ('t88');
2086 wakaba 1.52 !!!next-token;
2087 wakaba 1.126 next B;
2088 wakaba 1.1 }
2089 wakaba 1.177 ## TODO: set $token->{column} appropriately
2090 wakaba 1.1 }
2091 wakaba 1.52
2092 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2093 wakaba 1.79 !!!cp ('t89');
2094 wakaba 1.52 ## As if <head>
2095 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2096 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2097 wakaba 1.123 push @{$self->{open_elements}},
2098     [$self->{head_element}, $el_category->{head}];
2099 wakaba 1.52
2100     ## Reprocess in the "in head" insertion mode...
2101     pop @{$self->{open_elements}};
2102    
2103     ## Reprocess in the "after head" insertion mode...
2104 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2105 wakaba 1.79 !!!cp ('t90');
2106 wakaba 1.52 ## As if </noscript>
2107     pop @{$self->{open_elements}};
2108 wakaba 1.153 !!!parse-error (type => 'in noscript:#text', token => $token);
2109 wakaba 1.1
2110 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
2111     ## As if </head>
2112     pop @{$self->{open_elements}};
2113    
2114     ## Reprocess in the "after head" insertion mode...
2115 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2116 wakaba 1.79 !!!cp ('t91');
2117 wakaba 1.52 pop @{$self->{open_elements}};
2118    
2119     ## Reprocess in the "after head" insertion mode...
2120 wakaba 1.79 } else {
2121     !!!cp ('t92');
2122 wakaba 1.1 }
2123 wakaba 1.52
2124 wakaba 1.123 ## "after head" insertion mode
2125     ## As if <body>
2126     !!!insert-element ('body',, $token);
2127     $self->{insertion_mode} = IN_BODY_IM;
2128     ## reprocess
2129 wakaba 1.126 next B;
2130 wakaba 1.123 } elsif ($token->{type} == START_TAG_TOKEN) {
2131     if ($token->{tag_name} eq 'head') {
2132     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2133     !!!cp ('t93');
2134 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
2135 wakaba 1.123 $self->{open_elements}->[-1]->[0]->append_child
2136     ($self->{head_element});
2137     push @{$self->{open_elements}},
2138     [$self->{head_element}, $el_category->{head}];
2139     $self->{insertion_mode} = IN_HEAD_IM;
2140 wakaba 1.125 !!!nack ('t93.1');
2141 wakaba 1.123 !!!next-token;
2142 wakaba 1.126 next B;
2143 wakaba 1.125 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2144 wakaba 1.139 !!!cp ('t93.2');
2145 wakaba 1.153 !!!parse-error (type => 'after head', text => 'head',
2146     token => $token);
2147 wakaba 1.139 ## Ignore the token
2148     !!!nack ('t93.3');
2149     !!!next-token;
2150     next B;
2151 wakaba 1.125 } else {
2152     !!!cp ('t95');
2153 wakaba 1.153 !!!parse-error (type => 'in head:head',
2154     token => $token); # or in head noscript
2155 wakaba 1.125 ## Ignore the token
2156     !!!nack ('t95.1');
2157     !!!next-token;
2158 wakaba 1.126 next B;
2159 wakaba 1.125 }
2160     } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2161 wakaba 1.126 !!!cp ('t96');
2162     ## As if <head>
2163     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2164     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2165     push @{$self->{open_elements}},
2166     [$self->{head_element}, $el_category->{head}];
2167 wakaba 1.52
2168 wakaba 1.126 $self->{insertion_mode} = IN_HEAD_IM;
2169     ## Reprocess in the "in head" insertion mode...
2170     } else {
2171     !!!cp ('t97');
2172     }
2173 wakaba 1.52
2174 wakaba 1.202 if ($token->{tag_name} eq 'base') {
2175     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2176     !!!cp ('t98');
2177     ## As if </noscript>
2178     pop @{$self->{open_elements}};
2179     !!!parse-error (type => 'in noscript', text => 'base',
2180     token => $token);
2181    
2182     $self->{insertion_mode} = IN_HEAD_IM;
2183     ## Reprocess in the "in head" insertion mode...
2184     } else {
2185     !!!cp ('t99');
2186     }
2187 wakaba 1.49
2188 wakaba 1.202 ## NOTE: There is a "as if in head" code clone.
2189     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2190     !!!cp ('t100');
2191     !!!parse-error (type => 'after head',
2192     text => $token->{tag_name}, token => $token);
2193     push @{$self->{open_elements}},
2194     [$self->{head_element}, $el_category->{head}];
2195     $self->{head_element_inserted} = 1;
2196     } else {
2197     !!!cp ('t101');
2198     }
2199     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2200     pop @{$self->{open_elements}};
2201     pop @{$self->{open_elements}} # <head>
2202     if $self->{insertion_mode} == AFTER_HEAD_IM;
2203     !!!nack ('t101.1');
2204     !!!next-token;
2205     next B;
2206 wakaba 1.194 } elsif ($token->{tag_name} eq 'link') {
2207     ## NOTE: There is a "as if in head" code clone.
2208     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2209     !!!cp ('t102');
2210     !!!parse-error (type => 'after head',
2211     text => $token->{tag_name}, token => $token);
2212     push @{$self->{open_elements}},
2213     [$self->{head_element}, $el_category->{head}];
2214 wakaba 1.202 $self->{head_element_inserted} = 1;
2215 wakaba 1.194 } else {
2216     !!!cp ('t103');
2217     }
2218     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2219     pop @{$self->{open_elements}};
2220     pop @{$self->{open_elements}} # <head>
2221     if $self->{insertion_mode} == AFTER_HEAD_IM;
2222     !!!ack ('t103.1');
2223     !!!next-token;
2224     next B;
2225 wakaba 1.232 } elsif ($token->{tag_name} eq 'command') {
2226 wakaba 1.194 if ($self->{insertion_mode} == IN_HEAD_IM) {
2227     ## NOTE: If the insertion mode at the time of the emission
2228     ## of the token was "before head", $self->{insertion_mode}
2229     ## is already changed to |IN_HEAD_IM|.
2230    
2231     ## NOTE: There is a "as if in head" code clone.
2232     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2233     pop @{$self->{open_elements}};
2234     pop @{$self->{open_elements}} # <head>
2235     if $self->{insertion_mode} == AFTER_HEAD_IM;
2236     !!!ack ('t103.2');
2237     !!!next-token;
2238     next B;
2239     } else {
2240     ## NOTE: "in head noscript" or "after head" insertion mode
2241     ## - in these cases, these tags are treated as same as
2242     ## normal in-body tags.
2243     !!!cp ('t103.3');
2244     #
2245     }
2246 wakaba 1.202 } elsif ($token->{tag_name} eq 'meta') {
2247     ## NOTE: There is a "as if in head" code clone.
2248     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2249     !!!cp ('t104');
2250     !!!parse-error (type => 'after head',
2251     text => $token->{tag_name}, token => $token);
2252     push @{$self->{open_elements}},
2253     [$self->{head_element}, $el_category->{head}];
2254     $self->{head_element_inserted} = 1;
2255     } else {
2256     !!!cp ('t105');
2257     }
2258     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2259     my $meta_el = pop @{$self->{open_elements}};
2260 wakaba 1.34
2261     unless ($self->{confident}) {
2262 wakaba 1.134 if ($token->{attributes}->{charset}) {
2263 wakaba 1.79 !!!cp ('t106');
2264 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
2265     ## in the {change_encoding} callback.
2266 wakaba 1.63 $self->{change_encoding}
2267 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value},
2268     $token);
2269 wakaba 1.66
2270     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2271     ->set_user_data (manakai_has_reference =>
2272     $token->{attributes}->{charset}
2273     ->{has_reference});
2274 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
2275     if ($token->{attributes}->{content}->{value}
2276 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2277 wakaba 1.186 [\x09\x0A\x0C\x0D\x20]*=
2278     [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
2279     ([^"'\x09\x0A\x0C\x0D\x20]
2280     [^\x09\x0A\x0C\x0D\x20\x3B]*))/x) {
2281 wakaba 1.79 !!!cp ('t107');
2282 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
2283     ## in the {change_encoding} callback.
2284 wakaba 1.63 $self->{change_encoding}
2285 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
2286     $token);
2287 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2288     ->set_user_data (manakai_has_reference =>
2289     $token->{attributes}->{content}
2290     ->{has_reference});
2291 wakaba 1.79 } else {
2292     !!!cp ('t108');
2293 wakaba 1.63 }
2294 wakaba 1.34 }
2295 wakaba 1.66 } else {
2296     if ($token->{attributes}->{charset}) {
2297 wakaba 1.79 !!!cp ('t109');
2298 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2299     ->set_user_data (manakai_has_reference =>
2300     $token->{attributes}->{charset}
2301     ->{has_reference});
2302     }
2303 wakaba 1.68 if ($token->{attributes}->{content}) {
2304 wakaba 1.79 !!!cp ('t110');
2305 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2306     ->set_user_data (manakai_has_reference =>
2307     $token->{attributes}->{content}
2308     ->{has_reference});
2309     }
2310 wakaba 1.34 }
2311    
2312 wakaba 1.100 pop @{$self->{open_elements}} # <head>
2313 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
2314 wakaba 1.125 !!!ack ('t110.1');
2315 wakaba 1.34 !!!next-token;
2316 wakaba 1.126 next B;
2317 wakaba 1.202 } elsif ($token->{tag_name} eq 'title') {
2318     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2319     !!!cp ('t111');
2320     ## As if </noscript>
2321     pop @{$self->{open_elements}};
2322     !!!parse-error (type => 'in noscript', text => 'title',
2323     token => $token);
2324    
2325     $self->{insertion_mode} = IN_HEAD_IM;
2326     ## Reprocess in the "in head" insertion mode...
2327     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2328     !!!cp ('t112');
2329     !!!parse-error (type => 'after head',
2330     text => $token->{tag_name}, token => $token);
2331     push @{$self->{open_elements}},
2332     [$self->{head_element}, $el_category->{head}];
2333     $self->{head_element_inserted} = 1;
2334     } else {
2335     !!!cp ('t113');
2336     }
2337 wakaba 1.49
2338 wakaba 1.202 ## NOTE: There is a "as if in head" code clone.
2339     $parse_rcdata->(RCDATA_CONTENT_MODEL);
2340 wakaba 1.225
2341     ## NOTE: At this point the stack of open elements contain
2342     ## the |head| element (index == -2) and the |script| element
2343     ## (index == -1). In the "after head" insertion mode the
2344     ## |head| element is inserted only for the purpose of
2345     ## providing the context for the |script| element, and
2346     ## therefore we can now and have to remove the element from
2347     ## the stack.
2348 wakaba 1.205 splice @{$self->{open_elements}}, -2, 1, () # <head>
2349 wakaba 1.210 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2350 wakaba 1.202 next B;
2351     } elsif ($token->{tag_name} eq 'style' or
2352     $token->{tag_name} eq 'noframes') {
2353     ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
2354     ## insertion mode IN_HEAD_IM)
2355     ## NOTE: There is a "as if in head" code clone.
2356     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2357     !!!cp ('t114');
2358     !!!parse-error (type => 'after head',
2359     text => $token->{tag_name}, token => $token);
2360     push @{$self->{open_elements}},
2361     [$self->{head_element}, $el_category->{head}];
2362     $self->{head_element_inserted} = 1;
2363     } else {
2364     !!!cp ('t115');
2365     }
2366     $parse_rcdata->(CDATA_CONTENT_MODEL);
2367 wakaba 1.205 ## ISSUE: A spec bug [Bug 6038]
2368     splice @{$self->{open_elements}}, -2, 1, () # <head>
2369 wakaba 1.210 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2370 wakaba 1.202 next B;
2371 wakaba 1.205 } elsif ($token->{tag_name} eq 'noscript') {
2372 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
2373 wakaba 1.79 !!!cp ('t116');
2374 wakaba 1.25 ## NOTE: and scripting is disalbed
2375 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2376 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
2377 wakaba 1.125 !!!nack ('t116.1');
2378 wakaba 1.1 !!!next-token;
2379 wakaba 1.126 next B;
2380 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2381 wakaba 1.79 !!!cp ('t117');
2382 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'noscript',
2383     token => $token);
2384 wakaba 1.1 ## Ignore the token
2385 wakaba 1.125 !!!nack ('t117.1');
2386 wakaba 1.41 !!!next-token;
2387 wakaba 1.126 next B;
2388 wakaba 1.1 } else {
2389 wakaba 1.79 !!!cp ('t118');
2390 wakaba 1.25 #
2391 wakaba 1.1 }
2392 wakaba 1.202 } elsif ($token->{tag_name} eq 'script') {
2393     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2394     !!!cp ('t119');
2395     ## As if </noscript>
2396     pop @{$self->{open_elements}};
2397     !!!parse-error (type => 'in noscript', text => 'script',
2398     token => $token);
2399    
2400     $self->{insertion_mode} = IN_HEAD_IM;
2401     ## Reprocess in the "in head" insertion mode...
2402     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2403     !!!cp ('t120');
2404     !!!parse-error (type => 'after head',
2405     text => $token->{tag_name}, token => $token);
2406     push @{$self->{open_elements}},
2407     [$self->{head_element}, $el_category->{head}];
2408     $self->{head_element_inserted} = 1;
2409     } else {
2410     !!!cp ('t121');
2411     }
2412 wakaba 1.49
2413 wakaba 1.202 ## NOTE: There is a "as if in head" code clone.
2414     $script_start_tag->();
2415 wakaba 1.205 ## ISSUE: A spec bug [Bug 6038]
2416     splice @{$self->{open_elements}}, -2, 1 # <head>
2417 wakaba 1.210 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2418 wakaba 1.202 next B;
2419     } elsif ($token->{tag_name} eq 'body' or
2420     $token->{tag_name} eq 'frameset') {
2421 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2422 wakaba 1.79 !!!cp ('t122');
2423 wakaba 1.49 ## As if </noscript>
2424     pop @{$self->{open_elements}};
2425 wakaba 1.153 !!!parse-error (type => 'in noscript',
2426     text => $token->{tag_name}, token => $token);
2427 wakaba 1.49
2428     ## Reprocess in the "in head" insertion mode...
2429     ## As if </head>
2430     pop @{$self->{open_elements}};
2431    
2432     ## Reprocess in the "after head" insertion mode...
2433 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2434 wakaba 1.79 !!!cp ('t124');
2435 wakaba 1.49 pop @{$self->{open_elements}};
2436    
2437     ## Reprocess in the "after head" insertion mode...
2438 wakaba 1.79 } else {
2439     !!!cp ('t125');
2440 wakaba 1.49 }
2441    
2442     ## "after head" insertion mode
2443 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2444 wakaba 1.54 if ($token->{tag_name} eq 'body') {
2445 wakaba 1.79 !!!cp ('t126');
2446 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
2447     } elsif ($token->{tag_name} eq 'frameset') {
2448 wakaba 1.79 !!!cp ('t127');
2449 wakaba 1.54 $self->{insertion_mode} = IN_FRAMESET_IM;
2450     } else {
2451     die "$0: tag name: $self->{tag_name}";
2452     }
2453 wakaba 1.125 !!!nack ('t127.1');
2454 wakaba 1.1 !!!next-token;
2455 wakaba 1.126 next B;
2456 wakaba 1.1 } else {
2457 wakaba 1.79 !!!cp ('t128');
2458 wakaba 1.1 #
2459     }
2460 wakaba 1.49
2461 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2462 wakaba 1.79 !!!cp ('t129');
2463 wakaba 1.49 ## As if </noscript>
2464     pop @{$self->{open_elements}};
2465 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
2466     text => $token->{tag_name}, token => $token);
2467 wakaba 1.49
2468     ## Reprocess in the "in head" insertion mode...
2469     ## As if </head>
2470 wakaba 1.25 pop @{$self->{open_elements}};
2471 wakaba 1.49
2472     ## Reprocess in the "after head" insertion mode...
2473 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2474 wakaba 1.79 !!!cp ('t130');
2475 wakaba 1.49 ## As if </head>
2476 wakaba 1.25 pop @{$self->{open_elements}};
2477 wakaba 1.49
2478     ## Reprocess in the "after head" insertion mode...
2479 wakaba 1.79 } else {
2480     !!!cp ('t131');
2481 wakaba 1.49 }
2482    
2483     ## "after head" insertion mode
2484     ## As if <body>
2485 wakaba 1.116 !!!insert-element ('body',, $token);
2486 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
2487 wakaba 1.49 ## reprocess
2488 wakaba 1.125 !!!ack-later;
2489 wakaba 1.126 next B;
2490 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
2491 wakaba 1.49 if ($token->{tag_name} eq 'head') {
2492 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2493 wakaba 1.79 !!!cp ('t132');
2494 wakaba 1.50 ## As if <head>
2495 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2496 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2497 wakaba 1.123 push @{$self->{open_elements}},
2498     [$self->{head_element}, $el_category->{head}];
2499 wakaba 1.50
2500     ## Reprocess in the "in head" insertion mode...
2501     pop @{$self->{open_elements}};
2502 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
2503 wakaba 1.50 !!!next-token;
2504 wakaba 1.126 next B;
2505 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2506 wakaba 1.79 !!!cp ('t133');
2507 wakaba 1.49 ## As if </noscript>
2508     pop @{$self->{open_elements}};
2509 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
2510     text => 'head', token => $token);
2511 wakaba 1.49
2512     ## Reprocess in the "in head" insertion mode...
2513 wakaba 1.50 pop @{$self->{open_elements}};
2514 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
2515 wakaba 1.50 !!!next-token;
2516 wakaba 1.126 next B;
2517 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2518 wakaba 1.79 !!!cp ('t134');
2519 wakaba 1.49 pop @{$self->{open_elements}};
2520 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
2521 wakaba 1.49 !!!next-token;
2522 wakaba 1.126 next B;
2523 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2524     !!!cp ('t134.1');
2525 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => 'head',
2526     token => $token);
2527 wakaba 1.139 ## Ignore the token
2528     !!!next-token;
2529     next B;
2530 wakaba 1.49 } else {
2531 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2532 wakaba 1.49 }
2533     } elsif ($token->{tag_name} eq 'noscript') {
2534 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2535 wakaba 1.79 !!!cp ('t136');
2536 wakaba 1.49 pop @{$self->{open_elements}};
2537 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
2538 wakaba 1.49 !!!next-token;
2539 wakaba 1.126 next B;
2540 wakaba 1.139 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or
2541     $self->{insertion_mode} == AFTER_HEAD_IM) {
2542 wakaba 1.79 !!!cp ('t137');
2543 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2544     text => 'noscript', token => $token);
2545 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
2546     !!!next-token;
2547 wakaba 1.126 next B;
2548 wakaba 1.49 } else {
2549 wakaba 1.79 !!!cp ('t138');
2550 wakaba 1.49 #
2551     }
2552     } elsif ({
2553 wakaba 1.31 body => 1, html => 1,
2554     }->{$token->{tag_name}}) {
2555 wakaba 1.203 ## TODO: This branch is entirely redundant.
2556     if ($self->{insertion_mode} == BEFORE_HEAD_IM or
2557 wakaba 1.139 $self->{insertion_mode} == IN_HEAD_IM or
2558     $self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2559 wakaba 1.79 !!!cp ('t140');
2560 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2561     text => $token->{tag_name}, token => $token);
2562 wakaba 1.49 ## Ignore the token
2563     !!!next-token;
2564 wakaba 1.126 next B;
2565 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2566     !!!cp ('t140.1');
2567 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2568     text => $token->{tag_name}, token => $token);
2569 wakaba 1.139 ## Ignore the token
2570     !!!next-token;
2571     next B;
2572 wakaba 1.79 } else {
2573 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2574 wakaba 1.49 }
2575 wakaba 1.139 } elsif ($token->{tag_name} eq 'p') {
2576     !!!cp ('t142');
2577 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2578     text => $token->{tag_name}, token => $token);
2579 wakaba 1.139 ## Ignore the token
2580     !!!next-token;
2581     next B;
2582 wakaba 1.224 } elsif ($token->{tag_name} eq 'br') {
2583     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2584     !!!cp ('t142.2');
2585     ## (before head) as if <head>, (in head) as if </head>
2586     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2587     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2588     $self->{insertion_mode} = AFTER_HEAD_IM;
2589 wakaba 1.139
2590 wakaba 1.224 ## Reprocess in the "after head" insertion mode...
2591     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2592     !!!cp ('t143.2');
2593     ## As if </head>
2594     pop @{$self->{open_elements}};
2595     $self->{insertion_mode} = AFTER_HEAD_IM;
2596 wakaba 1.139
2597 wakaba 1.224 ## Reprocess in the "after head" insertion mode...
2598     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2599     !!!cp ('t143.3');
2600     ## NOTE: Two parse errors for <head><noscript></br>
2601     !!!parse-error (type => 'unmatched end tag',
2602     text => 'br', token => $token);
2603     ## As if </noscript>
2604     pop @{$self->{open_elements}};
2605     $self->{insertion_mode} = IN_HEAD_IM;
2606 wakaba 1.50
2607 wakaba 1.224 ## Reprocess in the "in head" insertion mode...
2608     ## As if </head>
2609     pop @{$self->{open_elements}};
2610     $self->{insertion_mode} = AFTER_HEAD_IM;
2611 wakaba 1.139
2612 wakaba 1.224 ## Reprocess in the "after head" insertion mode...
2613     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2614     !!!cp ('t143.4');
2615     #
2616     } else {
2617     die "$0: $self->{insertion_mode}: Unknown insertion mode";
2618     }
2619 wakaba 1.50
2620 wakaba 1.224 #
2621     } else { ## Other end tags
2622 wakaba 1.139 !!!cp ('t145');
2623 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2624     text => $token->{tag_name}, token => $token);
2625 wakaba 1.139 ## Ignore the token
2626     !!!next-token;
2627     next B;
2628 wakaba 1.49 }
2629    
2630 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2631 wakaba 1.79 !!!cp ('t146');
2632 wakaba 1.49 ## As if </noscript>
2633     pop @{$self->{open_elements}};
2634 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
2635     text => $token->{tag_name}, token => $token);
2636 wakaba 1.49
2637     ## Reprocess in the "in head" insertion mode...
2638     ## As if </head>
2639     pop @{$self->{open_elements}};
2640    
2641     ## Reprocess in the "after head" insertion mode...
2642 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2643 wakaba 1.79 !!!cp ('t147');
2644 wakaba 1.49 ## As if </head>
2645     pop @{$self->{open_elements}};
2646    
2647     ## Reprocess in the "after head" insertion mode...
2648 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2649 wakaba 1.82 ## ISSUE: This case cannot be reached?
2650 wakaba 1.79 !!!cp ('t148');
2651 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2652     text => $token->{tag_name}, token => $token);
2653 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
2654     !!!next-token;
2655 wakaba 1.126 next B;
2656 wakaba 1.79 } else {
2657     !!!cp ('t149');
2658 wakaba 1.1 }
2659    
2660 wakaba 1.49 ## "after head" insertion mode
2661     ## As if <body>
2662 wakaba 1.116 !!!insert-element ('body',, $token);
2663 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
2664 wakaba 1.52 ## reprocess
2665 wakaba 1.224 next B;
2666 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2667     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2668     !!!cp ('t149.1');
2669    
2670     ## NOTE: As if <head>
2671 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2672 wakaba 1.104 $self->{open_elements}->[-1]->[0]->append_child
2673     ($self->{head_element});
2674 wakaba 1.123 #push @{$self->{open_elements}},
2675     # [$self->{head_element}, $el_category->{head}];
2676 wakaba 1.104 #$self->{insertion_mode} = IN_HEAD_IM;
2677     ## NOTE: Reprocess.
2678    
2679     ## NOTE: As if </head>
2680     #pop @{$self->{open_elements}};
2681     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2682     ## NOTE: Reprocess.
2683    
2684     #
2685     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2686     !!!cp ('t149.2');
2687    
2688     ## NOTE: As if </head>
2689     pop @{$self->{open_elements}};
2690     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2691     ## NOTE: Reprocess.
2692    
2693     #
2694     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2695     !!!cp ('t149.3');
2696    
2697 wakaba 1.113 !!!parse-error (type => 'in noscript:#eof', token => $token);
2698 wakaba 1.104
2699     ## As if </noscript>
2700     pop @{$self->{open_elements}};
2701     #$self->{insertion_mode} = IN_HEAD_IM;
2702     ## NOTE: Reprocess.
2703    
2704     ## NOTE: As if </head>
2705     pop @{$self->{open_elements}};
2706     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2707     ## NOTE: Reprocess.
2708    
2709     #
2710     } else {
2711     !!!cp ('t149.4');
2712     #
2713     }
2714    
2715     ## NOTE: As if <body>
2716 wakaba 1.116 !!!insert-element ('body',, $token);
2717 wakaba 1.104 $self->{insertion_mode} = IN_BODY_IM;
2718     ## NOTE: Reprocess.
2719 wakaba 1.126 next B;
2720 wakaba 1.104 } else {
2721     die "$0: $token->{type}: Unknown token type";
2722     }
2723 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
2724 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
2725 wakaba 1.79 !!!cp ('t150');
2726 wakaba 1.52 ## NOTE: There is a code clone of "character in body".
2727     $reconstruct_active_formatting_elements->($insert_to_current);
2728    
2729     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
2730    
2731     !!!next-token;
2732 wakaba 1.126 next B;
2733 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
2734 wakaba 1.52 if ({
2735     caption => 1, col => 1, colgroup => 1, tbody => 1,
2736     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
2737     }->{$token->{tag_name}}) {
2738 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2739 wakaba 1.52 ## have an element in table scope
2740 wakaba 1.108 for (reverse 0..$#{$self->{open_elements}}) {
2741 wakaba 1.52 my $node = $self->{open_elements}->[$_];
2742 wakaba 1.206 if ($node->[1] == TABLE_CELL_EL) {
2743 wakaba 1.79 !!!cp ('t151');
2744 wakaba 1.108
2745     ## Close the cell
2746 wakaba 1.125 !!!back-token; # <x>
2747 wakaba 1.122 $token = {type => END_TAG_TOKEN,
2748     tag_name => $node->[0]->manakai_local_name,
2749 wakaba 1.114 line => $token->{line},
2750     column => $token->{column}};
2751 wakaba 1.126 next B;
2752 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2753 wakaba 1.79 !!!cp ('t152');
2754 wakaba 1.108 ## ISSUE: This case can never be reached, maybe.
2755     last;
2756 wakaba 1.52 }
2757 wakaba 1.108 }
2758    
2759     !!!cp ('t153');
2760     !!!parse-error (type => 'start tag not allowed',
2761 wakaba 1.153 text => $token->{tag_name}, token => $token);
2762 wakaba 1.108 ## Ignore the token
2763 wakaba 1.125 !!!nack ('t153.1');
2764 wakaba 1.108 !!!next-token;
2765 wakaba 1.126 next B;
2766 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2767 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
2768     token => $token);
2769 wakaba 1.52
2770 wakaba 1.108 ## NOTE: As if </caption>.
2771 wakaba 1.52 ## have a table element in table scope
2772     my $i;
2773 wakaba 1.108 INSCOPE: {
2774     for (reverse 0..$#{$self->{open_elements}}) {
2775     my $node = $self->{open_elements}->[$_];
2776 wakaba 1.206 if ($node->[1] == CAPTION_EL) {
2777 wakaba 1.108 !!!cp ('t155');
2778     $i = $_;
2779     last INSCOPE;
2780 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2781 wakaba 1.108 !!!cp ('t156');
2782     last;
2783     }
2784 wakaba 1.52 }
2785 wakaba 1.108
2786     !!!cp ('t157');
2787     !!!parse-error (type => 'start tag not allowed',
2788 wakaba 1.153 text => $token->{tag_name}, token => $token);
2789 wakaba 1.108 ## Ignore the token
2790 wakaba 1.125 !!!nack ('t157.1');
2791 wakaba 1.108 !!!next-token;
2792 wakaba 1.126 next B;
2793 wakaba 1.52 } # INSCOPE
2794    
2795     ## generate implied end tags
2796 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
2797     & END_TAG_OPTIONAL_EL) {
2798 wakaba 1.79 !!!cp ('t158');
2799 wakaba 1.86 pop @{$self->{open_elements}};
2800 wakaba 1.52 }
2801    
2802 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2803 wakaba 1.79 !!!cp ('t159');
2804 wakaba 1.122 !!!parse-error (type => 'not closed',
2805 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2806 wakaba 1.122 ->manakai_local_name,
2807     token => $token);
2808 wakaba 1.79 } else {
2809     !!!cp ('t160');
2810 wakaba 1.52 }
2811    
2812     splice @{$self->{open_elements}}, $i;
2813    
2814     $clear_up_to_marker->();
2815    
2816 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
2817 wakaba 1.52
2818     ## reprocess
2819 wakaba 1.125 !!!ack-later;
2820 wakaba 1.126 next B;
2821 wakaba 1.52 } else {
2822 wakaba 1.79 !!!cp ('t161');
2823 wakaba 1.52 #
2824     }
2825     } else {
2826 wakaba 1.79 !!!cp ('t162');
2827 wakaba 1.52 #
2828     }
2829 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
2830 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
2831 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2832 wakaba 1.43 ## have an element in table scope
2833 wakaba 1.52 my $i;
2834 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2835     my $node = $self->{open_elements}->[$_];
2836 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2837 wakaba 1.79 !!!cp ('t163');
2838 wakaba 1.52 $i = $_;
2839 wakaba 1.43 last INSCOPE;
2840 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2841 wakaba 1.79 !!!cp ('t164');
2842 wakaba 1.43 last INSCOPE;
2843     }
2844     } # INSCOPE
2845 wakaba 1.52 unless (defined $i) {
2846 wakaba 1.79 !!!cp ('t165');
2847 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2848     text => $token->{tag_name},
2849     token => $token);
2850 wakaba 1.43 ## Ignore the token
2851     !!!next-token;
2852 wakaba 1.126 next B;
2853 wakaba 1.43 }
2854    
2855 wakaba 1.52 ## generate implied end tags
2856 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
2857     & END_TAG_OPTIONAL_EL) {
2858 wakaba 1.79 !!!cp ('t166');
2859 wakaba 1.86 pop @{$self->{open_elements}};
2860 wakaba 1.52 }
2861 wakaba 1.86
2862 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
2863     ne $token->{tag_name}) {
2864 wakaba 1.79 !!!cp ('t167');
2865 wakaba 1.122 !!!parse-error (type => 'not closed',
2866 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2867 wakaba 1.122 ->manakai_local_name,
2868     token => $token);
2869 wakaba 1.79 } else {
2870     !!!cp ('t168');
2871 wakaba 1.52 }
2872    
2873     splice @{$self->{open_elements}}, $i;
2874    
2875     $clear_up_to_marker->();
2876    
2877 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
2878 wakaba 1.52
2879     !!!next-token;
2880 wakaba 1.126 next B;
2881 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2882 wakaba 1.79 !!!cp ('t169');
2883 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2884     text => $token->{tag_name}, token => $token);
2885 wakaba 1.52 ## Ignore the token
2886     !!!next-token;
2887 wakaba 1.126 next B;
2888 wakaba 1.52 } else {
2889 wakaba 1.79 !!!cp ('t170');
2890 wakaba 1.52 #
2891     }
2892     } elsif ($token->{tag_name} eq 'caption') {
2893 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2894 wakaba 1.43 ## have a table element in table scope
2895     my $i;
2896 wakaba 1.108 INSCOPE: {
2897     for (reverse 0..$#{$self->{open_elements}}) {
2898     my $node = $self->{open_elements}->[$_];
2899 wakaba 1.206 if ($node->[1] == CAPTION_EL) {
2900 wakaba 1.108 !!!cp ('t171');
2901     $i = $_;
2902     last INSCOPE;
2903 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2904 wakaba 1.108 !!!cp ('t172');
2905     last;
2906     }
2907 wakaba 1.43 }
2908 wakaba 1.108
2909     !!!cp ('t173');
2910     !!!parse-error (type => 'unmatched end tag',
2911 wakaba 1.153 text => $token->{tag_name}, token => $token);
2912 wakaba 1.108 ## Ignore the token
2913     !!!next-token;
2914 wakaba 1.126 next B;
2915 wakaba 1.43 } # INSCOPE
2916    
2917     ## generate implied end tags
2918 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
2919     & END_TAG_OPTIONAL_EL) {
2920 wakaba 1.79 !!!cp ('t174');
2921 wakaba 1.86 pop @{$self->{open_elements}};
2922 wakaba 1.43 }
2923 wakaba 1.52
2924 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2925 wakaba 1.79 !!!cp ('t175');
2926 wakaba 1.122 !!!parse-error (type => 'not closed',
2927 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2928 wakaba 1.122 ->manakai_local_name,
2929     token => $token);
2930 wakaba 1.79 } else {
2931     !!!cp ('t176');
2932 wakaba 1.52 }
2933    
2934     splice @{$self->{open_elements}}, $i;
2935    
2936     $clear_up_to_marker->();
2937    
2938 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
2939 wakaba 1.52
2940     !!!next-token;
2941 wakaba 1.126 next B;
2942 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2943 wakaba 1.79 !!!cp ('t177');
2944 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2945     text => $token->{tag_name}, token => $token);
2946 wakaba 1.52 ## Ignore the token
2947     !!!next-token;
2948 wakaba 1.126 next B;
2949 wakaba 1.52 } else {
2950 wakaba 1.79 !!!cp ('t178');
2951 wakaba 1.52 #
2952     }
2953     } elsif ({
2954     table => 1, tbody => 1, tfoot => 1,
2955     thead => 1, tr => 1,
2956     }->{$token->{tag_name}} and
2957 wakaba 1.210 ($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2958 wakaba 1.52 ## have an element in table scope
2959     my $i;
2960     my $tn;
2961 wakaba 1.108 INSCOPE: {
2962     for (reverse 0..$#{$self->{open_elements}}) {
2963     my $node = $self->{open_elements}->[$_];
2964 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2965 wakaba 1.108 !!!cp ('t179');
2966     $i = $_;
2967    
2968     ## Close the cell
2969 wakaba 1.125 !!!back-token; # </x>
2970 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $tn,
2971     line => $token->{line},
2972     column => $token->{column}};
2973 wakaba 1.126 next B;
2974 wakaba 1.206 } elsif ($node->[1] == TABLE_CELL_EL) {
2975 wakaba 1.108 !!!cp ('t180');
2976 wakaba 1.123 $tn = $node->[0]->manakai_local_name;
2977 wakaba 1.108 ## NOTE: There is exactly one |td| or |th| element
2978     ## in scope in the stack of open elements by definition.
2979 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2980 wakaba 1.108 ## ISSUE: Can this be reached?
2981     !!!cp ('t181');
2982     last;
2983     }
2984 wakaba 1.52 }
2985 wakaba 1.108
2986 wakaba 1.79 !!!cp ('t182');
2987 wakaba 1.108 !!!parse-error (type => 'unmatched end tag',
2988 wakaba 1.153 text => $token->{tag_name}, token => $token);
2989 wakaba 1.52 ## Ignore the token
2990     !!!next-token;
2991 wakaba 1.126 next B;
2992 wakaba 1.108 } # INSCOPE
2993 wakaba 1.52 } elsif ($token->{tag_name} eq 'table' and
2994 wakaba 1.210 ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2995 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
2996     token => $token);
2997 wakaba 1.52
2998     ## As if </caption>
2999     ## have a table element in table scope
3000     my $i;
3001     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3002     my $node = $self->{open_elements}->[$_];
3003 wakaba 1.206 if ($node->[1] == CAPTION_EL) {
3004 wakaba 1.79 !!!cp ('t184');
3005 wakaba 1.52 $i = $_;
3006     last INSCOPE;
3007 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3008 wakaba 1.79 !!!cp ('t185');
3009 wakaba 1.52 last INSCOPE;
3010     }
3011     } # INSCOPE
3012     unless (defined $i) {
3013 wakaba 1.79 !!!cp ('t186');
3014 wakaba 1.209 ## TODO: Wrong error type?
3015 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3016     text => 'caption', token => $token);
3017 wakaba 1.52 ## Ignore the token
3018     !!!next-token;
3019 wakaba 1.126 next B;
3020 wakaba 1.52 }
3021    
3022     ## generate implied end tags
3023 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
3024 wakaba 1.79 !!!cp ('t187');
3025 wakaba 1.86 pop @{$self->{open_elements}};
3026 wakaba 1.52 }
3027    
3028 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
3029 wakaba 1.79 !!!cp ('t188');
3030 wakaba 1.122 !!!parse-error (type => 'not closed',
3031 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
3032 wakaba 1.122 ->manakai_local_name,
3033     token => $token);
3034 wakaba 1.79 } else {
3035     !!!cp ('t189');
3036 wakaba 1.52 }
3037    
3038     splice @{$self->{open_elements}}, $i;
3039    
3040     $clear_up_to_marker->();
3041    
3042 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3043 wakaba 1.52
3044     ## reprocess
3045 wakaba 1.126 next B;
3046 wakaba 1.52 } elsif ({
3047     body => 1, col => 1, colgroup => 1, html => 1,
3048     }->{$token->{tag_name}}) {
3049 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
3050 wakaba 1.79 !!!cp ('t190');
3051 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3052     text => $token->{tag_name}, token => $token);
3053 wakaba 1.52 ## Ignore the token
3054     !!!next-token;
3055 wakaba 1.126 next B;
3056 wakaba 1.52 } else {
3057 wakaba 1.79 !!!cp ('t191');
3058 wakaba 1.52 #
3059     }
3060 wakaba 1.210 } elsif ({
3061     tbody => 1, tfoot => 1,
3062     thead => 1, tr => 1,
3063     }->{$token->{tag_name}} and
3064     ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
3065     !!!cp ('t192');
3066     !!!parse-error (type => 'unmatched end tag',
3067     text => $token->{tag_name}, token => $token);
3068     ## Ignore the token
3069     !!!next-token;
3070     next B;
3071     } else {
3072     !!!cp ('t193');
3073     #
3074     }
3075 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3076     for my $entry (@{$self->{open_elements}}) {
3077 wakaba 1.123 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
3078 wakaba 1.104 !!!cp ('t75');
3079 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
3080 wakaba 1.104 last;
3081     }
3082     }
3083    
3084     ## Stop parsing.
3085     last B;
3086 wakaba 1.52 } else {
3087     die "$0: $token->{type}: Unknown token type";
3088     }
3089    
3090     $insert = $insert_to_current;
3091     #
3092 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
3093 wakaba 1.229 if ($token->{type} == START_TAG_TOKEN) {
3094 wakaba 1.153 if ({
3095 wakaba 1.210 tr => (($self->{insertion_mode} & IM_MASK) != IN_ROW_IM),
3096 wakaba 1.153 th => 1, td => 1,
3097     }->{$token->{tag_name}}) {
3098 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_IM) {
3099 wakaba 1.153 ## Clear back to table context
3100     while (not ($self->{open_elements}->[-1]->[1]
3101     & TABLE_SCOPING_EL)) {
3102     !!!cp ('t201');
3103     pop @{$self->{open_elements}};
3104     }
3105    
3106     !!!insert-element ('tbody',, $token);
3107     $self->{insertion_mode} = IN_TABLE_BODY_IM;
3108     ## reprocess in the "in table body" insertion mode...
3109     }
3110    
3111 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3112 wakaba 1.153 unless ($token->{tag_name} eq 'tr') {
3113     !!!cp ('t202');
3114     !!!parse-error (type => 'missing start tag:tr', token => $token);
3115     }
3116 wakaba 1.43
3117 wakaba 1.153 ## Clear back to table body context
3118     while (not ($self->{open_elements}->[-1]->[1]
3119     & TABLE_ROWS_SCOPING_EL)) {
3120     !!!cp ('t203');
3121     ## ISSUE: Can this case be reached?
3122     pop @{$self->{open_elements}};
3123     }
3124 wakaba 1.43
3125 wakaba 1.202 $self->{insertion_mode} = IN_ROW_IM;
3126     if ($token->{tag_name} eq 'tr') {
3127     !!!cp ('t204');
3128     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3129     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3130     !!!nack ('t204');
3131     !!!next-token;
3132     next B;
3133     } else {
3134     !!!cp ('t205');
3135     !!!insert-element ('tr',, $token);
3136     ## reprocess in the "in row" insertion mode
3137     }
3138     } else {
3139     !!!cp ('t206');
3140     }
3141 wakaba 1.52
3142     ## Clear back to table row context
3143 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3144     & TABLE_ROW_SCOPING_EL)) {
3145 wakaba 1.79 !!!cp ('t207');
3146 wakaba 1.52 pop @{$self->{open_elements}};
3147 wakaba 1.43 }
3148 wakaba 1.52
3149 wakaba 1.202 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3150     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3151     $self->{insertion_mode} = IN_CELL_IM;
3152 wakaba 1.52
3153 wakaba 1.202 push @$active_formatting_elements, ['#marker', ''];
3154 wakaba 1.52
3155 wakaba 1.202 !!!nack ('t207.1');
3156     !!!next-token;
3157     next B;
3158     } elsif ({
3159     caption => 1, col => 1, colgroup => 1,
3160     tbody => 1, tfoot => 1, thead => 1,
3161     tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3162     }->{$token->{tag_name}}) {
3163 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3164 wakaba 1.202 ## As if </tr>
3165     ## have an element in table scope
3166     my $i;
3167     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3168     my $node = $self->{open_elements}->[$_];
3169 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3170 wakaba 1.202 !!!cp ('t208');
3171     $i = $_;
3172     last INSCOPE;
3173     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3174     !!!cp ('t209');
3175     last INSCOPE;
3176     }
3177     } # INSCOPE
3178     unless (defined $i) {
3179     !!!cp ('t210');
3180     ## TODO: This type is wrong.
3181     !!!parse-error (type => 'unmacthed end tag',
3182     text => $token->{tag_name}, token => $token);
3183     ## Ignore the token
3184     !!!nack ('t210.1');
3185 wakaba 1.52 !!!next-token;
3186 wakaba 1.126 next B;
3187 wakaba 1.202 }
3188 wakaba 1.43
3189 wakaba 1.52 ## Clear back to table row context
3190 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3191     & TABLE_ROW_SCOPING_EL)) {
3192 wakaba 1.79 !!!cp ('t211');
3193 wakaba 1.83 ## ISSUE: Can this case be reached?
3194 wakaba 1.52 pop @{$self->{open_elements}};
3195 wakaba 1.1 }
3196 wakaba 1.43
3197 wakaba 1.52 pop @{$self->{open_elements}}; # tr
3198 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3199 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
3200 wakaba 1.79 !!!cp ('t212');
3201 wakaba 1.52 ## reprocess
3202 wakaba 1.125 !!!ack-later;
3203 wakaba 1.126 next B;
3204 wakaba 1.52 } else {
3205 wakaba 1.79 !!!cp ('t213');
3206 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
3207     }
3208 wakaba 1.1 }
3209 wakaba 1.52
3210 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3211 wakaba 1.52 ## have an element in table scope
3212 wakaba 1.43 my $i;
3213     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3214     my $node = $self->{open_elements}->[$_];
3215 wakaba 1.206 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3216 wakaba 1.79 !!!cp ('t214');
3217 wakaba 1.43 $i = $_;
3218     last INSCOPE;
3219 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3220 wakaba 1.79 !!!cp ('t215');
3221 wakaba 1.43 last INSCOPE;
3222     }
3223     } # INSCOPE
3224 wakaba 1.52 unless (defined $i) {
3225 wakaba 1.79 !!!cp ('t216');
3226 wakaba 1.153 ## TODO: This erorr type is wrong.
3227     !!!parse-error (type => 'unmatched end tag',
3228     text => $token->{tag_name}, token => $token);
3229 wakaba 1.52 ## Ignore the token
3230 wakaba 1.125 !!!nack ('t216.1');
3231 wakaba 1.52 !!!next-token;
3232 wakaba 1.126 next B;
3233 wakaba 1.43 }
3234 wakaba 1.52
3235     ## Clear back to table body context
3236 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3237     & TABLE_ROWS_SCOPING_EL)) {
3238 wakaba 1.79 !!!cp ('t217');
3239 wakaba 1.83 ## ISSUE: Can this state be reached?
3240 wakaba 1.52 pop @{$self->{open_elements}};
3241 wakaba 1.43 }
3242    
3243 wakaba 1.52 ## As if <{current node}>
3244     ## have an element in table scope
3245     ## true by definition
3246 wakaba 1.43
3247 wakaba 1.52 ## Clear back to table body context
3248     ## nop by definition
3249 wakaba 1.43
3250 wakaba 1.52 pop @{$self->{open_elements}};
3251 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3252 wakaba 1.52 ## reprocess in "in table" insertion mode...
3253 wakaba 1.79 } else {
3254     !!!cp ('t218');
3255 wakaba 1.52 }
3256    
3257 wakaba 1.202 if ($token->{tag_name} eq 'col') {
3258     ## Clear back to table context
3259     while (not ($self->{open_elements}->[-1]->[1]
3260     & TABLE_SCOPING_EL)) {
3261     !!!cp ('t219');
3262     ## ISSUE: Can this state be reached?
3263     pop @{$self->{open_elements}};
3264     }
3265    
3266     !!!insert-element ('colgroup',, $token);
3267     $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
3268     ## reprocess
3269     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3270     !!!ack-later;
3271     next B;
3272     } elsif ({
3273     caption => 1,
3274     colgroup => 1,
3275     tbody => 1, tfoot => 1, thead => 1,
3276     }->{$token->{tag_name}}) {
3277     ## Clear back to table context
3278 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3279     & TABLE_SCOPING_EL)) {
3280 wakaba 1.79 !!!cp ('t220');
3281 wakaba 1.83 ## ISSUE: Can this state be reached?
3282 wakaba 1.52 pop @{$self->{open_elements}};
3283 wakaba 1.1 }
3284 wakaba 1.52
3285 wakaba 1.202 push @$active_formatting_elements, ['#marker', '']
3286     if $token->{tag_name} eq 'caption';
3287 wakaba 1.52
3288 wakaba 1.202 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3289     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3290     $self->{insertion_mode} = {
3291     caption => IN_CAPTION_IM,
3292     colgroup => IN_COLUMN_GROUP_IM,
3293     tbody => IN_TABLE_BODY_IM,
3294     tfoot => IN_TABLE_BODY_IM,
3295     thead => IN_TABLE_BODY_IM,
3296     }->{$token->{tag_name}};
3297     !!!next-token;
3298     !!!nack ('t220.1');
3299     next B;
3300     } else {
3301     die "$0: in table: <>: $token->{tag_name}";
3302     }
3303 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
3304 wakaba 1.122 !!!parse-error (type => 'not closed',
3305 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
3306 wakaba 1.122 ->manakai_local_name,
3307     token => $token);
3308 wakaba 1.1
3309 wakaba 1.52 ## As if </table>
3310 wakaba 1.1 ## have a table element in table scope
3311     my $i;
3312 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3313     my $node = $self->{open_elements}->[$_];
3314 wakaba 1.206 if ($node->[1] == TABLE_EL) {
3315 wakaba 1.79 !!!cp ('t221');
3316 wakaba 1.1 $i = $_;
3317     last INSCOPE;
3318 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3319 wakaba 1.79 !!!cp ('t222');
3320 wakaba 1.1 last INSCOPE;
3321     }
3322     } # INSCOPE
3323     unless (defined $i) {
3324 wakaba 1.79 !!!cp ('t223');
3325 wakaba 1.83 ## TODO: The following is wrong, maybe.
3326 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => 'table',
3327     token => $token);
3328 wakaba 1.52 ## Ignore tokens </table><table>
3329 wakaba 1.125 !!!nack ('t223.1');
3330 wakaba 1.1 !!!next-token;
3331 wakaba 1.126 next B;
3332 wakaba 1.1 }
3333    
3334 wakaba 1.151 ## TODO: Followings are removed from the latest spec.
3335 wakaba 1.1 ## generate implied end tags
3336 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
3337 wakaba 1.79 !!!cp ('t224');
3338 wakaba 1.86 pop @{$self->{open_elements}};
3339 wakaba 1.1 }
3340    
3341 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == TABLE_EL) {
3342 wakaba 1.79 !!!cp ('t225');
3343 wakaba 1.122 ## NOTE: |<table><tr><table>|
3344     !!!parse-error (type => 'not closed',
3345 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
3346 wakaba 1.122 ->manakai_local_name,
3347     token => $token);
3348 wakaba 1.79 } else {
3349     !!!cp ('t226');
3350 wakaba 1.1 }
3351    
3352 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3353 wakaba 1.95 pop @{$open_tables};
3354 wakaba 1.1
3355 wakaba 1.52 $self->_reset_insertion_mode;
3356 wakaba 1.1
3357 wakaba 1.125 ## reprocess
3358     !!!ack-later;
3359 wakaba 1.126 next B;
3360 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
3361 wakaba 1.233 !!!cp ('t227.8');
3362     ## NOTE: This is a "as if in head" code clone.
3363     $parse_rcdata->(CDATA_CONTENT_MODEL);
3364     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3365     next B;
3366 wakaba 1.100 } elsif ($token->{tag_name} eq 'script') {
3367 wakaba 1.233 !!!cp ('t227.6');
3368     ## NOTE: This is a "as if in head" code clone.
3369     $script_start_tag->();
3370     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3371     next B;
3372 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
3373 wakaba 1.233 if ($token->{attributes}->{type}) {
3374     my $type = $token->{attributes}->{type}->{value};
3375     $type =~ tr/A-Z/a-z/; ## ASCII case-insensitive.
3376     if ($type eq 'hidden') {
3377     !!!cp ('t227.3');
3378     !!!parse-error (type => 'in table',
3379     text => $token->{tag_name}, token => $token);
3380 wakaba 1.98
3381 wakaba 1.233 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3382     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3383 wakaba 1.98
3384 wakaba 1.233 ## TODO: form element pointer
3385 wakaba 1.98
3386 wakaba 1.233 pop @{$self->{open_elements}};
3387 wakaba 1.98
3388 wakaba 1.233 !!!next-token;
3389     !!!ack ('t227.2.1');
3390     next B;
3391 wakaba 1.98 } else {
3392     !!!cp ('t227.1');
3393     #
3394     }
3395     } else {
3396     !!!cp ('t227.4');
3397     #
3398     }
3399 wakaba 1.58 } else {
3400 wakaba 1.79 !!!cp ('t227');
3401 wakaba 1.58 #
3402     }
3403 wakaba 1.98
3404 wakaba 1.153 !!!parse-error (type => 'in table', text => $token->{tag_name},
3405     token => $token);
3406 wakaba 1.98
3407     $insert = $insert_to_foster;
3408     #
3409 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
3410 wakaba 1.210 if ($token->{tag_name} eq 'tr' and
3411     ($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3412     ## have an element in table scope
3413 wakaba 1.52 my $i;
3414     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3415     my $node = $self->{open_elements}->[$_];
3416 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3417 wakaba 1.79 !!!cp ('t228');
3418 wakaba 1.52 $i = $_;
3419     last INSCOPE;
3420 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3421 wakaba 1.79 !!!cp ('t229');
3422 wakaba 1.52 last INSCOPE;
3423     }
3424     } # INSCOPE
3425     unless (defined $i) {
3426 wakaba 1.79 !!!cp ('t230');
3427 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3428     text => $token->{tag_name}, token => $token);
3429 wakaba 1.52 ## Ignore the token
3430 wakaba 1.125 !!!nack ('t230.1');
3431 wakaba 1.42 !!!next-token;
3432 wakaba 1.126 next B;
3433 wakaba 1.79 } else {
3434     !!!cp ('t232');
3435 wakaba 1.42 }
3436    
3437 wakaba 1.52 ## Clear back to table row context
3438 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3439     & TABLE_ROW_SCOPING_EL)) {
3440 wakaba 1.79 !!!cp ('t231');
3441 wakaba 1.83 ## ISSUE: Can this state be reached?
3442 wakaba 1.52 pop @{$self->{open_elements}};
3443     }
3444 wakaba 1.42
3445 wakaba 1.52 pop @{$self->{open_elements}}; # tr
3446 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3447 wakaba 1.52 !!!next-token;
3448 wakaba 1.125 !!!nack ('t231.1');
3449 wakaba 1.126 next B;
3450 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
3451 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3452 wakaba 1.52 ## As if </tr>
3453     ## have an element in table scope
3454     my $i;
3455     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3456     my $node = $self->{open_elements}->[$_];
3457 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3458 wakaba 1.79 !!!cp ('t233');
3459 wakaba 1.52 $i = $_;
3460     last INSCOPE;
3461 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3462 wakaba 1.79 !!!cp ('t234');
3463 wakaba 1.52 last INSCOPE;
3464 wakaba 1.42 }
3465 wakaba 1.52 } # INSCOPE
3466     unless (defined $i) {
3467 wakaba 1.79 !!!cp ('t235');
3468 wakaba 1.83 ## TODO: The following is wrong.
3469 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3470     text => $token->{type}, token => $token);
3471 wakaba 1.52 ## Ignore the token
3472 wakaba 1.125 !!!nack ('t236.1');
3473 wakaba 1.52 !!!next-token;
3474 wakaba 1.126 next B;
3475 wakaba 1.42 }
3476 wakaba 1.52
3477     ## Clear back to table row context
3478 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3479     & TABLE_ROW_SCOPING_EL)) {
3480 wakaba 1.79 !!!cp ('t236');
3481 wakaba 1.83 ## ISSUE: Can this state be reached?
3482 wakaba 1.46 pop @{$self->{open_elements}};
3483 wakaba 1.1 }
3484 wakaba 1.46
3485 wakaba 1.52 pop @{$self->{open_elements}}; # tr
3486 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3487 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
3488 wakaba 1.1 }
3489    
3490 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3491 wakaba 1.52 ## have an element in table scope
3492     my $i;
3493     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3494     my $node = $self->{open_elements}->[$_];
3495 wakaba 1.206 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3496 wakaba 1.79 !!!cp ('t237');
3497 wakaba 1.52 $i = $_;
3498     last INSCOPE;
3499 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3500 wakaba 1.79 !!!cp ('t238');
3501 wakaba 1.52 last INSCOPE;
3502     }
3503     } # INSCOPE
3504     unless (defined $i) {
3505 wakaba 1.79 !!!cp ('t239');
3506 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3507     text => $token->{tag_name}, token => $token);
3508 wakaba 1.52 ## Ignore the token
3509 wakaba 1.125 !!!nack ('t239.1');
3510 wakaba 1.52 !!!next-token;
3511 wakaba 1.126 next B;
3512 wakaba 1.47 }
3513    
3514     ## Clear back to table body context
3515 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3516     & TABLE_ROWS_SCOPING_EL)) {
3517 wakaba 1.79 !!!cp ('t240');
3518 wakaba 1.47 pop @{$self->{open_elements}};
3519     }
3520    
3521 wakaba 1.52 ## As if <{current node}>
3522     ## have an element in table scope
3523     ## true by definition
3524    
3525     ## Clear back to table body context
3526     ## nop by definition
3527    
3528     pop @{$self->{open_elements}};
3529 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3530 wakaba 1.52 ## reprocess in the "in table" insertion mode...
3531     }
3532    
3533 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
3534     ## When you edit the code fragment below, please ensure that
3535     ## the code for <table> in the "in table" insertion mode
3536     ## is synced with it.
3537    
3538 wakaba 1.52 ## have a table element in table scope
3539     my $i;
3540     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3541     my $node = $self->{open_elements}->[$_];
3542 wakaba 1.206 if ($node->[1] == TABLE_EL) {
3543 wakaba 1.79 !!!cp ('t241');
3544 wakaba 1.52 $i = $_;
3545     last INSCOPE;
3546 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3547 wakaba 1.79 !!!cp ('t242');
3548 wakaba 1.52 last INSCOPE;
3549 wakaba 1.47 }
3550 wakaba 1.52 } # INSCOPE
3551     unless (defined $i) {
3552 wakaba 1.79 !!!cp ('t243');
3553 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3554     text => $token->{tag_name}, token => $token);
3555 wakaba 1.52 ## Ignore the token
3556 wakaba 1.125 !!!nack ('t243.1');
3557 wakaba 1.52 !!!next-token;
3558 wakaba 1.126 next B;
3559 wakaba 1.3 }
3560 wakaba 1.52
3561     splice @{$self->{open_elements}}, $i;
3562 wakaba 1.95 pop @{$open_tables};
3563 wakaba 1.1
3564 wakaba 1.52 $self->_reset_insertion_mode;
3565 wakaba 1.47
3566     !!!next-token;
3567 wakaba 1.126 next B;
3568 wakaba 1.47 } elsif ({
3569 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
3570 wakaba 1.52 }->{$token->{tag_name}} and
3571 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
3572 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3573 wakaba 1.52 ## have an element in table scope
3574     my $i;
3575     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3576     my $node = $self->{open_elements}->[$_];
3577 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3578 wakaba 1.79 !!!cp ('t247');
3579 wakaba 1.52 $i = $_;
3580     last INSCOPE;
3581 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3582 wakaba 1.79 !!!cp ('t248');
3583 wakaba 1.52 last INSCOPE;
3584     }
3585     } # INSCOPE
3586     unless (defined $i) {
3587 wakaba 1.79 !!!cp ('t249');
3588 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3589     text => $token->{tag_name}, token => $token);
3590 wakaba 1.52 ## Ignore the token
3591 wakaba 1.125 !!!nack ('t249.1');
3592 wakaba 1.52 !!!next-token;
3593 wakaba 1.126 next B;
3594 wakaba 1.52 }
3595    
3596 wakaba 1.48 ## As if </tr>
3597     ## have an element in table scope
3598     my $i;
3599     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3600     my $node = $self->{open_elements}->[$_];
3601 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3602 wakaba 1.79 !!!cp ('t250');
3603 wakaba 1.48 $i = $_;
3604     last INSCOPE;
3605 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3606 wakaba 1.79 !!!cp ('t251');
3607 wakaba 1.48 last INSCOPE;
3608     }
3609     } # INSCOPE
3610 wakaba 1.52 unless (defined $i) {
3611 wakaba 1.79 !!!cp ('t252');
3612 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3613     text => 'tr', token => $token);
3614 wakaba 1.52 ## Ignore the token
3615 wakaba 1.125 !!!nack ('t252.1');
3616 wakaba 1.52 !!!next-token;
3617 wakaba 1.126 next B;
3618 wakaba 1.52 }
3619 wakaba 1.48
3620     ## Clear back to table row context
3621 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3622     & TABLE_ROW_SCOPING_EL)) {
3623 wakaba 1.79 !!!cp ('t253');
3624 wakaba 1.83 ## ISSUE: Can this case be reached?
3625 wakaba 1.48 pop @{$self->{open_elements}};
3626     }
3627    
3628     pop @{$self->{open_elements}}; # tr
3629 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3630 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
3631     }
3632    
3633     ## have an element in table scope
3634     my $i;
3635     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3636     my $node = $self->{open_elements}->[$_];
3637 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3638 wakaba 1.79 !!!cp ('t254');
3639 wakaba 1.52 $i = $_;
3640     last INSCOPE;
3641 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3642 wakaba 1.79 !!!cp ('t255');
3643 wakaba 1.52 last INSCOPE;
3644     }
3645     } # INSCOPE
3646     unless (defined $i) {
3647 wakaba 1.79 !!!cp ('t256');
3648 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3649     text => $token->{tag_name}, token => $token);
3650 wakaba 1.52 ## Ignore the token
3651 wakaba 1.125 !!!nack ('t256.1');
3652 wakaba 1.52 !!!next-token;
3653 wakaba 1.126 next B;
3654 wakaba 1.52 }
3655    
3656     ## Clear back to table body context
3657 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3658     & TABLE_ROWS_SCOPING_EL)) {
3659 wakaba 1.79 !!!cp ('t257');
3660 wakaba 1.83 ## ISSUE: Can this case be reached?
3661 wakaba 1.52 pop @{$self->{open_elements}};
3662     }
3663    
3664     pop @{$self->{open_elements}};
3665 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3666 wakaba 1.125 !!!nack ('t257.1');
3667 wakaba 1.52 !!!next-token;
3668 wakaba 1.126 next B;
3669 wakaba 1.52 } elsif ({
3670     body => 1, caption => 1, col => 1, colgroup => 1,
3671     html => 1, td => 1, th => 1,
3672 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3673     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
3674 wakaba 1.52 }->{$token->{tag_name}}) {
3675 wakaba 1.125 !!!cp ('t258');
3676 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3677     text => $token->{tag_name}, token => $token);
3678 wakaba 1.125 ## Ignore the token
3679     !!!nack ('t258.1');
3680     !!!next-token;
3681 wakaba 1.126 next B;
3682 wakaba 1.58 } else {
3683 wakaba 1.79 !!!cp ('t259');
3684 wakaba 1.153 !!!parse-error (type => 'in table:/',
3685     text => $token->{tag_name}, token => $token);
3686 wakaba 1.52
3687 wakaba 1.58 $insert = $insert_to_foster;
3688     #
3689     }
3690 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3691 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
3692 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
3693 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
3694 wakaba 1.104 !!!cp ('t259.1');
3695 wakaba 1.105 #
3696 wakaba 1.104 } else {
3697     !!!cp ('t259.2');
3698 wakaba 1.105 #
3699 wakaba 1.104 }
3700    
3701     ## Stop parsing
3702     last B;
3703 wakaba 1.58 } else {
3704     die "$0: $token->{type}: Unknown token type";
3705     }
3706 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
3707 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
3708 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3709 wakaba 1.52 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3710     unless (length $token->{data}) {
3711 wakaba 1.79 !!!cp ('t260');
3712 wakaba 1.52 !!!next-token;
3713 wakaba 1.126 next B;
3714 wakaba 1.52 }
3715     }
3716    
3717 wakaba 1.79 !!!cp ('t261');
3718 wakaba 1.52 #
3719 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
3720 wakaba 1.52 if ($token->{tag_name} eq 'col') {
3721 wakaba 1.79 !!!cp ('t262');
3722 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3723 wakaba 1.52 pop @{$self->{open_elements}};
3724 wakaba 1.125 !!!ack ('t262.1');
3725 wakaba 1.52 !!!next-token;
3726 wakaba 1.126 next B;
3727 wakaba 1.52 } else {
3728 wakaba 1.79 !!!cp ('t263');
3729 wakaba 1.52 #
3730     }
3731 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
3732 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
3733 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3734 wakaba 1.79 !!!cp ('t264');
3735 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3736     text => 'colgroup', token => $token);
3737 wakaba 1.52 ## Ignore the token
3738     !!!next-token;
3739 wakaba 1.126 next B;
3740 wakaba 1.52 } else {
3741 wakaba 1.79 !!!cp ('t265');
3742 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
3743 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3744 wakaba 1.52 !!!next-token;
3745 wakaba 1.126 next B;
3746 wakaba 1.52 }
3747     } elsif ($token->{tag_name} eq 'col') {
3748 wakaba 1.79 !!!cp ('t266');
3749 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3750     text => 'col', token => $token);
3751 wakaba 1.52 ## Ignore the token
3752     !!!next-token;
3753 wakaba 1.126 next B;
3754 wakaba 1.52 } else {
3755 wakaba 1.79 !!!cp ('t267');
3756 wakaba 1.52 #
3757     }
3758 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3759 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
3760 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
3761     !!!cp ('t270.2');
3762     ## Stop parsing.
3763     last B;
3764     } else {
3765     ## NOTE: As if </colgroup>.
3766     !!!cp ('t270.1');
3767     pop @{$self->{open_elements}}; # colgroup
3768     $self->{insertion_mode} = IN_TABLE_IM;
3769     ## Reprocess.
3770 wakaba 1.126 next B;
3771 wakaba 1.104 }
3772     } else {
3773     die "$0: $token->{type}: Unknown token type";
3774     }
3775 wakaba 1.52
3776     ## As if </colgroup>
3777 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3778 wakaba 1.79 !!!cp ('t269');
3779 wakaba 1.104 ## TODO: Wrong error type?
3780 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3781     text => 'colgroup', token => $token);
3782 wakaba 1.52 ## Ignore the token
3783 wakaba 1.125 !!!nack ('t269.1');
3784 wakaba 1.52 !!!next-token;
3785 wakaba 1.126 next B;
3786 wakaba 1.52 } else {
3787 wakaba 1.79 !!!cp ('t270');
3788 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
3789 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3790 wakaba 1.125 !!!ack-later;
3791 wakaba 1.52 ## reprocess
3792 wakaba 1.126 next B;
3793 wakaba 1.52 }
3794 wakaba 1.101 } elsif ($self->{insertion_mode} & SELECT_IMS) {
3795 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
3796 wakaba 1.79 !!!cp ('t271');
3797 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3798     !!!next-token;
3799 wakaba 1.126 next B;
3800 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
3801 wakaba 1.123 if ($token->{tag_name} eq 'option') {
3802 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3803 wakaba 1.123 !!!cp ('t272');
3804     ## As if </option>
3805     pop @{$self->{open_elements}};
3806     } else {
3807     !!!cp ('t273');
3808     }
3809 wakaba 1.52
3810 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3811 wakaba 1.125 !!!nack ('t273.1');
3812 wakaba 1.123 !!!next-token;
3813 wakaba 1.126 next B;
3814 wakaba 1.123 } elsif ($token->{tag_name} eq 'optgroup') {
3815 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3816 wakaba 1.123 !!!cp ('t274');
3817     ## As if </option>
3818     pop @{$self->{open_elements}};
3819     } else {
3820     !!!cp ('t275');
3821     }
3822 wakaba 1.52
3823 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3824 wakaba 1.123 !!!cp ('t276');
3825     ## As if </optgroup>
3826     pop @{$self->{open_elements}};
3827     } else {
3828     !!!cp ('t277');
3829     }
3830 wakaba 1.52
3831 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3832 wakaba 1.125 !!!nack ('t277.1');
3833 wakaba 1.123 !!!next-token;
3834 wakaba 1.126 next B;
3835 wakaba 1.146 } elsif ({
3836 wakaba 1.216 select => 1, input => 1, textarea => 1, keygen => 1,
3837 wakaba 1.146 }->{$token->{tag_name}} or
3838 wakaba 1.210 (($self->{insertion_mode} & IM_MASK)
3839     == IN_SELECT_IN_TABLE_IM and
3840 wakaba 1.101 {
3841     caption => 1, table => 1,
3842     tbody => 1, tfoot => 1, thead => 1,
3843     tr => 1, td => 1, th => 1,
3844     }->{$token->{tag_name}})) {
3845 wakaba 1.222
3846     ## 1. Parse error.
3847     if ($token->{tag_name} eq 'select') {
3848     !!!parse-error (type => 'select in select', ## XXX: documentation
3849     token => $token);
3850     } else {
3851     !!!parse-error (type => 'not closed', text => 'select',
3852     token => $token);
3853     }
3854    
3855     ## 2./<select>-1. Unless "have an element in table scope" (select):
3856 wakaba 1.123 my $i;
3857     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3858     my $node = $self->{open_elements}->[$_];
3859 wakaba 1.206 if ($node->[1] == SELECT_EL) {
3860 wakaba 1.123 !!!cp ('t278');
3861     $i = $_;
3862     last INSCOPE;
3863     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3864     !!!cp ('t279');
3865     last INSCOPE;
3866     }
3867     } # INSCOPE
3868     unless (defined $i) {
3869     !!!cp ('t280');
3870 wakaba 1.222 if ($token->{tag_name} eq 'select') {
3871     ## NOTE: This error would be raised when
3872     ## |select.innerHTML = '<select>'| is executed; in this
3873     ## case two errors, "select in select" and "unmatched
3874     ## end tags" are reported to the user, the latter might
3875     ## be confusing but this is what the spec requires.
3876     !!!parse-error (type => 'unmatched end tag',
3877     text => 'select',
3878     token => $token);
3879     }
3880     ## Ignore the token.
3881 wakaba 1.125 !!!nack ('t280.1');
3882 wakaba 1.123 !!!next-token;
3883 wakaba 1.126 next B;
3884 wakaba 1.123 }
3885 wakaba 1.222
3886     ## 3. Otherwise, as if there were <select>:
3887 wakaba 1.52
3888 wakaba 1.123 !!!cp ('t281');
3889     splice @{$self->{open_elements}}, $i;
3890 wakaba 1.52
3891 wakaba 1.123 $self->_reset_insertion_mode;
3892 wakaba 1.47
3893 wakaba 1.101 if ($token->{tag_name} eq 'select') {
3894 wakaba 1.125 !!!nack ('t281.2');
3895 wakaba 1.101 !!!next-token;
3896 wakaba 1.126 next B;
3897 wakaba 1.101 } else {
3898     !!!cp ('t281.1');
3899 wakaba 1.125 !!!ack-later;
3900 wakaba 1.101 ## Reprocess the token.
3901 wakaba 1.126 next B;
3902 wakaba 1.101 }
3903 wakaba 1.226 } elsif ($token->{tag_name} eq 'script') {
3904     !!!cp ('t281.3');
3905     ## NOTE: This is an "as if in head" code clone
3906     $script_start_tag->();
3907     next B;
3908 wakaba 1.58 } else {
3909 wakaba 1.79 !!!cp ('t282');
3910 wakaba 1.153 !!!parse-error (type => 'in select',
3911     text => $token->{tag_name}, token => $token);
3912 wakaba 1.58 ## Ignore the token
3913 wakaba 1.125 !!!nack ('t282.1');
3914 wakaba 1.58 !!!next-token;
3915 wakaba 1.126 next B;
3916 wakaba 1.58 }
3917     } elsif ($token->{type} == END_TAG_TOKEN) {
3918 wakaba 1.123 if ($token->{tag_name} eq 'optgroup') {
3919 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL and
3920     $self->{open_elements}->[-2]->[1] == OPTGROUP_EL) {
3921 wakaba 1.123 !!!cp ('t283');
3922     ## As if </option>
3923     splice @{$self->{open_elements}}, -2;
3924 wakaba 1.206 } elsif ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3925 wakaba 1.123 !!!cp ('t284');
3926     pop @{$self->{open_elements}};
3927     } else {
3928     !!!cp ('t285');
3929 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3930     text => $token->{tag_name}, token => $token);
3931 wakaba 1.123 ## Ignore the token
3932     }
3933 wakaba 1.125 !!!nack ('t285.1');
3934 wakaba 1.123 !!!next-token;
3935 wakaba 1.126 next B;
3936 wakaba 1.123 } elsif ($token->{tag_name} eq 'option') {
3937 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3938 wakaba 1.123 !!!cp ('t286');
3939     pop @{$self->{open_elements}};
3940     } else {
3941     !!!cp ('t287');
3942 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3943     text => $token->{tag_name}, token => $token);
3944 wakaba 1.123 ## Ignore the token
3945     }
3946 wakaba 1.125 !!!nack ('t287.1');
3947 wakaba 1.123 !!!next-token;
3948 wakaba 1.126 next B;
3949 wakaba 1.123 } elsif ($token->{tag_name} eq 'select') {
3950     ## have an element in table scope
3951     my $i;
3952     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3953     my $node = $self->{open_elements}->[$_];
3954 wakaba 1.206 if ($node->[1] == SELECT_EL) {
3955 wakaba 1.123 !!!cp ('t288');
3956     $i = $_;
3957     last INSCOPE;
3958     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3959     !!!cp ('t289');
3960     last INSCOPE;
3961     }
3962     } # INSCOPE
3963     unless (defined $i) {
3964     !!!cp ('t290');
3965 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3966     text => $token->{tag_name}, token => $token);
3967 wakaba 1.123 ## Ignore the token
3968 wakaba 1.125 !!!nack ('t290.1');
3969 wakaba 1.123 !!!next-token;
3970 wakaba 1.126 next B;
3971 wakaba 1.123 }
3972 wakaba 1.52
3973 wakaba 1.123 !!!cp ('t291');
3974     splice @{$self->{open_elements}}, $i;
3975 wakaba 1.52
3976 wakaba 1.123 $self->_reset_insertion_mode;
3977 wakaba 1.52
3978 wakaba 1.125 !!!nack ('t291.1');
3979 wakaba 1.123 !!!next-token;
3980 wakaba 1.126 next B;
3981 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK)
3982     == IN_SELECT_IN_TABLE_IM and
3983 wakaba 1.101 {
3984     caption => 1, table => 1, tbody => 1,
3985     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
3986     }->{$token->{tag_name}}) {
3987 wakaba 1.83 ## TODO: The following is wrong?
3988 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3989     text => $token->{tag_name}, token => $token);
3990 wakaba 1.52
3991 wakaba 1.123 ## have an element in table scope
3992     my $i;
3993     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3994     my $node = $self->{open_elements}->[$_];
3995     if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3996     !!!cp ('t292');
3997     $i = $_;
3998     last INSCOPE;
3999     } elsif ($node->[1] & TABLE_SCOPING_EL) {
4000     !!!cp ('t293');
4001     last INSCOPE;
4002     }
4003     } # INSCOPE
4004     unless (defined $i) {
4005     !!!cp ('t294');
4006     ## Ignore the token
4007 wakaba 1.125 !!!nack ('t294.1');
4008 wakaba 1.123 !!!next-token;
4009 wakaba 1.126 next B;
4010 wakaba 1.123 }
4011 wakaba 1.52
4012 wakaba 1.123 ## As if </select>
4013     ## have an element in table scope
4014     undef $i;
4015     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4016     my $node = $self->{open_elements}->[$_];
4017 wakaba 1.206 if ($node->[1] == SELECT_EL) {
4018 wakaba 1.123 !!!cp ('t295');
4019     $i = $_;
4020     last INSCOPE;
4021     } elsif ($node->[1] & TABLE_SCOPING_EL) {
4022 wakaba 1.83 ## ISSUE: Can this state be reached?
4023 wakaba 1.123 !!!cp ('t296');
4024     last INSCOPE;
4025     }
4026     } # INSCOPE
4027     unless (defined $i) {
4028     !!!cp ('t297');
4029 wakaba 1.83 ## TODO: The following error type is correct?
4030 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4031     text => 'select', token => $token);
4032 wakaba 1.123 ## Ignore the </select> token
4033 wakaba 1.125 !!!nack ('t297.1');
4034 wakaba 1.123 !!!next-token; ## TODO: ok?
4035 wakaba 1.126 next B;
4036 wakaba 1.123 }
4037 wakaba 1.52
4038 wakaba 1.123 !!!cp ('t298');
4039     splice @{$self->{open_elements}}, $i;
4040 wakaba 1.52
4041 wakaba 1.123 $self->_reset_insertion_mode;
4042 wakaba 1.52
4043 wakaba 1.125 !!!ack-later;
4044 wakaba 1.123 ## reprocess
4045 wakaba 1.126 next B;
4046 wakaba 1.58 } else {
4047 wakaba 1.79 !!!cp ('t299');
4048 wakaba 1.153 !!!parse-error (type => 'in select:/',
4049     text => $token->{tag_name}, token => $token);
4050 wakaba 1.52 ## Ignore the token
4051 wakaba 1.125 !!!nack ('t299.3');
4052 wakaba 1.52 !!!next-token;
4053 wakaba 1.126 next B;
4054 wakaba 1.58 }
4055 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4056 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4057 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
4058     !!!cp ('t299.1');
4059 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4060 wakaba 1.104 } else {
4061     !!!cp ('t299.2');
4062     }
4063    
4064     ## Stop parsing.
4065     last B;
4066 wakaba 1.58 } else {
4067     die "$0: $token->{type}: Unknown token type";
4068     }
4069 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
4070 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4071 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4072 wakaba 1.52 my $data = $1;
4073     ## As if in body
4074     $reconstruct_active_formatting_elements->($insert_to_current);
4075    
4076     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4077    
4078     unless (length $token->{data}) {
4079 wakaba 1.79 !!!cp ('t300');
4080 wakaba 1.52 !!!next-token;
4081 wakaba 1.126 next B;
4082 wakaba 1.52 }
4083     }
4084    
4085 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4086 wakaba 1.79 !!!cp ('t301');
4087 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
4088 wakaba 1.188 #
4089 wakaba 1.79 } else {
4090     !!!cp ('t302');
4091 wakaba 1.188 ## "after body" insertion mode
4092     !!!parse-error (type => 'after body:#text', token => $token);
4093     #
4094 wakaba 1.52 }
4095    
4096 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4097 wakaba 1.52 ## reprocess
4098 wakaba 1.126 next B;
4099 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4100 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4101 wakaba 1.79 !!!cp ('t303');
4102 wakaba 1.153 !!!parse-error (type => 'after html',
4103     text => $token->{tag_name}, token => $token);
4104 wakaba 1.188 #
4105 wakaba 1.79 } else {
4106     !!!cp ('t304');
4107 wakaba 1.188 ## "after body" insertion mode
4108     !!!parse-error (type => 'after body',
4109     text => $token->{tag_name}, token => $token);
4110     #
4111 wakaba 1.52 }
4112    
4113 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4114 wakaba 1.125 !!!ack-later;
4115 wakaba 1.52 ## reprocess
4116 wakaba 1.126 next B;
4117 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4118 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4119 wakaba 1.79 !!!cp ('t305');
4120 wakaba 1.153 !!!parse-error (type => 'after html:/',
4121     text => $token->{tag_name}, token => $token);
4122 wakaba 1.52
4123 wakaba 1.188 $self->{insertion_mode} = IN_BODY_IM;
4124     ## Reprocess.
4125     next B;
4126 wakaba 1.79 } else {
4127     !!!cp ('t306');
4128 wakaba 1.52 }
4129    
4130     ## "after body" insertion mode
4131     if ($token->{tag_name} eq 'html') {
4132     if (defined $self->{inner_html_node}) {
4133 wakaba 1.79 !!!cp ('t307');
4134 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4135     text => 'html', token => $token);
4136 wakaba 1.52 ## Ignore the token
4137     !!!next-token;
4138 wakaba 1.126 next B;
4139 wakaba 1.52 } else {
4140 wakaba 1.79 !!!cp ('t308');
4141 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
4142 wakaba 1.52 !!!next-token;
4143 wakaba 1.126 next B;
4144 wakaba 1.52 }
4145     } else {
4146 wakaba 1.79 !!!cp ('t309');
4147 wakaba 1.153 !!!parse-error (type => 'after body:/',
4148     text => $token->{tag_name}, token => $token);
4149 wakaba 1.52
4150 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4151 wakaba 1.52 ## reprocess
4152 wakaba 1.126 next B;
4153 wakaba 1.52 }
4154 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4155     !!!cp ('t309.2');
4156     ## Stop parsing
4157     last B;
4158 wakaba 1.52 } else {
4159     die "$0: $token->{type}: Unknown token type";
4160     }
4161 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
4162 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4163 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4164 wakaba 1.52 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4165    
4166     unless (length $token->{data}) {
4167 wakaba 1.79 !!!cp ('t310');
4168 wakaba 1.52 !!!next-token;
4169 wakaba 1.126 next B;
4170 wakaba 1.52 }
4171     }
4172    
4173 wakaba 1.188 if ($token->{data} =~ s/^[^\x09\x0A\x0C\x20]+//) {
4174 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4175 wakaba 1.79 !!!cp ('t311');
4176 wakaba 1.153 !!!parse-error (type => 'in frameset:#text', token => $token);
4177 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4178 wakaba 1.79 !!!cp ('t312');
4179 wakaba 1.153 !!!parse-error (type => 'after frameset:#text', token => $token);
4180 wakaba 1.158 } else { # "after after frameset"
4181 wakaba 1.79 !!!cp ('t313');
4182 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
4183 wakaba 1.52 }
4184    
4185     ## Ignore the token.
4186     if (length $token->{data}) {
4187 wakaba 1.79 !!!cp ('t314');
4188 wakaba 1.52 ## reprocess the rest of characters
4189     } else {
4190 wakaba 1.79 !!!cp ('t315');
4191 wakaba 1.52 !!!next-token;
4192     }
4193 wakaba 1.126 next B;
4194 wakaba 1.52 }
4195    
4196     die qq[$0: Character "$token->{data}"];
4197 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4198 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
4199 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
4200 wakaba 1.79 !!!cp ('t318');
4201 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4202 wakaba 1.125 !!!nack ('t318.1');
4203 wakaba 1.52 !!!next-token;
4204 wakaba 1.126 next B;
4205 wakaba 1.52 } elsif ($token->{tag_name} eq 'frame' and
4206 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
4207 wakaba 1.79 !!!cp ('t319');
4208 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4209 wakaba 1.52 pop @{$self->{open_elements}};
4210 wakaba 1.125 !!!ack ('t319.1');
4211 wakaba 1.52 !!!next-token;
4212 wakaba 1.126 next B;
4213 wakaba 1.52 } elsif ($token->{tag_name} eq 'noframes') {
4214 wakaba 1.79 !!!cp ('t320');
4215 wakaba 1.148 ## NOTE: As if in head.
4216 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4217 wakaba 1.126 next B;
4218 wakaba 1.158
4219     ## NOTE: |<!DOCTYPE HTML><frameset></frameset></html><noframes></noframes>|
4220     ## has no parse error.
4221 wakaba 1.52 } else {
4222 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4223 wakaba 1.79 !!!cp ('t321');
4224 wakaba 1.153 !!!parse-error (type => 'in frameset',
4225     text => $token->{tag_name}, token => $token);
4226 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4227 wakaba 1.79 !!!cp ('t322');
4228 wakaba 1.153 !!!parse-error (type => 'after frameset',
4229     text => $token->{tag_name}, token => $token);
4230 wakaba 1.158 } else { # "after after frameset"
4231     !!!cp ('t322.2');
4232     !!!parse-error (type => 'after after frameset',
4233     text => $token->{tag_name}, token => $token);
4234 wakaba 1.52 }
4235     ## Ignore the token
4236 wakaba 1.125 !!!nack ('t322.1');
4237 wakaba 1.52 !!!next-token;
4238 wakaba 1.126 next B;
4239 wakaba 1.52 }
4240 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4241 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
4242 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
4243 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
4244 wakaba 1.52 @{$self->{open_elements}} == 1) {
4245 wakaba 1.79 !!!cp ('t325');
4246 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4247     text => $token->{tag_name}, token => $token);
4248 wakaba 1.52 ## Ignore the token
4249     !!!next-token;
4250     } else {
4251 wakaba 1.79 !!!cp ('t326');
4252 wakaba 1.52 pop @{$self->{open_elements}};
4253     !!!next-token;
4254     }
4255 wakaba 1.47
4256 wakaba 1.52 if (not defined $self->{inner_html_node} and
4257 wakaba 1.206 not ($self->{open_elements}->[-1]->[1] == FRAMESET_EL)) {
4258 wakaba 1.79 !!!cp ('t327');
4259 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
4260 wakaba 1.79 } else {
4261     !!!cp ('t328');
4262 wakaba 1.52 }
4263 wakaba 1.126 next B;
4264 wakaba 1.52 } elsif ($token->{tag_name} eq 'html' and
4265 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
4266 wakaba 1.79 !!!cp ('t329');
4267 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
4268 wakaba 1.52 !!!next-token;
4269 wakaba 1.126 next B;
4270 wakaba 1.52 } else {
4271 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4272 wakaba 1.79 !!!cp ('t330');
4273 wakaba 1.153 !!!parse-error (type => 'in frameset:/',
4274     text => $token->{tag_name}, token => $token);
4275 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4276     !!!cp ('t330.1');
4277     !!!parse-error (type => 'after frameset:/',
4278     text => $token->{tag_name}, token => $token);
4279     } else { # "after after html"
4280 wakaba 1.79 !!!cp ('t331');
4281 wakaba 1.158 !!!parse-error (type => 'after after frameset:/',
4282 wakaba 1.153 text => $token->{tag_name}, token => $token);
4283 wakaba 1.52 }
4284     ## Ignore the token
4285     !!!next-token;
4286 wakaba 1.126 next B;
4287 wakaba 1.52 }
4288 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4289 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4290 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
4291     !!!cp ('t331.1');
4292 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4293 wakaba 1.104 } else {
4294     !!!cp ('t331.2');
4295     }
4296    
4297     ## Stop parsing
4298     last B;
4299 wakaba 1.52 } else {
4300     die "$0: $token->{type}: Unknown token type";
4301     }
4302     } else {
4303     die "$0: $self->{insertion_mode}: Unknown insertion mode";
4304     }
4305 wakaba 1.47
4306 wakaba 1.52 ## "in body" insertion mode
4307 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
4308 wakaba 1.52 if ($token->{tag_name} eq 'script') {
4309 wakaba 1.79 !!!cp ('t332');
4310 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
4311 wakaba 1.100 $script_start_tag->();
4312 wakaba 1.126 next B;
4313 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
4314 wakaba 1.79 !!!cp ('t333');
4315 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
4316 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4317 wakaba 1.126 next B;
4318 wakaba 1.52 } elsif ({
4319 wakaba 1.232 base => 1, command => 1, link => 1,
4320 wakaba 1.52 }->{$token->{tag_name}}) {
4321 wakaba 1.79 !!!cp ('t334');
4322 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
4323 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4324 wakaba 1.194 pop @{$self->{open_elements}};
4325 wakaba 1.125 !!!ack ('t334.1');
4326 wakaba 1.52 !!!next-token;
4327 wakaba 1.126 next B;
4328 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
4329     ## NOTE: This is an "as if in head" code clone, only "-t" differs
4330 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4331 wakaba 1.194 my $meta_el = pop @{$self->{open_elements}};
4332 wakaba 1.46
4333 wakaba 1.52 unless ($self->{confident}) {
4334 wakaba 1.134 if ($token->{attributes}->{charset}) {
4335 wakaba 1.79 !!!cp ('t335');
4336 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4337     ## in the {change_encoding} callback.
4338 wakaba 1.63 $self->{change_encoding}
4339 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value}, $token);
4340 wakaba 1.66
4341     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4342     ->set_user_data (manakai_has_reference =>
4343     $token->{attributes}->{charset}
4344     ->{has_reference});
4345 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
4346     if ($token->{attributes}->{content}->{value}
4347 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4348 wakaba 1.189 [\x09\x0A\x0C\x0D\x20]*=
4349     [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4350     ([^"'\x09\x0A\x0C\x0D\x20][^\x09\x0A\x0C\x0D\x20\x3B]*))
4351     /x) {
4352 wakaba 1.79 !!!cp ('t336');
4353 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4354     ## in the {change_encoding} callback.
4355 wakaba 1.63 $self->{change_encoding}
4356 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
4357 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4358     ->set_user_data (manakai_has_reference =>
4359     $token->{attributes}->{content}
4360     ->{has_reference});
4361 wakaba 1.63 }
4362 wakaba 1.52 }
4363 wakaba 1.66 } else {
4364     if ($token->{attributes}->{charset}) {
4365 wakaba 1.79 !!!cp ('t337');
4366 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4367     ->set_user_data (manakai_has_reference =>
4368     $token->{attributes}->{charset}
4369     ->{has_reference});
4370     }
4371 wakaba 1.68 if ($token->{attributes}->{content}) {
4372 wakaba 1.79 !!!cp ('t338');
4373 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4374     ->set_user_data (manakai_has_reference =>
4375     $token->{attributes}->{content}
4376     ->{has_reference});
4377     }
4378 wakaba 1.52 }
4379 wakaba 1.1
4380 wakaba 1.125 !!!ack ('t338.1');
4381 wakaba 1.52 !!!next-token;
4382 wakaba 1.126 next B;
4383 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
4384 wakaba 1.79 !!!cp ('t341');
4385 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
4386 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
4387 wakaba 1.126 next B;
4388 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
4389 wakaba 1.153 !!!parse-error (type => 'in body', text => 'body', token => $token);
4390 wakaba 1.46
4391 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
4392 wakaba 1.206 not ($self->{open_elements}->[1]->[1] == BODY_EL)) {
4393 wakaba 1.79 !!!cp ('t342');
4394 wakaba 1.52 ## Ignore the token
4395     } else {
4396     my $body_el = $self->{open_elements}->[1]->[0];
4397     for my $attr_name (keys %{$token->{attributes}}) {
4398     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
4399 wakaba 1.79 !!!cp ('t343');
4400 wakaba 1.52 $body_el->set_attribute_ns
4401     (undef, [undef, $attr_name],
4402     $token->{attributes}->{$attr_name}->{value});
4403     }
4404     }
4405     }
4406 wakaba 1.125 !!!nack ('t343.1');
4407 wakaba 1.52 !!!next-token;
4408 wakaba 1.126 next B;
4409 wakaba 1.52 } elsif ({
4410 wakaba 1.195 ## NOTE: Start tags for non-phrasing flow content elements
4411    
4412     ## NOTE: The normal one
4413     address => 1, article => 1, aside => 1, blockquote => 1,
4414     center => 1, datagrid => 1, details => 1, dialog => 1,
4415     dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
4416     footer => 1, h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1,
4417     h6 => 1, header => 1, menu => 1, nav => 1, ol => 1, p => 1,
4418     section => 1, ul => 1,
4419     ## NOTE: As normal, but drops leading newline
4420 wakaba 1.97 pre => 1, listing => 1,
4421 wakaba 1.195 ## NOTE: As normal, but interacts with the form element pointer
4422 wakaba 1.109 form => 1,
4423 wakaba 1.195
4424 wakaba 1.109 table => 1,
4425     hr => 1,
4426 wakaba 1.52 }->{$token->{tag_name}}) {
4427 wakaba 1.225
4428     ## 1. When there is an opening |form| element:
4429 wakaba 1.109 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
4430     !!!cp ('t350');
4431 wakaba 1.113 !!!parse-error (type => 'in form:form', token => $token);
4432 wakaba 1.109 ## Ignore the token
4433 wakaba 1.125 !!!nack ('t350.1');
4434 wakaba 1.109 !!!next-token;
4435 wakaba 1.126 next B;
4436 wakaba 1.109 }
4437    
4438 wakaba 1.225 ## 2. Close the |p| element, if any.
4439 wakaba 1.217 if ($token->{tag_name} ne 'table' or # The Hixie Quirk
4440     $self->{document}->manakai_compat_mode ne 'quirks') {
4441     ## has a p element in scope
4442     INSCOPE: for (reverse @{$self->{open_elements}}) {
4443     if ($_->[1] == P_EL) {
4444     !!!cp ('t344');
4445     !!!back-token; # <form>
4446     $token = {type => END_TAG_TOKEN, tag_name => 'p',
4447     line => $token->{line}, column => $token->{column}};
4448     next B;
4449     } elsif ($_->[1] & SCOPING_EL) {
4450     !!!cp ('t345');
4451     last INSCOPE;
4452     }
4453     } # INSCOPE
4454     }
4455 wakaba 1.225
4456     ## 3. Close the opening <hn> element, if any.
4457     if ({h1 => 1, h2 => 1, h3 => 1,
4458     h4 => 1, h5 => 1, h6 => 1}->{$token->{tag_name}}) {
4459     if ($self->{open_elements}->[-1]->[1] == HEADING_EL) {
4460     !!!parse-error (type => 'not closed',
4461     text => $self->{open_elements}->[-1]->[0]->manakai_local_name,
4462     token => $token);
4463     pop @{$self->{open_elements}};
4464     }
4465     }
4466    
4467     ## 4. Insertion.
4468 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4469 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
4470 wakaba 1.125 !!!nack ('t346.1');
4471 wakaba 1.52 !!!next-token;
4472 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4473 wakaba 1.52 $token->{data} =~ s/^\x0A//;
4474     unless (length $token->{data}) {
4475 wakaba 1.79 !!!cp ('t346');
4476 wakaba 1.1 !!!next-token;
4477 wakaba 1.79 } else {
4478     !!!cp ('t349');
4479 wakaba 1.52 }
4480 wakaba 1.79 } else {
4481     !!!cp ('t348');
4482 wakaba 1.52 }
4483 wakaba 1.109 } elsif ($token->{tag_name} eq 'form') {
4484     !!!cp ('t347.1');
4485     $self->{form_element} = $self->{open_elements}->[-1]->[0];
4486    
4487 wakaba 1.125 !!!nack ('t347.2');
4488 wakaba 1.109 !!!next-token;
4489     } elsif ($token->{tag_name} eq 'table') {
4490     !!!cp ('t382');
4491     push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
4492    
4493     $self->{insertion_mode} = IN_TABLE_IM;
4494    
4495 wakaba 1.125 !!!nack ('t382.1');
4496 wakaba 1.109 !!!next-token;
4497     } elsif ($token->{tag_name} eq 'hr') {
4498     !!!cp ('t386');
4499     pop @{$self->{open_elements}};
4500    
4501 wakaba 1.125 !!!nack ('t386.1');
4502 wakaba 1.109 !!!next-token;
4503 wakaba 1.52 } else {
4504 wakaba 1.125 !!!nack ('t347.1');
4505 wakaba 1.52 !!!next-token;
4506     }
4507 wakaba 1.126 next B;
4508 wakaba 1.196 } elsif ($token->{tag_name} eq 'li') {
4509     ## NOTE: As normal, but imply </li> when there's another <li> ...
4510 wakaba 1.193
4511 wakaba 1.225 ## NOTE: Special, Scope (<li><foo><li> == <li><foo><li/></foo></li>)::
4512     ## Interpreted as <li><foo/></li><li/> (non-conforming):
4513 wakaba 1.193 ## blockquote (O9.27), center (O), dd (Fx3, O, S3.1.2, IE7),
4514     ## dt (Fx, O, S, IE), dl (O), fieldset (O, S, IE), form (Fx, O, S),
4515     ## hn (O), pre (O), applet (O, S), button (O, S), marquee (Fx, O, S),
4516     ## object (Fx)
4517 wakaba 1.225 ## Generate non-tree (non-conforming):
4518 wakaba 1.193 ## basefont (IE7 (where basefont is non-void)), center (IE),
4519     ## form (IE), hn (IE)
4520 wakaba 1.225 ## address, div, p (<li><foo><li> == <li><foo/></li><li/>)::
4521     ## Interpreted as <li><foo><li/></foo></li> (non-conforming):
4522 wakaba 1.193 ## div (Fx, S)
4523 wakaba 1.196
4524     my $non_optional;
4525 wakaba 1.52 my $i = -1;
4526 wakaba 1.196
4527     ## 1.
4528     for my $node (reverse @{$self->{open_elements}}) {
4529 wakaba 1.206 if ($node->[1] == LI_EL) {
4530 wakaba 1.196 ## 2. (a) As if </li>
4531     {
4532     ## If no </li> - not applied
4533     #
4534    
4535     ## Otherwise
4536    
4537     ## 1. generate implied end tags, except for </li>
4538     #
4539    
4540     ## 2. If current node != "li", parse error
4541     if ($non_optional) {
4542     !!!parse-error (type => 'not closed',
4543     text => $non_optional->[0]->manakai_local_name,
4544     token => $token);
4545     !!!cp ('t355');
4546     } else {
4547     !!!cp ('t356');
4548     }
4549    
4550     ## 3. Pop
4551     splice @{$self->{open_elements}}, $i;
4552 wakaba 1.52 }
4553 wakaba 1.196
4554     last; ## 2. (b) goto 5.
4555     } elsif (
4556     ## NOTE: not "formatting" and not "phrasing"
4557     ($node->[1] & SPECIAL_EL or
4558     $node->[1] & SCOPING_EL) and
4559     ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4560 wakaba 1.206 (not $node->[1] & ADDRESS_DIV_P_EL)
4561     ) {
4562 wakaba 1.196 ## 3.
4563 wakaba 1.79 !!!cp ('t357');
4564 wakaba 1.196 last; ## goto 5.
4565     } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4566 wakaba 1.79 !!!cp ('t358');
4567 wakaba 1.196 #
4568     } else {
4569     !!!cp ('t359');
4570     $non_optional ||= $node;
4571     #
4572 wakaba 1.52 }
4573 wakaba 1.196 ## 4.
4574     ## goto 2.
4575 wakaba 1.52 $i--;
4576 wakaba 1.196 }
4577    
4578     ## 5. (a) has a |p| element in scope
4579     INSCOPE: for (reverse @{$self->{open_elements}}) {
4580 wakaba 1.206 if ($_->[1] == P_EL) {
4581 wakaba 1.196 !!!cp ('t353');
4582 wakaba 1.198
4583     ## NOTE: |<p><li>|, for example.
4584    
4585 wakaba 1.196 !!!back-token; # <x>
4586     $token = {type => END_TAG_TOKEN, tag_name => 'p',
4587     line => $token->{line}, column => $token->{column}};
4588     next B;
4589     } elsif ($_->[1] & SCOPING_EL) {
4590     !!!cp ('t354');
4591     last INSCOPE;
4592     }
4593     } # INSCOPE
4594    
4595     ## 5. (b) insert
4596 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4597 wakaba 1.125 !!!nack ('t359.1');
4598 wakaba 1.52 !!!next-token;
4599 wakaba 1.126 next B;
4600 wakaba 1.196 } elsif ($token->{tag_name} eq 'dt' or
4601     $token->{tag_name} eq 'dd') {
4602     ## NOTE: As normal, but imply </dt> or </dd> when ...
4603    
4604     my $non_optional;
4605     my $i = -1;
4606    
4607     ## 1.
4608     for my $node (reverse @{$self->{open_elements}}) {
4609 wakaba 1.207 if ($node->[1] == DTDD_EL) {
4610 wakaba 1.196 ## 2. (a) As if </li>
4611     {
4612     ## If no </li> - not applied
4613     #
4614    
4615     ## Otherwise
4616    
4617     ## 1. generate implied end tags, except for </dt> or </dd>
4618     #
4619    
4620     ## 2. If current node != "dt"|"dd", parse error
4621     if ($non_optional) {
4622     !!!parse-error (type => 'not closed',
4623     text => $non_optional->[0]->manakai_local_name,
4624     token => $token);
4625     !!!cp ('t355.1');
4626     } else {
4627     !!!cp ('t356.1');
4628     }
4629    
4630     ## 3. Pop
4631     splice @{$self->{open_elements}}, $i;
4632     }
4633    
4634     last; ## 2. (b) goto 5.
4635     } elsif (
4636     ## NOTE: not "formatting" and not "phrasing"
4637     ($node->[1] & SPECIAL_EL or
4638     $node->[1] & SCOPING_EL) and
4639     ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4640    
4641 wakaba 1.206 (not $node->[1] & ADDRESS_DIV_P_EL)
4642     ) {
4643 wakaba 1.196 ## 3.
4644     !!!cp ('t357.1');
4645     last; ## goto 5.
4646     } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4647     !!!cp ('t358.1');
4648     #
4649     } else {
4650     !!!cp ('t359.1');
4651     $non_optional ||= $node;
4652     #
4653     }
4654     ## 4.
4655     ## goto 2.
4656     $i--;
4657     }
4658    
4659     ## 5. (a) has a |p| element in scope
4660     INSCOPE: for (reverse @{$self->{open_elements}}) {
4661 wakaba 1.206 if ($_->[1] == P_EL) {
4662 wakaba 1.196 !!!cp ('t353.1');
4663     !!!back-token; # <x>
4664     $token = {type => END_TAG_TOKEN, tag_name => 'p',
4665     line => $token->{line}, column => $token->{column}};
4666     next B;
4667     } elsif ($_->[1] & SCOPING_EL) {
4668     !!!cp ('t354.1');
4669     last INSCOPE;
4670     }
4671     } # INSCOPE
4672    
4673     ## 5. (b) insert
4674     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4675     !!!nack ('t359.2');
4676     !!!next-token;
4677     next B;
4678 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
4679 wakaba 1.195 ## NOTE: As normal, but effectively ends parsing
4680    
4681 wakaba 1.52 ## has a p element in scope
4682     INSCOPE: for (reverse @{$self->{open_elements}}) {
4683 wakaba 1.206 if ($_->[1] == P_EL) {
4684 wakaba 1.79 !!!cp ('t367');
4685 wakaba 1.125 !!!back-token; # <plaintext>
4686 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4687     line => $token->{line}, column => $token->{column}};
4688 wakaba 1.126 next B;
4689 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
4690 wakaba 1.79 !!!cp ('t368');
4691 wakaba 1.52 last INSCOPE;
4692 wakaba 1.46 }
4693 wakaba 1.52 } # INSCOPE
4694    
4695 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4696 wakaba 1.52
4697     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
4698    
4699 wakaba 1.125 !!!nack ('t368.1');
4700 wakaba 1.52 !!!next-token;
4701 wakaba 1.126 next B;
4702 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
4703     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
4704     my $node = $active_formatting_elements->[$i];
4705 wakaba 1.206 if ($node->[1] == A_EL) {
4706 wakaba 1.79 !!!cp ('t371');
4707 wakaba 1.113 !!!parse-error (type => 'in a:a', token => $token);
4708 wakaba 1.52
4709 wakaba 1.125 !!!back-token; # <a>
4710 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'a',
4711     line => $token->{line}, column => $token->{column}};
4712 wakaba 1.113 $formatting_end_tag->($token);
4713 wakaba 1.52
4714     AFE2: for (reverse 0..$#$active_formatting_elements) {
4715     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
4716 wakaba 1.79 !!!cp ('t372');
4717 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
4718     last AFE2;
4719 wakaba 1.1 }
4720 wakaba 1.52 } # AFE2
4721     OE: for (reverse 0..$#{$self->{open_elements}}) {
4722     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
4723 wakaba 1.79 !!!cp ('t373');
4724 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
4725     last OE;
4726 wakaba 1.1 }
4727 wakaba 1.52 } # OE
4728     last AFE;
4729     } elsif ($node->[0] eq '#marker') {
4730 wakaba 1.79 !!!cp ('t374');
4731 wakaba 1.52 last AFE;
4732     }
4733     } # AFE
4734    
4735     $reconstruct_active_formatting_elements->($insert_to_current);
4736 wakaba 1.1
4737 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4738 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
4739 wakaba 1.1
4740 wakaba 1.125 !!!nack ('t374.1');
4741 wakaba 1.52 !!!next-token;
4742 wakaba 1.126 next B;
4743 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
4744     $reconstruct_active_formatting_elements->($insert_to_current);
4745 wakaba 1.1
4746 wakaba 1.52 ## has a |nobr| element in scope
4747     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4748     my $node = $self->{open_elements}->[$_];
4749 wakaba 1.206 if ($node->[1] == NOBR_EL) {
4750 wakaba 1.79 !!!cp ('t376');
4751 wakaba 1.113 !!!parse-error (type => 'in nobr:nobr', token => $token);
4752 wakaba 1.125 !!!back-token; # <nobr>
4753 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
4754     line => $token->{line}, column => $token->{column}};
4755 wakaba 1.126 next B;
4756 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
4757 wakaba 1.79 !!!cp ('t377');
4758 wakaba 1.52 last INSCOPE;
4759     }
4760     } # INSCOPE
4761    
4762 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4763 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
4764    
4765 wakaba 1.125 !!!nack ('t377.1');
4766 wakaba 1.52 !!!next-token;
4767 wakaba 1.126 next B;
4768 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
4769     ## has a button element in scope
4770     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4771     my $node = $self->{open_elements}->[$_];
4772 wakaba 1.206 if ($node->[1] == BUTTON_EL) {
4773 wakaba 1.79 !!!cp ('t378');
4774 wakaba 1.113 !!!parse-error (type => 'in button:button', token => $token);
4775 wakaba 1.125 !!!back-token; # <button>
4776 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'button',
4777     line => $token->{line}, column => $token->{column}};
4778 wakaba 1.126 next B;
4779 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
4780 wakaba 1.79 !!!cp ('t379');
4781 wakaba 1.52 last INSCOPE;
4782     }
4783     } # INSCOPE
4784    
4785     $reconstruct_active_formatting_elements->($insert_to_current);
4786    
4787 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4788 wakaba 1.85
4789     ## TODO: associate with $self->{form_element} if defined
4790    
4791 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
4792 wakaba 1.1
4793 wakaba 1.125 !!!nack ('t379.1');
4794 wakaba 1.52 !!!next-token;
4795 wakaba 1.126 next B;
4796 wakaba 1.103 } elsif ({
4797 wakaba 1.109 xmp => 1,
4798     iframe => 1,
4799     noembed => 1,
4800 wakaba 1.148 noframes => 1, ## NOTE: This is an "as if in head" code clone.
4801 wakaba 1.109 noscript => 0, ## TODO: 1 if scripting is enabled
4802 wakaba 1.103 }->{$token->{tag_name}}) {
4803 wakaba 1.109 if ($token->{tag_name} eq 'xmp') {
4804     !!!cp ('t381');
4805     $reconstruct_active_formatting_elements->($insert_to_current);
4806     } else {
4807     !!!cp ('t399');
4808     }
4809     ## NOTE: There is an "as if in body" code clone.
4810 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4811 wakaba 1.126 next B;
4812 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
4813 wakaba 1.113 !!!parse-error (type => 'isindex', token => $token);
4814 wakaba 1.52
4815     if (defined $self->{form_element}) {
4816 wakaba 1.79 !!!cp ('t389');
4817 wakaba 1.52 ## Ignore the token
4818 wakaba 1.125 !!!nack ('t389'); ## NOTE: Not acknowledged.
4819 wakaba 1.52 !!!next-token;
4820 wakaba 1.126 next B;
4821 wakaba 1.52 } else {
4822 wakaba 1.147 !!!ack ('t391.1');
4823    
4824 wakaba 1.52 my $at = $token->{attributes};
4825     my $form_attrs;
4826     $form_attrs->{action} = $at->{action} if $at->{action};
4827     my $prompt_attr = $at->{prompt};
4828     $at->{name} = {name => 'name', value => 'isindex'};
4829     delete $at->{action};
4830     delete $at->{prompt};
4831     my @tokens = (
4832 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
4833 wakaba 1.114 attributes => $form_attrs,
4834     line => $token->{line}, column => $token->{column}},
4835     {type => START_TAG_TOKEN, tag_name => 'hr',
4836     line => $token->{line}, column => $token->{column}},
4837     {type => START_TAG_TOKEN, tag_name => 'label',
4838     line => $token->{line}, column => $token->{column}},
4839 wakaba 1.52 );
4840     if ($prompt_attr) {
4841 wakaba 1.79 !!!cp ('t390');
4842 wakaba 1.114 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
4843 wakaba 1.118 #line => $token->{line}, column => $token->{column},
4844     };
4845 wakaba 1.1 } else {
4846 wakaba 1.79 !!!cp ('t391');
4847 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
4848 wakaba 1.114 data => 'This is a searchable index. Insert your search keywords here: ',
4849 wakaba 1.118 #line => $token->{line}, column => $token->{column},
4850     }; # SHOULD
4851 wakaba 1.52 ## TODO: make this configurable
4852 wakaba 1.1 }
4853 wakaba 1.52 push @tokens,
4854 wakaba 1.114 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
4855     line => $token->{line}, column => $token->{column}},
4856 wakaba 1.55 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
4857 wakaba 1.114 {type => END_TAG_TOKEN, tag_name => 'label',
4858     line => $token->{line}, column => $token->{column}},
4859     {type => START_TAG_TOKEN, tag_name => 'hr',
4860     line => $token->{line}, column => $token->{column}},
4861     {type => END_TAG_TOKEN, tag_name => 'form',
4862     line => $token->{line}, column => $token->{column}};
4863 wakaba 1.52 !!!back-token (@tokens);
4864 wakaba 1.125 !!!next-token;
4865 wakaba 1.126 next B;
4866 wakaba 1.52 }
4867     } elsif ($token->{tag_name} eq 'textarea') {
4868 wakaba 1.224 ## 1. Insert
4869 wakaba 1.205 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4870 wakaba 1.52
4871 wakaba 1.224 ## Step 2 # XXX
4872 wakaba 1.52 ## TODO: $self->{form_element} if defined
4873 wakaba 1.205
4874 wakaba 1.224 ## 2. Drop U+000A LINE FEED
4875 wakaba 1.205 $self->{ignore_newline} = 1;
4876    
4877 wakaba 1.224 ## 3. RCDATA
4878 wakaba 1.52 $self->{content_model} = RCDATA_CONTENT_MODEL;
4879     delete $self->{escape}; # MUST
4880 wakaba 1.205
4881 wakaba 1.224 ## 4., 6. Insertion mode
4882 wakaba 1.205 $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
4883    
4884 wakaba 1.224 ## XXX: 5. frameset-ok flag
4885    
4886 wakaba 1.125 !!!nack ('t392.1');
4887 wakaba 1.52 !!!next-token;
4888 wakaba 1.126 next B;
4889 wakaba 1.201 } elsif ($token->{tag_name} eq 'optgroup' or
4890     $token->{tag_name} eq 'option') {
4891     ## has an |option| element in scope
4892     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4893     my $node = $self->{open_elements}->[$_];
4894 wakaba 1.206 if ($node->[1] == OPTION_EL) {
4895 wakaba 1.201 !!!cp ('t397.1');
4896     ## NOTE: As if </option>
4897     !!!back-token; # <option> or <optgroup>
4898     $token = {type => END_TAG_TOKEN, tag_name => 'option',
4899     line => $token->{line}, column => $token->{column}};
4900     next B;
4901     } elsif ($node->[1] & SCOPING_EL) {
4902     !!!cp ('t397.2');
4903     last INSCOPE;
4904     }
4905     } # INSCOPE
4906    
4907     $reconstruct_active_formatting_elements->($insert_to_current);
4908    
4909     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4910    
4911     !!!nack ('t397.3');
4912     !!!next-token;
4913     redo B;
4914 wakaba 1.151 } elsif ($token->{tag_name} eq 'rt' or
4915     $token->{tag_name} eq 'rp') {
4916     ## has a |ruby| element in scope
4917     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4918     my $node = $self->{open_elements}->[$_];
4919 wakaba 1.206 if ($node->[1] == RUBY_EL) {
4920 wakaba 1.151 !!!cp ('t398.1');
4921     ## generate implied end tags
4922     while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
4923     !!!cp ('t398.2');
4924     pop @{$self->{open_elements}};
4925     }
4926 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == RUBY_EL) {
4927 wakaba 1.151 !!!cp ('t398.3');
4928     !!!parse-error (type => 'not closed',
4929 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4930 wakaba 1.151 ->manakai_local_name,
4931     token => $token);
4932     pop @{$self->{open_elements}}
4933 wakaba 1.206 while not $self->{open_elements}->[-1]->[1] == RUBY_EL;
4934 wakaba 1.151 }
4935     last INSCOPE;
4936     } elsif ($node->[1] & SCOPING_EL) {
4937     !!!cp ('t398.4');
4938     last INSCOPE;
4939     }
4940     } # INSCOPE
4941 wakaba 1.212
4942     ## TODO: <non-ruby><rt> is not allowed.
4943 wakaba 1.151
4944     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4945    
4946     !!!nack ('t398.5');
4947     !!!next-token;
4948     redo B;
4949 wakaba 1.126 } elsif ($token->{tag_name} eq 'math' or
4950     $token->{tag_name} eq 'svg') {
4951     $reconstruct_active_formatting_elements->($insert_to_current);
4952 wakaba 1.131
4953 wakaba 1.155 ## "Adjust MathML attributes" ('math' only) - done in insert-element-f
4954    
4955 wakaba 1.131 ## "adjust SVG attributes" ('svg' only) - done in insert-element-f
4956    
4957     ## "adjust foreign attributes" - done in insert-element-f
4958 wakaba 1.126
4959 wakaba 1.131 !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
4960 wakaba 1.126
4961     if ($self->{self_closing}) {
4962     pop @{$self->{open_elements}};
4963 wakaba 1.201 !!!ack ('t398.6');
4964 wakaba 1.126 } else {
4965 wakaba 1.201 !!!cp ('t398.7');
4966 wakaba 1.126 $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
4967     ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
4968     ## mode, "in body" (not "in foreign content") secondary insertion
4969     ## mode, maybe.
4970     }
4971    
4972     !!!next-token;
4973     next B;
4974 wakaba 1.52 } elsif ({
4975     caption => 1, col => 1, colgroup => 1, frame => 1,
4976 wakaba 1.201 frameset => 1, head => 1,
4977 wakaba 1.52 tbody => 1, td => 1, tfoot => 1, th => 1,
4978     thead => 1, tr => 1,
4979     }->{$token->{tag_name}}) {
4980 wakaba 1.79 !!!cp ('t401');
4981 wakaba 1.153 !!!parse-error (type => 'in body',
4982     text => $token->{tag_name}, token => $token);
4983 wakaba 1.52 ## Ignore the token
4984 wakaba 1.125 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
4985 wakaba 1.52 !!!next-token;
4986 wakaba 1.126 next B;
4987 wakaba 1.198 } elsif ($token->{tag_name} eq 'param' or
4988     $token->{tag_name} eq 'source') {
4989     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4990     pop @{$self->{open_elements}};
4991    
4992     !!!ack ('t398.5');
4993     !!!next-token;
4994     redo B;
4995 wakaba 1.52 } else {
4996 wakaba 1.110 if ($token->{tag_name} eq 'image') {
4997     !!!cp ('t384');
4998 wakaba 1.113 !!!parse-error (type => 'image', token => $token);
4999 wakaba 1.110 $token->{tag_name} = 'img';
5000     } else {
5001     !!!cp ('t385');
5002     }
5003    
5004     ## NOTE: There is an "as if <br>" code clone.
5005 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
5006    
5007 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5008 wakaba 1.109
5009 wakaba 1.110 if ({
5010     applet => 1, marquee => 1, object => 1,
5011     }->{$token->{tag_name}}) {
5012     !!!cp ('t380');
5013     push @$active_formatting_elements, ['#marker', ''];
5014 wakaba 1.125 !!!nack ('t380.1');
5015 wakaba 1.110 } elsif ({
5016     b => 1, big => 1, em => 1, font => 1, i => 1,
5017 wakaba 1.193 s => 1, small => 1, strike => 1,
5018 wakaba 1.110 strong => 1, tt => 1, u => 1,
5019     }->{$token->{tag_name}}) {
5020     !!!cp ('t375');
5021     push @$active_formatting_elements, $self->{open_elements}->[-1];
5022 wakaba 1.125 !!!nack ('t375.1');
5023 wakaba 1.110 } elsif ($token->{tag_name} eq 'input') {
5024     !!!cp ('t388');
5025     ## TODO: associate with $self->{form_element} if defined
5026     pop @{$self->{open_elements}};
5027 wakaba 1.125 !!!ack ('t388.2');
5028 wakaba 1.110 } elsif ({
5029     area => 1, basefont => 1, bgsound => 1, br => 1,
5030 wakaba 1.198 embed => 1, img => 1, spacer => 1, wbr => 1,
5031 wakaba 1.231 keygen => 1,
5032 wakaba 1.110 }->{$token->{tag_name}}) {
5033     !!!cp ('t388.1');
5034     pop @{$self->{open_elements}};
5035 wakaba 1.125 !!!ack ('t388.3');
5036 wakaba 1.110 } elsif ($token->{tag_name} eq 'select') {
5037 wakaba 1.109 ## TODO: associate with $self->{form_element} if defined
5038    
5039     if ($self->{insertion_mode} & TABLE_IMS or
5040     $self->{insertion_mode} & BODY_TABLE_IMS or
5041 wakaba 1.210 ($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
5042 wakaba 1.109 !!!cp ('t400.1');
5043     $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
5044     } else {
5045     !!!cp ('t400.2');
5046     $self->{insertion_mode} = IN_SELECT_IM;
5047     }
5048 wakaba 1.125 !!!nack ('t400.3');
5049 wakaba 1.110 } else {
5050 wakaba 1.125 !!!nack ('t402');
5051 wakaba 1.109 }
5052 wakaba 1.51
5053 wakaba 1.52 !!!next-token;
5054 wakaba 1.126 next B;
5055 wakaba 1.52 }
5056 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5057 wakaba 1.52 if ($token->{tag_name} eq 'body') {
5058 wakaba 1.225
5059     ## 1. If not "have an element in scope":
5060     ## "has a |body| element in scope"
5061 wakaba 1.107 my $i;
5062 wakaba 1.111 INSCOPE: {
5063     for (reverse @{$self->{open_elements}}) {
5064 wakaba 1.206 if ($_->[1] == BODY_EL) {
5065 wakaba 1.111 !!!cp ('t405');
5066     $i = $_;
5067     last INSCOPE;
5068 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
5069 wakaba 1.111 !!!cp ('t405.1');
5070     last;
5071     }
5072 wakaba 1.52 }
5073 wakaba 1.111
5074 wakaba 1.200 ## NOTE: |<marquee></body>|, |<svg><foreignobject></body>|
5075    
5076     !!!parse-error (type => 'unmatched end tag',
5077 wakaba 1.153 text => $token->{tag_name}, token => $token);
5078 wakaba 1.107 ## NOTE: Ignore the token.
5079 wakaba 1.52 !!!next-token;
5080 wakaba 1.126 next B;
5081 wakaba 1.111 } # INSCOPE
5082 wakaba 1.107
5083 wakaba 1.225 ## 2. If unclosed elements:
5084 wakaba 1.107 for (@{$self->{open_elements}}) {
5085 wakaba 1.220 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL ||
5086     $_->[1] == OPTGROUP_EL ||
5087     $_->[1] == OPTION_EL ||
5088     $_->[1] == RUBY_COMPONENT_EL) {
5089 wakaba 1.107 !!!cp ('t403');
5090 wakaba 1.122 !!!parse-error (type => 'not closed',
5091 wakaba 1.153 text => $_->[0]->manakai_local_name,
5092 wakaba 1.122 token => $token);
5093 wakaba 1.107 last;
5094     } else {
5095     !!!cp ('t404');
5096     }
5097     }
5098    
5099 wakaba 1.225 ## 3. Switch the insertion mode.
5100 wakaba 1.107 $self->{insertion_mode} = AFTER_BODY_IM;
5101     !!!next-token;
5102 wakaba 1.126 next B;
5103 wakaba 1.52 } elsif ($token->{tag_name} eq 'html') {
5104 wakaba 1.122 ## TODO: Update this code. It seems that the code below is not
5105     ## up-to-date, though it has same effect as speced.
5106 wakaba 1.123 if (@{$self->{open_elements}} > 1 and
5107 wakaba 1.206 $self->{open_elements}->[1]->[1] == BODY_EL) {
5108     unless ($self->{open_elements}->[-1]->[1] == BODY_EL) {
5109 wakaba 1.79 !!!cp ('t406');
5110 wakaba 1.122 !!!parse-error (type => 'not closed',
5111 wakaba 1.153 text => $self->{open_elements}->[1]->[0]
5112 wakaba 1.122 ->manakai_local_name,
5113     token => $token);
5114 wakaba 1.79 } else {
5115     !!!cp ('t407');
5116 wakaba 1.1 }
5117 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
5118 wakaba 1.52 ## reprocess
5119 wakaba 1.126 next B;
5120 wakaba 1.51 } else {
5121 wakaba 1.79 !!!cp ('t408');
5122 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5123     text => $token->{tag_name}, token => $token);
5124 wakaba 1.52 ## Ignore the token
5125     !!!next-token;
5126 wakaba 1.126 next B;
5127 wakaba 1.51 }
5128 wakaba 1.52 } elsif ({
5129 wakaba 1.195 ## NOTE: End tags for non-phrasing flow content elements
5130    
5131     ## NOTE: The normal ones
5132     address => 1, article => 1, aside => 1, blockquote => 1,
5133     center => 1, datagrid => 1, details => 1, dialog => 1,
5134     dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
5135     footer => 1, header => 1, listing => 1, menu => 1, nav => 1,
5136     ol => 1, pre => 1, section => 1, ul => 1,
5137    
5138     ## NOTE: As normal, but ... optional tags
5139 wakaba 1.52 dd => 1, dt => 1, li => 1,
5140 wakaba 1.195
5141 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
5142 wakaba 1.52 }->{$token->{tag_name}}) {
5143 wakaba 1.197 ## NOTE: Code for <li> start tags includes "as if </li>" code.
5144     ## Code for <dt> or <dd> start tags includes "as if </dt> or
5145     ## </dd>" code.
5146    
5147 wakaba 1.52 ## has an element in scope
5148     my $i;
5149     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5150     my $node = $self->{open_elements}->[$_];
5151 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5152 wakaba 1.79 !!!cp ('t410');
5153 wakaba 1.52 $i = $_;
5154 wakaba 1.87 last INSCOPE;
5155 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5156 wakaba 1.79 !!!cp ('t411');
5157 wakaba 1.52 last INSCOPE;
5158 wakaba 1.51 }
5159 wakaba 1.52 } # INSCOPE
5160 wakaba 1.89
5161     unless (defined $i) { # has an element in scope
5162     !!!cp ('t413');
5163 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5164     text => $token->{tag_name}, token => $token);
5165 wakaba 1.157 ## NOTE: Ignore the token.
5166 wakaba 1.89 } else {
5167     ## Step 1. generate implied end tags
5168     while ({
5169 wakaba 1.151 ## END_TAG_OPTIONAL_EL
5170 wakaba 1.89 dd => ($token->{tag_name} ne 'dd'),
5171     dt => ($token->{tag_name} ne 'dt'),
5172     li => ($token->{tag_name} ne 'li'),
5173 wakaba 1.194 option => 1,
5174     optgroup => 1,
5175 wakaba 1.89 p => 1,
5176 wakaba 1.151 rt => 1,
5177     rp => 1,
5178 wakaba 1.123 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
5179 wakaba 1.89 !!!cp ('t409');
5180     pop @{$self->{open_elements}};
5181     }
5182    
5183     ## Step 2.
5184 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5185     ne $token->{tag_name}) {
5186 wakaba 1.79 !!!cp ('t412');
5187 wakaba 1.122 !!!parse-error (type => 'not closed',
5188 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5189 wakaba 1.122 ->manakai_local_name,
5190     token => $token);
5191 wakaba 1.51 } else {
5192 wakaba 1.89 !!!cp ('t414');
5193 wakaba 1.51 }
5194 wakaba 1.89
5195     ## Step 3.
5196 wakaba 1.52 splice @{$self->{open_elements}}, $i;
5197 wakaba 1.89
5198     ## Step 4.
5199     $clear_up_to_marker->()
5200     if {
5201 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
5202 wakaba 1.89 }->{$token->{tag_name}};
5203 wakaba 1.51 }
5204 wakaba 1.52 !!!next-token;
5205 wakaba 1.126 next B;
5206 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
5207 wakaba 1.195 ## NOTE: As normal, but interacts with the form element pointer
5208    
5209 wakaba 1.92 undef $self->{form_element};
5210    
5211 wakaba 1.52 ## has an element in scope
5212 wakaba 1.92 my $i;
5213 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5214     my $node = $self->{open_elements}->[$_];
5215 wakaba 1.206 if ($node->[1] == FORM_EL) {
5216 wakaba 1.79 !!!cp ('t418');
5217 wakaba 1.92 $i = $_;
5218 wakaba 1.52 last INSCOPE;
5219 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5220 wakaba 1.79 !!!cp ('t419');
5221 wakaba 1.52 last INSCOPE;
5222     }
5223     } # INSCOPE
5224 wakaba 1.92
5225     unless (defined $i) { # has an element in scope
5226 wakaba 1.79 !!!cp ('t421');
5227 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5228     text => $token->{tag_name}, token => $token);
5229 wakaba 1.157 ## NOTE: Ignore the token.
5230 wakaba 1.92 } else {
5231     ## Step 1. generate implied end tags
5232 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5233 wakaba 1.92 !!!cp ('t417');
5234     pop @{$self->{open_elements}};
5235     }
5236    
5237     ## Step 2.
5238 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5239     ne $token->{tag_name}) {
5240 wakaba 1.92 !!!cp ('t417.1');
5241 wakaba 1.122 !!!parse-error (type => 'not closed',
5242 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5243 wakaba 1.122 ->manakai_local_name,
5244     token => $token);
5245 wakaba 1.92 } else {
5246     !!!cp ('t420');
5247     }
5248    
5249     ## Step 3.
5250     splice @{$self->{open_elements}}, $i;
5251 wakaba 1.52 }
5252    
5253     !!!next-token;
5254 wakaba 1.126 next B;
5255 wakaba 1.52 } elsif ({
5256 wakaba 1.195 ## NOTE: As normal, except acts as a closer for any ...
5257 wakaba 1.52 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
5258     }->{$token->{tag_name}}) {
5259     ## has an element in scope
5260     my $i;
5261     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5262     my $node = $self->{open_elements}->[$_];
5263 wakaba 1.206 if ($node->[1] == HEADING_EL) {
5264 wakaba 1.79 !!!cp ('t423');
5265 wakaba 1.52 $i = $_;
5266     last INSCOPE;
5267 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5268 wakaba 1.79 !!!cp ('t424');
5269 wakaba 1.52 last INSCOPE;
5270 wakaba 1.51 }
5271 wakaba 1.52 } # INSCOPE
5272 wakaba 1.93
5273     unless (defined $i) { # has an element in scope
5274     !!!cp ('t425.1');
5275 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5276     text => $token->{tag_name}, token => $token);
5277 wakaba 1.157 ## NOTE: Ignore the token.
5278 wakaba 1.79 } else {
5279 wakaba 1.93 ## Step 1. generate implied end tags
5280 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5281 wakaba 1.93 !!!cp ('t422');
5282     pop @{$self->{open_elements}};
5283     }
5284    
5285     ## Step 2.
5286 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5287     ne $token->{tag_name}) {
5288 wakaba 1.93 !!!cp ('t425');
5289 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5290     text => $token->{tag_name}, token => $token);
5291 wakaba 1.93 } else {
5292     !!!cp ('t426');
5293     }
5294    
5295     ## Step 3.
5296     splice @{$self->{open_elements}}, $i;
5297 wakaba 1.36 }
5298 wakaba 1.52
5299     !!!next-token;
5300 wakaba 1.126 next B;
5301 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
5302 wakaba 1.195 ## NOTE: As normal, except </p> implies <p> and ...
5303    
5304 wakaba 1.87 ## has an element in scope
5305 wakaba 1.197 my $non_optional;
5306 wakaba 1.87 my $i;
5307     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5308     my $node = $self->{open_elements}->[$_];
5309 wakaba 1.206 if ($node->[1] == P_EL) {
5310 wakaba 1.87 !!!cp ('t410.1');
5311     $i = $_;
5312 wakaba 1.88 last INSCOPE;
5313 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5314 wakaba 1.87 !!!cp ('t411.1');
5315     last INSCOPE;
5316 wakaba 1.197 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
5317     ## NOTE: |END_TAG_OPTIONAL_EL| includes "p"
5318     !!!cp ('t411.2');
5319     #
5320     } else {
5321     !!!cp ('t411.3');
5322     $non_optional ||= $node;
5323     #
5324 wakaba 1.87 }
5325     } # INSCOPE
5326 wakaba 1.91
5327     if (defined $i) {
5328 wakaba 1.197 ## 1. Generate implied end tags
5329     #
5330    
5331     ## 2. If current node != "p", parse error
5332     if ($non_optional) {
5333 wakaba 1.87 !!!cp ('t412.1');
5334 wakaba 1.122 !!!parse-error (type => 'not closed',
5335 wakaba 1.197 text => $non_optional->[0]->manakai_local_name,
5336 wakaba 1.122 token => $token);
5337 wakaba 1.87 } else {
5338 wakaba 1.91 !!!cp ('t414.1');
5339 wakaba 1.87 }
5340 wakaba 1.91
5341 wakaba 1.197 ## 3. Pop
5342 wakaba 1.87 splice @{$self->{open_elements}}, $i;
5343     } else {
5344 wakaba 1.91 !!!cp ('t413.1');
5345 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5346     text => $token->{tag_name}, token => $token);
5347 wakaba 1.91
5348 wakaba 1.87 !!!cp ('t415.1');
5349     ## As if <p>, then reprocess the current token
5350     my $el;
5351 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'p',, $token);
5352 wakaba 1.87 $insert->($el);
5353 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
5354 wakaba 1.87 }
5355 wakaba 1.91
5356 wakaba 1.87 !!!next-token;
5357 wakaba 1.126 next B;
5358 wakaba 1.52 } elsif ({
5359     a => 1,
5360     b => 1, big => 1, em => 1, font => 1, i => 1,
5361 wakaba 1.193 nobr => 1, s => 1, small => 1, strike => 1,
5362 wakaba 1.52 strong => 1, tt => 1, u => 1,
5363     }->{$token->{tag_name}}) {
5364 wakaba 1.79 !!!cp ('t427');
5365 wakaba 1.113 $formatting_end_tag->($token);
5366 wakaba 1.126 next B;
5367 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
5368 wakaba 1.79 !!!cp ('t428');
5369 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5370     text => 'br', token => $token);
5371 wakaba 1.52
5372     ## As if <br>
5373     $reconstruct_active_formatting_elements->($insert_to_current);
5374    
5375     my $el;
5376 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'br',, $token);
5377 wakaba 1.52 $insert->($el);
5378    
5379     ## Ignore the token.
5380     !!!next-token;
5381 wakaba 1.126 next B;
5382 wakaba 1.52 } else {
5383 wakaba 1.195 if ($token->{tag_name} eq 'sarcasm') {
5384     sleep 0.001; # take a deep breath
5385     }
5386    
5387 wakaba 1.52 ## Step 1
5388     my $node_i = -1;
5389     my $node = $self->{open_elements}->[$node_i];
5390 wakaba 1.51
5391 wakaba 1.52 ## Step 2
5392     S2: {
5393 wakaba 1.200 my $node_tag_name = $node->[0]->manakai_local_name;
5394     $node_tag_name =~ tr/A-Z/a-z/; # for SVG camelCase tag names
5395     if ($node_tag_name eq $token->{tag_name}) {
5396 wakaba 1.52 ## Step 1
5397     ## generate implied end tags
5398 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5399 wakaba 1.79 !!!cp ('t430');
5400 wakaba 1.151 ## NOTE: |<ruby><rt></ruby>|.
5401     ## ISSUE: <ruby><rt></rt> will also take this code path,
5402     ## which seems wrong.
5403 wakaba 1.86 pop @{$self->{open_elements}};
5404 wakaba 1.151 $node_i++;
5405 wakaba 1.52 }
5406    
5407     ## Step 2
5408 wakaba 1.200 my $current_tag_name
5409     = $self->{open_elements}->[-1]->[0]->manakai_local_name;
5410     $current_tag_name =~ tr/A-Z/a-z/;
5411     if ($current_tag_name ne $token->{tag_name}) {
5412 wakaba 1.79 !!!cp ('t431');
5413 wakaba 1.58 ## NOTE: <x><y></x>
5414 wakaba 1.122 !!!parse-error (type => 'not closed',
5415 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5416 wakaba 1.122 ->manakai_local_name,
5417     token => $token);
5418 wakaba 1.79 } else {
5419     !!!cp ('t432');
5420 wakaba 1.52 }
5421    
5422     ## Step 3
5423 wakaba 1.151 splice @{$self->{open_elements}}, $node_i if $node_i < 0;
5424 wakaba 1.51
5425 wakaba 1.1 !!!next-token;
5426 wakaba 1.52 last S2;
5427 wakaba 1.1 } else {
5428 wakaba 1.52 ## Step 3
5429 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
5430 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
5431 wakaba 1.123 ($node->[1] & SPECIAL_EL or
5432     $node->[1] & SCOPING_EL)) {
5433 wakaba 1.79 !!!cp ('t433');
5434 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5435     text => $token->{tag_name}, token => $token);
5436 wakaba 1.52 ## Ignore the token
5437     !!!next-token;
5438     last S2;
5439 wakaba 1.193
5440     ## NOTE: |<span><dd></span>a|: In Safari 3.1.2 and Opera
5441     ## 9.27, "a" is a child of <dd> (conforming). In
5442     ## Firefox 3.0.2, "a" is a child of <body>. In WinIE 7,
5443     ## "a" is a child of both <body> and <dd>.
5444 wakaba 1.52 }
5445 wakaba 1.193
5446 wakaba 1.79 !!!cp ('t434');
5447 wakaba 1.1 }
5448 wakaba 1.52
5449     ## Step 4
5450     $node_i--;
5451     $node = $self->{open_elements}->[$node_i];
5452    
5453     ## Step 5;
5454     redo S2;
5455     } # S2
5456 wakaba 1.126 next B;
5457 wakaba 1.1 }
5458     }
5459 wakaba 1.126 next B;
5460     } continue { # B
5461     if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
5462     ## NOTE: The code below is executed in cases where it does not have
5463     ## to be, but it it is harmless even in those cases.
5464     ## has an element in scope
5465     INSCOPE: {
5466     for (reverse 0..$#{$self->{open_elements}}) {
5467     my $node = $self->{open_elements}->[$_];
5468     if ($node->[1] & FOREIGN_EL) {
5469     last INSCOPE;
5470     } elsif ($node->[1] & SCOPING_EL) {
5471     last;
5472     }
5473     }
5474    
5475     ## NOTE: No foreign element in scope.
5476     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
5477     } # INSCOPE
5478     }
5479 wakaba 1.1 } # B
5480    
5481     ## Stop parsing # MUST
5482    
5483     ## TODO: script stuffs
5484 wakaba 1.3 } # _tree_construct_main
5485    
5486 wakaba 1.218 ## XXX: How this method is organized is somewhat out of date, although
5487     ## it still does what the current spec documents.
5488 wakaba 1.177 sub set_inner_html ($$$$;$) {
5489 wakaba 1.3 my $class = shift;
5490 wakaba 1.218 my $node = shift; # /context/
5491 wakaba 1.177 #my $s = \$_[0];
5492 wakaba 1.3 my $onerror = $_[1];
5493 wakaba 1.162 my $get_wrapper = $_[2] || sub ($) { return $_[0] };
5494 wakaba 1.3
5495     my $nt = $node->node_type;
5496 wakaba 1.218 if ($nt == 9) { # Document (invoke the algorithm with no /context/ element)
5497 wakaba 1.3 # MUST
5498    
5499     ## Step 1 # MUST
5500     ## TODO: If the document has an active parser, ...
5501     ## ISSUE: There is an issue in the spec.
5502    
5503     ## Step 2 # MUST
5504     my @cn = @{$node->child_nodes};
5505     for (@cn) {
5506     $node->remove_child ($_);
5507     }
5508    
5509     ## Step 3, 4, 5 # MUST
5510 wakaba 1.177 $class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper);
5511 wakaba 1.218 } elsif ($nt == 1) { # Element (invoke the algorithm with /context/ element)
5512 wakaba 1.3 ## TODO: If non-html element
5513    
5514     ## NOTE: Most of this code is copied from |parse_string|
5515    
5516 wakaba 1.162 ## TODO: Support for $get_wrapper
5517    
5518 wakaba 1.218 ## F1. Create an HTML document.
5519 wakaba 1.14 my $this_doc = $node->owner_document;
5520     my $doc = $this_doc->implementation->create_document;
5521 wakaba 1.18 $doc->manakai_is_html (1);
5522 wakaba 1.218
5523     ## F2. Propagate quirkness flag
5524     my $node_doc = $node->owner_document;
5525     $doc->manakai_compat_mode ($node_doc->manakai_compat_mode);
5526    
5527     ## F3. Create an HTML parser
5528 wakaba 1.3 my $p = $class->new;
5529     $p->{document} = $doc;
5530    
5531 wakaba 1.84 ## Step 8 # MUST
5532 wakaba 1.3 my $i = 0;
5533 wakaba 1.121 $p->{line_prev} = $p->{line} = 1;
5534     $p->{column_prev} = $p->{column} = 0;
5535 wakaba 1.177 require Whatpm::Charset::DecodeHandle;
5536     my $input = Whatpm::Charset::DecodeHandle::CharString->new (\($_[0]));
5537     $input = $get_wrapper->($input);
5538 wakaba 1.183 $p->{set_nc} = sub {
5539 wakaba 1.3 my $self = shift;
5540 wakaba 1.14
5541 wakaba 1.178 my $char = '';
5542 wakaba 1.183 if (defined $self->{next_nc}) {
5543     $char = $self->{next_nc};
5544     delete $self->{next_nc};
5545     $self->{nc} = ord $char;
5546 wakaba 1.177 } else {
5547 wakaba 1.180 $self->{char_buffer} = '';
5548     $self->{char_buffer_pos} = 0;
5549    
5550     my $count = $input->manakai_read_until
5551 wakaba 1.182 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/,
5552     $self->{char_buffer_pos});
5553 wakaba 1.180 if ($count) {
5554     $self->{line_prev} = $self->{line};
5555     $self->{column_prev} = $self->{column};
5556     $self->{column}++;
5557 wakaba 1.183 $self->{nc}
5558 wakaba 1.180 = ord substr ($self->{char_buffer},
5559     $self->{char_buffer_pos}++, 1);
5560     return;
5561     }
5562    
5563 wakaba 1.178 if ($input->read ($char, 1)) {
5564 wakaba 1.183 $self->{nc} = ord $char;
5565 wakaba 1.178 } else {
5566 wakaba 1.183 $self->{nc} = -1;
5567 wakaba 1.178 return;
5568     }
5569 wakaba 1.177 }
5570 wakaba 1.121
5571     ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
5572     $p->{column}++;
5573 wakaba 1.4
5574 wakaba 1.183 if ($self->{nc} == 0x000A) { # LF
5575 wakaba 1.121 $p->{line}++;
5576     $p->{column} = 0;
5577 wakaba 1.79 !!!cp ('i1');
5578 wakaba 1.183 } elsif ($self->{nc} == 0x000D) { # CR
5579 wakaba 1.177 ## TODO: support for abort/streaming
5580 wakaba 1.178 my $next = '';
5581     if ($input->read ($next, 1) and $next ne "\x0A") {
5582 wakaba 1.183 $self->{next_nc} = $next;
5583 wakaba 1.177 }
5584 wakaba 1.183 $self->{nc} = 0x000A; # LF # MUST
5585 wakaba 1.121 $p->{line}++;
5586     $p->{column} = 0;
5587 wakaba 1.79 !!!cp ('i2');
5588 wakaba 1.183 } elsif ($self->{nc} == 0x0000) { # NULL
5589 wakaba 1.79 !!!cp ('i4');
5590 wakaba 1.14 !!!parse-error (type => 'NULL');
5591 wakaba 1.183 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5592 wakaba 1.3 }
5593     };
5594 wakaba 1.171
5595 wakaba 1.172 $p->{read_until} = sub {
5596 wakaba 1.177 #my ($scalar, $specials_range, $offset) = @_;
5597 wakaba 1.183 return 0 if defined $p->{next_nc};
5598 wakaba 1.180
5599 wakaba 1.182 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
5600 wakaba 1.180 my $offset = $_[2] || 0;
5601    
5602     if ($p->{char_buffer_pos} < length $p->{char_buffer}) {
5603     pos ($p->{char_buffer}) = $p->{char_buffer_pos};
5604     if ($p->{char_buffer} =~ /\G(?>$pattern)+/) {
5605     substr ($_[0], $offset)
5606     = substr ($p->{char_buffer}, $-[0], $+[0] - $-[0]);
5607     my $count = $+[0] - $-[0];
5608     if ($count) {
5609     $p->{column} += $count;
5610     $p->{char_buffer_pos} += $count;
5611     $p->{line_prev} = $p->{line};
5612     $p->{column_prev} = $p->{column} - 1;
5613 wakaba 1.183 $p->{nc} = -1;
5614 wakaba 1.180 }
5615     return $count;
5616     } else {
5617     return 0;
5618     }
5619     } else {
5620     my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
5621     if ($count) {
5622     $p->{column} += $count;
5623     $p->{column_prev} += $count;
5624 wakaba 1.183 $p->{nc} = -1;
5625 wakaba 1.180 }
5626     return $count;
5627 wakaba 1.177 }
5628     }; # $p->{read_until}
5629 wakaba 1.171
5630 wakaba 1.3 my $ponerror = $onerror || sub {
5631     my (%opt) = @_;
5632 wakaba 1.121 my $line = $opt{line};
5633     my $column = $opt{column};
5634     if (defined $opt{token} and defined $opt{token}->{line}) {
5635     $line = $opt{token}->{line};
5636     $column = $opt{token}->{column};
5637     }
5638     warn "Parse error ($opt{type}) at line $line column $column\n";
5639 wakaba 1.3 };
5640     $p->{parse_error} = sub {
5641 wakaba 1.121 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
5642 wakaba 1.3 };
5643    
5644 wakaba 1.178 my $char_onerror = sub {
5645     my (undef, $type, %opt) = @_;
5646     $ponerror->(layer => 'encode',
5647     line => $p->{line}, column => $p->{column} + 1,
5648     %opt, type => $type);
5649     }; # $char_onerror
5650     $input->onerror ($char_onerror);
5651    
5652 wakaba 1.3 $p->_initialize_tokenizer;
5653     $p->_initialize_tree_constructor;
5654    
5655 wakaba 1.218 ## F4. If /context/ is not undef...
5656    
5657     ## F4.1. content model flag
5658 wakaba 1.71 my $node_ln = $node->manakai_local_name;
5659 wakaba 1.40 $p->{content_model} = {
5660     title => RCDATA_CONTENT_MODEL,
5661     textarea => RCDATA_CONTENT_MODEL,
5662     style => CDATA_CONTENT_MODEL,
5663     script => CDATA_CONTENT_MODEL,
5664     xmp => CDATA_CONTENT_MODEL,
5665     iframe => CDATA_CONTENT_MODEL,
5666     noembed => CDATA_CONTENT_MODEL,
5667     noframes => CDATA_CONTENT_MODEL,
5668     noscript => CDATA_CONTENT_MODEL,
5669     plaintext => PLAINTEXT_CONTENT_MODEL,
5670     }->{$node_ln};
5671     $p->{content_model} = PCDATA_CONTENT_MODEL
5672     unless defined $p->{content_model};
5673 wakaba 1.3
5674 wakaba 1.123 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
5675     ## TODO: Foreign element OK?
5676 wakaba 1.3
5677 wakaba 1.218 ## F4.2. Root |html| element
5678 wakaba 1.3 my $root = $doc->create_element_ns
5679     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
5680    
5681 wakaba 1.218 ## F4.3.
5682 wakaba 1.3 $doc->append_child ($root);
5683    
5684 wakaba 1.218 ## F4.4.
5685 wakaba 1.123 push @{$p->{open_elements}}, [$root, $el_category->{html}];
5686 wakaba 1.3
5687     undef $p->{head_element};
5688 wakaba 1.202 undef $p->{head_element_inserted};
5689 wakaba 1.3
5690 wakaba 1.218 ## F4.5.
5691 wakaba 1.3 $p->_reset_insertion_mode;
5692    
5693 wakaba 1.218 ## F4.6.
5694 wakaba 1.3 my $anode = $node;
5695     AN: while (defined $anode) {
5696     if ($anode->node_type == 1) {
5697     my $nsuri = $anode->namespace_uri;
5698     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
5699 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
5700 wakaba 1.79 !!!cp ('i5');
5701 wakaba 1.3 $p->{form_element} = $anode;
5702     last AN;
5703     }
5704     }
5705     }
5706     $anode = $anode->parent_node;
5707     } # AN
5708 wakaba 1.218
5709 wakaba 1.235 ## F.5. Set the input stream.
5710     $self->{confident} = 1; ## Confident: irrelevant.
5711    
5712 wakaba 1.218 ## F.6. Start the parser.
5713 wakaba 1.3 {
5714     my $self = $p;
5715     !!!next-token;
5716     }
5717     $p->_tree_construction_main;
5718    
5719 wakaba 1.218 ## F.7.
5720 wakaba 1.3 my @cn = @{$node->child_nodes};
5721     for (@cn) {
5722     $node->remove_child ($_);
5723     }
5724     ## ISSUE: mutation events? read-only?
5725    
5726 wakaba 1.84 ## Step 11 # MUST
5727 wakaba 1.3 @cn = @{$root->child_nodes};
5728     for (@cn) {
5729 wakaba 1.14 $this_doc->adopt_node ($_);
5730 wakaba 1.3 $node->append_child ($_);
5731     }
5732 wakaba 1.14 ## ISSUE: mutation events?
5733 wakaba 1.3
5734     $p->_terminate_tree_constructor;
5735 wakaba 1.121
5736     delete $p->{parse_error}; # delete loop
5737 wakaba 1.3 } else {
5738     die "$0: |set_inner_html| is not defined for node of type $nt";
5739     }
5740     } # set_inner_html
5741    
5742     } # tree construction stage
5743 wakaba 1.1
5744 wakaba 1.63 package Whatpm::HTML::RestartParser;
5745     push our @ISA, 'Error';
5746    
5747 wakaba 1.1 1;
5748 wakaba 1.235 # $Date: 2009/09/06 02:20:52 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24