/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.228 - (hide annotations) (download) (as text)
Sun Aug 16 06:47:59 2009 UTC (15 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.227: +11 -11 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	16 Aug 2009 06:43:09 -0000
	* tree-test-1.dat: Added tests for about:legacy-compat and changed
	test results for XSLT-compat (HTML5 revision 2725).

2009-08-16  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	16 Aug 2009 06:44:01 -0000
	* HTML.pm.src: Dropped support for "XSLT-compat" and added
	"about:legacy-compat" (HTML5 revision 2725).

2009-08-16  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.228 our $VERSION=do{my @r=(q$Revision: 1.227 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.208 use Whatpm::HTML::Tokenizer;
7    
8 wakaba 1.182 ## NOTE: This module don't check all HTML5 parse errors; character
9     ## encoding related parse errors are expected to be handled by relevant
10     ## modules.
11     ## Parse errors for control characters that are not allowed in HTML5
12     ## documents, for surrogate code points, and for noncharacter code
13     ## points, as well as U+FFFD substitions for characters whose code points
14     ## is higher than U+10FFFF may be detected by combining the parser with
15     ## the checker implemented by Whatpm::Charset::UnicodeChecker (for its
16     ## usage example, see |t/HTML-tree.t| in the Whatpm package or the
17     ## WebHACC::Language::HTML module in the WebHACC package).
18    
19 wakaba 1.18 ## ISSUE:
20     ## var doc = implementation.createDocument (null, null, null);
21     ## doc.write ('');
22     ## alert (doc.compatMode);
23 wakaba 1.1
24 wakaba 1.139 require IO::Handle;
25    
26 wakaba 1.208 ## Namespace URLs
27    
28 wakaba 1.126 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
29     my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
30     my $SVG_NS = q<http://www.w3.org/2000/svg>;
31     my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
32     my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
33     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
34    
35 wakaba 1.208 ## Element categories
36    
37 wakaba 1.206 ## Bits 12-15
38     sub SPECIAL_EL () { 0b1_000000000000000 }
39     sub SCOPING_EL () { 0b1_00000000000000 }
40     sub FORMATTING_EL () { 0b1_0000000000000 }
41     sub PHRASING_EL () { 0b1_000000000000 }
42    
43     ## Bits 10-11
44 wakaba 1.208 #sub FOREIGN_EL () { 0b1_00000000000 } # see Whatpm::HTML::Tokenizer
45 wakaba 1.206 sub FOREIGN_FLOW_CONTENT_EL () { 0b1_0000000000 }
46    
47     ## Bits 6-9
48     sub TABLE_SCOPING_EL () { 0b1_000000000 }
49     sub TABLE_ROWS_SCOPING_EL () { 0b1_00000000 }
50     sub TABLE_ROW_SCOPING_EL () { 0b1_0000000 }
51     sub TABLE_ROWS_EL () { 0b1_000000 }
52    
53     ## Bit 5
54     sub ADDRESS_DIV_P_EL () { 0b1_00000 }
55    
56     ## NOTE: Used in </body> and EOF algorithms.
57     ## Bit 4
58     sub ALL_END_TAG_OPTIONAL_EL () { 0b1_0000 }
59 wakaba 1.123
60 wakaba 1.151 ## NOTE: Used in "generate implied end tags" algorithm.
61 wakaba 1.194 ## NOTE: There is a code where a modified version of
62     ## END_TAG_OPTIONAL_EL is used in "generate implied end tags"
63     ## implementation (search for the algorithm name).
64 wakaba 1.206 ## Bit 3
65     sub END_TAG_OPTIONAL_EL () { 0b1_000 }
66    
67     ## Bits 0-2
68    
69     sub MISC_SPECIAL_EL () { SPECIAL_EL | 0b000 }
70     sub FORM_EL () { SPECIAL_EL | 0b001 }
71     sub FRAMESET_EL () { SPECIAL_EL | 0b010 }
72     sub HEADING_EL () { SPECIAL_EL | 0b011 }
73     sub SELECT_EL () { SPECIAL_EL | 0b100 }
74     sub SCRIPT_EL () { SPECIAL_EL | 0b101 }
75    
76     sub ADDRESS_DIV_EL () { SPECIAL_EL | ADDRESS_DIV_P_EL | 0b001 }
77     sub BODY_EL () { SPECIAL_EL | ALL_END_TAG_OPTIONAL_EL | 0b001 }
78    
79 wakaba 1.207 sub DTDD_EL () {
80 wakaba 1.206 SPECIAL_EL |
81     END_TAG_OPTIONAL_EL |
82     ALL_END_TAG_OPTIONAL_EL |
83     0b010
84     }
85     sub LI_EL () {
86     SPECIAL_EL |
87     END_TAG_OPTIONAL_EL |
88     ALL_END_TAG_OPTIONAL_EL |
89     0b100
90     }
91     sub P_EL () {
92     SPECIAL_EL |
93     ADDRESS_DIV_P_EL |
94     END_TAG_OPTIONAL_EL |
95     ALL_END_TAG_OPTIONAL_EL |
96     0b001
97 wakaba 1.123 }
98    
99 wakaba 1.206 sub TABLE_ROW_EL () {
100     SPECIAL_EL |
101     TABLE_ROWS_EL |
102     TABLE_ROW_SCOPING_EL |
103     ALL_END_TAG_OPTIONAL_EL |
104     0b001
105     }
106     sub TABLE_ROW_GROUP_EL () {
107     SPECIAL_EL |
108     TABLE_ROWS_EL |
109     TABLE_ROWS_SCOPING_EL |
110     ALL_END_TAG_OPTIONAL_EL |
111     0b001
112 wakaba 1.123 }
113    
114 wakaba 1.206 sub MISC_SCOPING_EL () { SCOPING_EL | 0b000 }
115     sub BUTTON_EL () { SCOPING_EL | 0b001 }
116     sub CAPTION_EL () { SCOPING_EL | 0b010 }
117     sub HTML_EL () {
118     SCOPING_EL |
119     TABLE_SCOPING_EL |
120     TABLE_ROWS_SCOPING_EL |
121     TABLE_ROW_SCOPING_EL |
122     ALL_END_TAG_OPTIONAL_EL |
123     0b001
124 wakaba 1.123 }
125 wakaba 1.206 sub TABLE_EL () {
126     SCOPING_EL |
127     TABLE_ROWS_EL |
128     TABLE_SCOPING_EL |
129     0b001
130 wakaba 1.123 }
131 wakaba 1.206 sub TABLE_CELL_EL () {
132     SCOPING_EL |
133     TABLE_ROW_SCOPING_EL |
134     ALL_END_TAG_OPTIONAL_EL |
135     0b001
136 wakaba 1.123 }
137    
138 wakaba 1.206 sub MISC_FORMATTING_EL () { FORMATTING_EL | 0b000 }
139     sub A_EL () { FORMATTING_EL | 0b001 }
140     sub NOBR_EL () { FORMATTING_EL | 0b010 }
141    
142     sub RUBY_EL () { PHRASING_EL | 0b001 }
143    
144     ## ISSUE: ALL_END_TAG_OPTIONAL_EL?
145     sub OPTGROUP_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b001 }
146     sub OPTION_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b010 }
147     sub RUBY_COMPONENT_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b100 }
148 wakaba 1.123
149 wakaba 1.206 sub MML_AXML_EL () { PHRASING_EL | FOREIGN_EL | 0b001 }
150 wakaba 1.123
151     my $el_category = {
152 wakaba 1.206 a => A_EL,
153     address => ADDRESS_DIV_EL,
154 wakaba 1.123 applet => MISC_SCOPING_EL,
155     area => MISC_SPECIAL_EL,
156 wakaba 1.193 article => MISC_SPECIAL_EL,
157     aside => MISC_SPECIAL_EL,
158 wakaba 1.123 b => FORMATTING_EL,
159     base => MISC_SPECIAL_EL,
160     basefont => MISC_SPECIAL_EL,
161     bgsound => MISC_SPECIAL_EL,
162     big => FORMATTING_EL,
163     blockquote => MISC_SPECIAL_EL,
164     body => BODY_EL,
165     br => MISC_SPECIAL_EL,
166     button => BUTTON_EL,
167     caption => CAPTION_EL,
168     center => MISC_SPECIAL_EL,
169     col => MISC_SPECIAL_EL,
170     colgroup => MISC_SPECIAL_EL,
171 wakaba 1.193 command => MISC_SPECIAL_EL,
172     datagrid => MISC_SPECIAL_EL,
173 wakaba 1.207 dd => DTDD_EL,
174 wakaba 1.193 details => MISC_SPECIAL_EL,
175     dialog => MISC_SPECIAL_EL,
176 wakaba 1.123 dir => MISC_SPECIAL_EL,
177 wakaba 1.206 div => ADDRESS_DIV_EL,
178 wakaba 1.123 dl => MISC_SPECIAL_EL,
179 wakaba 1.207 dt => DTDD_EL,
180 wakaba 1.123 em => FORMATTING_EL,
181     embed => MISC_SPECIAL_EL,
182 wakaba 1.193 eventsource => MISC_SPECIAL_EL,
183 wakaba 1.123 fieldset => MISC_SPECIAL_EL,
184 wakaba 1.193 figure => MISC_SPECIAL_EL,
185 wakaba 1.123 font => FORMATTING_EL,
186 wakaba 1.193 footer => MISC_SPECIAL_EL,
187 wakaba 1.123 form => FORM_EL,
188     frame => MISC_SPECIAL_EL,
189     frameset => FRAMESET_EL,
190     h1 => HEADING_EL,
191     h2 => HEADING_EL,
192     h3 => HEADING_EL,
193     h4 => HEADING_EL,
194     h5 => HEADING_EL,
195     h6 => HEADING_EL,
196     head => MISC_SPECIAL_EL,
197 wakaba 1.193 header => MISC_SPECIAL_EL,
198 wakaba 1.123 hr => MISC_SPECIAL_EL,
199     html => HTML_EL,
200     i => FORMATTING_EL,
201     iframe => MISC_SPECIAL_EL,
202     img => MISC_SPECIAL_EL,
203 wakaba 1.193 #image => MISC_SPECIAL_EL, ## NOTE: Commented out in the spec.
204 wakaba 1.123 input => MISC_SPECIAL_EL,
205     isindex => MISC_SPECIAL_EL,
206     li => LI_EL,
207     link => MISC_SPECIAL_EL,
208     listing => MISC_SPECIAL_EL,
209     marquee => MISC_SCOPING_EL,
210     menu => MISC_SPECIAL_EL,
211     meta => MISC_SPECIAL_EL,
212 wakaba 1.193 nav => MISC_SPECIAL_EL,
213 wakaba 1.206 nobr => NOBR_EL,
214 wakaba 1.123 noembed => MISC_SPECIAL_EL,
215     noframes => MISC_SPECIAL_EL,
216     noscript => MISC_SPECIAL_EL,
217     object => MISC_SCOPING_EL,
218     ol => MISC_SPECIAL_EL,
219     optgroup => OPTGROUP_EL,
220     option => OPTION_EL,
221     p => P_EL,
222     param => MISC_SPECIAL_EL,
223     plaintext => MISC_SPECIAL_EL,
224     pre => MISC_SPECIAL_EL,
225 wakaba 1.151 rp => RUBY_COMPONENT_EL,
226     rt => RUBY_COMPONENT_EL,
227     ruby => RUBY_EL,
228 wakaba 1.123 s => FORMATTING_EL,
229     script => MISC_SPECIAL_EL,
230     select => SELECT_EL,
231 wakaba 1.193 section => MISC_SPECIAL_EL,
232 wakaba 1.123 small => FORMATTING_EL,
233     spacer => MISC_SPECIAL_EL,
234     strike => FORMATTING_EL,
235     strong => FORMATTING_EL,
236     style => MISC_SPECIAL_EL,
237     table => TABLE_EL,
238     tbody => TABLE_ROW_GROUP_EL,
239     td => TABLE_CELL_EL,
240     textarea => MISC_SPECIAL_EL,
241     tfoot => TABLE_ROW_GROUP_EL,
242     th => TABLE_CELL_EL,
243     thead => TABLE_ROW_GROUP_EL,
244     title => MISC_SPECIAL_EL,
245     tr => TABLE_ROW_EL,
246     tt => FORMATTING_EL,
247     u => FORMATTING_EL,
248     ul => MISC_SPECIAL_EL,
249     wbr => MISC_SPECIAL_EL,
250     };
251    
252 wakaba 1.126 my $el_category_f = {
253     $MML_NS => {
254     'annotation-xml' => MML_AXML_EL,
255 wakaba 1.206 mi => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
256     mo => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
257     mn => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
258     ms => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
259     mtext => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
260 wakaba 1.126 },
261     $SVG_NS => {
262 wakaba 1.206 foreignObject => SCOPING_EL | FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
263     desc => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
264     title => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
265 wakaba 1.126 },
266     ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
267     };
268    
269 wakaba 1.131 my $svg_attr_name = {
270 wakaba 1.146 attributename => 'attributeName',
271 wakaba 1.131 attributetype => 'attributeType',
272     basefrequency => 'baseFrequency',
273     baseprofile => 'baseProfile',
274     calcmode => 'calcMode',
275     clippathunits => 'clipPathUnits',
276     contentscripttype => 'contentScriptType',
277     contentstyletype => 'contentStyleType',
278     diffuseconstant => 'diffuseConstant',
279     edgemode => 'edgeMode',
280     externalresourcesrequired => 'externalResourcesRequired',
281     filterres => 'filterRes',
282     filterunits => 'filterUnits',
283     glyphref => 'glyphRef',
284     gradienttransform => 'gradientTransform',
285     gradientunits => 'gradientUnits',
286     kernelmatrix => 'kernelMatrix',
287     kernelunitlength => 'kernelUnitLength',
288     keypoints => 'keyPoints',
289     keysplines => 'keySplines',
290     keytimes => 'keyTimes',
291     lengthadjust => 'lengthAdjust',
292     limitingconeangle => 'limitingConeAngle',
293     markerheight => 'markerHeight',
294     markerunits => 'markerUnits',
295     markerwidth => 'markerWidth',
296     maskcontentunits => 'maskContentUnits',
297     maskunits => 'maskUnits',
298     numoctaves => 'numOctaves',
299     pathlength => 'pathLength',
300     patterncontentunits => 'patternContentUnits',
301     patterntransform => 'patternTransform',
302     patternunits => 'patternUnits',
303     pointsatx => 'pointsAtX',
304     pointsaty => 'pointsAtY',
305     pointsatz => 'pointsAtZ',
306     preservealpha => 'preserveAlpha',
307     preserveaspectratio => 'preserveAspectRatio',
308     primitiveunits => 'primitiveUnits',
309     refx => 'refX',
310     refy => 'refY',
311     repeatcount => 'repeatCount',
312     repeatdur => 'repeatDur',
313     requiredextensions => 'requiredExtensions',
314 wakaba 1.146 requiredfeatures => 'requiredFeatures',
315 wakaba 1.131 specularconstant => 'specularConstant',
316     specularexponent => 'specularExponent',
317     spreadmethod => 'spreadMethod',
318     startoffset => 'startOffset',
319     stddeviation => 'stdDeviation',
320     stitchtiles => 'stitchTiles',
321     surfacescale => 'surfaceScale',
322     systemlanguage => 'systemLanguage',
323     tablevalues => 'tableValues',
324     targetx => 'targetX',
325     targety => 'targetY',
326     textlength => 'textLength',
327     viewbox => 'viewBox',
328     viewtarget => 'viewTarget',
329     xchannelselector => 'xChannelSelector',
330     ychannelselector => 'yChannelSelector',
331     zoomandpan => 'zoomAndPan',
332     };
333    
334     my $foreign_attr_xname = {
335     'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
336     'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
337     'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
338     'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
339     'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
340     'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
341     'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
342     'xml:base' => [$XML_NS, ['xml', 'base']],
343     'xml:lang' => [$XML_NS, ['xml', 'lang']],
344     'xml:space' => [$XML_NS, ['xml', 'space']],
345     'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
346     'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
347     };
348    
349     ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
350    
351 wakaba 1.192 ## TODO: Invoke the reset algorithm when a resettable element is
352     ## created (cf. HTML5 revision 2259).
353    
354 wakaba 1.63 sub parse_byte_string ($$$$;$) {
355 wakaba 1.138 my $self = shift;
356     my $charset_name = shift;
357     open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
358     return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
359     } # parse_byte_string
360    
361 wakaba 1.162 sub parse_byte_stream ($$$$;$$) {
362     # my ($self, $charset_name, $byte_stream, $doc, $onerror, $get_wrapper) = @_;
363 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
364 wakaba 1.133 my $charset_name = shift;
365 wakaba 1.138 my $byte_stream = $_[0];
366 wakaba 1.133
367 wakaba 1.134 my $onerror = $_[2] || sub {
368     my (%opt) = @_;
369     warn "Parse error ($opt{type})\n";
370     };
371     $self->{parse_error} = $onerror; # updated later by parse_char_string
372    
373 wakaba 1.162 my $get_wrapper = $_[3] || sub ($) {
374     return $_[0]; # $_[0] = byte stream handle, returned = arg to char handle
375     };
376    
377 wakaba 1.133 ## HTML5 encoding sniffing algorithm
378     require Message::Charset::Info;
379     my $charset;
380 wakaba 1.136 my $buffer;
381     my ($char_stream, $e_status);
382 wakaba 1.133
383     SNIFFING: {
384 wakaba 1.160 ## NOTE: By setting |allow_fallback| option true when the
385     ## |get_decode_handle| method is invoked, we ignore what the HTML5
386     ## spec requires, i.e. unsupported encoding should be ignored.
387     ## TODO: We should not do this unless the parser is invoked
388     ## in the conformance checking mode, in which this behavior
389     ## would be useful.
390 wakaba 1.133
391     ## Step 1
392     if (defined $charset_name) {
393 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
394     ## TODO: Is this ok? Transfer protocol's parameter should be
395     ## interpreted in its semantics?
396 wakaba 1.133
397 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
398     ($byte_stream, allow_error_reporting => 1,
399 wakaba 1.133 allow_fallback => 1);
400 wakaba 1.136 if ($char_stream) {
401 wakaba 1.133 $self->{confident} = 1;
402     last SNIFFING;
403 wakaba 1.136 } else {
404 wakaba 1.190 !!!parse-error (type => 'charset:not supported',
405     layer => 'encode',
406     line => 1, column => 1,
407     value => $charset_name,
408     level => $self->{level}->{uncertain});
409 wakaba 1.133 }
410     }
411    
412     ## Step 2
413 wakaba 1.136 my $byte_buffer = '';
414     for (1..1024) {
415     my $char = $byte_stream->getc;
416     last unless defined $char;
417     $byte_buffer .= $char;
418     } ## TODO: timeout
419 wakaba 1.133
420     ## Step 3
421 wakaba 1.136 if ($byte_buffer =~ /^\xFE\xFF/) {
422 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-16be');
423 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
424     ($byte_stream, allow_error_reporting => 1,
425     allow_fallback => 1, byte_buffer => \$byte_buffer);
426 wakaba 1.133 $self->{confident} = 1;
427     last SNIFFING;
428 wakaba 1.136 } elsif ($byte_buffer =~ /^\xFF\xFE/) {
429 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-16le');
430 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
431     ($byte_stream, allow_error_reporting => 1,
432     allow_fallback => 1, byte_buffer => \$byte_buffer);
433 wakaba 1.133 $self->{confident} = 1;
434     last SNIFFING;
435 wakaba 1.136 } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
436 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
437 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
438     ($byte_stream, allow_error_reporting => 1,
439     allow_fallback => 1, byte_buffer => \$byte_buffer);
440 wakaba 1.133 $self->{confident} = 1;
441     last SNIFFING;
442     }
443    
444     ## Step 4
445     ## TODO: <meta charset>
446    
447     ## Step 5
448     ## TODO: from history
449    
450     ## Step 6
451 wakaba 1.65 require Whatpm::Charset::UniversalCharDet;
452 wakaba 1.133 $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
453 wakaba 1.136 ($byte_buffer);
454 wakaba 1.133 if (defined $charset_name) {
455 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
456 wakaba 1.133
457 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
458     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
459     ($byte_stream);
460     ($char_stream, $e_status) = $charset->get_decode_handle
461     ($buffer, allow_error_reporting => 1,
462     allow_fallback => 1, byte_buffer => \$byte_buffer);
463     if ($char_stream) {
464     $buffer->{buffer} = $byte_buffer;
465 wakaba 1.153 !!!parse-error (type => 'sniffing:chardet',
466     text => $charset_name,
467     level => $self->{level}->{info},
468     layer => 'encode',
469 wakaba 1.134 line => 1, column => 1);
470 wakaba 1.133 $self->{confident} = 0;
471     last SNIFFING;
472     }
473     }
474    
475     ## Step 7: default
476     ## TODO: Make this configurable.
477 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('windows-1252');
478 wakaba 1.133 ## NOTE: We choose |windows-1252| here, since |utf-8| should be
479     ## detectable in the step 6.
480 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
481     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
482     ($byte_stream);
483     ($char_stream, $e_status)
484     = $charset->get_decode_handle ($buffer,
485     allow_error_reporting => 1,
486     allow_fallback => 1,
487     byte_buffer => \$byte_buffer);
488     $buffer->{buffer} = $byte_buffer;
489 wakaba 1.153 !!!parse-error (type => 'sniffing:default',
490     text => 'windows-1252',
491     level => $self->{level}->{info},
492     line => 1, column => 1,
493     layer => 'encode');
494 wakaba 1.63 $self->{confident} = 0;
495 wakaba 1.133 } # SNIFFING
496    
497     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
498 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
499 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
500 wakaba 1.160 #text => $self->{input_encoding},
501 wakaba 1.153 level => $self->{level}->{uncertain},
502     line => 1, column => 1,
503     layer => 'encode');
504 wakaba 1.133 } elsif (not ($e_status &
505 wakaba 1.178 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
506 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name;
507 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
508     text => $self->{input_encoding},
509     level => $self->{level}->{uncertain},
510     line => 1, column => 1,
511     layer => 'encode');
512 wakaba 1.160 } else {
513     $self->{input_encoding} = $charset->get_iana_name;
514 wakaba 1.63 }
515    
516     $self->{change_encoding} = sub {
517     my $self = shift;
518 wakaba 1.134 $charset_name = shift;
519 wakaba 1.114 my $token = shift;
520 wakaba 1.63
521 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
522 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
523     ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
524     byte_buffer => \ $buffer->{buffer});
525 wakaba 1.134
526 wakaba 1.136 if ($char_stream) { # if supported
527 wakaba 1.134 ## "Change the encoding" algorithm:
528 wakaba 1.215
529     ## Step 1
530     if (defined $self->{input_encoding} and
531     $self->{input_encoding} eq $charset_name) {
532     !!!parse-error (type => 'charset label:matching',
533     text => $charset_name,
534     level => $self->{level}->{info});
535     $self->{confident} = 1;
536     return;
537     }
538 wakaba 1.63
539 wakaba 1.214 ## Step 2 (HTML5 revision 3205)
540     if (defined $self->{input_encoding} and
541     Message::Charset::Info->get_by_html_name ($self->{input_encoding})
542     ->{category} & Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
543     $self->{confident} = 1;
544     return;
545     }
546    
547     ## Step 3
548 wakaba 1.149 if ($charset->{category} &
549     Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
550 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
551 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
552     ($byte_stream,
553     byte_buffer => \ $buffer->{buffer});
554 wakaba 1.134 }
555     $charset_name = $charset->get_iana_name;
556 wakaba 1.63
557 wakaba 1.153 !!!parse-error (type => 'charset label detected',
558     text => $self->{input_encoding},
559     value => $charset_name,
560     level => $self->{level}->{warn},
561     token => $token);
562 wakaba 1.134
563 wakaba 1.214 ## Step 4
564 wakaba 1.134 # if (can) {
565     ## change the encoding on the fly.
566     #$self->{confident} = 1;
567     #return;
568     # }
569    
570 wakaba 1.214 ## Step 5
571 wakaba 1.134 throw Whatpm::HTML::RestartParser ();
572 wakaba 1.63 }
573     }; # $self->{change_encoding}
574    
575 wakaba 1.136 my $char_onerror = sub {
576     my (undef, $type, %opt) = @_;
577 wakaba 1.153 !!!parse-error (layer => 'encode',
578 wakaba 1.174 line => $self->{line}, column => $self->{column} + 1,
579     %opt, type => $type);
580 wakaba 1.136 if ($opt{octets}) {
581     ${$opt{octets}} = "\x{FFFD}"; # relacement character
582     }
583     };
584 wakaba 1.162
585     my $wrapped_char_stream = $get_wrapper->($char_stream);
586     $wrapped_char_stream->onerror ($char_onerror);
587 wakaba 1.136
588 wakaba 1.182 my @args = ($_[1], $_[2]); # $doc, $onerror - $get_wrapper = undef;
589 wakaba 1.63 my $return;
590     try {
591 wakaba 1.162 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
592 wakaba 1.63 } catch Whatpm::HTML::RestartParser with {
593 wakaba 1.134 ## NOTE: Invoked after {change_encoding}.
594    
595     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
596 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
597 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
598     level => $self->{level}->{uncertain},
599 wakaba 1.160 #text => $self->{input_encoding},
600 wakaba 1.153 line => 1, column => 1,
601     layer => 'encode');
602 wakaba 1.134 } elsif (not ($e_status &
603 wakaba 1.178 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
604 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name;
605 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
606     text => $self->{input_encoding},
607     level => $self->{level}->{uncertain},
608     line => 1, column => 1,
609     layer => 'encode');
610 wakaba 1.160 } else {
611     $self->{input_encoding} = $charset->get_iana_name;
612 wakaba 1.134 }
613 wakaba 1.63 $self->{confident} = 1;
614 wakaba 1.162
615     $wrapped_char_stream = $get_wrapper->($char_stream);
616     $wrapped_char_stream->onerror ($char_onerror);
617    
618     $return = $self->parse_char_stream ($wrapped_char_stream, @args);
619 wakaba 1.63 };
620     return $return;
621 wakaba 1.138 } # parse_byte_stream
622 wakaba 1.63
623 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
624     ## and the HTML layer MUST ignore it. However, we does strip BOM in
625     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
626     ## because the core part of our HTML parser expects a string of character,
627     ## not a string of bytes or code units or anything which might contain a BOM.
628     ## Therefore, any parser interface that accepts a string of bytes,
629     ## such as |parse_byte_string| in this module, must ensure that it does
630     ## strip the BOM and never strip any ZWNBSP.
631    
632 wakaba 1.162 sub parse_char_string ($$$;$$) {
633     #my ($self, $s, $doc, $onerror, $get_wrapper) = @_;
634 wakaba 1.135 my $self = shift;
635 wakaba 1.139 my $s = ref $_[0] ? $_[0] : \($_[0]);
636 wakaba 1.171 require Whatpm::Charset::DecodeHandle;
637     my $input = Whatpm::Charset::DecodeHandle::CharString->new ($s);
638 wakaba 1.135 return $self->parse_char_stream ($input, @_[1..$#_]);
639     } # parse_char_string
640 wakaba 1.162 *parse_string = \&parse_char_string; ## NOTE: Alias for backward compatibility.
641 wakaba 1.63
642 wakaba 1.182 sub parse_char_stream ($$$;$$) {
643 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
644 wakaba 1.135 my $input = $_[0];
645 wakaba 1.1 $self->{document} = $_[1];
646 wakaba 1.63 @{$self->{document}->child_nodes} = ();
647 wakaba 1.1
648 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
649    
650 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
651 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
652     if defined $self->{input_encoding};
653 wakaba 1.178 ## TODO: |{input_encoding}| is needless?
654 wakaba 1.63
655 wakaba 1.112 $self->{line_prev} = $self->{line} = 1;
656 wakaba 1.179 $self->{column_prev} = -1;
657     $self->{column} = 0;
658 wakaba 1.183 $self->{set_nc} = sub {
659 wakaba 1.1 my $self = shift;
660 wakaba 1.13
661 wakaba 1.178 my $char = '';
662 wakaba 1.183 if (defined $self->{next_nc}) {
663     $char = $self->{next_nc};
664     delete $self->{next_nc};
665     $self->{nc} = ord $char;
666 wakaba 1.139 } else {
667 wakaba 1.179 $self->{char_buffer} = '';
668     $self->{char_buffer_pos} = 0;
669    
670     my $count = $input->manakai_read_until
671 wakaba 1.182 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/, $self->{char_buffer_pos});
672 wakaba 1.179 if ($count) {
673     $self->{line_prev} = $self->{line};
674     $self->{column_prev} = $self->{column};
675     $self->{column}++;
676 wakaba 1.183 $self->{nc}
677 wakaba 1.179 = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
678     return;
679     }
680    
681 wakaba 1.178 if ($input->read ($char, 1)) {
682 wakaba 1.183 $self->{nc} = ord $char;
683 wakaba 1.178 } else {
684 wakaba 1.183 $self->{nc} = -1;
685 wakaba 1.178 return;
686     }
687 wakaba 1.139 }
688 wakaba 1.112
689     ($self->{line_prev}, $self->{column_prev})
690     = ($self->{line}, $self->{column});
691     $self->{column}++;
692 wakaba 1.1
693 wakaba 1.183 if ($self->{nc} == 0x000A) { # LF
694 wakaba 1.132 !!!cp ('j1');
695 wakaba 1.112 $self->{line}++;
696     $self->{column} = 0;
697 wakaba 1.183 } elsif ($self->{nc} == 0x000D) { # CR
698 wakaba 1.132 !!!cp ('j2');
699 wakaba 1.170 ## TODO: support for abort/streaming
700 wakaba 1.178 my $next = '';
701     if ($input->read ($next, 1) and $next ne "\x0A") {
702 wakaba 1.183 $self->{next_nc} = $next;
703 wakaba 1.135 }
704 wakaba 1.183 $self->{nc} = 0x000A; # LF # MUST
705 wakaba 1.112 $self->{line}++;
706     $self->{column} = 0;
707 wakaba 1.183 } elsif ($self->{nc} == 0x0000) { # NULL
708 wakaba 1.132 !!!cp ('j4');
709 wakaba 1.8 !!!parse-error (type => 'NULL');
710 wakaba 1.183 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
711 wakaba 1.1 }
712     };
713    
714 wakaba 1.172 $self->{read_until} = sub {
715     #my ($scalar, $specials_range, $offset) = @_;
716 wakaba 1.183 return 0 if defined $self->{next_nc};
717 wakaba 1.180
718 wakaba 1.182 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
719 wakaba 1.180 my $offset = $_[2] || 0;
720    
721     if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
722     pos ($self->{char_buffer}) = $self->{char_buffer_pos};
723     if ($self->{char_buffer} =~ /\G(?>$pattern)+/) {
724     substr ($_[0], $offset)
725     = substr ($self->{char_buffer}, $-[0], $+[0] - $-[0]);
726     my $count = $+[0] - $-[0];
727     if ($count) {
728     $self->{column} += $count;
729     $self->{char_buffer_pos} += $count;
730     $self->{line_prev} = $self->{line};
731     $self->{column_prev} = $self->{column} - 1;
732 wakaba 1.183 $self->{nc} = -1;
733 wakaba 1.180 }
734     return $count;
735     } else {
736     return 0;
737     }
738     } else {
739     my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
740     if ($count) {
741     $self->{column} += $count;
742     $self->{line_prev} = $self->{line};
743     $self->{column_prev} = $self->{column} - 1;
744 wakaba 1.183 $self->{nc} = -1;
745 wakaba 1.180 }
746     return $count;
747 wakaba 1.172 }
748     }; # $self->{read_until}
749 wakaba 1.171
750 wakaba 1.3 my $onerror = $_[2] || sub {
751     my (%opt) = @_;
752 wakaba 1.112 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
753     my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
754     warn "Parse error ($opt{type}) at line $line column $column\n";
755 wakaba 1.3 };
756     $self->{parse_error} = sub {
757 wakaba 1.112 $onerror->(line => $self->{line}, column => $self->{column}, @_);
758 wakaba 1.1 };
759    
760 wakaba 1.182 my $char_onerror = sub {
761     my (undef, $type, %opt) = @_;
762     !!!parse-error (layer => 'encode',
763     line => $self->{line}, column => $self->{column} + 1,
764     %opt, type => $type);
765     }; # $char_onerror
766    
767     if ($_[3]) {
768     $input = $_[3]->($input);
769     $input->onerror ($char_onerror);
770     } else {
771     $input->onerror ($char_onerror) unless defined $input->onerror;
772     }
773    
774 wakaba 1.1 $self->_initialize_tokenizer;
775     $self->_initialize_tree_constructor;
776     $self->_construct_tree;
777     $self->_terminate_tree_constructor;
778    
779 wakaba 1.112 delete $self->{parse_error}; # remove loop
780    
781 wakaba 1.1 return $self->{document};
782 wakaba 1.135 } # parse_char_stream
783 wakaba 1.1
784     sub new ($) {
785     my $class = shift;
786 wakaba 1.134 my $self = bless {
787 wakaba 1.153 level => {must => 'm',
788 wakaba 1.159 should => 's',
789 wakaba 1.153 warn => 'w',
790     info => 'i',
791     uncertain => 'u'},
792 wakaba 1.134 }, $class;
793 wakaba 1.183 $self->{set_nc} = sub {
794     $self->{nc} = -1;
795 wakaba 1.1 };
796     $self->{parse_error} = sub {
797     #
798     };
799 wakaba 1.63 $self->{change_encoding} = sub {
800     # if ($_[0] is a supported encoding) {
801     # run "change the encoding" algorithm;
802     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
803     # }
804     };
805 wakaba 1.61 $self->{application_cache_selection} = sub {
806     #
807     };
808 wakaba 1.1 return $self;
809     } # new
810    
811 wakaba 1.208 ## Insertion modes
812 wakaba 1.55
813 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
814     sub HEAD_IMS () { 0b1000 }
815     sub BODY_IMS () { 0b10000 }
816 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
817 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
818 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
819 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
820     sub FRAME_IMS () { 0b1000000000 }
821 wakaba 1.101 sub SELECT_IMS () { 0b10000000000 }
822 wakaba 1.208 #sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 } # see Whatpm::HTML::Tokenizer
823 wakaba 1.126 ## NOTE: "in foreign content" insertion mode is special; it is combined
824     ## with the secondary insertion mode. In this parser, they are stored
825     ## together in the bit-or'ed form.
826 wakaba 1.205 sub IN_CDATA_RCDATA_IM () { 0b1000000000000 }
827     ## NOTE: "in CDATA/RCDATA" insertion mode is also special; it is
828     ## combined with the original insertion mode. In thie parser,
829     ## they are stored together in the bit-or'ed form.
830 wakaba 1.54
831 wakaba 1.210 sub IM_MASK () { 0b11111111111 }
832    
833 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
834    
835     ## NOTE: "after after body" insertion mode.
836 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
837 wakaba 1.84
838     ## NOTE: "after after frameset" insertion mode.
839 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
840 wakaba 1.84
841 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
842     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
843     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
844     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
845     sub IN_BODY_IM () { BODY_IMS }
846 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
847     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
848     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
849     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
850 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
851     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
852     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
853     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
854 wakaba 1.101 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
855     sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
856 wakaba 1.54 sub IN_COLUMN_GROUP_IM () { 0b10 }
857    
858 wakaba 1.1 sub _initialize_tree_constructor ($) {
859     my $self = shift;
860     ## NOTE: $self->{document} MUST be specified before this method is called
861     $self->{document}->strict_error_checking (0);
862     ## TODO: Turn mutation events off # MUST
863     ## TODO: Turn loose Document option (manakai extension) on
864 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
865 wakaba 1.154 $self->{document}->set_user_data (manakai_source_line => 1);
866     $self->{document}->set_user_data (manakai_source_column => 1);
867 wakaba 1.1 } # _initialize_tree_constructor
868    
869     sub _terminate_tree_constructor ($) {
870     my $self = shift;
871     $self->{document}->strict_error_checking (1);
872     ## TODO: Turn mutation events on
873     } # _terminate_tree_constructor
874    
875     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
876    
877 wakaba 1.3 { # tree construction stage
878     my $token;
879    
880 wakaba 1.1 sub _construct_tree ($) {
881     my ($self) = @_;
882    
883     ## When an interactive UA render the $self->{document} available
884     ## to the user, or when it begin accepting user input, are
885     ## not defined.
886    
887     !!!next-token;
888    
889 wakaba 1.3 undef $self->{form_element};
890     undef $self->{head_element};
891 wakaba 1.202 undef $self->{head_element_inserted};
892 wakaba 1.3 $self->{open_elements} = [];
893     undef $self->{inner_html_node};
894 wakaba 1.206 undef $self->{ignore_newline};
895 wakaba 1.3
896 wakaba 1.84 ## NOTE: The "initial" insertion mode.
897 wakaba 1.3 $self->_tree_construction_initial; # MUST
898 wakaba 1.84
899     ## NOTE: The "before html" insertion mode.
900 wakaba 1.3 $self->_tree_construction_root_element;
901 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
902    
903     ## NOTE: The "before head" insertion mode and so on.
904 wakaba 1.3 $self->_tree_construction_main;
905     } # _construct_tree
906    
907     sub _tree_construction_initial ($) {
908     my $self = shift;
909 wakaba 1.84
910     ## NOTE: "initial" insertion mode
911    
912 wakaba 1.18 INITIAL: {
913 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
914 wakaba 1.227 ## NOTE: Conformance checkers MAY, instead of reporting "not
915     ## HTML5" error, switch to a conformance checking mode for
916     ## another language. (We don't support such mode switchings; it
917     ## is nonsense to do anything different from what browsers do.)
918 wakaba 1.18 my $doctype_name = $token->{name};
919     $doctype_name = '' unless defined $doctype_name;
920 wakaba 1.227 my $doctype = $self->{document}->create_document_type_definition
921     ($doctype_name);
922    
923 wakaba 1.228 $doctype_name =~ tr/A-Z/a-z/; # ASCII case-insensitive
924     if ($doctype_name ne 'html') {
925 wakaba 1.79 !!!cp ('t1');
926 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
927 wakaba 1.228 } elsif (defined $token->{pubid}) {
928 wakaba 1.79 !!!cp ('t2');
929 wakaba 1.228 ## XXX Obsolete permitted DOCTYPEs
930 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
931 wakaba 1.228 } elsif (defined $token->{sysid}) {
932     if ($token->{sysid} eq 'about:legacy-compat') {
933     !!!cp ('t1.2'); ## <!DOCTYPE HTML SYSTEM "about:legacy-compat">
934 wakaba 1.159 !!!parse-error (type => 'XSLT-compat', token => $token,
935     level => $self->{level}->{should});
936     } else {
937     !!!parse-error (type => 'not HTML5', token => $token);
938     }
939 wakaba 1.228 } else { ## <!DOCTYPE HTML>
940 wakaba 1.79 !!!cp ('t3');
941 wakaba 1.159 #
942 wakaba 1.18 }
943    
944 wakaba 1.122 ## NOTE: Default value for both |public_id| and |system_id| attributes
945     ## are empty strings, so that we don't set any value in missing cases.
946 wakaba 1.183 $doctype->public_id ($token->{pubid}) if defined $token->{pubid};
947     $doctype->system_id ($token->{sysid}) if defined $token->{sysid};
948 wakaba 1.227
949 wakaba 1.18 ## NOTE: Other DocumentType attributes are null or empty lists.
950 wakaba 1.211 ## In Firefox3, |internalSubset| attribute is set to the empty
951     ## string, while |null| is an allowed value for the attribute
952     ## according to DOM3 Core.
953 wakaba 1.18 $self->{document}->append_child ($doctype);
954    
955 wakaba 1.228 if ($token->{quirks} or $doctype_name ne 'html') {
956 wakaba 1.79 !!!cp ('t4');
957 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
958 wakaba 1.183 } elsif (defined $token->{pubid}) {
959     my $pubid = $token->{pubid};
960 wakaba 1.18 $pubid =~ tr/a-z/A-z/;
961 wakaba 1.143 my $prefix = [
962     "+//SILMARIL//DTD HTML PRO V0R11 19970101//",
963     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
964     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
965     "-//IETF//DTD HTML 2.0 LEVEL 1//",
966     "-//IETF//DTD HTML 2.0 LEVEL 2//",
967     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//",
968     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//",
969     "-//IETF//DTD HTML 2.0 STRICT//",
970     "-//IETF//DTD HTML 2.0//",
971     "-//IETF//DTD HTML 2.1E//",
972     "-//IETF//DTD HTML 3.0//",
973     "-//IETF//DTD HTML 3.2 FINAL//",
974     "-//IETF//DTD HTML 3.2//",
975     "-//IETF//DTD HTML 3//",
976     "-//IETF//DTD HTML LEVEL 0//",
977     "-//IETF//DTD HTML LEVEL 1//",
978     "-//IETF//DTD HTML LEVEL 2//",
979     "-//IETF//DTD HTML LEVEL 3//",
980     "-//IETF//DTD HTML STRICT LEVEL 0//",
981     "-//IETF//DTD HTML STRICT LEVEL 1//",
982     "-//IETF//DTD HTML STRICT LEVEL 2//",
983     "-//IETF//DTD HTML STRICT LEVEL 3//",
984     "-//IETF//DTD HTML STRICT//",
985     "-//IETF//DTD HTML//",
986     "-//METRIUS//DTD METRIUS PRESENTATIONAL//",
987     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//",
988     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//",
989     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//",
990     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//",
991     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//",
992     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//",
993     "-//NETSCAPE COMM. CORP.//DTD HTML//",
994     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//",
995     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//",
996     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//",
997     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//",
998     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//",
999     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//",
1000     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//",
1001     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//",
1002     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//",
1003     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//",
1004     "-//W3C//DTD HTML 3 1995-03-24//",
1005     "-//W3C//DTD HTML 3.2 DRAFT//",
1006     "-//W3C//DTD HTML 3.2 FINAL//",
1007     "-//W3C//DTD HTML 3.2//",
1008     "-//W3C//DTD HTML 3.2S DRAFT//",
1009     "-//W3C//DTD HTML 4.0 FRAMESET//",
1010     "-//W3C//DTD HTML 4.0 TRANSITIONAL//",
1011     "-//W3C//DTD HTML EXPERIMETNAL 19960712//",
1012     "-//W3C//DTD HTML EXPERIMENTAL 970421//",
1013     "-//W3C//DTD W3 HTML//",
1014     "-//W3O//DTD W3 HTML 3.0//",
1015     "-//WEBTECHS//DTD MOZILLA HTML 2.0//",
1016     "-//WEBTECHS//DTD MOZILLA HTML//",
1017     ]; # $prefix
1018     my $match;
1019     for (@$prefix) {
1020     if (substr ($prefix, 0, length $_) eq $_) {
1021     $match = 1;
1022     last;
1023     }
1024     }
1025     if ($match or
1026     $pubid eq "-//W3O//DTD W3 HTML STRICT 3.0//EN//" or
1027     $pubid eq "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" or
1028     $pubid eq "HTML") {
1029 wakaba 1.79 !!!cp ('t5');
1030 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1031 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
1032     $pubid =~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
1033 wakaba 1.183 if (defined $token->{sysid}) {
1034 wakaba 1.79 !!!cp ('t6');
1035 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1036     } else {
1037 wakaba 1.79 !!!cp ('t7');
1038 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
1039 wakaba 1.3 }
1040 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
1041     $pubid =~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
1042 wakaba 1.79 !!!cp ('t8');
1043 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
1044 wakaba 1.79 } else {
1045     !!!cp ('t9');
1046 wakaba 1.18 }
1047 wakaba 1.79 } else {
1048     !!!cp ('t10');
1049 wakaba 1.18 }
1050 wakaba 1.183 if (defined $token->{sysid}) {
1051     my $sysid = $token->{sysid};
1052 wakaba 1.18 $sysid =~ tr/A-Z/a-z/;
1053     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
1054 wakaba 1.143 ## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"| is
1055     ## marked as quirks.
1056 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1057 wakaba 1.79 !!!cp ('t11');
1058     } else {
1059     !!!cp ('t12');
1060 wakaba 1.18 }
1061 wakaba 1.79 } else {
1062     !!!cp ('t13');
1063 wakaba 1.18 }
1064    
1065 wakaba 1.84 ## Go to the "before html" insertion mode.
1066 wakaba 1.18 !!!next-token;
1067     return;
1068     } elsif ({
1069 wakaba 1.55 START_TAG_TOKEN, 1,
1070     END_TAG_TOKEN, 1,
1071     END_OF_FILE_TOKEN, 1,
1072 wakaba 1.18 }->{$token->{type}}) {
1073 wakaba 1.79 !!!cp ('t14');
1074 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
1075 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1076 wakaba 1.84 ## Go to the "before html" insertion mode.
1077 wakaba 1.18 ## reprocess
1078 wakaba 1.125 !!!ack-later;
1079 wakaba 1.18 return;
1080 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
1081 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1082 wakaba 1.18 ## Ignore the token
1083 wakaba 1.26
1084 wakaba 1.18 unless (length $token->{data}) {
1085 wakaba 1.79 !!!cp ('t15');
1086 wakaba 1.84 ## Stay in the insertion mode.
1087 wakaba 1.18 !!!next-token;
1088     redo INITIAL;
1089 wakaba 1.79 } else {
1090     !!!cp ('t16');
1091 wakaba 1.3 }
1092 wakaba 1.79 } else {
1093     !!!cp ('t17');
1094 wakaba 1.3 }
1095 wakaba 1.18
1096 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
1097 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1098 wakaba 1.84 ## Go to the "before html" insertion mode.
1099 wakaba 1.18 ## reprocess
1100     return;
1101 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
1102 wakaba 1.79 !!!cp ('t18');
1103 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
1104     $self->{document}->append_child ($comment);
1105    
1106 wakaba 1.84 ## Stay in the insertion mode.
1107 wakaba 1.18 !!!next-token;
1108     redo INITIAL;
1109     } else {
1110 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
1111 wakaba 1.18 }
1112     } # INITIAL
1113 wakaba 1.79
1114     die "$0: _tree_construction_initial: This should be never reached";
1115 wakaba 1.3 } # _tree_construction_initial
1116    
1117     sub _tree_construction_root_element ($) {
1118     my $self = shift;
1119 wakaba 1.84
1120     ## NOTE: "before html" insertion mode.
1121 wakaba 1.3
1122     B: {
1123 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
1124 wakaba 1.79 !!!cp ('t19');
1125 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1126 wakaba 1.3 ## Ignore the token
1127 wakaba 1.84 ## Stay in the insertion mode.
1128 wakaba 1.3 !!!next-token;
1129     redo B;
1130 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
1131 wakaba 1.79 !!!cp ('t20');
1132 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
1133     $self->{document}->append_child ($comment);
1134 wakaba 1.84 ## Stay in the insertion mode.
1135 wakaba 1.3 !!!next-token;
1136     redo B;
1137 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
1138 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1139 wakaba 1.26 ## Ignore the token.
1140    
1141 wakaba 1.3 unless (length $token->{data}) {
1142 wakaba 1.79 !!!cp ('t21');
1143 wakaba 1.84 ## Stay in the insertion mode.
1144 wakaba 1.3 !!!next-token;
1145     redo B;
1146 wakaba 1.79 } else {
1147     !!!cp ('t22');
1148 wakaba 1.3 }
1149 wakaba 1.79 } else {
1150     !!!cp ('t23');
1151 wakaba 1.3 }
1152 wakaba 1.61
1153     $self->{application_cache_selection}->(undef);
1154    
1155     #
1156     } elsif ($token->{type} == START_TAG_TOKEN) {
1157 wakaba 1.84 if ($token->{tag_name} eq 'html') {
1158     my $root_element;
1159 wakaba 1.126 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
1160 wakaba 1.84 $self->{document}->append_child ($root_element);
1161 wakaba 1.123 push @{$self->{open_elements}},
1162     [$root_element, $el_category->{html}];
1163 wakaba 1.84
1164     if ($token->{attributes}->{manifest}) {
1165     !!!cp ('t24');
1166     $self->{application_cache_selection}
1167     ->($token->{attributes}->{manifest}->{value});
1168 wakaba 1.118 ## ISSUE: Spec is unclear on relative references.
1169     ## According to Hixie (#whatwg 2008-03-19), it should be
1170     ## resolved against the base URI of the document in HTML
1171     ## or xml:base of the element in XHTML.
1172 wakaba 1.84 } else {
1173     !!!cp ('t25');
1174     $self->{application_cache_selection}->(undef);
1175     }
1176    
1177 wakaba 1.125 !!!nack ('t25c');
1178    
1179 wakaba 1.84 !!!next-token;
1180     return; ## Go to the "before head" insertion mode.
1181 wakaba 1.61 } else {
1182 wakaba 1.84 !!!cp ('t25.1');
1183     #
1184 wakaba 1.61 }
1185 wakaba 1.3 } elsif ({
1186 wakaba 1.55 END_TAG_TOKEN, 1,
1187     END_OF_FILE_TOKEN, 1,
1188 wakaba 1.3 }->{$token->{type}}) {
1189 wakaba 1.79 !!!cp ('t26');
1190 wakaba 1.3 #
1191     } else {
1192 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
1193 wakaba 1.3 }
1194 wakaba 1.61
1195 wakaba 1.126 my $root_element;
1196     !!!create-element ($root_element, $HTML_NS, 'html',, $token);
1197 wakaba 1.84 $self->{document}->append_child ($root_element);
1198 wakaba 1.123 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
1199 wakaba 1.84
1200     $self->{application_cache_selection}->(undef);
1201    
1202     ## NOTE: Reprocess the token.
1203 wakaba 1.125 !!!ack-later;
1204 wakaba 1.84 return; ## Go to the "before head" insertion mode.
1205 wakaba 1.3 } # B
1206 wakaba 1.79
1207     die "$0: _tree_construction_root_element: This should never be reached";
1208 wakaba 1.3 } # _tree_construction_root_element
1209    
1210     sub _reset_insertion_mode ($) {
1211     my $self = shift;
1212    
1213     ## Step 1
1214     my $last;
1215    
1216     ## Step 2
1217     my $i = -1;
1218     my $node = $self->{open_elements}->[$i];
1219    
1220     ## Step 3
1221     S3: {
1222 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
1223     $last = 1;
1224     if (defined $self->{inner_html_node}) {
1225 wakaba 1.140 !!!cp ('t28');
1226     $node = $self->{inner_html_node};
1227     } else {
1228     die "_reset_insertion_mode: t27";
1229 wakaba 1.3 }
1230     }
1231 wakaba 1.140
1232     ## Step 4..14
1233     my $new_mode;
1234     if ($node->[1] & FOREIGN_EL) {
1235     !!!cp ('t28.1');
1236     ## NOTE: Strictly spaking, the line below only applies to MathML and
1237     ## SVG elements. Currently the HTML syntax supports only MathML and
1238     ## SVG elements as foreigners.
1239 wakaba 1.148 $new_mode = IN_BODY_IM | IN_FOREIGN_CONTENT_IM;
1240 wakaba 1.206 } elsif ($node->[1] == TABLE_CELL_EL) {
1241 wakaba 1.140 if ($last) {
1242     !!!cp ('t28.2');
1243     #
1244     } else {
1245     !!!cp ('t28.3');
1246     $new_mode = IN_CELL_IM;
1247     }
1248     } else {
1249     !!!cp ('t28.4');
1250     $new_mode = {
1251 wakaba 1.54 select => IN_SELECT_IM,
1252 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
1253     ## insertion mode to "in select" by themselves.
1254 wakaba 1.54 tr => IN_ROW_IM,
1255     tbody => IN_TABLE_BODY_IM,
1256     thead => IN_TABLE_BODY_IM,
1257     tfoot => IN_TABLE_BODY_IM,
1258     caption => IN_CAPTION_IM,
1259     colgroup => IN_COLUMN_GROUP_IM,
1260     table => IN_TABLE_IM,
1261     head => IN_BODY_IM, # not in head!
1262     body => IN_BODY_IM,
1263     frameset => IN_FRAMESET_IM,
1264 wakaba 1.123 }->{$node->[0]->manakai_local_name};
1265 wakaba 1.140 }
1266     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1267 wakaba 1.3
1268 wakaba 1.126 ## Step 15
1269 wakaba 1.206 if ($node->[1] == HTML_EL) {
1270 wakaba 1.3 unless (defined $self->{head_element}) {
1271 wakaba 1.79 !!!cp ('t29');
1272 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
1273 wakaba 1.3 } else {
1274 wakaba 1.81 ## ISSUE: Can this state be reached?
1275 wakaba 1.79 !!!cp ('t30');
1276 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
1277 wakaba 1.3 }
1278     return;
1279 wakaba 1.79 } else {
1280     !!!cp ('t31');
1281 wakaba 1.3 }
1282    
1283 wakaba 1.126 ## Step 16
1284 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
1285 wakaba 1.3
1286 wakaba 1.126 ## Step 17
1287 wakaba 1.3 $i--;
1288     $node = $self->{open_elements}->[$i];
1289    
1290 wakaba 1.126 ## Step 18
1291 wakaba 1.3 redo S3;
1292     } # S3
1293 wakaba 1.79
1294     die "$0: _reset_insertion_mode: This line should never be reached";
1295 wakaba 1.3 } # _reset_insertion_mode
1296    
1297     sub _tree_construction_main ($) {
1298     my $self = shift;
1299    
1300 wakaba 1.1 my $active_formatting_elements = [];
1301    
1302     my $reconstruct_active_formatting_elements = sub { # MUST
1303     my $insert = shift;
1304    
1305     ## Step 1
1306     return unless @$active_formatting_elements;
1307    
1308     ## Step 3
1309     my $i = -1;
1310     my $entry = $active_formatting_elements->[$i];
1311    
1312     ## Step 2
1313     return if $entry->[0] eq '#marker';
1314 wakaba 1.3 for (@{$self->{open_elements}}) {
1315 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1316 wakaba 1.79 !!!cp ('t32');
1317 wakaba 1.1 return;
1318     }
1319     }
1320    
1321     S4: {
1322     ## Step 4
1323     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1324    
1325     ## Step 5
1326     $i--;
1327     $entry = $active_formatting_elements->[$i];
1328    
1329     ## Step 6
1330     if ($entry->[0] eq '#marker') {
1331 wakaba 1.81 !!!cp ('t33_1');
1332 wakaba 1.1 #
1333     } else {
1334     my $in_open_elements;
1335 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
1336 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1337 wakaba 1.79 !!!cp ('t33');
1338 wakaba 1.1 $in_open_elements = 1;
1339     last OE;
1340     }
1341     }
1342     if ($in_open_elements) {
1343 wakaba 1.79 !!!cp ('t34');
1344 wakaba 1.1 #
1345     } else {
1346 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
1347 wakaba 1.79 !!!cp ('t35');
1348 wakaba 1.1 redo S4;
1349     }
1350     }
1351    
1352     ## Step 7
1353     $i++;
1354     $entry = $active_formatting_elements->[$i];
1355     } # S4
1356    
1357     S7: {
1358     ## Step 8
1359     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1360    
1361     ## Step 9
1362     $insert->($clone->[0]);
1363 wakaba 1.3 push @{$self->{open_elements}}, $clone;
1364 wakaba 1.1
1365     ## Step 10
1366 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1367 wakaba 1.1
1368     ## Step 11
1369     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1370 wakaba 1.79 !!!cp ('t36');
1371 wakaba 1.1 ## Step 7'
1372     $i++;
1373     $entry = $active_formatting_elements->[$i];
1374    
1375     redo S7;
1376     }
1377 wakaba 1.79
1378     !!!cp ('t37');
1379 wakaba 1.1 } # S7
1380     }; # $reconstruct_active_formatting_elements
1381    
1382     my $clear_up_to_marker = sub {
1383     for (reverse 0..$#$active_formatting_elements) {
1384     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1385 wakaba 1.79 !!!cp ('t38');
1386 wakaba 1.1 splice @$active_formatting_elements, $_;
1387     return;
1388     }
1389     }
1390 wakaba 1.79
1391     !!!cp ('t39');
1392 wakaba 1.1 }; # $clear_up_to_marker
1393    
1394 wakaba 1.96 my $insert;
1395    
1396     my $parse_rcdata = sub ($) {
1397     my ($content_model_flag) = @_;
1398 wakaba 1.25
1399     ## Step 1
1400     my $start_tag_name = $token->{tag_name};
1401 wakaba 1.205 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
1402 wakaba 1.25
1403     ## Step 2
1404 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
1405 wakaba 1.13 delete $self->{escape}; # MUST
1406 wakaba 1.25
1407 wakaba 1.205 ## Step 3, 4
1408     $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1409    
1410 wakaba 1.125 !!!nack ('t40.1');
1411 wakaba 1.1 !!!next-token;
1412 wakaba 1.25 }; # $parse_rcdata
1413 wakaba 1.1
1414 wakaba 1.96 my $script_start_tag = sub () {
1415 wakaba 1.205 ## Step 1
1416 wakaba 1.1 my $script_el;
1417 wakaba 1.126 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
1418 wakaba 1.205
1419     ## Step 2
1420 wakaba 1.1 ## TODO: mark as "parser-inserted"
1421    
1422 wakaba 1.205 ## Step 3
1423     ## TODO: Mark as "already executed", if ...
1424    
1425 wakaba 1.221 ## Step 4 (HTML5 revision 2702)
1426 wakaba 1.205 $insert->($script_el);
1427     push @{$self->{open_elements}}, [$script_el, $el_category->{script}];
1428    
1429     ## Step 5
1430 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
1431 wakaba 1.13 delete $self->{escape}; # MUST
1432 wakaba 1.1
1433 wakaba 1.205 ## Step 6-7
1434     $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1435 wakaba 1.25
1436 wakaba 1.205 !!!nack ('t40.2');
1437 wakaba 1.1 !!!next-token;
1438     }; # $script_start_tag
1439    
1440 wakaba 1.102 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
1441     ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
1442 wakaba 1.202 ## NOTE: $open_tables->[-1]->[2] is set false when non-Text node inserted.
1443 wakaba 1.102 my $open_tables = [[$self->{open_elements}->[0]->[0]]];
1444    
1445 wakaba 1.1 my $formatting_end_tag = sub {
1446 wakaba 1.113 my $end_tag_token = shift;
1447     my $tag_name = $end_tag_token->{tag_name};
1448 wakaba 1.1
1449 wakaba 1.103 ## NOTE: The adoption agency algorithm (AAA).
1450 wakaba 1.102
1451 wakaba 1.1 FET: {
1452     ## Step 1
1453     my $formatting_element;
1454     my $formatting_element_i_in_active;
1455     AFE: for (reverse 0..$#$active_formatting_elements) {
1456 wakaba 1.123 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1457     !!!cp ('t52');
1458     last AFE;
1459     } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
1460     eq $tag_name) {
1461 wakaba 1.79 !!!cp ('t51');
1462 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
1463     $formatting_element_i_in_active = $_;
1464     last AFE;
1465     }
1466     } # AFE
1467     unless (defined $formatting_element) {
1468 wakaba 1.79 !!!cp ('t53');
1469 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => $tag_name, token => $end_tag_token);
1470 wakaba 1.1 ## Ignore the token
1471     !!!next-token;
1472     return;
1473     }
1474     ## has an element in scope
1475     my $in_scope = 1;
1476     my $formatting_element_i_in_open;
1477 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
1478     my $node = $self->{open_elements}->[$_];
1479 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
1480     if ($in_scope) {
1481 wakaba 1.79 !!!cp ('t54');
1482 wakaba 1.1 $formatting_element_i_in_open = $_;
1483     last INSCOPE;
1484     } else { # in open elements but not in scope
1485 wakaba 1.79 !!!cp ('t55');
1486 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
1487     text => $token->{tag_name},
1488 wakaba 1.113 token => $end_tag_token);
1489 wakaba 1.1 ## Ignore the token
1490     !!!next-token;
1491     return;
1492     }
1493 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
1494 wakaba 1.79 !!!cp ('t56');
1495 wakaba 1.1 $in_scope = 0;
1496     }
1497     } # INSCOPE
1498     unless (defined $formatting_element_i_in_open) {
1499 wakaba 1.79 !!!cp ('t57');
1500 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
1501     text => $token->{tag_name},
1502 wakaba 1.113 token => $end_tag_token);
1503 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
1504     !!!next-token; ## TODO: ok?
1505     return;
1506     }
1507 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
1508 wakaba 1.79 !!!cp ('t58');
1509 wakaba 1.122 !!!parse-error (type => 'not closed',
1510 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
1511 wakaba 1.122 ->manakai_local_name,
1512 wakaba 1.113 token => $end_tag_token);
1513 wakaba 1.1 }
1514    
1515     ## Step 2
1516     my $furthest_block;
1517     my $furthest_block_i_in_open;
1518 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
1519     my $node = $self->{open_elements}->[$_];
1520 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
1521 wakaba 1.1 #not $phrasing_category->{$node->[1]} and
1522 wakaba 1.123 ($node->[1] & SPECIAL_EL or
1523     $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
1524 wakaba 1.79 !!!cp ('t59');
1525 wakaba 1.1 $furthest_block = $node;
1526     $furthest_block_i_in_open = $_;
1527 wakaba 1.203 ## NOTE: The topmost (eldest) node.
1528 wakaba 1.1 } elsif ($node->[0] eq $formatting_element->[0]) {
1529 wakaba 1.79 !!!cp ('t60');
1530 wakaba 1.1 last OE;
1531     }
1532     } # OE
1533    
1534     ## Step 3
1535     unless (defined $furthest_block) { # MUST
1536 wakaba 1.79 !!!cp ('t61');
1537 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
1538 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
1539     !!!next-token;
1540     return;
1541     }
1542    
1543     ## Step 4
1544 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
1545 wakaba 1.1
1546     ## Step 5
1547     my $furthest_block_parent = $furthest_block->[0]->parent_node;
1548     if (defined $furthest_block_parent) {
1549 wakaba 1.79 !!!cp ('t62');
1550 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
1551     }
1552    
1553     ## Step 6
1554     my $bookmark_prev_el
1555     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
1556     ->[0];
1557    
1558     ## Step 7
1559     my $node = $furthest_block;
1560     my $node_i_in_open = $furthest_block_i_in_open;
1561     my $last_node = $furthest_block;
1562     S7: {
1563     ## Step 1
1564     $node_i_in_open--;
1565 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
1566 wakaba 1.1
1567     ## Step 2
1568     my $node_i_in_active;
1569     S7S2: {
1570     for (reverse 0..$#$active_formatting_elements) {
1571     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
1572 wakaba 1.79 !!!cp ('t63');
1573 wakaba 1.1 $node_i_in_active = $_;
1574     last S7S2;
1575     }
1576     }
1577 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
1578 wakaba 1.1 redo S7;
1579     } # S7S2
1580    
1581     ## Step 3
1582     last S7 if $node->[0] eq $formatting_element->[0];
1583    
1584     ## Step 4
1585     if ($last_node->[0] eq $furthest_block->[0]) {
1586 wakaba 1.79 !!!cp ('t64');
1587 wakaba 1.1 $bookmark_prev_el = $node->[0];
1588     }
1589    
1590     ## Step 5
1591     if ($node->[0]->has_child_nodes ()) {
1592 wakaba 1.79 !!!cp ('t65');
1593 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
1594     $active_formatting_elements->[$node_i_in_active] = $clone;
1595 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
1596 wakaba 1.1 $node = $clone;
1597     }
1598    
1599     ## Step 6
1600     $node->[0]->append_child ($last_node->[0]);
1601    
1602     ## Step 7
1603     $last_node = $node;
1604    
1605     ## Step 8
1606     redo S7;
1607     } # S7
1608    
1609     ## Step 8
1610 wakaba 1.123 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
1611 wakaba 1.102 my $foster_parent_element;
1612     my $next_sibling;
1613 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
1614 wakaba 1.206 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1615 wakaba 1.102 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
1616     if (defined $parent and $parent->node_type == 1) {
1617     !!!cp ('t65.1');
1618     $foster_parent_element = $parent;
1619     $next_sibling = $self->{open_elements}->[$_]->[0];
1620     } else {
1621     !!!cp ('t65.2');
1622     $foster_parent_element
1623     = $self->{open_elements}->[$_ - 1]->[0];
1624     }
1625     last OE;
1626     }
1627     } # OE
1628     $foster_parent_element = $self->{open_elements}->[0]->[0]
1629     unless defined $foster_parent_element;
1630     $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
1631     $open_tables->[-1]->[1] = 1; # tainted
1632     } else {
1633     !!!cp ('t65.3');
1634     $common_ancestor_node->[0]->append_child ($last_node->[0]);
1635     }
1636 wakaba 1.1
1637     ## Step 9
1638     my $clone = [$formatting_element->[0]->clone_node (0),
1639     $formatting_element->[1]];
1640    
1641     ## Step 10
1642     my @cn = @{$furthest_block->[0]->child_nodes};
1643     $clone->[0]->append_child ($_) for @cn;
1644    
1645     ## Step 11
1646     $furthest_block->[0]->append_child ($clone->[0]);
1647    
1648     ## Step 12
1649     my $i;
1650     AFE: for (reverse 0..$#$active_formatting_elements) {
1651     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
1652 wakaba 1.79 !!!cp ('t66');
1653 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
1654     $i-- and last AFE if defined $i;
1655     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
1656 wakaba 1.79 !!!cp ('t67');
1657 wakaba 1.1 $i = $_;
1658     }
1659     } # AFE
1660     splice @$active_formatting_elements, $i + 1, 0, $clone;
1661    
1662     ## Step 13
1663     undef $i;
1664 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
1665     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
1666 wakaba 1.79 !!!cp ('t68');
1667 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
1668 wakaba 1.1 $i-- and last OE if defined $i;
1669 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
1670 wakaba 1.79 !!!cp ('t69');
1671 wakaba 1.1 $i = $_;
1672     }
1673     } # OE
1674 wakaba 1.203 splice @{$self->{open_elements}}, $i + 1, 0, $clone;
1675 wakaba 1.1
1676     ## Step 14
1677     redo FET;
1678     } # FET
1679     }; # $formatting_end_tag
1680    
1681 wakaba 1.96 $insert = my $insert_to_current = sub {
1682 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
1683 wakaba 1.1 }; # $insert_to_current
1684    
1685     my $insert_to_foster = sub {
1686 wakaba 1.95 my $child = shift;
1687 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
1688 wakaba 1.95 # MUST
1689     my $foster_parent_element;
1690     my $next_sibling;
1691 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
1692 wakaba 1.206 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1693 wakaba 1.3 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
1694 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
1695 wakaba 1.79 !!!cp ('t70');
1696 wakaba 1.1 $foster_parent_element = $parent;
1697 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
1698 wakaba 1.1 } else {
1699 wakaba 1.79 !!!cp ('t71');
1700 wakaba 1.1 $foster_parent_element
1701 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
1702 wakaba 1.1 }
1703     last OE;
1704     }
1705     } # OE
1706 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
1707 wakaba 1.1 unless defined $foster_parent_element;
1708     $foster_parent_element->insert_before
1709     ($child, $next_sibling);
1710 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
1711     } else {
1712     !!!cp ('t72');
1713     $self->{open_elements}->[-1]->[0]->append_child ($child);
1714     }
1715 wakaba 1.1 }; # $insert_to_foster
1716    
1717 wakaba 1.204 ## NOTE: Insert a character (MUST): When a character is inserted, if
1718     ## the last node that was inserted by the parser is a Text node and
1719     ## the character has to be inserted after that node, then the
1720     ## character is appended to the Text node. However, if any other
1721     ## node is inserted by the parser, then a new Text node is created
1722     ## and the character is appended as that Text node. If I'm not
1723     ## wrong, for a parser with scripting disabled, there are only two
1724     ## cases where this occurs. One is the case where an element node
1725     ## is inserted to the |head| element. This is covered by using the
1726 wakaba 1.202 ## |$self->{head_element_inserted}| flag. Another is the case where
1727     ## an element or comment is inserted into the |table| subtree while
1728     ## foster parenting happens. This is covered by using the [2] flag
1729     ## of the |$open_tables| structure. All other cases are handled
1730     ## simply by calling |manakai_append_text| method.
1731    
1732 wakaba 1.204 ## TODO: |<body><script>document.write("a<br>");
1733     ## document.body.removeChild (document.body.lastChild);
1734     ## document.write ("b")</script>|
1735    
1736 wakaba 1.126 B: while (1) {
1737 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
1738 wakaba 1.79 !!!cp ('t73');
1739 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1740 wakaba 1.52 ## Ignore the token
1741     ## Stay in the phase
1742     !!!next-token;
1743 wakaba 1.126 next B;
1744 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
1745 wakaba 1.52 $token->{tag_name} eq 'html') {
1746 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
1747 wakaba 1.79 !!!cp ('t79');
1748 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
1749 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
1750     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
1751 wakaba 1.79 !!!cp ('t80');
1752 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
1753 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
1754 wakaba 1.79 } else {
1755     !!!cp ('t81');
1756 wakaba 1.52 }
1757    
1758 wakaba 1.84 !!!cp ('t82');
1759 wakaba 1.113 !!!parse-error (type => 'not first start tag', token => $token);
1760 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
1761     for my $attr_name (keys %{$token->{attributes}}) {
1762     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
1763 wakaba 1.79 !!!cp ('t84');
1764 wakaba 1.52 $top_el->set_attribute_ns
1765     (undef, [undef, $attr_name],
1766     $token->{attributes}->{$attr_name}->{value});
1767     }
1768     }
1769 wakaba 1.125 !!!nack ('t84.1');
1770 wakaba 1.52 !!!next-token;
1771 wakaba 1.126 next B;
1772 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
1773 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
1774 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
1775 wakaba 1.79 !!!cp ('t85');
1776 wakaba 1.52 $self->{document}->append_child ($comment);
1777 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
1778 wakaba 1.79 !!!cp ('t86');
1779 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
1780     } else {
1781 wakaba 1.79 !!!cp ('t87');
1782 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
1783 wakaba 1.202 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
1784 wakaba 1.52 }
1785     !!!next-token;
1786 wakaba 1.126 next B;
1787 wakaba 1.205 } elsif ($self->{insertion_mode} & IN_CDATA_RCDATA_IM) {
1788     if ($token->{type} == CHARACTER_TOKEN) {
1789     $token->{data} =~ s/^\x0A// if $self->{ignore_newline};
1790     delete $self->{ignore_newline};
1791    
1792     if (length $token->{data}) {
1793     !!!cp ('t43');
1794     $self->{open_elements}->[-1]->[0]->manakai_append_text
1795     ($token->{data});
1796     } else {
1797     !!!cp ('t43.1');
1798     }
1799     !!!next-token;
1800     next B;
1801     } elsif ($token->{type} == END_TAG_TOKEN) {
1802     delete $self->{ignore_newline};
1803    
1804     if ($token->{tag_name} eq 'script') {
1805     !!!cp ('t50');
1806    
1807     ## Para 1-2
1808     my $script = pop @{$self->{open_elements}};
1809    
1810     ## Para 3
1811     $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1812    
1813     ## Para 4
1814     ## TODO: $old_insertion_point = $current_insertion_point;
1815     ## TODO: $current_insertion_point = just before $self->{nc};
1816    
1817     ## Para 5
1818     ## TODO: Run the $script->[0].
1819    
1820     ## Para 6
1821     ## TODO: $current_insertion_point = $old_insertion_point;
1822    
1823     ## Para 7
1824     ## TODO: if ($pending_external_script) {
1825     ## TODO: ...
1826     ## TODO: }
1827    
1828     !!!next-token;
1829     next B;
1830     } else {
1831     !!!cp ('t42');
1832    
1833     pop @{$self->{open_elements}};
1834    
1835     $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1836     !!!next-token;
1837     next B;
1838     }
1839     } elsif ($token->{type} == END_OF_FILE_TOKEN) {
1840     delete $self->{ignore_newline};
1841    
1842     !!!cp ('t44');
1843     !!!parse-error (type => 'not closed',
1844     text => $self->{open_elements}->[-1]->[0]
1845     ->manakai_local_name,
1846     token => $token);
1847    
1848 wakaba 1.206 #if ($self->{open_elements}->[-1]->[1] == SCRIPT_EL) {
1849 wakaba 1.205 # ## TODO: Mark as "already executed"
1850     #}
1851    
1852     pop @{$self->{open_elements}};
1853    
1854     $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1855     ## Reprocess.
1856     next B;
1857     } else {
1858     die "$0: $token->{type}: In CDATA/RCDATA: Unknown token type";
1859     }
1860 wakaba 1.126 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
1861     if ($token->{type} == CHARACTER_TOKEN) {
1862     !!!cp ('t87.1');
1863     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
1864     !!!next-token;
1865     next B;
1866     } elsif ($token->{type} == START_TAG_TOKEN) {
1867 wakaba 1.129 if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
1868     $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
1869 wakaba 1.126 not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
1870     ($token->{tag_name} eq 'svg' and
1871 wakaba 1.206 $self->{open_elements}->[-1]->[1] == MML_AXML_EL)) {
1872 wakaba 1.126 ## NOTE: "using the rules for secondary insertion mode"then"continue"
1873     !!!cp ('t87.2');
1874     #
1875     } elsif ({
1876 wakaba 1.130 b => 1, big => 1, blockquote => 1, body => 1, br => 1,
1877 wakaba 1.146 center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1,
1878 wakaba 1.223 em => 1, embed => 1, h1 => 1, h2 => 1, h3 => 1,
1879 wakaba 1.146 h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1,
1880     img => 1, li => 1, listing => 1, menu => 1, meta => 1,
1881     nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1,
1882     small => 1, span => 1, strong => 1, strike => 1, sub => 1,
1883     sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1,
1884 wakaba 1.223 }->{$token->{tag_name}} or
1885     ($token->{tag_name} eq 'font' and
1886     ($token->{attributes}->{color} or
1887     $token->{attributes}->{face} or
1888     $token->{attributes}->{size}))) {
1889 wakaba 1.126 !!!cp ('t87.2');
1890     !!!parse-error (type => 'not closed',
1891 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
1892 wakaba 1.126 ->manakai_local_name,
1893     token => $token);
1894    
1895     pop @{$self->{open_elements}}
1896     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
1897    
1898 wakaba 1.130 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
1899 wakaba 1.126 ## Reprocess.
1900     next B;
1901     } else {
1902 wakaba 1.131 my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
1903     my $tag_name = $token->{tag_name};
1904     if ($nsuri eq $SVG_NS) {
1905     $tag_name = {
1906     altglyph => 'altGlyph',
1907     altglyphdef => 'altGlyphDef',
1908     altglyphitem => 'altGlyphItem',
1909     animatecolor => 'animateColor',
1910     animatemotion => 'animateMotion',
1911     animatetransform => 'animateTransform',
1912     clippath => 'clipPath',
1913     feblend => 'feBlend',
1914     fecolormatrix => 'feColorMatrix',
1915     fecomponenttransfer => 'feComponentTransfer',
1916     fecomposite => 'feComposite',
1917     feconvolvematrix => 'feConvolveMatrix',
1918     fediffuselighting => 'feDiffuseLighting',
1919     fedisplacementmap => 'feDisplacementMap',
1920     fedistantlight => 'feDistantLight',
1921     feflood => 'feFlood',
1922     fefunca => 'feFuncA',
1923     fefuncb => 'feFuncB',
1924     fefuncg => 'feFuncG',
1925     fefuncr => 'feFuncR',
1926     fegaussianblur => 'feGaussianBlur',
1927     feimage => 'feImage',
1928     femerge => 'feMerge',
1929     femergenode => 'feMergeNode',
1930     femorphology => 'feMorphology',
1931     feoffset => 'feOffset',
1932     fepointlight => 'fePointLight',
1933     fespecularlighting => 'feSpecularLighting',
1934     fespotlight => 'feSpotLight',
1935     fetile => 'feTile',
1936     feturbulence => 'feTurbulence',
1937     foreignobject => 'foreignObject',
1938     glyphref => 'glyphRef',
1939     lineargradient => 'linearGradient',
1940     radialgradient => 'radialGradient',
1941     #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
1942     textpath => 'textPath',
1943     }->{$tag_name} || $tag_name;
1944     }
1945    
1946     ## "adjust SVG attributes" (SVG only) - done in insert-element-f
1947    
1948     ## "adjust foreign attributes" - done in insert-element-f
1949 wakaba 1.126
1950 wakaba 1.131 !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);
1951 wakaba 1.126
1952     if ($self->{self_closing}) {
1953     pop @{$self->{open_elements}};
1954     !!!ack ('t87.3');
1955     } else {
1956     !!!cp ('t87.4');
1957     }
1958    
1959     !!!next-token;
1960     next B;
1961     }
1962     } elsif ($token->{type} == END_TAG_TOKEN) {
1963     ## NOTE: "using the rules for secondary insertion mode" then "continue"
1964 wakaba 1.219 if ($token->{tag_name} eq 'script') {
1965     !!!cp ('t87.41');
1966     #
1967     ## XXXscript: Execute script here.
1968     } else {
1969     !!!cp ('t87.5');
1970     #
1971     }
1972 wakaba 1.126 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
1973     !!!cp ('t87.6');
1974 wakaba 1.146 !!!parse-error (type => 'not closed',
1975 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
1976 wakaba 1.146 ->manakai_local_name,
1977     token => $token);
1978    
1979     pop @{$self->{open_elements}}
1980     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
1981    
1982 wakaba 1.200 ## NOTE: |<span><svg>| ... two parse errors, |<svg>| ... a parse error.
1983    
1984 wakaba 1.146 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
1985     ## Reprocess.
1986     next B;
1987 wakaba 1.126 } else {
1988     die "$0: $token->{type}: Unknown token type";
1989     }
1990     }
1991    
1992     if ($self->{insertion_mode} & HEAD_IMS) {
1993 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
1994 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1995 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
1996 wakaba 1.202 if ($self->{head_element_inserted}) {
1997     !!!cp ('t88.3');
1998     $self->{open_elements}->[-1]->[0]->append_child
1999     ($self->{document}->create_text_node ($1));
2000     delete $self->{head_element_inserted};
2001     ## NOTE: |</head> <link> |
2002     #
2003     } else {
2004     !!!cp ('t88.2');
2005     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
2006     ## NOTE: |</head> &#x20;|
2007     #
2008     }
2009 wakaba 1.99 } else {
2010     !!!cp ('t88.1');
2011     ## Ignore the token.
2012 wakaba 1.177 #
2013 wakaba 1.99 }
2014 wakaba 1.52 unless (length $token->{data}) {
2015 wakaba 1.79 !!!cp ('t88');
2016 wakaba 1.52 !!!next-token;
2017 wakaba 1.126 next B;
2018 wakaba 1.1 }
2019 wakaba 1.177 ## TODO: set $token->{column} appropriately
2020 wakaba 1.1 }
2021 wakaba 1.52
2022 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2023 wakaba 1.79 !!!cp ('t89');
2024 wakaba 1.52 ## As if <head>
2025 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2026 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2027 wakaba 1.123 push @{$self->{open_elements}},
2028     [$self->{head_element}, $el_category->{head}];
2029 wakaba 1.52
2030     ## Reprocess in the "in head" insertion mode...
2031     pop @{$self->{open_elements}};
2032    
2033     ## Reprocess in the "after head" insertion mode...
2034 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2035 wakaba 1.79 !!!cp ('t90');
2036 wakaba 1.52 ## As if </noscript>
2037     pop @{$self->{open_elements}};
2038 wakaba 1.153 !!!parse-error (type => 'in noscript:#text', token => $token);
2039 wakaba 1.1
2040 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
2041     ## As if </head>
2042     pop @{$self->{open_elements}};
2043    
2044     ## Reprocess in the "after head" insertion mode...
2045 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2046 wakaba 1.79 !!!cp ('t91');
2047 wakaba 1.52 pop @{$self->{open_elements}};
2048    
2049     ## Reprocess in the "after head" insertion mode...
2050 wakaba 1.79 } else {
2051     !!!cp ('t92');
2052 wakaba 1.1 }
2053 wakaba 1.52
2054 wakaba 1.123 ## "after head" insertion mode
2055     ## As if <body>
2056     !!!insert-element ('body',, $token);
2057     $self->{insertion_mode} = IN_BODY_IM;
2058     ## reprocess
2059 wakaba 1.126 next B;
2060 wakaba 1.123 } elsif ($token->{type} == START_TAG_TOKEN) {
2061     if ($token->{tag_name} eq 'head') {
2062     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2063     !!!cp ('t93');
2064 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
2065 wakaba 1.123 $self->{open_elements}->[-1]->[0]->append_child
2066     ($self->{head_element});
2067     push @{$self->{open_elements}},
2068     [$self->{head_element}, $el_category->{head}];
2069     $self->{insertion_mode} = IN_HEAD_IM;
2070 wakaba 1.125 !!!nack ('t93.1');
2071 wakaba 1.123 !!!next-token;
2072 wakaba 1.126 next B;
2073 wakaba 1.125 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2074 wakaba 1.139 !!!cp ('t93.2');
2075 wakaba 1.153 !!!parse-error (type => 'after head', text => 'head',
2076     token => $token);
2077 wakaba 1.139 ## Ignore the token
2078     !!!nack ('t93.3');
2079     !!!next-token;
2080     next B;
2081 wakaba 1.125 } else {
2082     !!!cp ('t95');
2083 wakaba 1.153 !!!parse-error (type => 'in head:head',
2084     token => $token); # or in head noscript
2085 wakaba 1.125 ## Ignore the token
2086     !!!nack ('t95.1');
2087     !!!next-token;
2088 wakaba 1.126 next B;
2089 wakaba 1.125 }
2090     } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2091 wakaba 1.126 !!!cp ('t96');
2092     ## As if <head>
2093     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2094     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2095     push @{$self->{open_elements}},
2096     [$self->{head_element}, $el_category->{head}];
2097 wakaba 1.52
2098 wakaba 1.126 $self->{insertion_mode} = IN_HEAD_IM;
2099     ## Reprocess in the "in head" insertion mode...
2100     } else {
2101     !!!cp ('t97');
2102     }
2103 wakaba 1.52
2104 wakaba 1.202 if ($token->{tag_name} eq 'base') {
2105     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2106     !!!cp ('t98');
2107     ## As if </noscript>
2108     pop @{$self->{open_elements}};
2109     !!!parse-error (type => 'in noscript', text => 'base',
2110     token => $token);
2111    
2112     $self->{insertion_mode} = IN_HEAD_IM;
2113     ## Reprocess in the "in head" insertion mode...
2114     } else {
2115     !!!cp ('t99');
2116     }
2117 wakaba 1.49
2118 wakaba 1.202 ## NOTE: There is a "as if in head" code clone.
2119     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2120     !!!cp ('t100');
2121     !!!parse-error (type => 'after head',
2122     text => $token->{tag_name}, token => $token);
2123     push @{$self->{open_elements}},
2124     [$self->{head_element}, $el_category->{head}];
2125     $self->{head_element_inserted} = 1;
2126     } else {
2127     !!!cp ('t101');
2128     }
2129     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2130     pop @{$self->{open_elements}};
2131     pop @{$self->{open_elements}} # <head>
2132     if $self->{insertion_mode} == AFTER_HEAD_IM;
2133     !!!nack ('t101.1');
2134     !!!next-token;
2135     next B;
2136 wakaba 1.194 } elsif ($token->{tag_name} eq 'link') {
2137     ## NOTE: There is a "as if in head" code clone.
2138     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2139     !!!cp ('t102');
2140     !!!parse-error (type => 'after head',
2141     text => $token->{tag_name}, token => $token);
2142     push @{$self->{open_elements}},
2143     [$self->{head_element}, $el_category->{head}];
2144 wakaba 1.202 $self->{head_element_inserted} = 1;
2145 wakaba 1.194 } else {
2146     !!!cp ('t103');
2147     }
2148     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2149     pop @{$self->{open_elements}};
2150     pop @{$self->{open_elements}} # <head>
2151     if $self->{insertion_mode} == AFTER_HEAD_IM;
2152     !!!ack ('t103.1');
2153     !!!next-token;
2154     next B;
2155     } elsif ($token->{tag_name} eq 'command' or
2156     $token->{tag_name} eq 'eventsource') {
2157     if ($self->{insertion_mode} == IN_HEAD_IM) {
2158     ## NOTE: If the insertion mode at the time of the emission
2159     ## of the token was "before head", $self->{insertion_mode}
2160     ## is already changed to |IN_HEAD_IM|.
2161    
2162     ## NOTE: There is a "as if in head" code clone.
2163     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2164     pop @{$self->{open_elements}};
2165     pop @{$self->{open_elements}} # <head>
2166     if $self->{insertion_mode} == AFTER_HEAD_IM;
2167     !!!ack ('t103.2');
2168     !!!next-token;
2169     next B;
2170     } else {
2171     ## NOTE: "in head noscript" or "after head" insertion mode
2172     ## - in these cases, these tags are treated as same as
2173     ## normal in-body tags.
2174     !!!cp ('t103.3');
2175     #
2176     }
2177 wakaba 1.202 } elsif ($token->{tag_name} eq 'meta') {
2178     ## NOTE: There is a "as if in head" code clone.
2179     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2180     !!!cp ('t104');
2181     !!!parse-error (type => 'after head',
2182     text => $token->{tag_name}, token => $token);
2183     push @{$self->{open_elements}},
2184     [$self->{head_element}, $el_category->{head}];
2185     $self->{head_element_inserted} = 1;
2186     } else {
2187     !!!cp ('t105');
2188     }
2189     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2190     my $meta_el = pop @{$self->{open_elements}};
2191 wakaba 1.34
2192     unless ($self->{confident}) {
2193 wakaba 1.134 if ($token->{attributes}->{charset}) {
2194 wakaba 1.79 !!!cp ('t106');
2195 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
2196     ## in the {change_encoding} callback.
2197 wakaba 1.63 $self->{change_encoding}
2198 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value},
2199     $token);
2200 wakaba 1.66
2201     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2202     ->set_user_data (manakai_has_reference =>
2203     $token->{attributes}->{charset}
2204     ->{has_reference});
2205 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
2206     if ($token->{attributes}->{content}->{value}
2207 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2208 wakaba 1.186 [\x09\x0A\x0C\x0D\x20]*=
2209     [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
2210     ([^"'\x09\x0A\x0C\x0D\x20]
2211     [^\x09\x0A\x0C\x0D\x20\x3B]*))/x) {
2212 wakaba 1.79 !!!cp ('t107');
2213 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
2214     ## in the {change_encoding} callback.
2215 wakaba 1.63 $self->{change_encoding}
2216 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
2217     $token);
2218 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2219     ->set_user_data (manakai_has_reference =>
2220     $token->{attributes}->{content}
2221     ->{has_reference});
2222 wakaba 1.79 } else {
2223     !!!cp ('t108');
2224 wakaba 1.63 }
2225 wakaba 1.34 }
2226 wakaba 1.66 } else {
2227     if ($token->{attributes}->{charset}) {
2228 wakaba 1.79 !!!cp ('t109');
2229 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2230     ->set_user_data (manakai_has_reference =>
2231     $token->{attributes}->{charset}
2232     ->{has_reference});
2233     }
2234 wakaba 1.68 if ($token->{attributes}->{content}) {
2235 wakaba 1.79 !!!cp ('t110');
2236 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2237     ->set_user_data (manakai_has_reference =>
2238     $token->{attributes}->{content}
2239     ->{has_reference});
2240     }
2241 wakaba 1.34 }
2242    
2243 wakaba 1.100 pop @{$self->{open_elements}} # <head>
2244 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
2245 wakaba 1.125 !!!ack ('t110.1');
2246 wakaba 1.34 !!!next-token;
2247 wakaba 1.126 next B;
2248 wakaba 1.202 } elsif ($token->{tag_name} eq 'title') {
2249     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2250     !!!cp ('t111');
2251     ## As if </noscript>
2252     pop @{$self->{open_elements}};
2253     !!!parse-error (type => 'in noscript', text => 'title',
2254     token => $token);
2255    
2256     $self->{insertion_mode} = IN_HEAD_IM;
2257     ## Reprocess in the "in head" insertion mode...
2258     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2259     !!!cp ('t112');
2260     !!!parse-error (type => 'after head',
2261     text => $token->{tag_name}, token => $token);
2262     push @{$self->{open_elements}},
2263     [$self->{head_element}, $el_category->{head}];
2264     $self->{head_element_inserted} = 1;
2265     } else {
2266     !!!cp ('t113');
2267     }
2268 wakaba 1.49
2269 wakaba 1.202 ## NOTE: There is a "as if in head" code clone.
2270     $parse_rcdata->(RCDATA_CONTENT_MODEL);
2271 wakaba 1.225
2272     ## NOTE: At this point the stack of open elements contain
2273     ## the |head| element (index == -2) and the |script| element
2274     ## (index == -1). In the "after head" insertion mode the
2275     ## |head| element is inserted only for the purpose of
2276     ## providing the context for the |script| element, and
2277     ## therefore we can now and have to remove the element from
2278     ## the stack.
2279 wakaba 1.205 splice @{$self->{open_elements}}, -2, 1, () # <head>
2280 wakaba 1.210 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2281 wakaba 1.202 next B;
2282     } elsif ($token->{tag_name} eq 'style' or
2283     $token->{tag_name} eq 'noframes') {
2284     ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
2285     ## insertion mode IN_HEAD_IM)
2286     ## NOTE: There is a "as if in head" code clone.
2287     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2288     !!!cp ('t114');
2289     !!!parse-error (type => 'after head',
2290     text => $token->{tag_name}, token => $token);
2291     push @{$self->{open_elements}},
2292     [$self->{head_element}, $el_category->{head}];
2293     $self->{head_element_inserted} = 1;
2294     } else {
2295     !!!cp ('t115');
2296     }
2297     $parse_rcdata->(CDATA_CONTENT_MODEL);
2298 wakaba 1.205 ## ISSUE: A spec bug [Bug 6038]
2299     splice @{$self->{open_elements}}, -2, 1, () # <head>
2300 wakaba 1.210 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2301 wakaba 1.202 next B;
2302 wakaba 1.205 } elsif ($token->{tag_name} eq 'noscript') {
2303 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
2304 wakaba 1.79 !!!cp ('t116');
2305 wakaba 1.25 ## NOTE: and scripting is disalbed
2306 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2307 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
2308 wakaba 1.125 !!!nack ('t116.1');
2309 wakaba 1.1 !!!next-token;
2310 wakaba 1.126 next B;
2311 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2312 wakaba 1.79 !!!cp ('t117');
2313 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'noscript',
2314     token => $token);
2315 wakaba 1.1 ## Ignore the token
2316 wakaba 1.125 !!!nack ('t117.1');
2317 wakaba 1.41 !!!next-token;
2318 wakaba 1.126 next B;
2319 wakaba 1.1 } else {
2320 wakaba 1.79 !!!cp ('t118');
2321 wakaba 1.25 #
2322 wakaba 1.1 }
2323 wakaba 1.202 } elsif ($token->{tag_name} eq 'script') {
2324     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2325     !!!cp ('t119');
2326     ## As if </noscript>
2327     pop @{$self->{open_elements}};
2328     !!!parse-error (type => 'in noscript', text => 'script',
2329     token => $token);
2330    
2331     $self->{insertion_mode} = IN_HEAD_IM;
2332     ## Reprocess in the "in head" insertion mode...
2333     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2334     !!!cp ('t120');
2335     !!!parse-error (type => 'after head',
2336     text => $token->{tag_name}, token => $token);
2337     push @{$self->{open_elements}},
2338     [$self->{head_element}, $el_category->{head}];
2339     $self->{head_element_inserted} = 1;
2340     } else {
2341     !!!cp ('t121');
2342     }
2343 wakaba 1.49
2344 wakaba 1.202 ## NOTE: There is a "as if in head" code clone.
2345     $script_start_tag->();
2346 wakaba 1.205 ## ISSUE: A spec bug [Bug 6038]
2347     splice @{$self->{open_elements}}, -2, 1 # <head>
2348 wakaba 1.210 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2349 wakaba 1.202 next B;
2350     } elsif ($token->{tag_name} eq 'body' or
2351     $token->{tag_name} eq 'frameset') {
2352 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2353 wakaba 1.79 !!!cp ('t122');
2354 wakaba 1.49 ## As if </noscript>
2355     pop @{$self->{open_elements}};
2356 wakaba 1.153 !!!parse-error (type => 'in noscript',
2357     text => $token->{tag_name}, token => $token);
2358 wakaba 1.49
2359     ## Reprocess in the "in head" insertion mode...
2360     ## As if </head>
2361     pop @{$self->{open_elements}};
2362    
2363     ## Reprocess in the "after head" insertion mode...
2364 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2365 wakaba 1.79 !!!cp ('t124');
2366 wakaba 1.49 pop @{$self->{open_elements}};
2367    
2368     ## Reprocess in the "after head" insertion mode...
2369 wakaba 1.79 } else {
2370     !!!cp ('t125');
2371 wakaba 1.49 }
2372    
2373     ## "after head" insertion mode
2374 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2375 wakaba 1.54 if ($token->{tag_name} eq 'body') {
2376 wakaba 1.79 !!!cp ('t126');
2377 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
2378     } elsif ($token->{tag_name} eq 'frameset') {
2379 wakaba 1.79 !!!cp ('t127');
2380 wakaba 1.54 $self->{insertion_mode} = IN_FRAMESET_IM;
2381     } else {
2382     die "$0: tag name: $self->{tag_name}";
2383     }
2384 wakaba 1.125 !!!nack ('t127.1');
2385 wakaba 1.1 !!!next-token;
2386 wakaba 1.126 next B;
2387 wakaba 1.1 } else {
2388 wakaba 1.79 !!!cp ('t128');
2389 wakaba 1.1 #
2390     }
2391 wakaba 1.49
2392 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2393 wakaba 1.79 !!!cp ('t129');
2394 wakaba 1.49 ## As if </noscript>
2395     pop @{$self->{open_elements}};
2396 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
2397     text => $token->{tag_name}, token => $token);
2398 wakaba 1.49
2399     ## Reprocess in the "in head" insertion mode...
2400     ## As if </head>
2401 wakaba 1.25 pop @{$self->{open_elements}};
2402 wakaba 1.49
2403     ## Reprocess in the "after head" insertion mode...
2404 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2405 wakaba 1.79 !!!cp ('t130');
2406 wakaba 1.49 ## As if </head>
2407 wakaba 1.25 pop @{$self->{open_elements}};
2408 wakaba 1.49
2409     ## Reprocess in the "after head" insertion mode...
2410 wakaba 1.79 } else {
2411     !!!cp ('t131');
2412 wakaba 1.49 }
2413    
2414     ## "after head" insertion mode
2415     ## As if <body>
2416 wakaba 1.116 !!!insert-element ('body',, $token);
2417 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
2418 wakaba 1.49 ## reprocess
2419 wakaba 1.125 !!!ack-later;
2420 wakaba 1.126 next B;
2421 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
2422 wakaba 1.49 if ($token->{tag_name} eq 'head') {
2423 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2424 wakaba 1.79 !!!cp ('t132');
2425 wakaba 1.50 ## As if <head>
2426 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2427 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2428 wakaba 1.123 push @{$self->{open_elements}},
2429     [$self->{head_element}, $el_category->{head}];
2430 wakaba 1.50
2431     ## Reprocess in the "in head" insertion mode...
2432     pop @{$self->{open_elements}};
2433 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
2434 wakaba 1.50 !!!next-token;
2435 wakaba 1.126 next B;
2436 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2437 wakaba 1.79 !!!cp ('t133');
2438 wakaba 1.49 ## As if </noscript>
2439     pop @{$self->{open_elements}};
2440 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
2441     text => 'head', token => $token);
2442 wakaba 1.49
2443     ## Reprocess in the "in head" insertion mode...
2444 wakaba 1.50 pop @{$self->{open_elements}};
2445 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
2446 wakaba 1.50 !!!next-token;
2447 wakaba 1.126 next B;
2448 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2449 wakaba 1.79 !!!cp ('t134');
2450 wakaba 1.49 pop @{$self->{open_elements}};
2451 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
2452 wakaba 1.49 !!!next-token;
2453 wakaba 1.126 next B;
2454 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2455     !!!cp ('t134.1');
2456 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => 'head',
2457     token => $token);
2458 wakaba 1.139 ## Ignore the token
2459     !!!next-token;
2460     next B;
2461 wakaba 1.49 } else {
2462 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2463 wakaba 1.49 }
2464     } elsif ($token->{tag_name} eq 'noscript') {
2465 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2466 wakaba 1.79 !!!cp ('t136');
2467 wakaba 1.49 pop @{$self->{open_elements}};
2468 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
2469 wakaba 1.49 !!!next-token;
2470 wakaba 1.126 next B;
2471 wakaba 1.139 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or
2472     $self->{insertion_mode} == AFTER_HEAD_IM) {
2473 wakaba 1.79 !!!cp ('t137');
2474 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2475     text => 'noscript', token => $token);
2476 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
2477     !!!next-token;
2478 wakaba 1.126 next B;
2479 wakaba 1.49 } else {
2480 wakaba 1.79 !!!cp ('t138');
2481 wakaba 1.49 #
2482     }
2483     } elsif ({
2484 wakaba 1.31 body => 1, html => 1,
2485     }->{$token->{tag_name}}) {
2486 wakaba 1.203 ## TODO: This branch is entirely redundant.
2487     if ($self->{insertion_mode} == BEFORE_HEAD_IM or
2488 wakaba 1.139 $self->{insertion_mode} == IN_HEAD_IM or
2489     $self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2490 wakaba 1.79 !!!cp ('t140');
2491 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2492     text => $token->{tag_name}, token => $token);
2493 wakaba 1.49 ## Ignore the token
2494     !!!next-token;
2495 wakaba 1.126 next B;
2496 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2497     !!!cp ('t140.1');
2498 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2499     text => $token->{tag_name}, token => $token);
2500 wakaba 1.139 ## Ignore the token
2501     !!!next-token;
2502     next B;
2503 wakaba 1.79 } else {
2504 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2505 wakaba 1.49 }
2506 wakaba 1.139 } elsif ($token->{tag_name} eq 'p') {
2507     !!!cp ('t142');
2508 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2509     text => $token->{tag_name}, token => $token);
2510 wakaba 1.139 ## Ignore the token
2511     !!!next-token;
2512     next B;
2513 wakaba 1.224 } elsif ($token->{tag_name} eq 'br') {
2514     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2515     !!!cp ('t142.2');
2516     ## (before head) as if <head>, (in head) as if </head>
2517     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2518     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2519     $self->{insertion_mode} = AFTER_HEAD_IM;
2520 wakaba 1.139
2521 wakaba 1.224 ## Reprocess in the "after head" insertion mode...
2522     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2523     !!!cp ('t143.2');
2524     ## As if </head>
2525     pop @{$self->{open_elements}};
2526     $self->{insertion_mode} = AFTER_HEAD_IM;
2527 wakaba 1.139
2528 wakaba 1.224 ## Reprocess in the "after head" insertion mode...
2529     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2530     !!!cp ('t143.3');
2531     ## NOTE: Two parse errors for <head><noscript></br>
2532     !!!parse-error (type => 'unmatched end tag',
2533     text => 'br', token => $token);
2534     ## As if </noscript>
2535     pop @{$self->{open_elements}};
2536     $self->{insertion_mode} = IN_HEAD_IM;
2537 wakaba 1.50
2538 wakaba 1.224 ## Reprocess in the "in head" insertion mode...
2539     ## As if </head>
2540     pop @{$self->{open_elements}};
2541     $self->{insertion_mode} = AFTER_HEAD_IM;
2542 wakaba 1.139
2543 wakaba 1.224 ## Reprocess in the "after head" insertion mode...
2544     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2545     !!!cp ('t143.4');
2546     #
2547     } else {
2548     die "$0: $self->{insertion_mode}: Unknown insertion mode";
2549     }
2550 wakaba 1.50
2551 wakaba 1.224 #
2552     } else { ## Other end tags
2553 wakaba 1.139 !!!cp ('t145');
2554 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2555     text => $token->{tag_name}, token => $token);
2556 wakaba 1.139 ## Ignore the token
2557     !!!next-token;
2558     next B;
2559 wakaba 1.49 }
2560    
2561 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2562 wakaba 1.79 !!!cp ('t146');
2563 wakaba 1.49 ## As if </noscript>
2564     pop @{$self->{open_elements}};
2565 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
2566     text => $token->{tag_name}, token => $token);
2567 wakaba 1.49
2568     ## Reprocess in the "in head" insertion mode...
2569     ## As if </head>
2570     pop @{$self->{open_elements}};
2571    
2572     ## Reprocess in the "after head" insertion mode...
2573 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2574 wakaba 1.79 !!!cp ('t147');
2575 wakaba 1.49 ## As if </head>
2576     pop @{$self->{open_elements}};
2577    
2578     ## Reprocess in the "after head" insertion mode...
2579 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2580 wakaba 1.82 ## ISSUE: This case cannot be reached?
2581 wakaba 1.79 !!!cp ('t148');
2582 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2583     text => $token->{tag_name}, token => $token);
2584 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
2585     !!!next-token;
2586 wakaba 1.126 next B;
2587 wakaba 1.79 } else {
2588     !!!cp ('t149');
2589 wakaba 1.1 }
2590    
2591 wakaba 1.49 ## "after head" insertion mode
2592     ## As if <body>
2593 wakaba 1.116 !!!insert-element ('body',, $token);
2594 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
2595 wakaba 1.52 ## reprocess
2596 wakaba 1.224 next B;
2597 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2598     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2599     !!!cp ('t149.1');
2600    
2601     ## NOTE: As if <head>
2602 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2603 wakaba 1.104 $self->{open_elements}->[-1]->[0]->append_child
2604     ($self->{head_element});
2605 wakaba 1.123 #push @{$self->{open_elements}},
2606     # [$self->{head_element}, $el_category->{head}];
2607 wakaba 1.104 #$self->{insertion_mode} = IN_HEAD_IM;
2608     ## NOTE: Reprocess.
2609    
2610     ## NOTE: As if </head>
2611     #pop @{$self->{open_elements}};
2612     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2613     ## NOTE: Reprocess.
2614    
2615     #
2616     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2617     !!!cp ('t149.2');
2618    
2619     ## NOTE: As if </head>
2620     pop @{$self->{open_elements}};
2621     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2622     ## NOTE: Reprocess.
2623    
2624     #
2625     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2626     !!!cp ('t149.3');
2627    
2628 wakaba 1.113 !!!parse-error (type => 'in noscript:#eof', token => $token);
2629 wakaba 1.104
2630     ## As if </noscript>
2631     pop @{$self->{open_elements}};
2632     #$self->{insertion_mode} = IN_HEAD_IM;
2633     ## NOTE: Reprocess.
2634    
2635     ## NOTE: As if </head>
2636     pop @{$self->{open_elements}};
2637     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2638     ## NOTE: Reprocess.
2639    
2640     #
2641     } else {
2642     !!!cp ('t149.4');
2643     #
2644     }
2645    
2646     ## NOTE: As if <body>
2647 wakaba 1.116 !!!insert-element ('body',, $token);
2648 wakaba 1.104 $self->{insertion_mode} = IN_BODY_IM;
2649     ## NOTE: Reprocess.
2650 wakaba 1.126 next B;
2651 wakaba 1.104 } else {
2652     die "$0: $token->{type}: Unknown token type";
2653     }
2654 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
2655 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
2656 wakaba 1.79 !!!cp ('t150');
2657 wakaba 1.52 ## NOTE: There is a code clone of "character in body".
2658     $reconstruct_active_formatting_elements->($insert_to_current);
2659    
2660     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
2661    
2662     !!!next-token;
2663 wakaba 1.126 next B;
2664 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
2665 wakaba 1.52 if ({
2666     caption => 1, col => 1, colgroup => 1, tbody => 1,
2667     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
2668     }->{$token->{tag_name}}) {
2669 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2670 wakaba 1.52 ## have an element in table scope
2671 wakaba 1.108 for (reverse 0..$#{$self->{open_elements}}) {
2672 wakaba 1.52 my $node = $self->{open_elements}->[$_];
2673 wakaba 1.206 if ($node->[1] == TABLE_CELL_EL) {
2674 wakaba 1.79 !!!cp ('t151');
2675 wakaba 1.108
2676     ## Close the cell
2677 wakaba 1.125 !!!back-token; # <x>
2678 wakaba 1.122 $token = {type => END_TAG_TOKEN,
2679     tag_name => $node->[0]->manakai_local_name,
2680 wakaba 1.114 line => $token->{line},
2681     column => $token->{column}};
2682 wakaba 1.126 next B;
2683 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2684 wakaba 1.79 !!!cp ('t152');
2685 wakaba 1.108 ## ISSUE: This case can never be reached, maybe.
2686     last;
2687 wakaba 1.52 }
2688 wakaba 1.108 }
2689    
2690     !!!cp ('t153');
2691     !!!parse-error (type => 'start tag not allowed',
2692 wakaba 1.153 text => $token->{tag_name}, token => $token);
2693 wakaba 1.108 ## Ignore the token
2694 wakaba 1.125 !!!nack ('t153.1');
2695 wakaba 1.108 !!!next-token;
2696 wakaba 1.126 next B;
2697 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2698 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
2699     token => $token);
2700 wakaba 1.52
2701 wakaba 1.108 ## NOTE: As if </caption>.
2702 wakaba 1.52 ## have a table element in table scope
2703     my $i;
2704 wakaba 1.108 INSCOPE: {
2705     for (reverse 0..$#{$self->{open_elements}}) {
2706     my $node = $self->{open_elements}->[$_];
2707 wakaba 1.206 if ($node->[1] == CAPTION_EL) {
2708 wakaba 1.108 !!!cp ('t155');
2709     $i = $_;
2710     last INSCOPE;
2711 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2712 wakaba 1.108 !!!cp ('t156');
2713     last;
2714     }
2715 wakaba 1.52 }
2716 wakaba 1.108
2717     !!!cp ('t157');
2718     !!!parse-error (type => 'start tag not allowed',
2719 wakaba 1.153 text => $token->{tag_name}, token => $token);
2720 wakaba 1.108 ## Ignore the token
2721 wakaba 1.125 !!!nack ('t157.1');
2722 wakaba 1.108 !!!next-token;
2723 wakaba 1.126 next B;
2724 wakaba 1.52 } # INSCOPE
2725    
2726     ## generate implied end tags
2727 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
2728     & END_TAG_OPTIONAL_EL) {
2729 wakaba 1.79 !!!cp ('t158');
2730 wakaba 1.86 pop @{$self->{open_elements}};
2731 wakaba 1.52 }
2732    
2733 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2734 wakaba 1.79 !!!cp ('t159');
2735 wakaba 1.122 !!!parse-error (type => 'not closed',
2736 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2737 wakaba 1.122 ->manakai_local_name,
2738     token => $token);
2739 wakaba 1.79 } else {
2740     !!!cp ('t160');
2741 wakaba 1.52 }
2742    
2743     splice @{$self->{open_elements}}, $i;
2744    
2745     $clear_up_to_marker->();
2746    
2747 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
2748 wakaba 1.52
2749     ## reprocess
2750 wakaba 1.125 !!!ack-later;
2751 wakaba 1.126 next B;
2752 wakaba 1.52 } else {
2753 wakaba 1.79 !!!cp ('t161');
2754 wakaba 1.52 #
2755     }
2756     } else {
2757 wakaba 1.79 !!!cp ('t162');
2758 wakaba 1.52 #
2759     }
2760 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
2761 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
2762 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2763 wakaba 1.43 ## have an element in table scope
2764 wakaba 1.52 my $i;
2765 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2766     my $node = $self->{open_elements}->[$_];
2767 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2768 wakaba 1.79 !!!cp ('t163');
2769 wakaba 1.52 $i = $_;
2770 wakaba 1.43 last INSCOPE;
2771 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2772 wakaba 1.79 !!!cp ('t164');
2773 wakaba 1.43 last INSCOPE;
2774     }
2775     } # INSCOPE
2776 wakaba 1.52 unless (defined $i) {
2777 wakaba 1.79 !!!cp ('t165');
2778 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2779     text => $token->{tag_name},
2780     token => $token);
2781 wakaba 1.43 ## Ignore the token
2782     !!!next-token;
2783 wakaba 1.126 next B;
2784 wakaba 1.43 }
2785    
2786 wakaba 1.52 ## generate implied end tags
2787 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
2788     & END_TAG_OPTIONAL_EL) {
2789 wakaba 1.79 !!!cp ('t166');
2790 wakaba 1.86 pop @{$self->{open_elements}};
2791 wakaba 1.52 }
2792 wakaba 1.86
2793 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
2794     ne $token->{tag_name}) {
2795 wakaba 1.79 !!!cp ('t167');
2796 wakaba 1.122 !!!parse-error (type => 'not closed',
2797 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2798 wakaba 1.122 ->manakai_local_name,
2799     token => $token);
2800 wakaba 1.79 } else {
2801     !!!cp ('t168');
2802 wakaba 1.52 }
2803    
2804     splice @{$self->{open_elements}}, $i;
2805    
2806     $clear_up_to_marker->();
2807    
2808 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
2809 wakaba 1.52
2810     !!!next-token;
2811 wakaba 1.126 next B;
2812 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2813 wakaba 1.79 !!!cp ('t169');
2814 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2815     text => $token->{tag_name}, token => $token);
2816 wakaba 1.52 ## Ignore the token
2817     !!!next-token;
2818 wakaba 1.126 next B;
2819 wakaba 1.52 } else {
2820 wakaba 1.79 !!!cp ('t170');
2821 wakaba 1.52 #
2822     }
2823     } elsif ($token->{tag_name} eq 'caption') {
2824 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2825 wakaba 1.43 ## have a table element in table scope
2826     my $i;
2827 wakaba 1.108 INSCOPE: {
2828     for (reverse 0..$#{$self->{open_elements}}) {
2829     my $node = $self->{open_elements}->[$_];
2830 wakaba 1.206 if ($node->[1] == CAPTION_EL) {
2831 wakaba 1.108 !!!cp ('t171');
2832     $i = $_;
2833     last INSCOPE;
2834 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2835 wakaba 1.108 !!!cp ('t172');
2836     last;
2837     }
2838 wakaba 1.43 }
2839 wakaba 1.108
2840     !!!cp ('t173');
2841     !!!parse-error (type => 'unmatched end tag',
2842 wakaba 1.153 text => $token->{tag_name}, token => $token);
2843 wakaba 1.108 ## Ignore the token
2844     !!!next-token;
2845 wakaba 1.126 next B;
2846 wakaba 1.43 } # INSCOPE
2847    
2848     ## generate implied end tags
2849 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
2850     & END_TAG_OPTIONAL_EL) {
2851 wakaba 1.79 !!!cp ('t174');
2852 wakaba 1.86 pop @{$self->{open_elements}};
2853 wakaba 1.43 }
2854 wakaba 1.52
2855 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2856 wakaba 1.79 !!!cp ('t175');
2857 wakaba 1.122 !!!parse-error (type => 'not closed',
2858 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2859 wakaba 1.122 ->manakai_local_name,
2860     token => $token);
2861 wakaba 1.79 } else {
2862     !!!cp ('t176');
2863 wakaba 1.52 }
2864    
2865     splice @{$self->{open_elements}}, $i;
2866    
2867     $clear_up_to_marker->();
2868    
2869 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
2870 wakaba 1.52
2871     !!!next-token;
2872 wakaba 1.126 next B;
2873 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2874 wakaba 1.79 !!!cp ('t177');
2875 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2876     text => $token->{tag_name}, token => $token);
2877 wakaba 1.52 ## Ignore the token
2878     !!!next-token;
2879 wakaba 1.126 next B;
2880 wakaba 1.52 } else {
2881 wakaba 1.79 !!!cp ('t178');
2882 wakaba 1.52 #
2883     }
2884     } elsif ({
2885     table => 1, tbody => 1, tfoot => 1,
2886     thead => 1, tr => 1,
2887     }->{$token->{tag_name}} and
2888 wakaba 1.210 ($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2889 wakaba 1.52 ## have an element in table scope
2890     my $i;
2891     my $tn;
2892 wakaba 1.108 INSCOPE: {
2893     for (reverse 0..$#{$self->{open_elements}}) {
2894     my $node = $self->{open_elements}->[$_];
2895 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2896 wakaba 1.108 !!!cp ('t179');
2897     $i = $_;
2898    
2899     ## Close the cell
2900 wakaba 1.125 !!!back-token; # </x>
2901 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $tn,
2902     line => $token->{line},
2903     column => $token->{column}};
2904 wakaba 1.126 next B;
2905 wakaba 1.206 } elsif ($node->[1] == TABLE_CELL_EL) {
2906 wakaba 1.108 !!!cp ('t180');
2907 wakaba 1.123 $tn = $node->[0]->manakai_local_name;
2908 wakaba 1.108 ## NOTE: There is exactly one |td| or |th| element
2909     ## in scope in the stack of open elements by definition.
2910 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2911 wakaba 1.108 ## ISSUE: Can this be reached?
2912     !!!cp ('t181');
2913     last;
2914     }
2915 wakaba 1.52 }
2916 wakaba 1.108
2917 wakaba 1.79 !!!cp ('t182');
2918 wakaba 1.108 !!!parse-error (type => 'unmatched end tag',
2919 wakaba 1.153 text => $token->{tag_name}, token => $token);
2920 wakaba 1.52 ## Ignore the token
2921     !!!next-token;
2922 wakaba 1.126 next B;
2923 wakaba 1.108 } # INSCOPE
2924 wakaba 1.52 } elsif ($token->{tag_name} eq 'table' and
2925 wakaba 1.210 ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2926 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
2927     token => $token);
2928 wakaba 1.52
2929     ## As if </caption>
2930     ## have a table element in table scope
2931     my $i;
2932     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2933     my $node = $self->{open_elements}->[$_];
2934 wakaba 1.206 if ($node->[1] == CAPTION_EL) {
2935 wakaba 1.79 !!!cp ('t184');
2936 wakaba 1.52 $i = $_;
2937     last INSCOPE;
2938 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2939 wakaba 1.79 !!!cp ('t185');
2940 wakaba 1.52 last INSCOPE;
2941     }
2942     } # INSCOPE
2943     unless (defined $i) {
2944 wakaba 1.79 !!!cp ('t186');
2945 wakaba 1.209 ## TODO: Wrong error type?
2946 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2947     text => 'caption', token => $token);
2948 wakaba 1.52 ## Ignore the token
2949     !!!next-token;
2950 wakaba 1.126 next B;
2951 wakaba 1.52 }
2952    
2953     ## generate implied end tags
2954 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
2955 wakaba 1.79 !!!cp ('t187');
2956 wakaba 1.86 pop @{$self->{open_elements}};
2957 wakaba 1.52 }
2958    
2959 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2960 wakaba 1.79 !!!cp ('t188');
2961 wakaba 1.122 !!!parse-error (type => 'not closed',
2962 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2963 wakaba 1.122 ->manakai_local_name,
2964     token => $token);
2965 wakaba 1.79 } else {
2966     !!!cp ('t189');
2967 wakaba 1.52 }
2968    
2969     splice @{$self->{open_elements}}, $i;
2970    
2971     $clear_up_to_marker->();
2972    
2973 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
2974 wakaba 1.52
2975     ## reprocess
2976 wakaba 1.126 next B;
2977 wakaba 1.52 } elsif ({
2978     body => 1, col => 1, colgroup => 1, html => 1,
2979     }->{$token->{tag_name}}) {
2980 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
2981 wakaba 1.79 !!!cp ('t190');
2982 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2983     text => $token->{tag_name}, token => $token);
2984 wakaba 1.52 ## Ignore the token
2985     !!!next-token;
2986 wakaba 1.126 next B;
2987 wakaba 1.52 } else {
2988 wakaba 1.79 !!!cp ('t191');
2989 wakaba 1.52 #
2990     }
2991 wakaba 1.210 } elsif ({
2992     tbody => 1, tfoot => 1,
2993     thead => 1, tr => 1,
2994     }->{$token->{tag_name}} and
2995     ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2996     !!!cp ('t192');
2997     !!!parse-error (type => 'unmatched end tag',
2998     text => $token->{tag_name}, token => $token);
2999     ## Ignore the token
3000     !!!next-token;
3001     next B;
3002     } else {
3003     !!!cp ('t193');
3004     #
3005     }
3006 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3007     for my $entry (@{$self->{open_elements}}) {
3008 wakaba 1.123 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
3009 wakaba 1.104 !!!cp ('t75');
3010 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
3011 wakaba 1.104 last;
3012     }
3013     }
3014    
3015     ## Stop parsing.
3016     last B;
3017 wakaba 1.52 } else {
3018     die "$0: $token->{type}: Unknown token type";
3019     }
3020    
3021     $insert = $insert_to_current;
3022     #
3023 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
3024 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
3025 wakaba 1.95 if (not $open_tables->[-1]->[1] and # tainted
3026 wakaba 1.188 $token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3027 wakaba 1.95 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3028 wakaba 1.52
3029 wakaba 1.95 unless (length $token->{data}) {
3030     !!!cp ('t194');
3031     !!!next-token;
3032 wakaba 1.126 next B;
3033 wakaba 1.95 } else {
3034     !!!cp ('t195');
3035     }
3036     }
3037 wakaba 1.52
3038 wakaba 1.153 !!!parse-error (type => 'in table:#text', token => $token);
3039 wakaba 1.52
3040 wakaba 1.202 ## NOTE: As if in body, but insert into the foster parent element.
3041     $reconstruct_active_formatting_elements->($insert_to_foster);
3042 wakaba 1.52
3043 wakaba 1.202 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
3044     # MUST
3045     my $foster_parent_element;
3046     my $next_sibling;
3047     my $prev_sibling;
3048     OE: for (reverse 0..$#{$self->{open_elements}}) {
3049 wakaba 1.206 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
3050 wakaba 1.202 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3051     if (defined $parent and $parent->node_type == 1) {
3052     $foster_parent_element = $parent;
3053     !!!cp ('t196');
3054     $next_sibling = $self->{open_elements}->[$_]->[0];
3055     $prev_sibling = $next_sibling->previous_sibling;
3056     #
3057 wakaba 1.52 } else {
3058 wakaba 1.202 !!!cp ('t197');
3059     $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3060     $prev_sibling = $foster_parent_element->last_child;
3061     #
3062 wakaba 1.52 }
3063 wakaba 1.202 last OE;
3064     }
3065     } # OE
3066     $foster_parent_element = $self->{open_elements}->[0]->[0] and
3067     $prev_sibling = $foster_parent_element->last_child
3068     unless defined $foster_parent_element;
3069     undef $prev_sibling unless $open_tables->[-1]->[2]; # ~node inserted
3070     if (defined $prev_sibling and
3071     $prev_sibling->node_type == 3) {
3072     !!!cp ('t198');
3073     $prev_sibling->manakai_append_text ($token->{data});
3074     } else {
3075     !!!cp ('t199');
3076     $foster_parent_element->insert_before
3077     ($self->{document}->create_text_node ($token->{data}),
3078     $next_sibling);
3079     }
3080 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
3081 wakaba 1.202 $open_tables->[-1]->[2] = 1; # ~node inserted
3082 wakaba 1.95 } else {
3083 wakaba 1.202 ## NOTE: Fragment case or in a foster parent'ed element
3084     ## (e.g. |<table><span>a|). In fragment case, whether the
3085     ## character is appended to existing node or a new node is
3086     ## created is irrelevant, since the foster parent'ed nodes
3087     ## are discarded and fragment parsing does not invoke any
3088     ## script.
3089 wakaba 1.95 !!!cp ('t200');
3090 wakaba 1.202 $self->{open_elements}->[-1]->[0]->manakai_append_text
3091     ($token->{data});
3092 wakaba 1.95 }
3093 wakaba 1.52
3094 wakaba 1.95 !!!next-token;
3095 wakaba 1.126 next B;
3096 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
3097 wakaba 1.153 if ({
3098 wakaba 1.210 tr => (($self->{insertion_mode} & IM_MASK) != IN_ROW_IM),
3099 wakaba 1.153 th => 1, td => 1,
3100     }->{$token->{tag_name}}) {
3101 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_IM) {
3102 wakaba 1.153 ## Clear back to table context
3103     while (not ($self->{open_elements}->[-1]->[1]
3104     & TABLE_SCOPING_EL)) {
3105     !!!cp ('t201');
3106     pop @{$self->{open_elements}};
3107     }
3108    
3109     !!!insert-element ('tbody',, $token);
3110     $self->{insertion_mode} = IN_TABLE_BODY_IM;
3111     ## reprocess in the "in table body" insertion mode...
3112     }
3113    
3114 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3115 wakaba 1.153 unless ($token->{tag_name} eq 'tr') {
3116     !!!cp ('t202');
3117     !!!parse-error (type => 'missing start tag:tr', token => $token);
3118     }
3119 wakaba 1.43
3120 wakaba 1.153 ## Clear back to table body context
3121     while (not ($self->{open_elements}->[-1]->[1]
3122     & TABLE_ROWS_SCOPING_EL)) {
3123     !!!cp ('t203');
3124     ## ISSUE: Can this case be reached?
3125     pop @{$self->{open_elements}};
3126     }
3127 wakaba 1.43
3128 wakaba 1.202 $self->{insertion_mode} = IN_ROW_IM;
3129     if ($token->{tag_name} eq 'tr') {
3130     !!!cp ('t204');
3131     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3132     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3133     !!!nack ('t204');
3134     !!!next-token;
3135     next B;
3136     } else {
3137     !!!cp ('t205');
3138     !!!insert-element ('tr',, $token);
3139     ## reprocess in the "in row" insertion mode
3140     }
3141     } else {
3142     !!!cp ('t206');
3143     }
3144 wakaba 1.52
3145     ## Clear back to table row context
3146 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3147     & TABLE_ROW_SCOPING_EL)) {
3148 wakaba 1.79 !!!cp ('t207');
3149 wakaba 1.52 pop @{$self->{open_elements}};
3150 wakaba 1.43 }
3151 wakaba 1.52
3152 wakaba 1.202 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3153     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3154     $self->{insertion_mode} = IN_CELL_IM;
3155 wakaba 1.52
3156 wakaba 1.202 push @$active_formatting_elements, ['#marker', ''];
3157 wakaba 1.52
3158 wakaba 1.202 !!!nack ('t207.1');
3159     !!!next-token;
3160     next B;
3161     } elsif ({
3162     caption => 1, col => 1, colgroup => 1,
3163     tbody => 1, tfoot => 1, thead => 1,
3164     tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3165     }->{$token->{tag_name}}) {
3166 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3167 wakaba 1.202 ## As if </tr>
3168     ## have an element in table scope
3169     my $i;
3170     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3171     my $node = $self->{open_elements}->[$_];
3172 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3173 wakaba 1.202 !!!cp ('t208');
3174     $i = $_;
3175     last INSCOPE;
3176     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3177     !!!cp ('t209');
3178     last INSCOPE;
3179     }
3180     } # INSCOPE
3181     unless (defined $i) {
3182     !!!cp ('t210');
3183     ## TODO: This type is wrong.
3184     !!!parse-error (type => 'unmacthed end tag',
3185     text => $token->{tag_name}, token => $token);
3186     ## Ignore the token
3187     !!!nack ('t210.1');
3188 wakaba 1.52 !!!next-token;
3189 wakaba 1.126 next B;
3190 wakaba 1.202 }
3191 wakaba 1.43
3192 wakaba 1.52 ## Clear back to table row context
3193 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3194     & TABLE_ROW_SCOPING_EL)) {
3195 wakaba 1.79 !!!cp ('t211');
3196 wakaba 1.83 ## ISSUE: Can this case be reached?
3197 wakaba 1.52 pop @{$self->{open_elements}};
3198 wakaba 1.1 }
3199 wakaba 1.43
3200 wakaba 1.52 pop @{$self->{open_elements}}; # tr
3201 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3202 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
3203 wakaba 1.79 !!!cp ('t212');
3204 wakaba 1.52 ## reprocess
3205 wakaba 1.125 !!!ack-later;
3206 wakaba 1.126 next B;
3207 wakaba 1.52 } else {
3208 wakaba 1.79 !!!cp ('t213');
3209 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
3210     }
3211 wakaba 1.1 }
3212 wakaba 1.52
3213 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3214 wakaba 1.52 ## have an element in table scope
3215 wakaba 1.43 my $i;
3216     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3217     my $node = $self->{open_elements}->[$_];
3218 wakaba 1.206 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3219 wakaba 1.79 !!!cp ('t214');
3220 wakaba 1.43 $i = $_;
3221     last INSCOPE;
3222 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3223 wakaba 1.79 !!!cp ('t215');
3224 wakaba 1.43 last INSCOPE;
3225     }
3226     } # INSCOPE
3227 wakaba 1.52 unless (defined $i) {
3228 wakaba 1.79 !!!cp ('t216');
3229 wakaba 1.153 ## TODO: This erorr type is wrong.
3230     !!!parse-error (type => 'unmatched end tag',
3231     text => $token->{tag_name}, token => $token);
3232 wakaba 1.52 ## Ignore the token
3233 wakaba 1.125 !!!nack ('t216.1');
3234 wakaba 1.52 !!!next-token;
3235 wakaba 1.126 next B;
3236 wakaba 1.43 }
3237 wakaba 1.52
3238     ## Clear back to table body context
3239 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3240     & TABLE_ROWS_SCOPING_EL)) {
3241 wakaba 1.79 !!!cp ('t217');
3242 wakaba 1.83 ## ISSUE: Can this state be reached?
3243 wakaba 1.52 pop @{$self->{open_elements}};
3244 wakaba 1.43 }
3245    
3246 wakaba 1.52 ## As if <{current node}>
3247     ## have an element in table scope
3248     ## true by definition
3249 wakaba 1.43
3250 wakaba 1.52 ## Clear back to table body context
3251     ## nop by definition
3252 wakaba 1.43
3253 wakaba 1.52 pop @{$self->{open_elements}};
3254 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3255 wakaba 1.52 ## reprocess in "in table" insertion mode...
3256 wakaba 1.79 } else {
3257     !!!cp ('t218');
3258 wakaba 1.52 }
3259    
3260 wakaba 1.202 if ($token->{tag_name} eq 'col') {
3261     ## Clear back to table context
3262     while (not ($self->{open_elements}->[-1]->[1]
3263     & TABLE_SCOPING_EL)) {
3264     !!!cp ('t219');
3265     ## ISSUE: Can this state be reached?
3266     pop @{$self->{open_elements}};
3267     }
3268    
3269     !!!insert-element ('colgroup',, $token);
3270     $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
3271     ## reprocess
3272     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3273     !!!ack-later;
3274     next B;
3275     } elsif ({
3276     caption => 1,
3277     colgroup => 1,
3278     tbody => 1, tfoot => 1, thead => 1,
3279     }->{$token->{tag_name}}) {
3280     ## Clear back to table context
3281 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3282     & TABLE_SCOPING_EL)) {
3283 wakaba 1.79 !!!cp ('t220');
3284 wakaba 1.83 ## ISSUE: Can this state be reached?
3285 wakaba 1.52 pop @{$self->{open_elements}};
3286 wakaba 1.1 }
3287 wakaba 1.52
3288 wakaba 1.202 push @$active_formatting_elements, ['#marker', '']
3289     if $token->{tag_name} eq 'caption';
3290 wakaba 1.52
3291 wakaba 1.202 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3292     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3293     $self->{insertion_mode} = {
3294     caption => IN_CAPTION_IM,
3295     colgroup => IN_COLUMN_GROUP_IM,
3296     tbody => IN_TABLE_BODY_IM,
3297     tfoot => IN_TABLE_BODY_IM,
3298     thead => IN_TABLE_BODY_IM,
3299     }->{$token->{tag_name}};
3300     !!!next-token;
3301     !!!nack ('t220.1');
3302     next B;
3303     } else {
3304     die "$0: in table: <>: $token->{tag_name}";
3305     }
3306 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
3307 wakaba 1.122 !!!parse-error (type => 'not closed',
3308 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
3309 wakaba 1.122 ->manakai_local_name,
3310     token => $token);
3311 wakaba 1.1
3312 wakaba 1.52 ## As if </table>
3313 wakaba 1.1 ## have a table element in table scope
3314     my $i;
3315 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3316     my $node = $self->{open_elements}->[$_];
3317 wakaba 1.206 if ($node->[1] == TABLE_EL) {
3318 wakaba 1.79 !!!cp ('t221');
3319 wakaba 1.1 $i = $_;
3320     last INSCOPE;
3321 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3322 wakaba 1.79 !!!cp ('t222');
3323 wakaba 1.1 last INSCOPE;
3324     }
3325     } # INSCOPE
3326     unless (defined $i) {
3327 wakaba 1.79 !!!cp ('t223');
3328 wakaba 1.83 ## TODO: The following is wrong, maybe.
3329 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => 'table',
3330     token => $token);
3331 wakaba 1.52 ## Ignore tokens </table><table>
3332 wakaba 1.125 !!!nack ('t223.1');
3333 wakaba 1.1 !!!next-token;
3334 wakaba 1.126 next B;
3335 wakaba 1.1 }
3336    
3337 wakaba 1.151 ## TODO: Followings are removed from the latest spec.
3338 wakaba 1.1 ## generate implied end tags
3339 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
3340 wakaba 1.79 !!!cp ('t224');
3341 wakaba 1.86 pop @{$self->{open_elements}};
3342 wakaba 1.1 }
3343    
3344 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == TABLE_EL) {
3345 wakaba 1.79 !!!cp ('t225');
3346 wakaba 1.122 ## NOTE: |<table><tr><table>|
3347     !!!parse-error (type => 'not closed',
3348 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
3349 wakaba 1.122 ->manakai_local_name,
3350     token => $token);
3351 wakaba 1.79 } else {
3352     !!!cp ('t226');
3353 wakaba 1.1 }
3354    
3355 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3356 wakaba 1.95 pop @{$open_tables};
3357 wakaba 1.1
3358 wakaba 1.52 $self->_reset_insertion_mode;
3359 wakaba 1.1
3360 wakaba 1.125 ## reprocess
3361     !!!ack-later;
3362 wakaba 1.126 next B;
3363 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
3364     if (not $open_tables->[-1]->[1]) { # tainted
3365     !!!cp ('t227.8');
3366     ## NOTE: This is a "as if in head" code clone.
3367     $parse_rcdata->(CDATA_CONTENT_MODEL);
3368 wakaba 1.202 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3369 wakaba 1.126 next B;
3370 wakaba 1.100 } else {
3371     !!!cp ('t227.7');
3372     #
3373     }
3374     } elsif ($token->{tag_name} eq 'script') {
3375     if (not $open_tables->[-1]->[1]) { # tainted
3376     !!!cp ('t227.6');
3377     ## NOTE: This is a "as if in head" code clone.
3378     $script_start_tag->();
3379 wakaba 1.202 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3380 wakaba 1.126 next B;
3381 wakaba 1.100 } else {
3382     !!!cp ('t227.5');
3383     #
3384     }
3385 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
3386     if (not $open_tables->[-1]->[1]) { # tainted
3387     if ($token->{attributes}->{type}) { ## TODO: case
3388     my $type = lc $token->{attributes}->{type}->{value};
3389     if ($type eq 'hidden') {
3390     !!!cp ('t227.3');
3391 wakaba 1.153 !!!parse-error (type => 'in table',
3392     text => $token->{tag_name}, token => $token);
3393 wakaba 1.98
3394 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3395 wakaba 1.202 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3396 wakaba 1.98
3397     ## TODO: form element pointer
3398    
3399     pop @{$self->{open_elements}};
3400    
3401     !!!next-token;
3402 wakaba 1.125 !!!ack ('t227.2.1');
3403 wakaba 1.126 next B;
3404 wakaba 1.98 } else {
3405     !!!cp ('t227.2');
3406     #
3407     }
3408     } else {
3409     !!!cp ('t227.1');
3410     #
3411     }
3412     } else {
3413     !!!cp ('t227.4');
3414     #
3415     }
3416 wakaba 1.58 } else {
3417 wakaba 1.79 !!!cp ('t227');
3418 wakaba 1.58 #
3419     }
3420 wakaba 1.98
3421 wakaba 1.153 !!!parse-error (type => 'in table', text => $token->{tag_name},
3422     token => $token);
3423 wakaba 1.98
3424     $insert = $insert_to_foster;
3425     #
3426 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
3427 wakaba 1.210 if ($token->{tag_name} eq 'tr' and
3428     ($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3429     ## have an element in table scope
3430 wakaba 1.52 my $i;
3431     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3432     my $node = $self->{open_elements}->[$_];
3433 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3434 wakaba 1.79 !!!cp ('t228');
3435 wakaba 1.52 $i = $_;
3436     last INSCOPE;
3437 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3438 wakaba 1.79 !!!cp ('t229');
3439 wakaba 1.52 last INSCOPE;
3440     }
3441     } # INSCOPE
3442     unless (defined $i) {
3443 wakaba 1.79 !!!cp ('t230');
3444 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3445     text => $token->{tag_name}, token => $token);
3446 wakaba 1.52 ## Ignore the token
3447 wakaba 1.125 !!!nack ('t230.1');
3448 wakaba 1.42 !!!next-token;
3449 wakaba 1.126 next B;
3450 wakaba 1.79 } else {
3451     !!!cp ('t232');
3452 wakaba 1.42 }
3453    
3454 wakaba 1.52 ## Clear back to table row context
3455 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3456     & TABLE_ROW_SCOPING_EL)) {
3457 wakaba 1.79 !!!cp ('t231');
3458 wakaba 1.83 ## ISSUE: Can this state be reached?
3459 wakaba 1.52 pop @{$self->{open_elements}};
3460     }
3461 wakaba 1.42
3462 wakaba 1.52 pop @{$self->{open_elements}}; # tr
3463 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3464 wakaba 1.52 !!!next-token;
3465 wakaba 1.125 !!!nack ('t231.1');
3466 wakaba 1.126 next B;
3467 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
3468 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3469 wakaba 1.52 ## As if </tr>
3470     ## have an element in table scope
3471     my $i;
3472     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3473     my $node = $self->{open_elements}->[$_];
3474 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3475 wakaba 1.79 !!!cp ('t233');
3476 wakaba 1.52 $i = $_;
3477     last INSCOPE;
3478 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3479 wakaba 1.79 !!!cp ('t234');
3480 wakaba 1.52 last INSCOPE;
3481 wakaba 1.42 }
3482 wakaba 1.52 } # INSCOPE
3483     unless (defined $i) {
3484 wakaba 1.79 !!!cp ('t235');
3485 wakaba 1.83 ## TODO: The following is wrong.
3486 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3487     text => $token->{type}, token => $token);
3488 wakaba 1.52 ## Ignore the token
3489 wakaba 1.125 !!!nack ('t236.1');
3490 wakaba 1.52 !!!next-token;
3491 wakaba 1.126 next B;
3492 wakaba 1.42 }
3493 wakaba 1.52
3494     ## Clear back to table row context
3495 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3496     & TABLE_ROW_SCOPING_EL)) {
3497 wakaba 1.79 !!!cp ('t236');
3498 wakaba 1.83 ## ISSUE: Can this state be reached?
3499 wakaba 1.46 pop @{$self->{open_elements}};
3500 wakaba 1.1 }
3501 wakaba 1.46
3502 wakaba 1.52 pop @{$self->{open_elements}}; # tr
3503 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3504 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
3505 wakaba 1.1 }
3506    
3507 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3508 wakaba 1.52 ## have an element in table scope
3509     my $i;
3510     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3511     my $node = $self->{open_elements}->[$_];
3512 wakaba 1.206 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3513 wakaba 1.79 !!!cp ('t237');
3514 wakaba 1.52 $i = $_;
3515     last INSCOPE;
3516 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3517 wakaba 1.79 !!!cp ('t238');
3518 wakaba 1.52 last INSCOPE;
3519     }
3520     } # INSCOPE
3521     unless (defined $i) {
3522 wakaba 1.79 !!!cp ('t239');
3523 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3524     text => $token->{tag_name}, token => $token);
3525 wakaba 1.52 ## Ignore the token
3526 wakaba 1.125 !!!nack ('t239.1');
3527 wakaba 1.52 !!!next-token;
3528 wakaba 1.126 next B;
3529 wakaba 1.47 }
3530    
3531     ## Clear back to table body context
3532 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3533     & TABLE_ROWS_SCOPING_EL)) {
3534 wakaba 1.79 !!!cp ('t240');
3535 wakaba 1.47 pop @{$self->{open_elements}};
3536     }
3537    
3538 wakaba 1.52 ## As if <{current node}>
3539     ## have an element in table scope
3540     ## true by definition
3541    
3542     ## Clear back to table body context
3543     ## nop by definition
3544    
3545     pop @{$self->{open_elements}};
3546 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3547 wakaba 1.52 ## reprocess in the "in table" insertion mode...
3548     }
3549    
3550 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
3551     ## When you edit the code fragment below, please ensure that
3552     ## the code for <table> in the "in table" insertion mode
3553     ## is synced with it.
3554    
3555 wakaba 1.52 ## have a table element in table scope
3556     my $i;
3557     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3558     my $node = $self->{open_elements}->[$_];
3559 wakaba 1.206 if ($node->[1] == TABLE_EL) {
3560 wakaba 1.79 !!!cp ('t241');
3561 wakaba 1.52 $i = $_;
3562     last INSCOPE;
3563 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3564 wakaba 1.79 !!!cp ('t242');
3565 wakaba 1.52 last INSCOPE;
3566 wakaba 1.47 }
3567 wakaba 1.52 } # INSCOPE
3568     unless (defined $i) {
3569 wakaba 1.79 !!!cp ('t243');
3570 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3571     text => $token->{tag_name}, token => $token);
3572 wakaba 1.52 ## Ignore the token
3573 wakaba 1.125 !!!nack ('t243.1');
3574 wakaba 1.52 !!!next-token;
3575 wakaba 1.126 next B;
3576 wakaba 1.3 }
3577 wakaba 1.52
3578     splice @{$self->{open_elements}}, $i;
3579 wakaba 1.95 pop @{$open_tables};
3580 wakaba 1.1
3581 wakaba 1.52 $self->_reset_insertion_mode;
3582 wakaba 1.47
3583     !!!next-token;
3584 wakaba 1.126 next B;
3585 wakaba 1.47 } elsif ({
3586 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
3587 wakaba 1.52 }->{$token->{tag_name}} and
3588 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
3589 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3590 wakaba 1.52 ## have an element in table scope
3591     my $i;
3592     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3593     my $node = $self->{open_elements}->[$_];
3594 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3595 wakaba 1.79 !!!cp ('t247');
3596 wakaba 1.52 $i = $_;
3597     last INSCOPE;
3598 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3599 wakaba 1.79 !!!cp ('t248');
3600 wakaba 1.52 last INSCOPE;
3601     }
3602     } # INSCOPE
3603     unless (defined $i) {
3604 wakaba 1.79 !!!cp ('t249');
3605 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3606     text => $token->{tag_name}, token => $token);
3607 wakaba 1.52 ## Ignore the token
3608 wakaba 1.125 !!!nack ('t249.1');
3609 wakaba 1.52 !!!next-token;
3610 wakaba 1.126 next B;
3611 wakaba 1.52 }
3612    
3613 wakaba 1.48 ## As if </tr>
3614     ## have an element in table scope
3615     my $i;
3616     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3617     my $node = $self->{open_elements}->[$_];
3618 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3619 wakaba 1.79 !!!cp ('t250');
3620 wakaba 1.48 $i = $_;
3621     last INSCOPE;
3622 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3623 wakaba 1.79 !!!cp ('t251');
3624 wakaba 1.48 last INSCOPE;
3625     }
3626     } # INSCOPE
3627 wakaba 1.52 unless (defined $i) {
3628 wakaba 1.79 !!!cp ('t252');
3629 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3630     text => 'tr', token => $token);
3631 wakaba 1.52 ## Ignore the token
3632 wakaba 1.125 !!!nack ('t252.1');
3633 wakaba 1.52 !!!next-token;
3634 wakaba 1.126 next B;
3635 wakaba 1.52 }
3636 wakaba 1.48
3637     ## Clear back to table row context
3638 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3639     & TABLE_ROW_SCOPING_EL)) {
3640 wakaba 1.79 !!!cp ('t253');
3641 wakaba 1.83 ## ISSUE: Can this case be reached?
3642 wakaba 1.48 pop @{$self->{open_elements}};
3643     }
3644    
3645     pop @{$self->{open_elements}}; # tr
3646 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3647 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
3648     }
3649    
3650     ## have an element in table scope
3651     my $i;
3652     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3653     my $node = $self->{open_elements}->[$_];
3654 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3655 wakaba 1.79 !!!cp ('t254');
3656 wakaba 1.52 $i = $_;
3657     last INSCOPE;
3658 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3659 wakaba 1.79 !!!cp ('t255');
3660 wakaba 1.52 last INSCOPE;
3661     }
3662     } # INSCOPE
3663     unless (defined $i) {
3664 wakaba 1.79 !!!cp ('t256');
3665 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3666     text => $token->{tag_name}, token => $token);
3667 wakaba 1.52 ## Ignore the token
3668 wakaba 1.125 !!!nack ('t256.1');
3669 wakaba 1.52 !!!next-token;
3670 wakaba 1.126 next B;
3671 wakaba 1.52 }
3672    
3673     ## Clear back to table body context
3674 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3675     & TABLE_ROWS_SCOPING_EL)) {
3676 wakaba 1.79 !!!cp ('t257');
3677 wakaba 1.83 ## ISSUE: Can this case be reached?
3678 wakaba 1.52 pop @{$self->{open_elements}};
3679     }
3680    
3681     pop @{$self->{open_elements}};
3682 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3683 wakaba 1.125 !!!nack ('t257.1');
3684 wakaba 1.52 !!!next-token;
3685 wakaba 1.126 next B;
3686 wakaba 1.52 } elsif ({
3687     body => 1, caption => 1, col => 1, colgroup => 1,
3688     html => 1, td => 1, th => 1,
3689 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3690     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
3691 wakaba 1.52 }->{$token->{tag_name}}) {
3692 wakaba 1.125 !!!cp ('t258');
3693 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3694     text => $token->{tag_name}, token => $token);
3695 wakaba 1.125 ## Ignore the token
3696     !!!nack ('t258.1');
3697     !!!next-token;
3698 wakaba 1.126 next B;
3699 wakaba 1.58 } else {
3700 wakaba 1.79 !!!cp ('t259');
3701 wakaba 1.153 !!!parse-error (type => 'in table:/',
3702     text => $token->{tag_name}, token => $token);
3703 wakaba 1.52
3704 wakaba 1.58 $insert = $insert_to_foster;
3705     #
3706     }
3707 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3708 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
3709 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
3710 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
3711 wakaba 1.104 !!!cp ('t259.1');
3712 wakaba 1.105 #
3713 wakaba 1.104 } else {
3714     !!!cp ('t259.2');
3715 wakaba 1.105 #
3716 wakaba 1.104 }
3717    
3718     ## Stop parsing
3719     last B;
3720 wakaba 1.58 } else {
3721     die "$0: $token->{type}: Unknown token type";
3722     }
3723 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
3724 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
3725 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3726 wakaba 1.52 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3727     unless (length $token->{data}) {
3728 wakaba 1.79 !!!cp ('t260');
3729 wakaba 1.52 !!!next-token;
3730 wakaba 1.126 next B;
3731 wakaba 1.52 }
3732     }
3733    
3734 wakaba 1.79 !!!cp ('t261');
3735 wakaba 1.52 #
3736 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
3737 wakaba 1.52 if ($token->{tag_name} eq 'col') {
3738 wakaba 1.79 !!!cp ('t262');
3739 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3740 wakaba 1.52 pop @{$self->{open_elements}};
3741 wakaba 1.125 !!!ack ('t262.1');
3742 wakaba 1.52 !!!next-token;
3743 wakaba 1.126 next B;
3744 wakaba 1.52 } else {
3745 wakaba 1.79 !!!cp ('t263');
3746 wakaba 1.52 #
3747     }
3748 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
3749 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
3750 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3751 wakaba 1.79 !!!cp ('t264');
3752 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3753     text => 'colgroup', token => $token);
3754 wakaba 1.52 ## Ignore the token
3755     !!!next-token;
3756 wakaba 1.126 next B;
3757 wakaba 1.52 } else {
3758 wakaba 1.79 !!!cp ('t265');
3759 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
3760 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3761 wakaba 1.52 !!!next-token;
3762 wakaba 1.126 next B;
3763 wakaba 1.52 }
3764     } elsif ($token->{tag_name} eq 'col') {
3765 wakaba 1.79 !!!cp ('t266');
3766 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3767     text => 'col', token => $token);
3768 wakaba 1.52 ## Ignore the token
3769     !!!next-token;
3770 wakaba 1.126 next B;
3771 wakaba 1.52 } else {
3772 wakaba 1.79 !!!cp ('t267');
3773 wakaba 1.52 #
3774     }
3775 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3776 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
3777 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
3778     !!!cp ('t270.2');
3779     ## Stop parsing.
3780     last B;
3781     } else {
3782     ## NOTE: As if </colgroup>.
3783     !!!cp ('t270.1');
3784     pop @{$self->{open_elements}}; # colgroup
3785     $self->{insertion_mode} = IN_TABLE_IM;
3786     ## Reprocess.
3787 wakaba 1.126 next B;
3788 wakaba 1.104 }
3789     } else {
3790     die "$0: $token->{type}: Unknown token type";
3791     }
3792 wakaba 1.52
3793     ## As if </colgroup>
3794 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3795 wakaba 1.79 !!!cp ('t269');
3796 wakaba 1.104 ## TODO: Wrong error type?
3797 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3798     text => 'colgroup', token => $token);
3799 wakaba 1.52 ## Ignore the token
3800 wakaba 1.125 !!!nack ('t269.1');
3801 wakaba 1.52 !!!next-token;
3802 wakaba 1.126 next B;
3803 wakaba 1.52 } else {
3804 wakaba 1.79 !!!cp ('t270');
3805 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
3806 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3807 wakaba 1.125 !!!ack-later;
3808 wakaba 1.52 ## reprocess
3809 wakaba 1.126 next B;
3810 wakaba 1.52 }
3811 wakaba 1.101 } elsif ($self->{insertion_mode} & SELECT_IMS) {
3812 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
3813 wakaba 1.79 !!!cp ('t271');
3814 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3815     !!!next-token;
3816 wakaba 1.126 next B;
3817 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
3818 wakaba 1.123 if ($token->{tag_name} eq 'option') {
3819 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3820 wakaba 1.123 !!!cp ('t272');
3821     ## As if </option>
3822     pop @{$self->{open_elements}};
3823     } else {
3824     !!!cp ('t273');
3825     }
3826 wakaba 1.52
3827 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3828 wakaba 1.125 !!!nack ('t273.1');
3829 wakaba 1.123 !!!next-token;
3830 wakaba 1.126 next B;
3831 wakaba 1.123 } elsif ($token->{tag_name} eq 'optgroup') {
3832 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3833 wakaba 1.123 !!!cp ('t274');
3834     ## As if </option>
3835     pop @{$self->{open_elements}};
3836     } else {
3837     !!!cp ('t275');
3838     }
3839 wakaba 1.52
3840 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3841 wakaba 1.123 !!!cp ('t276');
3842     ## As if </optgroup>
3843     pop @{$self->{open_elements}};
3844     } else {
3845     !!!cp ('t277');
3846     }
3847 wakaba 1.52
3848 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3849 wakaba 1.125 !!!nack ('t277.1');
3850 wakaba 1.123 !!!next-token;
3851 wakaba 1.126 next B;
3852 wakaba 1.146 } elsif ({
3853 wakaba 1.216 select => 1, input => 1, textarea => 1, keygen => 1,
3854 wakaba 1.146 }->{$token->{tag_name}} or
3855 wakaba 1.210 (($self->{insertion_mode} & IM_MASK)
3856     == IN_SELECT_IN_TABLE_IM and
3857 wakaba 1.101 {
3858     caption => 1, table => 1,
3859     tbody => 1, tfoot => 1, thead => 1,
3860     tr => 1, td => 1, th => 1,
3861     }->{$token->{tag_name}})) {
3862 wakaba 1.222
3863     ## 1. Parse error.
3864     if ($token->{tag_name} eq 'select') {
3865     !!!parse-error (type => 'select in select', ## XXX: documentation
3866     token => $token);
3867     } else {
3868     !!!parse-error (type => 'not closed', text => 'select',
3869     token => $token);
3870     }
3871    
3872     ## 2./<select>-1. Unless "have an element in table scope" (select):
3873 wakaba 1.123 my $i;
3874     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3875     my $node = $self->{open_elements}->[$_];
3876 wakaba 1.206 if ($node->[1] == SELECT_EL) {
3877 wakaba 1.123 !!!cp ('t278');
3878     $i = $_;
3879     last INSCOPE;
3880     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3881     !!!cp ('t279');
3882     last INSCOPE;
3883     }
3884     } # INSCOPE
3885     unless (defined $i) {
3886     !!!cp ('t280');
3887 wakaba 1.222 if ($token->{tag_name} eq 'select') {
3888     ## NOTE: This error would be raised when
3889     ## |select.innerHTML = '<select>'| is executed; in this
3890     ## case two errors, "select in select" and "unmatched
3891     ## end tags" are reported to the user, the latter might
3892     ## be confusing but this is what the spec requires.
3893     !!!parse-error (type => 'unmatched end tag',
3894     text => 'select',
3895     token => $token);
3896     }
3897     ## Ignore the token.
3898 wakaba 1.125 !!!nack ('t280.1');
3899 wakaba 1.123 !!!next-token;
3900 wakaba 1.126 next B;
3901 wakaba 1.123 }
3902 wakaba 1.222
3903     ## 3. Otherwise, as if there were <select>:
3904 wakaba 1.52
3905 wakaba 1.123 !!!cp ('t281');
3906     splice @{$self->{open_elements}}, $i;
3907 wakaba 1.52
3908 wakaba 1.123 $self->_reset_insertion_mode;
3909 wakaba 1.47
3910 wakaba 1.101 if ($token->{tag_name} eq 'select') {
3911 wakaba 1.125 !!!nack ('t281.2');
3912 wakaba 1.101 !!!next-token;
3913 wakaba 1.126 next B;
3914 wakaba 1.101 } else {
3915     !!!cp ('t281.1');
3916 wakaba 1.125 !!!ack-later;
3917 wakaba 1.101 ## Reprocess the token.
3918 wakaba 1.126 next B;
3919 wakaba 1.101 }
3920 wakaba 1.226 } elsif ($token->{tag_name} eq 'script') {
3921     !!!cp ('t281.3');
3922     ## NOTE: This is an "as if in head" code clone
3923     $script_start_tag->();
3924     next B;
3925 wakaba 1.58 } else {
3926 wakaba 1.79 !!!cp ('t282');
3927 wakaba 1.153 !!!parse-error (type => 'in select',
3928     text => $token->{tag_name}, token => $token);
3929 wakaba 1.58 ## Ignore the token
3930 wakaba 1.125 !!!nack ('t282.1');
3931 wakaba 1.58 !!!next-token;
3932 wakaba 1.126 next B;
3933 wakaba 1.58 }
3934     } elsif ($token->{type} == END_TAG_TOKEN) {
3935 wakaba 1.123 if ($token->{tag_name} eq 'optgroup') {
3936 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL and
3937     $self->{open_elements}->[-2]->[1] == OPTGROUP_EL) {
3938 wakaba 1.123 !!!cp ('t283');
3939     ## As if </option>
3940     splice @{$self->{open_elements}}, -2;
3941 wakaba 1.206 } elsif ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3942 wakaba 1.123 !!!cp ('t284');
3943     pop @{$self->{open_elements}};
3944     } else {
3945     !!!cp ('t285');
3946 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3947     text => $token->{tag_name}, token => $token);
3948 wakaba 1.123 ## Ignore the token
3949     }
3950 wakaba 1.125 !!!nack ('t285.1');
3951 wakaba 1.123 !!!next-token;
3952 wakaba 1.126 next B;
3953 wakaba 1.123 } elsif ($token->{tag_name} eq 'option') {
3954 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3955 wakaba 1.123 !!!cp ('t286');
3956     pop @{$self->{open_elements}};
3957     } else {
3958     !!!cp ('t287');
3959 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3960     text => $token->{tag_name}, token => $token);
3961 wakaba 1.123 ## Ignore the token
3962     }
3963 wakaba 1.125 !!!nack ('t287.1');
3964 wakaba 1.123 !!!next-token;
3965 wakaba 1.126 next B;
3966 wakaba 1.123 } elsif ($token->{tag_name} eq 'select') {
3967     ## have an element in table scope
3968     my $i;
3969     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3970     my $node = $self->{open_elements}->[$_];
3971 wakaba 1.206 if ($node->[1] == SELECT_EL) {
3972 wakaba 1.123 !!!cp ('t288');
3973     $i = $_;
3974     last INSCOPE;
3975     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3976     !!!cp ('t289');
3977     last INSCOPE;
3978     }
3979     } # INSCOPE
3980     unless (defined $i) {
3981     !!!cp ('t290');
3982 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3983     text => $token->{tag_name}, token => $token);
3984 wakaba 1.123 ## Ignore the token
3985 wakaba 1.125 !!!nack ('t290.1');
3986 wakaba 1.123 !!!next-token;
3987 wakaba 1.126 next B;
3988 wakaba 1.123 }
3989 wakaba 1.52
3990 wakaba 1.123 !!!cp ('t291');
3991     splice @{$self->{open_elements}}, $i;
3992 wakaba 1.52
3993 wakaba 1.123 $self->_reset_insertion_mode;
3994 wakaba 1.52
3995 wakaba 1.125 !!!nack ('t291.1');
3996 wakaba 1.123 !!!next-token;
3997 wakaba 1.126 next B;
3998 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK)
3999     == IN_SELECT_IN_TABLE_IM and
4000 wakaba 1.101 {
4001     caption => 1, table => 1, tbody => 1,
4002     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
4003     }->{$token->{tag_name}}) {
4004 wakaba 1.83 ## TODO: The following is wrong?
4005 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4006     text => $token->{tag_name}, token => $token);
4007 wakaba 1.52
4008 wakaba 1.123 ## have an element in table scope
4009     my $i;
4010     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4011     my $node = $self->{open_elements}->[$_];
4012     if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
4013     !!!cp ('t292');
4014     $i = $_;
4015     last INSCOPE;
4016     } elsif ($node->[1] & TABLE_SCOPING_EL) {
4017     !!!cp ('t293');
4018     last INSCOPE;
4019     }
4020     } # INSCOPE
4021     unless (defined $i) {
4022     !!!cp ('t294');
4023     ## Ignore the token
4024 wakaba 1.125 !!!nack ('t294.1');
4025 wakaba 1.123 !!!next-token;
4026 wakaba 1.126 next B;
4027 wakaba 1.123 }
4028 wakaba 1.52
4029 wakaba 1.123 ## As if </select>
4030     ## have an element in table scope
4031     undef $i;
4032     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4033     my $node = $self->{open_elements}->[$_];
4034 wakaba 1.206 if ($node->[1] == SELECT_EL) {
4035 wakaba 1.123 !!!cp ('t295');
4036     $i = $_;
4037     last INSCOPE;
4038     } elsif ($node->[1] & TABLE_SCOPING_EL) {
4039 wakaba 1.83 ## ISSUE: Can this state be reached?
4040 wakaba 1.123 !!!cp ('t296');
4041     last INSCOPE;
4042     }
4043     } # INSCOPE
4044     unless (defined $i) {
4045     !!!cp ('t297');
4046 wakaba 1.83 ## TODO: The following error type is correct?
4047 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4048     text => 'select', token => $token);
4049 wakaba 1.123 ## Ignore the </select> token
4050 wakaba 1.125 !!!nack ('t297.1');
4051 wakaba 1.123 !!!next-token; ## TODO: ok?
4052 wakaba 1.126 next B;
4053 wakaba 1.123 }
4054 wakaba 1.52
4055 wakaba 1.123 !!!cp ('t298');
4056     splice @{$self->{open_elements}}, $i;
4057 wakaba 1.52
4058 wakaba 1.123 $self->_reset_insertion_mode;
4059 wakaba 1.52
4060 wakaba 1.125 !!!ack-later;
4061 wakaba 1.123 ## reprocess
4062 wakaba 1.126 next B;
4063 wakaba 1.58 } else {
4064 wakaba 1.79 !!!cp ('t299');
4065 wakaba 1.153 !!!parse-error (type => 'in select:/',
4066     text => $token->{tag_name}, token => $token);
4067 wakaba 1.52 ## Ignore the token
4068 wakaba 1.125 !!!nack ('t299.3');
4069 wakaba 1.52 !!!next-token;
4070 wakaba 1.126 next B;
4071 wakaba 1.58 }
4072 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4073 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4074 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
4075     !!!cp ('t299.1');
4076 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4077 wakaba 1.104 } else {
4078     !!!cp ('t299.2');
4079     }
4080    
4081     ## Stop parsing.
4082     last B;
4083 wakaba 1.58 } else {
4084     die "$0: $token->{type}: Unknown token type";
4085     }
4086 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
4087 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4088 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4089 wakaba 1.52 my $data = $1;
4090     ## As if in body
4091     $reconstruct_active_formatting_elements->($insert_to_current);
4092    
4093     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4094    
4095     unless (length $token->{data}) {
4096 wakaba 1.79 !!!cp ('t300');
4097 wakaba 1.52 !!!next-token;
4098 wakaba 1.126 next B;
4099 wakaba 1.52 }
4100     }
4101    
4102 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4103 wakaba 1.79 !!!cp ('t301');
4104 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
4105 wakaba 1.188 #
4106 wakaba 1.79 } else {
4107     !!!cp ('t302');
4108 wakaba 1.188 ## "after body" insertion mode
4109     !!!parse-error (type => 'after body:#text', token => $token);
4110     #
4111 wakaba 1.52 }
4112    
4113 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4114 wakaba 1.52 ## reprocess
4115 wakaba 1.126 next B;
4116 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4117 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4118 wakaba 1.79 !!!cp ('t303');
4119 wakaba 1.153 !!!parse-error (type => 'after html',
4120     text => $token->{tag_name}, token => $token);
4121 wakaba 1.188 #
4122 wakaba 1.79 } else {
4123     !!!cp ('t304');
4124 wakaba 1.188 ## "after body" insertion mode
4125     !!!parse-error (type => 'after body',
4126     text => $token->{tag_name}, token => $token);
4127     #
4128 wakaba 1.52 }
4129    
4130 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4131 wakaba 1.125 !!!ack-later;
4132 wakaba 1.52 ## reprocess
4133 wakaba 1.126 next B;
4134 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4135 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4136 wakaba 1.79 !!!cp ('t305');
4137 wakaba 1.153 !!!parse-error (type => 'after html:/',
4138     text => $token->{tag_name}, token => $token);
4139 wakaba 1.52
4140 wakaba 1.188 $self->{insertion_mode} = IN_BODY_IM;
4141     ## Reprocess.
4142     next B;
4143 wakaba 1.79 } else {
4144     !!!cp ('t306');
4145 wakaba 1.52 }
4146    
4147     ## "after body" insertion mode
4148     if ($token->{tag_name} eq 'html') {
4149     if (defined $self->{inner_html_node}) {
4150 wakaba 1.79 !!!cp ('t307');
4151 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4152     text => 'html', token => $token);
4153 wakaba 1.52 ## Ignore the token
4154     !!!next-token;
4155 wakaba 1.126 next B;
4156 wakaba 1.52 } else {
4157 wakaba 1.79 !!!cp ('t308');
4158 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
4159 wakaba 1.52 !!!next-token;
4160 wakaba 1.126 next B;
4161 wakaba 1.52 }
4162     } else {
4163 wakaba 1.79 !!!cp ('t309');
4164 wakaba 1.153 !!!parse-error (type => 'after body:/',
4165     text => $token->{tag_name}, token => $token);
4166 wakaba 1.52
4167 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4168 wakaba 1.52 ## reprocess
4169 wakaba 1.126 next B;
4170 wakaba 1.52 }
4171 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4172     !!!cp ('t309.2');
4173     ## Stop parsing
4174     last B;
4175 wakaba 1.52 } else {
4176     die "$0: $token->{type}: Unknown token type";
4177     }
4178 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
4179 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4180 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4181 wakaba 1.52 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4182    
4183     unless (length $token->{data}) {
4184 wakaba 1.79 !!!cp ('t310');
4185 wakaba 1.52 !!!next-token;
4186 wakaba 1.126 next B;
4187 wakaba 1.52 }
4188     }
4189    
4190 wakaba 1.188 if ($token->{data} =~ s/^[^\x09\x0A\x0C\x20]+//) {
4191 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4192 wakaba 1.79 !!!cp ('t311');
4193 wakaba 1.153 !!!parse-error (type => 'in frameset:#text', token => $token);
4194 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4195 wakaba 1.79 !!!cp ('t312');
4196 wakaba 1.153 !!!parse-error (type => 'after frameset:#text', token => $token);
4197 wakaba 1.158 } else { # "after after frameset"
4198 wakaba 1.79 !!!cp ('t313');
4199 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
4200 wakaba 1.52 }
4201    
4202     ## Ignore the token.
4203     if (length $token->{data}) {
4204 wakaba 1.79 !!!cp ('t314');
4205 wakaba 1.52 ## reprocess the rest of characters
4206     } else {
4207 wakaba 1.79 !!!cp ('t315');
4208 wakaba 1.52 !!!next-token;
4209     }
4210 wakaba 1.126 next B;
4211 wakaba 1.52 }
4212    
4213     die qq[$0: Character "$token->{data}"];
4214 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4215 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
4216 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
4217 wakaba 1.79 !!!cp ('t318');
4218 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4219 wakaba 1.125 !!!nack ('t318.1');
4220 wakaba 1.52 !!!next-token;
4221 wakaba 1.126 next B;
4222 wakaba 1.52 } elsif ($token->{tag_name} eq 'frame' and
4223 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
4224 wakaba 1.79 !!!cp ('t319');
4225 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4226 wakaba 1.52 pop @{$self->{open_elements}};
4227 wakaba 1.125 !!!ack ('t319.1');
4228 wakaba 1.52 !!!next-token;
4229 wakaba 1.126 next B;
4230 wakaba 1.52 } elsif ($token->{tag_name} eq 'noframes') {
4231 wakaba 1.79 !!!cp ('t320');
4232 wakaba 1.148 ## NOTE: As if in head.
4233 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4234 wakaba 1.126 next B;
4235 wakaba 1.158
4236     ## NOTE: |<!DOCTYPE HTML><frameset></frameset></html><noframes></noframes>|
4237     ## has no parse error.
4238 wakaba 1.52 } else {
4239 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4240 wakaba 1.79 !!!cp ('t321');
4241 wakaba 1.153 !!!parse-error (type => 'in frameset',
4242     text => $token->{tag_name}, token => $token);
4243 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4244 wakaba 1.79 !!!cp ('t322');
4245 wakaba 1.153 !!!parse-error (type => 'after frameset',
4246     text => $token->{tag_name}, token => $token);
4247 wakaba 1.158 } else { # "after after frameset"
4248     !!!cp ('t322.2');
4249     !!!parse-error (type => 'after after frameset',
4250     text => $token->{tag_name}, token => $token);
4251 wakaba 1.52 }
4252     ## Ignore the token
4253 wakaba 1.125 !!!nack ('t322.1');
4254 wakaba 1.52 !!!next-token;
4255 wakaba 1.126 next B;
4256 wakaba 1.52 }
4257 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4258 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
4259 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
4260 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
4261 wakaba 1.52 @{$self->{open_elements}} == 1) {
4262 wakaba 1.79 !!!cp ('t325');
4263 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4264     text => $token->{tag_name}, token => $token);
4265 wakaba 1.52 ## Ignore the token
4266     !!!next-token;
4267     } else {
4268 wakaba 1.79 !!!cp ('t326');
4269 wakaba 1.52 pop @{$self->{open_elements}};
4270     !!!next-token;
4271     }
4272 wakaba 1.47
4273 wakaba 1.52 if (not defined $self->{inner_html_node} and
4274 wakaba 1.206 not ($self->{open_elements}->[-1]->[1] == FRAMESET_EL)) {
4275 wakaba 1.79 !!!cp ('t327');
4276 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
4277 wakaba 1.79 } else {
4278     !!!cp ('t328');
4279 wakaba 1.52 }
4280 wakaba 1.126 next B;
4281 wakaba 1.52 } elsif ($token->{tag_name} eq 'html' and
4282 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
4283 wakaba 1.79 !!!cp ('t329');
4284 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
4285 wakaba 1.52 !!!next-token;
4286 wakaba 1.126 next B;
4287 wakaba 1.52 } else {
4288 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4289 wakaba 1.79 !!!cp ('t330');
4290 wakaba 1.153 !!!parse-error (type => 'in frameset:/',
4291     text => $token->{tag_name}, token => $token);
4292 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4293     !!!cp ('t330.1');
4294     !!!parse-error (type => 'after frameset:/',
4295     text => $token->{tag_name}, token => $token);
4296     } else { # "after after html"
4297 wakaba 1.79 !!!cp ('t331');
4298 wakaba 1.158 !!!parse-error (type => 'after after frameset:/',
4299 wakaba 1.153 text => $token->{tag_name}, token => $token);
4300 wakaba 1.52 }
4301     ## Ignore the token
4302     !!!next-token;
4303 wakaba 1.126 next B;
4304 wakaba 1.52 }
4305 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4306 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4307 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
4308     !!!cp ('t331.1');
4309 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4310 wakaba 1.104 } else {
4311     !!!cp ('t331.2');
4312     }
4313    
4314     ## Stop parsing
4315     last B;
4316 wakaba 1.52 } else {
4317     die "$0: $token->{type}: Unknown token type";
4318     }
4319     } else {
4320     die "$0: $self->{insertion_mode}: Unknown insertion mode";
4321     }
4322 wakaba 1.47
4323 wakaba 1.52 ## "in body" insertion mode
4324 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
4325 wakaba 1.52 if ($token->{tag_name} eq 'script') {
4326 wakaba 1.79 !!!cp ('t332');
4327 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
4328 wakaba 1.100 $script_start_tag->();
4329 wakaba 1.126 next B;
4330 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
4331 wakaba 1.79 !!!cp ('t333');
4332 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
4333 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4334 wakaba 1.126 next B;
4335 wakaba 1.52 } elsif ({
4336 wakaba 1.194 base => 1, command => 1, eventsource => 1, link => 1,
4337 wakaba 1.52 }->{$token->{tag_name}}) {
4338 wakaba 1.79 !!!cp ('t334');
4339 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
4340 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4341 wakaba 1.194 pop @{$self->{open_elements}};
4342 wakaba 1.125 !!!ack ('t334.1');
4343 wakaba 1.52 !!!next-token;
4344 wakaba 1.126 next B;
4345 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
4346     ## NOTE: This is an "as if in head" code clone, only "-t" differs
4347 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4348 wakaba 1.194 my $meta_el = pop @{$self->{open_elements}};
4349 wakaba 1.46
4350 wakaba 1.52 unless ($self->{confident}) {
4351 wakaba 1.134 if ($token->{attributes}->{charset}) {
4352 wakaba 1.79 !!!cp ('t335');
4353 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4354     ## in the {change_encoding} callback.
4355 wakaba 1.63 $self->{change_encoding}
4356 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value}, $token);
4357 wakaba 1.66
4358     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4359     ->set_user_data (manakai_has_reference =>
4360     $token->{attributes}->{charset}
4361     ->{has_reference});
4362 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
4363     if ($token->{attributes}->{content}->{value}
4364 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4365 wakaba 1.189 [\x09\x0A\x0C\x0D\x20]*=
4366     [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4367     ([^"'\x09\x0A\x0C\x0D\x20][^\x09\x0A\x0C\x0D\x20\x3B]*))
4368     /x) {
4369 wakaba 1.79 !!!cp ('t336');
4370 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4371     ## in the {change_encoding} callback.
4372 wakaba 1.63 $self->{change_encoding}
4373 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
4374 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4375     ->set_user_data (manakai_has_reference =>
4376     $token->{attributes}->{content}
4377     ->{has_reference});
4378 wakaba 1.63 }
4379 wakaba 1.52 }
4380 wakaba 1.66 } else {
4381     if ($token->{attributes}->{charset}) {
4382 wakaba 1.79 !!!cp ('t337');
4383 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4384     ->set_user_data (manakai_has_reference =>
4385     $token->{attributes}->{charset}
4386     ->{has_reference});
4387     }
4388 wakaba 1.68 if ($token->{attributes}->{content}) {
4389 wakaba 1.79 !!!cp ('t338');
4390 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4391     ->set_user_data (manakai_has_reference =>
4392     $token->{attributes}->{content}
4393     ->{has_reference});
4394     }
4395 wakaba 1.52 }
4396 wakaba 1.1
4397 wakaba 1.125 !!!ack ('t338.1');
4398 wakaba 1.52 !!!next-token;
4399 wakaba 1.126 next B;
4400 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
4401 wakaba 1.79 !!!cp ('t341');
4402 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
4403 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
4404 wakaba 1.126 next B;
4405 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
4406 wakaba 1.153 !!!parse-error (type => 'in body', text => 'body', token => $token);
4407 wakaba 1.46
4408 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
4409 wakaba 1.206 not ($self->{open_elements}->[1]->[1] == BODY_EL)) {
4410 wakaba 1.79 !!!cp ('t342');
4411 wakaba 1.52 ## Ignore the token
4412     } else {
4413     my $body_el = $self->{open_elements}->[1]->[0];
4414     for my $attr_name (keys %{$token->{attributes}}) {
4415     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
4416 wakaba 1.79 !!!cp ('t343');
4417 wakaba 1.52 $body_el->set_attribute_ns
4418     (undef, [undef, $attr_name],
4419     $token->{attributes}->{$attr_name}->{value});
4420     }
4421     }
4422     }
4423 wakaba 1.125 !!!nack ('t343.1');
4424 wakaba 1.52 !!!next-token;
4425 wakaba 1.126 next B;
4426 wakaba 1.52 } elsif ({
4427 wakaba 1.195 ## NOTE: Start tags for non-phrasing flow content elements
4428    
4429     ## NOTE: The normal one
4430     address => 1, article => 1, aside => 1, blockquote => 1,
4431     center => 1, datagrid => 1, details => 1, dialog => 1,
4432     dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
4433     footer => 1, h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1,
4434     h6 => 1, header => 1, menu => 1, nav => 1, ol => 1, p => 1,
4435     section => 1, ul => 1,
4436     ## NOTE: As normal, but drops leading newline
4437 wakaba 1.97 pre => 1, listing => 1,
4438 wakaba 1.195 ## NOTE: As normal, but interacts with the form element pointer
4439 wakaba 1.109 form => 1,
4440 wakaba 1.195
4441 wakaba 1.109 table => 1,
4442     hr => 1,
4443 wakaba 1.52 }->{$token->{tag_name}}) {
4444 wakaba 1.225
4445     ## 1. When there is an opening |form| element:
4446 wakaba 1.109 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
4447     !!!cp ('t350');
4448 wakaba 1.113 !!!parse-error (type => 'in form:form', token => $token);
4449 wakaba 1.109 ## Ignore the token
4450 wakaba 1.125 !!!nack ('t350.1');
4451 wakaba 1.109 !!!next-token;
4452 wakaba 1.126 next B;
4453 wakaba 1.109 }
4454    
4455 wakaba 1.225 ## 2. Close the |p| element, if any.
4456 wakaba 1.217 if ($token->{tag_name} ne 'table' or # The Hixie Quirk
4457     $self->{document}->manakai_compat_mode ne 'quirks') {
4458     ## has a p element in scope
4459     INSCOPE: for (reverse @{$self->{open_elements}}) {
4460     if ($_->[1] == P_EL) {
4461     !!!cp ('t344');
4462     !!!back-token; # <form>
4463     $token = {type => END_TAG_TOKEN, tag_name => 'p',
4464     line => $token->{line}, column => $token->{column}};
4465     next B;
4466     } elsif ($_->[1] & SCOPING_EL) {
4467     !!!cp ('t345');
4468     last INSCOPE;
4469     }
4470     } # INSCOPE
4471     }
4472 wakaba 1.225
4473     ## 3. Close the opening <hn> element, if any.
4474     if ({h1 => 1, h2 => 1, h3 => 1,
4475     h4 => 1, h5 => 1, h6 => 1}->{$token->{tag_name}}) {
4476     if ($self->{open_elements}->[-1]->[1] == HEADING_EL) {
4477     !!!parse-error (type => 'not closed',
4478     text => $self->{open_elements}->[-1]->[0]->manakai_local_name,
4479     token => $token);
4480     pop @{$self->{open_elements}};
4481     }
4482     }
4483    
4484     ## 4. Insertion.
4485 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4486 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
4487 wakaba 1.125 !!!nack ('t346.1');
4488 wakaba 1.52 !!!next-token;
4489 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4490 wakaba 1.52 $token->{data} =~ s/^\x0A//;
4491     unless (length $token->{data}) {
4492 wakaba 1.79 !!!cp ('t346');
4493 wakaba 1.1 !!!next-token;
4494 wakaba 1.79 } else {
4495     !!!cp ('t349');
4496 wakaba 1.52 }
4497 wakaba 1.79 } else {
4498     !!!cp ('t348');
4499 wakaba 1.52 }
4500 wakaba 1.109 } elsif ($token->{tag_name} eq 'form') {
4501     !!!cp ('t347.1');
4502     $self->{form_element} = $self->{open_elements}->[-1]->[0];
4503    
4504 wakaba 1.125 !!!nack ('t347.2');
4505 wakaba 1.109 !!!next-token;
4506     } elsif ($token->{tag_name} eq 'table') {
4507     !!!cp ('t382');
4508     push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
4509    
4510     $self->{insertion_mode} = IN_TABLE_IM;
4511    
4512 wakaba 1.125 !!!nack ('t382.1');
4513 wakaba 1.109 !!!next-token;
4514     } elsif ($token->{tag_name} eq 'hr') {
4515     !!!cp ('t386');
4516     pop @{$self->{open_elements}};
4517    
4518 wakaba 1.125 !!!nack ('t386.1');
4519 wakaba 1.109 !!!next-token;
4520 wakaba 1.52 } else {
4521 wakaba 1.125 !!!nack ('t347.1');
4522 wakaba 1.52 !!!next-token;
4523     }
4524 wakaba 1.126 next B;
4525 wakaba 1.196 } elsif ($token->{tag_name} eq 'li') {
4526     ## NOTE: As normal, but imply </li> when there's another <li> ...
4527 wakaba 1.193
4528 wakaba 1.225 ## NOTE: Special, Scope (<li><foo><li> == <li><foo><li/></foo></li>)::
4529     ## Interpreted as <li><foo/></li><li/> (non-conforming):
4530 wakaba 1.193 ## blockquote (O9.27), center (O), dd (Fx3, O, S3.1.2, IE7),
4531     ## dt (Fx, O, S, IE), dl (O), fieldset (O, S, IE), form (Fx, O, S),
4532     ## hn (O), pre (O), applet (O, S), button (O, S), marquee (Fx, O, S),
4533     ## object (Fx)
4534 wakaba 1.225 ## Generate non-tree (non-conforming):
4535 wakaba 1.193 ## basefont (IE7 (where basefont is non-void)), center (IE),
4536     ## form (IE), hn (IE)
4537 wakaba 1.225 ## address, div, p (<li><foo><li> == <li><foo/></li><li/>)::
4538     ## Interpreted as <li><foo><li/></foo></li> (non-conforming):
4539 wakaba 1.193 ## div (Fx, S)
4540 wakaba 1.196
4541     my $non_optional;
4542 wakaba 1.52 my $i = -1;
4543 wakaba 1.196
4544     ## 1.
4545     for my $node (reverse @{$self->{open_elements}}) {
4546 wakaba 1.206 if ($node->[1] == LI_EL) {
4547 wakaba 1.196 ## 2. (a) As if </li>
4548     {
4549     ## If no </li> - not applied
4550     #
4551    
4552     ## Otherwise
4553    
4554     ## 1. generate implied end tags, except for </li>
4555     #
4556    
4557     ## 2. If current node != "li", parse error
4558     if ($non_optional) {
4559     !!!parse-error (type => 'not closed',
4560     text => $non_optional->[0]->manakai_local_name,
4561     token => $token);
4562     !!!cp ('t355');
4563     } else {
4564     !!!cp ('t356');
4565     }
4566    
4567     ## 3. Pop
4568     splice @{$self->{open_elements}}, $i;
4569 wakaba 1.52 }
4570 wakaba 1.196
4571     last; ## 2. (b) goto 5.
4572     } elsif (
4573     ## NOTE: not "formatting" and not "phrasing"
4574     ($node->[1] & SPECIAL_EL or
4575     $node->[1] & SCOPING_EL) and
4576     ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4577 wakaba 1.206 (not $node->[1] & ADDRESS_DIV_P_EL)
4578     ) {
4579 wakaba 1.196 ## 3.
4580 wakaba 1.79 !!!cp ('t357');
4581 wakaba 1.196 last; ## goto 5.
4582     } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4583 wakaba 1.79 !!!cp ('t358');
4584 wakaba 1.196 #
4585     } else {
4586     !!!cp ('t359');
4587     $non_optional ||= $node;
4588     #
4589 wakaba 1.52 }
4590 wakaba 1.196 ## 4.
4591     ## goto 2.
4592 wakaba 1.52 $i--;
4593 wakaba 1.196 }
4594    
4595     ## 5. (a) has a |p| element in scope
4596     INSCOPE: for (reverse @{$self->{open_elements}}) {
4597 wakaba 1.206 if ($_->[1] == P_EL) {
4598 wakaba 1.196 !!!cp ('t353');
4599 wakaba 1.198
4600     ## NOTE: |<p><li>|, for example.
4601    
4602 wakaba 1.196 !!!back-token; # <x>
4603     $token = {type => END_TAG_TOKEN, tag_name => 'p',
4604     line => $token->{line}, column => $token->{column}};
4605     next B;
4606     } elsif ($_->[1] & SCOPING_EL) {
4607     !!!cp ('t354');
4608     last INSCOPE;
4609     }
4610     } # INSCOPE
4611    
4612     ## 5. (b) insert
4613 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4614 wakaba 1.125 !!!nack ('t359.1');
4615 wakaba 1.52 !!!next-token;
4616 wakaba 1.126 next B;
4617 wakaba 1.196 } elsif ($token->{tag_name} eq 'dt' or
4618     $token->{tag_name} eq 'dd') {
4619     ## NOTE: As normal, but imply </dt> or </dd> when ...
4620    
4621     my $non_optional;
4622     my $i = -1;
4623    
4624     ## 1.
4625     for my $node (reverse @{$self->{open_elements}}) {
4626 wakaba 1.207 if ($node->[1] == DTDD_EL) {
4627 wakaba 1.196 ## 2. (a) As if </li>
4628     {
4629     ## If no </li> - not applied
4630     #
4631    
4632     ## Otherwise
4633    
4634     ## 1. generate implied end tags, except for </dt> or </dd>
4635     #
4636    
4637     ## 2. If current node != "dt"|"dd", parse error
4638     if ($non_optional) {
4639     !!!parse-error (type => 'not closed',
4640     text => $non_optional->[0]->manakai_local_name,
4641     token => $token);
4642     !!!cp ('t355.1');
4643     } else {
4644     !!!cp ('t356.1');
4645     }
4646    
4647     ## 3. Pop
4648     splice @{$self->{open_elements}}, $i;
4649     }
4650    
4651     last; ## 2. (b) goto 5.
4652     } elsif (
4653     ## NOTE: not "formatting" and not "phrasing"
4654     ($node->[1] & SPECIAL_EL or
4655     $node->[1] & SCOPING_EL) and
4656     ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4657    
4658 wakaba 1.206 (not $node->[1] & ADDRESS_DIV_P_EL)
4659     ) {
4660 wakaba 1.196 ## 3.
4661     !!!cp ('t357.1');
4662     last; ## goto 5.
4663     } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4664     !!!cp ('t358.1');
4665     #
4666     } else {
4667     !!!cp ('t359.1');
4668     $non_optional ||= $node;
4669     #
4670     }
4671     ## 4.
4672     ## goto 2.
4673     $i--;
4674     }
4675    
4676     ## 5. (a) has a |p| element in scope
4677     INSCOPE: for (reverse @{$self->{open_elements}}) {
4678 wakaba 1.206 if ($_->[1] == P_EL) {
4679 wakaba 1.196 !!!cp ('t353.1');
4680     !!!back-token; # <x>
4681     $token = {type => END_TAG_TOKEN, tag_name => 'p',
4682     line => $token->{line}, column => $token->{column}};
4683     next B;
4684     } elsif ($_->[1] & SCOPING_EL) {
4685     !!!cp ('t354.1');
4686     last INSCOPE;
4687     }
4688     } # INSCOPE
4689    
4690     ## 5. (b) insert
4691     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4692     !!!nack ('t359.2');
4693     !!!next-token;
4694     next B;
4695 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
4696 wakaba 1.195 ## NOTE: As normal, but effectively ends parsing
4697    
4698 wakaba 1.52 ## has a p element in scope
4699     INSCOPE: for (reverse @{$self->{open_elements}}) {
4700 wakaba 1.206 if ($_->[1] == P_EL) {
4701 wakaba 1.79 !!!cp ('t367');
4702 wakaba 1.125 !!!back-token; # <plaintext>
4703 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4704     line => $token->{line}, column => $token->{column}};
4705 wakaba 1.126 next B;
4706 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
4707 wakaba 1.79 !!!cp ('t368');
4708 wakaba 1.52 last INSCOPE;
4709 wakaba 1.46 }
4710 wakaba 1.52 } # INSCOPE
4711    
4712 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4713 wakaba 1.52
4714     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
4715    
4716 wakaba 1.125 !!!nack ('t368.1');
4717 wakaba 1.52 !!!next-token;
4718 wakaba 1.126 next B;
4719 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
4720     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
4721     my $node = $active_formatting_elements->[$i];
4722 wakaba 1.206 if ($node->[1] == A_EL) {
4723 wakaba 1.79 !!!cp ('t371');
4724 wakaba 1.113 !!!parse-error (type => 'in a:a', token => $token);
4725 wakaba 1.52
4726 wakaba 1.125 !!!back-token; # <a>
4727 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'a',
4728     line => $token->{line}, column => $token->{column}};
4729 wakaba 1.113 $formatting_end_tag->($token);
4730 wakaba 1.52
4731     AFE2: for (reverse 0..$#$active_formatting_elements) {
4732     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
4733 wakaba 1.79 !!!cp ('t372');
4734 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
4735     last AFE2;
4736 wakaba 1.1 }
4737 wakaba 1.52 } # AFE2
4738     OE: for (reverse 0..$#{$self->{open_elements}}) {
4739     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
4740 wakaba 1.79 !!!cp ('t373');
4741 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
4742     last OE;
4743 wakaba 1.1 }
4744 wakaba 1.52 } # OE
4745     last AFE;
4746     } elsif ($node->[0] eq '#marker') {
4747 wakaba 1.79 !!!cp ('t374');
4748 wakaba 1.52 last AFE;
4749     }
4750     } # AFE
4751    
4752     $reconstruct_active_formatting_elements->($insert_to_current);
4753 wakaba 1.1
4754 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4755 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
4756 wakaba 1.1
4757 wakaba 1.125 !!!nack ('t374.1');
4758 wakaba 1.52 !!!next-token;
4759 wakaba 1.126 next B;
4760 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
4761     $reconstruct_active_formatting_elements->($insert_to_current);
4762 wakaba 1.1
4763 wakaba 1.52 ## has a |nobr| element in scope
4764     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4765     my $node = $self->{open_elements}->[$_];
4766 wakaba 1.206 if ($node->[1] == NOBR_EL) {
4767 wakaba 1.79 !!!cp ('t376');
4768 wakaba 1.113 !!!parse-error (type => 'in nobr:nobr', token => $token);
4769 wakaba 1.125 !!!back-token; # <nobr>
4770 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
4771     line => $token->{line}, column => $token->{column}};
4772 wakaba 1.126 next B;
4773 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
4774 wakaba 1.79 !!!cp ('t377');
4775 wakaba 1.52 last INSCOPE;
4776     }
4777     } # INSCOPE
4778    
4779 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4780 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
4781    
4782 wakaba 1.125 !!!nack ('t377.1');
4783 wakaba 1.52 !!!next-token;
4784 wakaba 1.126 next B;
4785 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
4786     ## has a button element in scope
4787     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4788     my $node = $self->{open_elements}->[$_];
4789 wakaba 1.206 if ($node->[1] == BUTTON_EL) {
4790 wakaba 1.79 !!!cp ('t378');
4791 wakaba 1.113 !!!parse-error (type => 'in button:button', token => $token);
4792 wakaba 1.125 !!!back-token; # <button>
4793 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'button',
4794     line => $token->{line}, column => $token->{column}};
4795 wakaba 1.126 next B;
4796 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
4797 wakaba 1.79 !!!cp ('t379');
4798 wakaba 1.52 last INSCOPE;
4799     }
4800     } # INSCOPE
4801    
4802     $reconstruct_active_formatting_elements->($insert_to_current);
4803    
4804 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4805 wakaba 1.85
4806     ## TODO: associate with $self->{form_element} if defined
4807    
4808 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
4809 wakaba 1.1
4810 wakaba 1.125 !!!nack ('t379.1');
4811 wakaba 1.52 !!!next-token;
4812 wakaba 1.126 next B;
4813 wakaba 1.103 } elsif ({
4814 wakaba 1.109 xmp => 1,
4815     iframe => 1,
4816     noembed => 1,
4817 wakaba 1.148 noframes => 1, ## NOTE: This is an "as if in head" code clone.
4818 wakaba 1.109 noscript => 0, ## TODO: 1 if scripting is enabled
4819 wakaba 1.103 }->{$token->{tag_name}}) {
4820 wakaba 1.109 if ($token->{tag_name} eq 'xmp') {
4821     !!!cp ('t381');
4822     $reconstruct_active_formatting_elements->($insert_to_current);
4823     } else {
4824     !!!cp ('t399');
4825     }
4826     ## NOTE: There is an "as if in body" code clone.
4827 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4828 wakaba 1.126 next B;
4829 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
4830 wakaba 1.113 !!!parse-error (type => 'isindex', token => $token);
4831 wakaba 1.52
4832     if (defined $self->{form_element}) {
4833 wakaba 1.79 !!!cp ('t389');
4834 wakaba 1.52 ## Ignore the token
4835 wakaba 1.125 !!!nack ('t389'); ## NOTE: Not acknowledged.
4836 wakaba 1.52 !!!next-token;
4837 wakaba 1.126 next B;
4838 wakaba 1.52 } else {
4839 wakaba 1.147 !!!ack ('t391.1');
4840    
4841 wakaba 1.52 my $at = $token->{attributes};
4842     my $form_attrs;
4843     $form_attrs->{action} = $at->{action} if $at->{action};
4844     my $prompt_attr = $at->{prompt};
4845     $at->{name} = {name => 'name', value => 'isindex'};
4846     delete $at->{action};
4847     delete $at->{prompt};
4848     my @tokens = (
4849 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
4850 wakaba 1.114 attributes => $form_attrs,
4851     line => $token->{line}, column => $token->{column}},
4852     {type => START_TAG_TOKEN, tag_name => 'hr',
4853     line => $token->{line}, column => $token->{column}},
4854     {type => START_TAG_TOKEN, tag_name => 'label',
4855     line => $token->{line}, column => $token->{column}},
4856 wakaba 1.52 );
4857     if ($prompt_attr) {
4858 wakaba 1.79 !!!cp ('t390');
4859 wakaba 1.114 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
4860 wakaba 1.118 #line => $token->{line}, column => $token->{column},
4861     };
4862 wakaba 1.1 } else {
4863 wakaba 1.79 !!!cp ('t391');
4864 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
4865 wakaba 1.114 data => 'This is a searchable index. Insert your search keywords here: ',
4866 wakaba 1.118 #line => $token->{line}, column => $token->{column},
4867     }; # SHOULD
4868 wakaba 1.52 ## TODO: make this configurable
4869 wakaba 1.1 }
4870 wakaba 1.52 push @tokens,
4871 wakaba 1.114 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
4872     line => $token->{line}, column => $token->{column}},
4873 wakaba 1.55 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
4874 wakaba 1.114 {type => END_TAG_TOKEN, tag_name => 'label',
4875     line => $token->{line}, column => $token->{column}},
4876     {type => START_TAG_TOKEN, tag_name => 'hr',
4877     line => $token->{line}, column => $token->{column}},
4878     {type => END_TAG_TOKEN, tag_name => 'form',
4879     line => $token->{line}, column => $token->{column}};
4880 wakaba 1.52 !!!back-token (@tokens);
4881 wakaba 1.125 !!!next-token;
4882 wakaba 1.126 next B;
4883 wakaba 1.52 }
4884     } elsif ($token->{tag_name} eq 'textarea') {
4885 wakaba 1.224 ## 1. Insert
4886 wakaba 1.205 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4887 wakaba 1.52
4888 wakaba 1.224 ## Step 2 # XXX
4889 wakaba 1.52 ## TODO: $self->{form_element} if defined
4890 wakaba 1.205
4891 wakaba 1.224 ## 2. Drop U+000A LINE FEED
4892 wakaba 1.205 $self->{ignore_newline} = 1;
4893    
4894 wakaba 1.224 ## 3. RCDATA
4895 wakaba 1.52 $self->{content_model} = RCDATA_CONTENT_MODEL;
4896     delete $self->{escape}; # MUST
4897 wakaba 1.205
4898 wakaba 1.224 ## 4., 6. Insertion mode
4899 wakaba 1.205 $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
4900    
4901 wakaba 1.224 ## XXX: 5. frameset-ok flag
4902    
4903 wakaba 1.125 !!!nack ('t392.1');
4904 wakaba 1.52 !!!next-token;
4905 wakaba 1.126 next B;
4906 wakaba 1.201 } elsif ($token->{tag_name} eq 'optgroup' or
4907     $token->{tag_name} eq 'option') {
4908     ## has an |option| element in scope
4909     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4910     my $node = $self->{open_elements}->[$_];
4911 wakaba 1.206 if ($node->[1] == OPTION_EL) {
4912 wakaba 1.201 !!!cp ('t397.1');
4913     ## NOTE: As if </option>
4914     !!!back-token; # <option> or <optgroup>
4915     $token = {type => END_TAG_TOKEN, tag_name => 'option',
4916     line => $token->{line}, column => $token->{column}};
4917     next B;
4918     } elsif ($node->[1] & SCOPING_EL) {
4919     !!!cp ('t397.2');
4920     last INSCOPE;
4921     }
4922     } # INSCOPE
4923    
4924     $reconstruct_active_formatting_elements->($insert_to_current);
4925    
4926     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4927    
4928     !!!nack ('t397.3');
4929     !!!next-token;
4930     redo B;
4931 wakaba 1.151 } elsif ($token->{tag_name} eq 'rt' or
4932     $token->{tag_name} eq 'rp') {
4933     ## has a |ruby| element in scope
4934     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4935     my $node = $self->{open_elements}->[$_];
4936 wakaba 1.206 if ($node->[1] == RUBY_EL) {
4937 wakaba 1.151 !!!cp ('t398.1');
4938     ## generate implied end tags
4939     while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
4940     !!!cp ('t398.2');
4941     pop @{$self->{open_elements}};
4942     }
4943 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == RUBY_EL) {
4944 wakaba 1.151 !!!cp ('t398.3');
4945     !!!parse-error (type => 'not closed',
4946 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4947 wakaba 1.151 ->manakai_local_name,
4948     token => $token);
4949     pop @{$self->{open_elements}}
4950 wakaba 1.206 while not $self->{open_elements}->[-1]->[1] == RUBY_EL;
4951 wakaba 1.151 }
4952     last INSCOPE;
4953     } elsif ($node->[1] & SCOPING_EL) {
4954     !!!cp ('t398.4');
4955     last INSCOPE;
4956     }
4957     } # INSCOPE
4958 wakaba 1.212
4959     ## TODO: <non-ruby><rt> is not allowed.
4960 wakaba 1.151
4961     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4962    
4963     !!!nack ('t398.5');
4964     !!!next-token;
4965     redo B;
4966 wakaba 1.126 } elsif ($token->{tag_name} eq 'math' or
4967     $token->{tag_name} eq 'svg') {
4968     $reconstruct_active_formatting_elements->($insert_to_current);
4969 wakaba 1.131
4970 wakaba 1.155 ## "Adjust MathML attributes" ('math' only) - done in insert-element-f
4971    
4972 wakaba 1.131 ## "adjust SVG attributes" ('svg' only) - done in insert-element-f
4973    
4974     ## "adjust foreign attributes" - done in insert-element-f
4975 wakaba 1.126
4976 wakaba 1.131 !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
4977 wakaba 1.126
4978     if ($self->{self_closing}) {
4979     pop @{$self->{open_elements}};
4980 wakaba 1.201 !!!ack ('t398.6');
4981 wakaba 1.126 } else {
4982 wakaba 1.201 !!!cp ('t398.7');
4983 wakaba 1.126 $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
4984     ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
4985     ## mode, "in body" (not "in foreign content") secondary insertion
4986     ## mode, maybe.
4987     }
4988    
4989     !!!next-token;
4990     next B;
4991 wakaba 1.52 } elsif ({
4992     caption => 1, col => 1, colgroup => 1, frame => 1,
4993 wakaba 1.201 frameset => 1, head => 1,
4994 wakaba 1.52 tbody => 1, td => 1, tfoot => 1, th => 1,
4995     thead => 1, tr => 1,
4996     }->{$token->{tag_name}}) {
4997 wakaba 1.79 !!!cp ('t401');
4998 wakaba 1.153 !!!parse-error (type => 'in body',
4999     text => $token->{tag_name}, token => $token);
5000 wakaba 1.52 ## Ignore the token
5001 wakaba 1.125 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
5002 wakaba 1.52 !!!next-token;
5003 wakaba 1.126 next B;
5004 wakaba 1.198 } elsif ($token->{tag_name} eq 'param' or
5005     $token->{tag_name} eq 'source') {
5006     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5007     pop @{$self->{open_elements}};
5008    
5009     !!!ack ('t398.5');
5010     !!!next-token;
5011     redo B;
5012 wakaba 1.52 } else {
5013 wakaba 1.110 if ($token->{tag_name} eq 'image') {
5014     !!!cp ('t384');
5015 wakaba 1.113 !!!parse-error (type => 'image', token => $token);
5016 wakaba 1.110 $token->{tag_name} = 'img';
5017     } else {
5018     !!!cp ('t385');
5019     }
5020    
5021     ## NOTE: There is an "as if <br>" code clone.
5022 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
5023    
5024 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5025 wakaba 1.109
5026 wakaba 1.110 if ({
5027     applet => 1, marquee => 1, object => 1,
5028     }->{$token->{tag_name}}) {
5029     !!!cp ('t380');
5030     push @$active_formatting_elements, ['#marker', ''];
5031 wakaba 1.125 !!!nack ('t380.1');
5032 wakaba 1.110 } elsif ({
5033     b => 1, big => 1, em => 1, font => 1, i => 1,
5034 wakaba 1.193 s => 1, small => 1, strike => 1,
5035 wakaba 1.110 strong => 1, tt => 1, u => 1,
5036     }->{$token->{tag_name}}) {
5037     !!!cp ('t375');
5038     push @$active_formatting_elements, $self->{open_elements}->[-1];
5039 wakaba 1.125 !!!nack ('t375.1');
5040 wakaba 1.110 } elsif ($token->{tag_name} eq 'input') {
5041     !!!cp ('t388');
5042     ## TODO: associate with $self->{form_element} if defined
5043     pop @{$self->{open_elements}};
5044 wakaba 1.125 !!!ack ('t388.2');
5045 wakaba 1.110 } elsif ({
5046     area => 1, basefont => 1, bgsound => 1, br => 1,
5047 wakaba 1.198 embed => 1, img => 1, spacer => 1, wbr => 1,
5048 wakaba 1.110 }->{$token->{tag_name}}) {
5049     !!!cp ('t388.1');
5050     pop @{$self->{open_elements}};
5051 wakaba 1.125 !!!ack ('t388.3');
5052 wakaba 1.110 } elsif ($token->{tag_name} eq 'select') {
5053 wakaba 1.109 ## TODO: associate with $self->{form_element} if defined
5054    
5055     if ($self->{insertion_mode} & TABLE_IMS or
5056     $self->{insertion_mode} & BODY_TABLE_IMS or
5057 wakaba 1.210 ($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
5058 wakaba 1.109 !!!cp ('t400.1');
5059     $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
5060     } else {
5061     !!!cp ('t400.2');
5062     $self->{insertion_mode} = IN_SELECT_IM;
5063     }
5064 wakaba 1.125 !!!nack ('t400.3');
5065 wakaba 1.110 } else {
5066 wakaba 1.125 !!!nack ('t402');
5067 wakaba 1.109 }
5068 wakaba 1.51
5069 wakaba 1.52 !!!next-token;
5070 wakaba 1.126 next B;
5071 wakaba 1.52 }
5072 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5073 wakaba 1.52 if ($token->{tag_name} eq 'body') {
5074 wakaba 1.225
5075     ## 1. If not "have an element in scope":
5076     ## "has a |body| element in scope"
5077 wakaba 1.107 my $i;
5078 wakaba 1.111 INSCOPE: {
5079     for (reverse @{$self->{open_elements}}) {
5080 wakaba 1.206 if ($_->[1] == BODY_EL) {
5081 wakaba 1.111 !!!cp ('t405');
5082     $i = $_;
5083     last INSCOPE;
5084 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
5085 wakaba 1.111 !!!cp ('t405.1');
5086     last;
5087     }
5088 wakaba 1.52 }
5089 wakaba 1.111
5090 wakaba 1.200 ## NOTE: |<marquee></body>|, |<svg><foreignobject></body>|
5091    
5092     !!!parse-error (type => 'unmatched end tag',
5093 wakaba 1.153 text => $token->{tag_name}, token => $token);
5094 wakaba 1.107 ## NOTE: Ignore the token.
5095 wakaba 1.52 !!!next-token;
5096 wakaba 1.126 next B;
5097 wakaba 1.111 } # INSCOPE
5098 wakaba 1.107
5099 wakaba 1.225 ## 2. If unclosed elements:
5100 wakaba 1.107 for (@{$self->{open_elements}}) {
5101 wakaba 1.220 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL ||
5102     $_->[1] == OPTGROUP_EL ||
5103     $_->[1] == OPTION_EL ||
5104     $_->[1] == RUBY_COMPONENT_EL) {
5105 wakaba 1.107 !!!cp ('t403');
5106 wakaba 1.122 !!!parse-error (type => 'not closed',
5107 wakaba 1.153 text => $_->[0]->manakai_local_name,
5108 wakaba 1.122 token => $token);
5109 wakaba 1.107 last;
5110     } else {
5111     !!!cp ('t404');
5112     }
5113     }
5114    
5115 wakaba 1.225 ## 3. Switch the insertion mode.
5116 wakaba 1.107 $self->{insertion_mode} = AFTER_BODY_IM;
5117     !!!next-token;
5118 wakaba 1.126 next B;
5119 wakaba 1.52 } elsif ($token->{tag_name} eq 'html') {
5120 wakaba 1.122 ## TODO: Update this code. It seems that the code below is not
5121     ## up-to-date, though it has same effect as speced.
5122 wakaba 1.123 if (@{$self->{open_elements}} > 1 and
5123 wakaba 1.206 $self->{open_elements}->[1]->[1] == BODY_EL) {
5124     unless ($self->{open_elements}->[-1]->[1] == BODY_EL) {
5125 wakaba 1.79 !!!cp ('t406');
5126 wakaba 1.122 !!!parse-error (type => 'not closed',
5127 wakaba 1.153 text => $self->{open_elements}->[1]->[0]
5128 wakaba 1.122 ->manakai_local_name,
5129     token => $token);
5130 wakaba 1.79 } else {
5131     !!!cp ('t407');
5132 wakaba 1.1 }
5133 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
5134 wakaba 1.52 ## reprocess
5135 wakaba 1.126 next B;
5136 wakaba 1.51 } else {
5137 wakaba 1.79 !!!cp ('t408');
5138 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5139     text => $token->{tag_name}, token => $token);
5140 wakaba 1.52 ## Ignore the token
5141     !!!next-token;
5142 wakaba 1.126 next B;
5143 wakaba 1.51 }
5144 wakaba 1.52 } elsif ({
5145 wakaba 1.195 ## NOTE: End tags for non-phrasing flow content elements
5146    
5147     ## NOTE: The normal ones
5148     address => 1, article => 1, aside => 1, blockquote => 1,
5149     center => 1, datagrid => 1, details => 1, dialog => 1,
5150     dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
5151     footer => 1, header => 1, listing => 1, menu => 1, nav => 1,
5152     ol => 1, pre => 1, section => 1, ul => 1,
5153    
5154     ## NOTE: As normal, but ... optional tags
5155 wakaba 1.52 dd => 1, dt => 1, li => 1,
5156 wakaba 1.195
5157 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
5158 wakaba 1.52 }->{$token->{tag_name}}) {
5159 wakaba 1.197 ## NOTE: Code for <li> start tags includes "as if </li>" code.
5160     ## Code for <dt> or <dd> start tags includes "as if </dt> or
5161     ## </dd>" code.
5162    
5163 wakaba 1.52 ## has an element in scope
5164     my $i;
5165     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5166     my $node = $self->{open_elements}->[$_];
5167 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5168 wakaba 1.79 !!!cp ('t410');
5169 wakaba 1.52 $i = $_;
5170 wakaba 1.87 last INSCOPE;
5171 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5172 wakaba 1.79 !!!cp ('t411');
5173 wakaba 1.52 last INSCOPE;
5174 wakaba 1.51 }
5175 wakaba 1.52 } # INSCOPE
5176 wakaba 1.89
5177     unless (defined $i) { # has an element in scope
5178     !!!cp ('t413');
5179 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5180     text => $token->{tag_name}, token => $token);
5181 wakaba 1.157 ## NOTE: Ignore the token.
5182 wakaba 1.89 } else {
5183     ## Step 1. generate implied end tags
5184     while ({
5185 wakaba 1.151 ## END_TAG_OPTIONAL_EL
5186 wakaba 1.89 dd => ($token->{tag_name} ne 'dd'),
5187     dt => ($token->{tag_name} ne 'dt'),
5188     li => ($token->{tag_name} ne 'li'),
5189 wakaba 1.194 option => 1,
5190     optgroup => 1,
5191 wakaba 1.89 p => 1,
5192 wakaba 1.151 rt => 1,
5193     rp => 1,
5194 wakaba 1.123 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
5195 wakaba 1.89 !!!cp ('t409');
5196     pop @{$self->{open_elements}};
5197     }
5198    
5199     ## Step 2.
5200 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5201     ne $token->{tag_name}) {
5202 wakaba 1.79 !!!cp ('t412');
5203 wakaba 1.122 !!!parse-error (type => 'not closed',
5204 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5205 wakaba 1.122 ->manakai_local_name,
5206     token => $token);
5207 wakaba 1.51 } else {
5208 wakaba 1.89 !!!cp ('t414');
5209 wakaba 1.51 }
5210 wakaba 1.89
5211     ## Step 3.
5212 wakaba 1.52 splice @{$self->{open_elements}}, $i;
5213 wakaba 1.89
5214     ## Step 4.
5215     $clear_up_to_marker->()
5216     if {
5217 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
5218 wakaba 1.89 }->{$token->{tag_name}};
5219 wakaba 1.51 }
5220 wakaba 1.52 !!!next-token;
5221 wakaba 1.126 next B;
5222 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
5223 wakaba 1.195 ## NOTE: As normal, but interacts with the form element pointer
5224    
5225 wakaba 1.92 undef $self->{form_element};
5226    
5227 wakaba 1.52 ## has an element in scope
5228 wakaba 1.92 my $i;
5229 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5230     my $node = $self->{open_elements}->[$_];
5231 wakaba 1.206 if ($node->[1] == FORM_EL) {
5232 wakaba 1.79 !!!cp ('t418');
5233 wakaba 1.92 $i = $_;
5234 wakaba 1.52 last INSCOPE;
5235 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5236 wakaba 1.79 !!!cp ('t419');
5237 wakaba 1.52 last INSCOPE;
5238     }
5239     } # INSCOPE
5240 wakaba 1.92
5241     unless (defined $i) { # has an element in scope
5242 wakaba 1.79 !!!cp ('t421');
5243 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5244     text => $token->{tag_name}, token => $token);
5245 wakaba 1.157 ## NOTE: Ignore the token.
5246 wakaba 1.92 } else {
5247     ## Step 1. generate implied end tags
5248 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5249 wakaba 1.92 !!!cp ('t417');
5250     pop @{$self->{open_elements}};
5251     }
5252    
5253     ## Step 2.
5254 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5255     ne $token->{tag_name}) {
5256 wakaba 1.92 !!!cp ('t417.1');
5257 wakaba 1.122 !!!parse-error (type => 'not closed',
5258 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5259 wakaba 1.122 ->manakai_local_name,
5260     token => $token);
5261 wakaba 1.92 } else {
5262     !!!cp ('t420');
5263     }
5264    
5265     ## Step 3.
5266     splice @{$self->{open_elements}}, $i;
5267 wakaba 1.52 }
5268    
5269     !!!next-token;
5270 wakaba 1.126 next B;
5271 wakaba 1.52 } elsif ({
5272 wakaba 1.195 ## NOTE: As normal, except acts as a closer for any ...
5273 wakaba 1.52 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
5274     }->{$token->{tag_name}}) {
5275     ## has an element in scope
5276     my $i;
5277     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5278     my $node = $self->{open_elements}->[$_];
5279 wakaba 1.206 if ($node->[1] == HEADING_EL) {
5280 wakaba 1.79 !!!cp ('t423');
5281 wakaba 1.52 $i = $_;
5282     last INSCOPE;
5283 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5284 wakaba 1.79 !!!cp ('t424');
5285 wakaba 1.52 last INSCOPE;
5286 wakaba 1.51 }
5287 wakaba 1.52 } # INSCOPE
5288 wakaba 1.93
5289     unless (defined $i) { # has an element in scope
5290     !!!cp ('t425.1');
5291 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5292     text => $token->{tag_name}, token => $token);
5293 wakaba 1.157 ## NOTE: Ignore the token.
5294 wakaba 1.79 } else {
5295 wakaba 1.93 ## Step 1. generate implied end tags
5296 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5297 wakaba 1.93 !!!cp ('t422');
5298     pop @{$self->{open_elements}};
5299     }
5300    
5301     ## Step 2.
5302 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5303     ne $token->{tag_name}) {
5304 wakaba 1.93 !!!cp ('t425');
5305 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5306     text => $token->{tag_name}, token => $token);
5307 wakaba 1.93 } else {
5308     !!!cp ('t426');
5309     }
5310    
5311     ## Step 3.
5312     splice @{$self->{open_elements}}, $i;
5313 wakaba 1.36 }
5314 wakaba 1.52
5315     !!!next-token;
5316 wakaba 1.126 next B;
5317 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
5318 wakaba 1.195 ## NOTE: As normal, except </p> implies <p> and ...
5319    
5320 wakaba 1.87 ## has an element in scope
5321 wakaba 1.197 my $non_optional;
5322 wakaba 1.87 my $i;
5323     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5324     my $node = $self->{open_elements}->[$_];
5325 wakaba 1.206 if ($node->[1] == P_EL) {
5326 wakaba 1.87 !!!cp ('t410.1');
5327     $i = $_;
5328 wakaba 1.88 last INSCOPE;
5329 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5330 wakaba 1.87 !!!cp ('t411.1');
5331     last INSCOPE;
5332 wakaba 1.197 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
5333     ## NOTE: |END_TAG_OPTIONAL_EL| includes "p"
5334     !!!cp ('t411.2');
5335     #
5336     } else {
5337     !!!cp ('t411.3');
5338     $non_optional ||= $node;
5339     #
5340 wakaba 1.87 }
5341     } # INSCOPE
5342 wakaba 1.91
5343     if (defined $i) {
5344 wakaba 1.197 ## 1. Generate implied end tags
5345     #
5346    
5347     ## 2. If current node != "p", parse error
5348     if ($non_optional) {
5349 wakaba 1.87 !!!cp ('t412.1');
5350 wakaba 1.122 !!!parse-error (type => 'not closed',
5351 wakaba 1.197 text => $non_optional->[0]->manakai_local_name,
5352 wakaba 1.122 token => $token);
5353 wakaba 1.87 } else {
5354 wakaba 1.91 !!!cp ('t414.1');
5355 wakaba 1.87 }
5356 wakaba 1.91
5357 wakaba 1.197 ## 3. Pop
5358 wakaba 1.87 splice @{$self->{open_elements}}, $i;
5359     } else {
5360 wakaba 1.91 !!!cp ('t413.1');
5361 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5362     text => $token->{tag_name}, token => $token);
5363 wakaba 1.91
5364 wakaba 1.87 !!!cp ('t415.1');
5365     ## As if <p>, then reprocess the current token
5366     my $el;
5367 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'p',, $token);
5368 wakaba 1.87 $insert->($el);
5369 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
5370 wakaba 1.87 }
5371 wakaba 1.91
5372 wakaba 1.87 !!!next-token;
5373 wakaba 1.126 next B;
5374 wakaba 1.52 } elsif ({
5375     a => 1,
5376     b => 1, big => 1, em => 1, font => 1, i => 1,
5377 wakaba 1.193 nobr => 1, s => 1, small => 1, strike => 1,
5378 wakaba 1.52 strong => 1, tt => 1, u => 1,
5379     }->{$token->{tag_name}}) {
5380 wakaba 1.79 !!!cp ('t427');
5381 wakaba 1.113 $formatting_end_tag->($token);
5382 wakaba 1.126 next B;
5383 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
5384 wakaba 1.79 !!!cp ('t428');
5385 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5386     text => 'br', token => $token);
5387 wakaba 1.52
5388     ## As if <br>
5389     $reconstruct_active_formatting_elements->($insert_to_current);
5390    
5391     my $el;
5392 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'br',, $token);
5393 wakaba 1.52 $insert->($el);
5394    
5395     ## Ignore the token.
5396     !!!next-token;
5397 wakaba 1.126 next B;
5398 wakaba 1.52 } else {
5399 wakaba 1.195 if ($token->{tag_name} eq 'sarcasm') {
5400     sleep 0.001; # take a deep breath
5401     }
5402    
5403 wakaba 1.52 ## Step 1
5404     my $node_i = -1;
5405     my $node = $self->{open_elements}->[$node_i];
5406 wakaba 1.51
5407 wakaba 1.52 ## Step 2
5408     S2: {
5409 wakaba 1.200 my $node_tag_name = $node->[0]->manakai_local_name;
5410     $node_tag_name =~ tr/A-Z/a-z/; # for SVG camelCase tag names
5411     if ($node_tag_name eq $token->{tag_name}) {
5412 wakaba 1.52 ## Step 1
5413     ## generate implied end tags
5414 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5415 wakaba 1.79 !!!cp ('t430');
5416 wakaba 1.151 ## NOTE: |<ruby><rt></ruby>|.
5417     ## ISSUE: <ruby><rt></rt> will also take this code path,
5418     ## which seems wrong.
5419 wakaba 1.86 pop @{$self->{open_elements}};
5420 wakaba 1.151 $node_i++;
5421 wakaba 1.52 }
5422    
5423     ## Step 2
5424 wakaba 1.200 my $current_tag_name
5425     = $self->{open_elements}->[-1]->[0]->manakai_local_name;
5426     $current_tag_name =~ tr/A-Z/a-z/;
5427     if ($current_tag_name ne $token->{tag_name}) {
5428 wakaba 1.79 !!!cp ('t431');
5429 wakaba 1.58 ## NOTE: <x><y></x>
5430 wakaba 1.122 !!!parse-error (type => 'not closed',
5431 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5432 wakaba 1.122 ->manakai_local_name,
5433     token => $token);
5434 wakaba 1.79 } else {
5435     !!!cp ('t432');
5436 wakaba 1.52 }
5437    
5438     ## Step 3
5439 wakaba 1.151 splice @{$self->{open_elements}}, $node_i if $node_i < 0;
5440 wakaba 1.51
5441 wakaba 1.1 !!!next-token;
5442 wakaba 1.52 last S2;
5443 wakaba 1.1 } else {
5444 wakaba 1.52 ## Step 3
5445 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
5446 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
5447 wakaba 1.123 ($node->[1] & SPECIAL_EL or
5448     $node->[1] & SCOPING_EL)) {
5449 wakaba 1.79 !!!cp ('t433');
5450 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5451     text => $token->{tag_name}, token => $token);
5452 wakaba 1.52 ## Ignore the token
5453     !!!next-token;
5454     last S2;
5455 wakaba 1.193
5456     ## NOTE: |<span><dd></span>a|: In Safari 3.1.2 and Opera
5457     ## 9.27, "a" is a child of <dd> (conforming). In
5458     ## Firefox 3.0.2, "a" is a child of <body>. In WinIE 7,
5459     ## "a" is a child of both <body> and <dd>.
5460 wakaba 1.52 }
5461 wakaba 1.193
5462 wakaba 1.79 !!!cp ('t434');
5463 wakaba 1.1 }
5464 wakaba 1.52
5465     ## Step 4
5466     $node_i--;
5467     $node = $self->{open_elements}->[$node_i];
5468    
5469     ## Step 5;
5470     redo S2;
5471     } # S2
5472 wakaba 1.126 next B;
5473 wakaba 1.1 }
5474     }
5475 wakaba 1.126 next B;
5476     } continue { # B
5477     if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
5478     ## NOTE: The code below is executed in cases where it does not have
5479     ## to be, but it it is harmless even in those cases.
5480     ## has an element in scope
5481     INSCOPE: {
5482     for (reverse 0..$#{$self->{open_elements}}) {
5483     my $node = $self->{open_elements}->[$_];
5484     if ($node->[1] & FOREIGN_EL) {
5485     last INSCOPE;
5486     } elsif ($node->[1] & SCOPING_EL) {
5487     last;
5488     }
5489     }
5490    
5491     ## NOTE: No foreign element in scope.
5492     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
5493     } # INSCOPE
5494     }
5495 wakaba 1.1 } # B
5496    
5497     ## Stop parsing # MUST
5498    
5499     ## TODO: script stuffs
5500 wakaba 1.3 } # _tree_construct_main
5501    
5502 wakaba 1.218 ## XXX: How this method is organized is somewhat out of date, although
5503     ## it still does what the current spec documents.
5504 wakaba 1.177 sub set_inner_html ($$$$;$) {
5505 wakaba 1.3 my $class = shift;
5506 wakaba 1.218 my $node = shift; # /context/
5507 wakaba 1.177 #my $s = \$_[0];
5508 wakaba 1.3 my $onerror = $_[1];
5509 wakaba 1.162 my $get_wrapper = $_[2] || sub ($) { return $_[0] };
5510 wakaba 1.3
5511 wakaba 1.63 ## ISSUE: Should {confident} be true?
5512    
5513 wakaba 1.3 my $nt = $node->node_type;
5514 wakaba 1.218 if ($nt == 9) { # Document (invoke the algorithm with no /context/ element)
5515 wakaba 1.3 # MUST
5516    
5517     ## Step 1 # MUST
5518     ## TODO: If the document has an active parser, ...
5519     ## ISSUE: There is an issue in the spec.
5520    
5521     ## Step 2 # MUST
5522     my @cn = @{$node->child_nodes};
5523     for (@cn) {
5524     $node->remove_child ($_);
5525     }
5526    
5527     ## Step 3, 4, 5 # MUST
5528 wakaba 1.177 $class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper);
5529 wakaba 1.218 } elsif ($nt == 1) { # Element (invoke the algorithm with /context/ element)
5530 wakaba 1.3 ## TODO: If non-html element
5531    
5532     ## NOTE: Most of this code is copied from |parse_string|
5533    
5534 wakaba 1.162 ## TODO: Support for $get_wrapper
5535    
5536 wakaba 1.218 ## F1. Create an HTML document.
5537 wakaba 1.14 my $this_doc = $node->owner_document;
5538     my $doc = $this_doc->implementation->create_document;
5539 wakaba 1.18 $doc->manakai_is_html (1);
5540 wakaba 1.218
5541     ## F2. Propagate quirkness flag
5542     my $node_doc = $node->owner_document;
5543     $doc->manakai_compat_mode ($node_doc->manakai_compat_mode);
5544    
5545     ## F3. Create an HTML parser
5546 wakaba 1.3 my $p = $class->new;
5547     $p->{document} = $doc;
5548    
5549 wakaba 1.84 ## Step 8 # MUST
5550 wakaba 1.3 my $i = 0;
5551 wakaba 1.121 $p->{line_prev} = $p->{line} = 1;
5552     $p->{column_prev} = $p->{column} = 0;
5553 wakaba 1.177 require Whatpm::Charset::DecodeHandle;
5554     my $input = Whatpm::Charset::DecodeHandle::CharString->new (\($_[0]));
5555     $input = $get_wrapper->($input);
5556 wakaba 1.183 $p->{set_nc} = sub {
5557 wakaba 1.3 my $self = shift;
5558 wakaba 1.14
5559 wakaba 1.178 my $char = '';
5560 wakaba 1.183 if (defined $self->{next_nc}) {
5561     $char = $self->{next_nc};
5562     delete $self->{next_nc};
5563     $self->{nc} = ord $char;
5564 wakaba 1.177 } else {
5565 wakaba 1.180 $self->{char_buffer} = '';
5566     $self->{char_buffer_pos} = 0;
5567    
5568     my $count = $input->manakai_read_until
5569 wakaba 1.182 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/,
5570     $self->{char_buffer_pos});
5571 wakaba 1.180 if ($count) {
5572     $self->{line_prev} = $self->{line};
5573     $self->{column_prev} = $self->{column};
5574     $self->{column}++;
5575 wakaba 1.183 $self->{nc}
5576 wakaba 1.180 = ord substr ($self->{char_buffer},
5577     $self->{char_buffer_pos}++, 1);
5578     return;
5579     }
5580    
5581 wakaba 1.178 if ($input->read ($char, 1)) {
5582 wakaba 1.183 $self->{nc} = ord $char;
5583 wakaba 1.178 } else {
5584 wakaba 1.183 $self->{nc} = -1;
5585 wakaba 1.178 return;
5586     }
5587 wakaba 1.177 }
5588 wakaba 1.121
5589     ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
5590     $p->{column}++;
5591 wakaba 1.4
5592 wakaba 1.183 if ($self->{nc} == 0x000A) { # LF
5593 wakaba 1.121 $p->{line}++;
5594     $p->{column} = 0;
5595 wakaba 1.79 !!!cp ('i1');
5596 wakaba 1.183 } elsif ($self->{nc} == 0x000D) { # CR
5597 wakaba 1.177 ## TODO: support for abort/streaming
5598 wakaba 1.178 my $next = '';
5599     if ($input->read ($next, 1) and $next ne "\x0A") {
5600 wakaba 1.183 $self->{next_nc} = $next;
5601 wakaba 1.177 }
5602 wakaba 1.183 $self->{nc} = 0x000A; # LF # MUST
5603 wakaba 1.121 $p->{line}++;
5604     $p->{column} = 0;
5605 wakaba 1.79 !!!cp ('i2');
5606 wakaba 1.183 } elsif ($self->{nc} == 0x0000) { # NULL
5607 wakaba 1.79 !!!cp ('i4');
5608 wakaba 1.14 !!!parse-error (type => 'NULL');
5609 wakaba 1.183 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5610 wakaba 1.3 }
5611     };
5612 wakaba 1.171
5613 wakaba 1.172 $p->{read_until} = sub {
5614 wakaba 1.177 #my ($scalar, $specials_range, $offset) = @_;
5615 wakaba 1.183 return 0 if defined $p->{next_nc};
5616 wakaba 1.180
5617 wakaba 1.182 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
5618 wakaba 1.180 my $offset = $_[2] || 0;
5619    
5620     if ($p->{char_buffer_pos} < length $p->{char_buffer}) {
5621     pos ($p->{char_buffer}) = $p->{char_buffer_pos};
5622     if ($p->{char_buffer} =~ /\G(?>$pattern)+/) {
5623     substr ($_[0], $offset)
5624     = substr ($p->{char_buffer}, $-[0], $+[0] - $-[0]);
5625     my $count = $+[0] - $-[0];
5626     if ($count) {
5627     $p->{column} += $count;
5628     $p->{char_buffer_pos} += $count;
5629     $p->{line_prev} = $p->{line};
5630     $p->{column_prev} = $p->{column} - 1;
5631 wakaba 1.183 $p->{nc} = -1;
5632 wakaba 1.180 }
5633     return $count;
5634     } else {
5635     return 0;
5636     }
5637     } else {
5638     my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
5639     if ($count) {
5640     $p->{column} += $count;
5641     $p->{column_prev} += $count;
5642 wakaba 1.183 $p->{nc} = -1;
5643 wakaba 1.180 }
5644     return $count;
5645 wakaba 1.177 }
5646     }; # $p->{read_until}
5647 wakaba 1.171
5648 wakaba 1.3 my $ponerror = $onerror || sub {
5649     my (%opt) = @_;
5650 wakaba 1.121 my $line = $opt{line};
5651     my $column = $opt{column};
5652     if (defined $opt{token} and defined $opt{token}->{line}) {
5653     $line = $opt{token}->{line};
5654     $column = $opt{token}->{column};
5655     }
5656     warn "Parse error ($opt{type}) at line $line column $column\n";
5657 wakaba 1.3 };
5658     $p->{parse_error} = sub {
5659 wakaba 1.121 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
5660 wakaba 1.3 };
5661    
5662 wakaba 1.178 my $char_onerror = sub {
5663     my (undef, $type, %opt) = @_;
5664     $ponerror->(layer => 'encode',
5665     line => $p->{line}, column => $p->{column} + 1,
5666     %opt, type => $type);
5667     }; # $char_onerror
5668     $input->onerror ($char_onerror);
5669    
5670 wakaba 1.3 $p->_initialize_tokenizer;
5671     $p->_initialize_tree_constructor;
5672    
5673 wakaba 1.218 ## F4. If /context/ is not undef...
5674    
5675     ## F4.1. content model flag
5676 wakaba 1.71 my $node_ln = $node->manakai_local_name;
5677 wakaba 1.40 $p->{content_model} = {
5678     title => RCDATA_CONTENT_MODEL,
5679     textarea => RCDATA_CONTENT_MODEL,
5680     style => CDATA_CONTENT_MODEL,
5681     script => CDATA_CONTENT_MODEL,
5682     xmp => CDATA_CONTENT_MODEL,
5683     iframe => CDATA_CONTENT_MODEL,
5684     noembed => CDATA_CONTENT_MODEL,
5685     noframes => CDATA_CONTENT_MODEL,
5686     noscript => CDATA_CONTENT_MODEL,
5687     plaintext => PLAINTEXT_CONTENT_MODEL,
5688     }->{$node_ln};
5689     $p->{content_model} = PCDATA_CONTENT_MODEL
5690     unless defined $p->{content_model};
5691 wakaba 1.3
5692 wakaba 1.123 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
5693     ## TODO: Foreign element OK?
5694 wakaba 1.3
5695 wakaba 1.218 ## F4.2. Root |html| element
5696 wakaba 1.3 my $root = $doc->create_element_ns
5697     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
5698    
5699 wakaba 1.218 ## F4.3.
5700 wakaba 1.3 $doc->append_child ($root);
5701    
5702 wakaba 1.218 ## F4.4.
5703 wakaba 1.123 push @{$p->{open_elements}}, [$root, $el_category->{html}];
5704 wakaba 1.3
5705     undef $p->{head_element};
5706 wakaba 1.202 undef $p->{head_element_inserted};
5707 wakaba 1.3
5708 wakaba 1.218 ## F4.5.
5709 wakaba 1.3 $p->_reset_insertion_mode;
5710    
5711 wakaba 1.218 ## F4.6.
5712 wakaba 1.3 my $anode = $node;
5713     AN: while (defined $anode) {
5714     if ($anode->node_type == 1) {
5715     my $nsuri = $anode->namespace_uri;
5716     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
5717 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
5718 wakaba 1.79 !!!cp ('i5');
5719 wakaba 1.3 $p->{form_element} = $anode;
5720     last AN;
5721     }
5722     }
5723     }
5724     $anode = $anode->parent_node;
5725     } # AN
5726 wakaba 1.218
5727     ## F.6. Start the parser.
5728 wakaba 1.3 {
5729     my $self = $p;
5730     !!!next-token;
5731     }
5732     $p->_tree_construction_main;
5733    
5734 wakaba 1.218 ## F.7.
5735 wakaba 1.3 my @cn = @{$node->child_nodes};
5736     for (@cn) {
5737     $node->remove_child ($_);
5738     }
5739     ## ISSUE: mutation events? read-only?
5740    
5741 wakaba 1.84 ## Step 11 # MUST
5742 wakaba 1.3 @cn = @{$root->child_nodes};
5743     for (@cn) {
5744 wakaba 1.14 $this_doc->adopt_node ($_);
5745 wakaba 1.3 $node->append_child ($_);
5746     }
5747 wakaba 1.14 ## ISSUE: mutation events?
5748 wakaba 1.3
5749     $p->_terminate_tree_constructor;
5750 wakaba 1.121
5751     delete $p->{parse_error}; # delete loop
5752 wakaba 1.3 } else {
5753     die "$0: |set_inner_html| is not defined for node of type $nt";
5754     }
5755     } # set_inner_html
5756    
5757     } # tree construction stage
5758 wakaba 1.1
5759 wakaba 1.63 package Whatpm::HTML::RestartParser;
5760     push our @ISA, 'Error';
5761    
5762 wakaba 1.1 1;
5763 wakaba 1.228 # $Date: 2009/08/16 06:31:20 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24