/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.244 - (hide annotations) (download) (as text)
Sun Sep 6 23:32:06 2009 UTC (15 years, 2 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.243: +31 -15 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	6 Sep 2009 23:31:19 -0000
2009-09-07  Wakaba  <wakaba@suika.fam.cx>

	* tree-test-1.dat: Added new test data on obsolete permitted
	DOCTYPEs (HTML5 revision 3378).

++ whatpm/Whatpm/ChangeLog	6 Sep 2009 23:31:49 -0000
2009-09-07  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm.src (_tree_construction_initial): Implemented "obsolete
	permitted DOCTYPEs" (HTML5 revision 3378).

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.244 our $VERSION=do{my @r=(q$Revision: 1.243 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.208 use Whatpm::HTML::Tokenizer;
7    
8 wakaba 1.182 ## NOTE: This module don't check all HTML5 parse errors; character
9     ## encoding related parse errors are expected to be handled by relevant
10     ## modules.
11     ## Parse errors for control characters that are not allowed in HTML5
12     ## documents, for surrogate code points, and for noncharacter code
13     ## points, as well as U+FFFD substitions for characters whose code points
14     ## is higher than U+10FFFF may be detected by combining the parser with
15     ## the checker implemented by Whatpm::Charset::UnicodeChecker (for its
16     ## usage example, see |t/HTML-tree.t| in the Whatpm package or the
17     ## WebHACC::Language::HTML module in the WebHACC package).
18    
19 wakaba 1.18 ## ISSUE:
20     ## var doc = implementation.createDocument (null, null, null);
21     ## doc.write ('');
22     ## alert (doc.compatMode);
23 wakaba 1.1
24 wakaba 1.139 require IO::Handle;
25    
26 wakaba 1.208 ## Namespace URLs
27    
28 wakaba 1.126 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
29     my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
30     my $SVG_NS = q<http://www.w3.org/2000/svg>;
31     my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
32     my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
33     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
34    
35 wakaba 1.208 ## Element categories
36    
37 wakaba 1.206 ## Bits 12-15
38     sub SPECIAL_EL () { 0b1_000000000000000 }
39     sub SCOPING_EL () { 0b1_00000000000000 }
40     sub FORMATTING_EL () { 0b1_0000000000000 }
41     sub PHRASING_EL () { 0b1_000000000000 }
42    
43     ## Bits 10-11
44 wakaba 1.208 #sub FOREIGN_EL () { 0b1_00000000000 } # see Whatpm::HTML::Tokenizer
45 wakaba 1.206 sub FOREIGN_FLOW_CONTENT_EL () { 0b1_0000000000 }
46    
47     ## Bits 6-9
48     sub TABLE_SCOPING_EL () { 0b1_000000000 }
49     sub TABLE_ROWS_SCOPING_EL () { 0b1_00000000 }
50     sub TABLE_ROW_SCOPING_EL () { 0b1_0000000 }
51     sub TABLE_ROWS_EL () { 0b1_000000 }
52    
53     ## Bit 5
54     sub ADDRESS_DIV_P_EL () { 0b1_00000 }
55    
56     ## NOTE: Used in </body> and EOF algorithms.
57     ## Bit 4
58     sub ALL_END_TAG_OPTIONAL_EL () { 0b1_0000 }
59 wakaba 1.123
60 wakaba 1.151 ## NOTE: Used in "generate implied end tags" algorithm.
61 wakaba 1.194 ## NOTE: There is a code where a modified version of
62     ## END_TAG_OPTIONAL_EL is used in "generate implied end tags"
63     ## implementation (search for the algorithm name).
64 wakaba 1.206 ## Bit 3
65     sub END_TAG_OPTIONAL_EL () { 0b1_000 }
66    
67     ## Bits 0-2
68    
69     sub MISC_SPECIAL_EL () { SPECIAL_EL | 0b000 }
70     sub FORM_EL () { SPECIAL_EL | 0b001 }
71     sub FRAMESET_EL () { SPECIAL_EL | 0b010 }
72     sub HEADING_EL () { SPECIAL_EL | 0b011 }
73     sub SELECT_EL () { SPECIAL_EL | 0b100 }
74     sub SCRIPT_EL () { SPECIAL_EL | 0b101 }
75    
76     sub ADDRESS_DIV_EL () { SPECIAL_EL | ADDRESS_DIV_P_EL | 0b001 }
77     sub BODY_EL () { SPECIAL_EL | ALL_END_TAG_OPTIONAL_EL | 0b001 }
78    
79 wakaba 1.207 sub DTDD_EL () {
80 wakaba 1.206 SPECIAL_EL |
81     END_TAG_OPTIONAL_EL |
82     ALL_END_TAG_OPTIONAL_EL |
83     0b010
84     }
85     sub LI_EL () {
86     SPECIAL_EL |
87     END_TAG_OPTIONAL_EL |
88     ALL_END_TAG_OPTIONAL_EL |
89     0b100
90     }
91     sub P_EL () {
92     SPECIAL_EL |
93     ADDRESS_DIV_P_EL |
94     END_TAG_OPTIONAL_EL |
95     ALL_END_TAG_OPTIONAL_EL |
96     0b001
97 wakaba 1.123 }
98    
99 wakaba 1.206 sub TABLE_ROW_EL () {
100     SPECIAL_EL |
101     TABLE_ROWS_EL |
102     TABLE_ROW_SCOPING_EL |
103     ALL_END_TAG_OPTIONAL_EL |
104     0b001
105     }
106     sub TABLE_ROW_GROUP_EL () {
107     SPECIAL_EL |
108     TABLE_ROWS_EL |
109     TABLE_ROWS_SCOPING_EL |
110     ALL_END_TAG_OPTIONAL_EL |
111     0b001
112 wakaba 1.123 }
113    
114 wakaba 1.206 sub MISC_SCOPING_EL () { SCOPING_EL | 0b000 }
115     sub BUTTON_EL () { SCOPING_EL | 0b001 }
116     sub CAPTION_EL () { SCOPING_EL | 0b010 }
117     sub HTML_EL () {
118     SCOPING_EL |
119     TABLE_SCOPING_EL |
120     TABLE_ROWS_SCOPING_EL |
121     TABLE_ROW_SCOPING_EL |
122     ALL_END_TAG_OPTIONAL_EL |
123     0b001
124 wakaba 1.123 }
125 wakaba 1.206 sub TABLE_EL () {
126     SCOPING_EL |
127     TABLE_ROWS_EL |
128     TABLE_SCOPING_EL |
129     0b001
130 wakaba 1.123 }
131 wakaba 1.206 sub TABLE_CELL_EL () {
132     SCOPING_EL |
133     TABLE_ROW_SCOPING_EL |
134     ALL_END_TAG_OPTIONAL_EL |
135     0b001
136 wakaba 1.123 }
137    
138 wakaba 1.206 sub MISC_FORMATTING_EL () { FORMATTING_EL | 0b000 }
139     sub A_EL () { FORMATTING_EL | 0b001 }
140     sub NOBR_EL () { FORMATTING_EL | 0b010 }
141    
142     sub RUBY_EL () { PHRASING_EL | 0b001 }
143    
144     ## ISSUE: ALL_END_TAG_OPTIONAL_EL?
145     sub OPTGROUP_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b001 }
146     sub OPTION_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b010 }
147     sub RUBY_COMPONENT_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b100 }
148 wakaba 1.123
149 wakaba 1.206 sub MML_AXML_EL () { PHRASING_EL | FOREIGN_EL | 0b001 }
150 wakaba 1.123
151     my $el_category = {
152 wakaba 1.206 a => A_EL,
153     address => ADDRESS_DIV_EL,
154 wakaba 1.123 applet => MISC_SCOPING_EL,
155     area => MISC_SPECIAL_EL,
156 wakaba 1.193 article => MISC_SPECIAL_EL,
157     aside => MISC_SPECIAL_EL,
158 wakaba 1.123 b => FORMATTING_EL,
159     base => MISC_SPECIAL_EL,
160     basefont => MISC_SPECIAL_EL,
161     bgsound => MISC_SPECIAL_EL,
162     big => FORMATTING_EL,
163     blockquote => MISC_SPECIAL_EL,
164     body => BODY_EL,
165     br => MISC_SPECIAL_EL,
166     button => BUTTON_EL,
167     caption => CAPTION_EL,
168     center => MISC_SPECIAL_EL,
169     col => MISC_SPECIAL_EL,
170     colgroup => MISC_SPECIAL_EL,
171 wakaba 1.193 command => MISC_SPECIAL_EL,
172     datagrid => MISC_SPECIAL_EL,
173 wakaba 1.207 dd => DTDD_EL,
174 wakaba 1.193 details => MISC_SPECIAL_EL,
175     dialog => MISC_SPECIAL_EL,
176 wakaba 1.123 dir => MISC_SPECIAL_EL,
177 wakaba 1.206 div => ADDRESS_DIV_EL,
178 wakaba 1.123 dl => MISC_SPECIAL_EL,
179 wakaba 1.207 dt => DTDD_EL,
180 wakaba 1.123 em => FORMATTING_EL,
181     embed => MISC_SPECIAL_EL,
182     fieldset => MISC_SPECIAL_EL,
183 wakaba 1.193 figure => MISC_SPECIAL_EL,
184 wakaba 1.123 font => FORMATTING_EL,
185 wakaba 1.193 footer => MISC_SPECIAL_EL,
186 wakaba 1.123 form => FORM_EL,
187     frame => MISC_SPECIAL_EL,
188     frameset => FRAMESET_EL,
189     h1 => HEADING_EL,
190     h2 => HEADING_EL,
191     h3 => HEADING_EL,
192     h4 => HEADING_EL,
193     h5 => HEADING_EL,
194     h6 => HEADING_EL,
195     head => MISC_SPECIAL_EL,
196 wakaba 1.193 header => MISC_SPECIAL_EL,
197 wakaba 1.237 hgroup => MISC_SPECIAL_EL,
198 wakaba 1.123 hr => MISC_SPECIAL_EL,
199     html => HTML_EL,
200     i => FORMATTING_EL,
201     iframe => MISC_SPECIAL_EL,
202     img => MISC_SPECIAL_EL,
203 wakaba 1.193 #image => MISC_SPECIAL_EL, ## NOTE: Commented out in the spec.
204 wakaba 1.123 input => MISC_SPECIAL_EL,
205     isindex => MISC_SPECIAL_EL,
206 wakaba 1.232 ## XXX keygen? (Whether a void element is in Special or not does not
207     ## affect to the processing, however.)
208 wakaba 1.123 li => LI_EL,
209     link => MISC_SPECIAL_EL,
210     listing => MISC_SPECIAL_EL,
211     marquee => MISC_SCOPING_EL,
212     menu => MISC_SPECIAL_EL,
213     meta => MISC_SPECIAL_EL,
214 wakaba 1.193 nav => MISC_SPECIAL_EL,
215 wakaba 1.206 nobr => NOBR_EL,
216 wakaba 1.123 noembed => MISC_SPECIAL_EL,
217     noframes => MISC_SPECIAL_EL,
218     noscript => MISC_SPECIAL_EL,
219     object => MISC_SCOPING_EL,
220     ol => MISC_SPECIAL_EL,
221     optgroup => OPTGROUP_EL,
222     option => OPTION_EL,
223     p => P_EL,
224     param => MISC_SPECIAL_EL,
225     plaintext => MISC_SPECIAL_EL,
226     pre => MISC_SPECIAL_EL,
227 wakaba 1.151 rp => RUBY_COMPONENT_EL,
228     rt => RUBY_COMPONENT_EL,
229     ruby => RUBY_EL,
230 wakaba 1.123 s => FORMATTING_EL,
231     script => MISC_SPECIAL_EL,
232     select => SELECT_EL,
233 wakaba 1.193 section => MISC_SPECIAL_EL,
234 wakaba 1.123 small => FORMATTING_EL,
235     spacer => MISC_SPECIAL_EL,
236     strike => FORMATTING_EL,
237     strong => FORMATTING_EL,
238     style => MISC_SPECIAL_EL,
239     table => TABLE_EL,
240     tbody => TABLE_ROW_GROUP_EL,
241     td => TABLE_CELL_EL,
242     textarea => MISC_SPECIAL_EL,
243     tfoot => TABLE_ROW_GROUP_EL,
244     th => TABLE_CELL_EL,
245     thead => TABLE_ROW_GROUP_EL,
246     title => MISC_SPECIAL_EL,
247     tr => TABLE_ROW_EL,
248     tt => FORMATTING_EL,
249     u => FORMATTING_EL,
250     ul => MISC_SPECIAL_EL,
251     wbr => MISC_SPECIAL_EL,
252 wakaba 1.236 xmp => MISC_SPECIAL_EL,
253 wakaba 1.123 };
254    
255 wakaba 1.126 my $el_category_f = {
256     $MML_NS => {
257     'annotation-xml' => MML_AXML_EL,
258 wakaba 1.206 mi => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
259     mo => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
260     mn => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
261     ms => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
262     mtext => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
263 wakaba 1.126 },
264     $SVG_NS => {
265 wakaba 1.206 foreignObject => SCOPING_EL | FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
266     desc => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
267     title => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
268 wakaba 1.126 },
269     ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
270     };
271    
272 wakaba 1.131 my $svg_attr_name = {
273 wakaba 1.146 attributename => 'attributeName',
274 wakaba 1.131 attributetype => 'attributeType',
275     basefrequency => 'baseFrequency',
276     baseprofile => 'baseProfile',
277     calcmode => 'calcMode',
278     clippathunits => 'clipPathUnits',
279     contentscripttype => 'contentScriptType',
280     contentstyletype => 'contentStyleType',
281     diffuseconstant => 'diffuseConstant',
282     edgemode => 'edgeMode',
283     externalresourcesrequired => 'externalResourcesRequired',
284     filterres => 'filterRes',
285     filterunits => 'filterUnits',
286     glyphref => 'glyphRef',
287     gradienttransform => 'gradientTransform',
288     gradientunits => 'gradientUnits',
289     kernelmatrix => 'kernelMatrix',
290     kernelunitlength => 'kernelUnitLength',
291     keypoints => 'keyPoints',
292     keysplines => 'keySplines',
293     keytimes => 'keyTimes',
294     lengthadjust => 'lengthAdjust',
295     limitingconeangle => 'limitingConeAngle',
296     markerheight => 'markerHeight',
297     markerunits => 'markerUnits',
298     markerwidth => 'markerWidth',
299     maskcontentunits => 'maskContentUnits',
300     maskunits => 'maskUnits',
301     numoctaves => 'numOctaves',
302     pathlength => 'pathLength',
303     patterncontentunits => 'patternContentUnits',
304     patterntransform => 'patternTransform',
305     patternunits => 'patternUnits',
306     pointsatx => 'pointsAtX',
307     pointsaty => 'pointsAtY',
308     pointsatz => 'pointsAtZ',
309     preservealpha => 'preserveAlpha',
310     preserveaspectratio => 'preserveAspectRatio',
311     primitiveunits => 'primitiveUnits',
312     refx => 'refX',
313     refy => 'refY',
314     repeatcount => 'repeatCount',
315     repeatdur => 'repeatDur',
316     requiredextensions => 'requiredExtensions',
317 wakaba 1.146 requiredfeatures => 'requiredFeatures',
318 wakaba 1.131 specularconstant => 'specularConstant',
319     specularexponent => 'specularExponent',
320     spreadmethod => 'spreadMethod',
321     startoffset => 'startOffset',
322     stddeviation => 'stdDeviation',
323     stitchtiles => 'stitchTiles',
324     surfacescale => 'surfaceScale',
325     systemlanguage => 'systemLanguage',
326     tablevalues => 'tableValues',
327     targetx => 'targetX',
328     targety => 'targetY',
329     textlength => 'textLength',
330     viewbox => 'viewBox',
331     viewtarget => 'viewTarget',
332     xchannelselector => 'xChannelSelector',
333     ychannelselector => 'yChannelSelector',
334     zoomandpan => 'zoomAndPan',
335     };
336    
337     my $foreign_attr_xname = {
338     'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
339     'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
340     'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
341     'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
342     'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
343     'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
344     'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
345     'xml:base' => [$XML_NS, ['xml', 'base']],
346     'xml:lang' => [$XML_NS, ['xml', 'lang']],
347     'xml:space' => [$XML_NS, ['xml', 'space']],
348     'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
349     'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
350     };
351    
352     ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
353    
354 wakaba 1.192 ## TODO: Invoke the reset algorithm when a resettable element is
355     ## created (cf. HTML5 revision 2259).
356    
357 wakaba 1.63 sub parse_byte_string ($$$$;$) {
358 wakaba 1.138 my $self = shift;
359     my $charset_name = shift;
360     open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
361     return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
362     } # parse_byte_string
363    
364 wakaba 1.162 sub parse_byte_stream ($$$$;$$) {
365     # my ($self, $charset_name, $byte_stream, $doc, $onerror, $get_wrapper) = @_;
366 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
367 wakaba 1.133 my $charset_name = shift;
368 wakaba 1.138 my $byte_stream = $_[0];
369 wakaba 1.133
370 wakaba 1.134 my $onerror = $_[2] || sub {
371     my (%opt) = @_;
372     warn "Parse error ($opt{type})\n";
373     };
374     $self->{parse_error} = $onerror; # updated later by parse_char_string
375    
376 wakaba 1.162 my $get_wrapper = $_[3] || sub ($) {
377     return $_[0]; # $_[0] = byte stream handle, returned = arg to char handle
378     };
379    
380 wakaba 1.133 ## HTML5 encoding sniffing algorithm
381     require Message::Charset::Info;
382     my $charset;
383 wakaba 1.136 my $buffer;
384     my ($char_stream, $e_status);
385 wakaba 1.133
386     SNIFFING: {
387 wakaba 1.160 ## NOTE: By setting |allow_fallback| option true when the
388     ## |get_decode_handle| method is invoked, we ignore what the HTML5
389     ## spec requires, i.e. unsupported encoding should be ignored.
390     ## TODO: We should not do this unless the parser is invoked
391     ## in the conformance checking mode, in which this behavior
392     ## would be useful.
393 wakaba 1.133
394     ## Step 1
395     if (defined $charset_name) {
396 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
397     ## TODO: Is this ok? Transfer protocol's parameter should be
398     ## interpreted in its semantics?
399 wakaba 1.133
400 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
401     ($byte_stream, allow_error_reporting => 1,
402 wakaba 1.133 allow_fallback => 1);
403 wakaba 1.136 if ($char_stream) {
404 wakaba 1.133 $self->{confident} = 1;
405     last SNIFFING;
406 wakaba 1.136 } else {
407 wakaba 1.190 !!!parse-error (type => 'charset:not supported',
408     layer => 'encode',
409     line => 1, column => 1,
410     value => $charset_name,
411     level => $self->{level}->{uncertain});
412 wakaba 1.133 }
413     }
414    
415     ## Step 2
416 wakaba 1.136 my $byte_buffer = '';
417     for (1..1024) {
418     my $char = $byte_stream->getc;
419     last unless defined $char;
420     $byte_buffer .= $char;
421     } ## TODO: timeout
422 wakaba 1.133
423     ## Step 3
424 wakaba 1.136 if ($byte_buffer =~ /^\xFE\xFF/) {
425 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-16be');
426 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
427     ($byte_stream, allow_error_reporting => 1,
428     allow_fallback => 1, byte_buffer => \$byte_buffer);
429 wakaba 1.133 $self->{confident} = 1;
430     last SNIFFING;
431 wakaba 1.136 } elsif ($byte_buffer =~ /^\xFF\xFE/) {
432 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-16le');
433 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
434     ($byte_stream, allow_error_reporting => 1,
435     allow_fallback => 1, byte_buffer => \$byte_buffer);
436 wakaba 1.133 $self->{confident} = 1;
437     last SNIFFING;
438 wakaba 1.136 } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
439 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
440 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
441     ($byte_stream, allow_error_reporting => 1,
442     allow_fallback => 1, byte_buffer => \$byte_buffer);
443 wakaba 1.133 $self->{confident} = 1;
444     last SNIFFING;
445     }
446    
447     ## Step 4
448     ## TODO: <meta charset>
449    
450     ## Step 5
451     ## TODO: from history
452    
453     ## Step 6
454 wakaba 1.65 require Whatpm::Charset::UniversalCharDet;
455 wakaba 1.133 $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
456 wakaba 1.136 ($byte_buffer);
457 wakaba 1.133 if (defined $charset_name) {
458 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
459 wakaba 1.133
460 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
461     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
462     ($byte_stream);
463     ($char_stream, $e_status) = $charset->get_decode_handle
464     ($buffer, allow_error_reporting => 1,
465     allow_fallback => 1, byte_buffer => \$byte_buffer);
466     if ($char_stream) {
467     $buffer->{buffer} = $byte_buffer;
468 wakaba 1.153 !!!parse-error (type => 'sniffing:chardet',
469     text => $charset_name,
470     level => $self->{level}->{info},
471     layer => 'encode',
472 wakaba 1.134 line => 1, column => 1);
473 wakaba 1.133 $self->{confident} = 0;
474     last SNIFFING;
475     }
476     }
477    
478     ## Step 7: default
479     ## TODO: Make this configurable.
480 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('windows-1252');
481 wakaba 1.133 ## NOTE: We choose |windows-1252| here, since |utf-8| should be
482     ## detectable in the step 6.
483 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
484     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
485     ($byte_stream);
486     ($char_stream, $e_status)
487     = $charset->get_decode_handle ($buffer,
488     allow_error_reporting => 1,
489     allow_fallback => 1,
490     byte_buffer => \$byte_buffer);
491     $buffer->{buffer} = $byte_buffer;
492 wakaba 1.153 !!!parse-error (type => 'sniffing:default',
493     text => 'windows-1252',
494     level => $self->{level}->{info},
495     line => 1, column => 1,
496     layer => 'encode');
497 wakaba 1.63 $self->{confident} = 0;
498 wakaba 1.133 } # SNIFFING
499    
500     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
501 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
502 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
503 wakaba 1.160 #text => $self->{input_encoding},
504 wakaba 1.153 level => $self->{level}->{uncertain},
505     line => 1, column => 1,
506     layer => 'encode');
507 wakaba 1.133 } elsif (not ($e_status &
508 wakaba 1.178 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
509 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name;
510 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
511     text => $self->{input_encoding},
512     level => $self->{level}->{uncertain},
513     line => 1, column => 1,
514     layer => 'encode');
515 wakaba 1.160 } else {
516     $self->{input_encoding} = $charset->get_iana_name;
517 wakaba 1.63 }
518    
519     $self->{change_encoding} = sub {
520     my $self = shift;
521 wakaba 1.134 $charset_name = shift;
522 wakaba 1.114 my $token = shift;
523 wakaba 1.63
524 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
525 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
526     ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
527     byte_buffer => \ $buffer->{buffer});
528 wakaba 1.134
529 wakaba 1.136 if ($char_stream) { # if supported
530 wakaba 1.134 ## "Change the encoding" algorithm:
531 wakaba 1.215
532     ## Step 1
533     if (defined $self->{input_encoding} and
534     $self->{input_encoding} eq $charset_name) {
535     !!!parse-error (type => 'charset label:matching',
536     text => $charset_name,
537     level => $self->{level}->{info});
538     $self->{confident} = 1;
539     return;
540     }
541 wakaba 1.63
542 wakaba 1.214 ## Step 2 (HTML5 revision 3205)
543     if (defined $self->{input_encoding} and
544     Message::Charset::Info->get_by_html_name ($self->{input_encoding})
545     ->{category} & Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
546     $self->{confident} = 1;
547     return;
548     }
549    
550     ## Step 3
551 wakaba 1.149 if ($charset->{category} &
552     Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
553 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
554 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
555     ($byte_stream,
556     byte_buffer => \ $buffer->{buffer});
557 wakaba 1.134 }
558     $charset_name = $charset->get_iana_name;
559 wakaba 1.63
560 wakaba 1.153 !!!parse-error (type => 'charset label detected',
561     text => $self->{input_encoding},
562     value => $charset_name,
563     level => $self->{level}->{warn},
564     token => $token);
565 wakaba 1.134
566 wakaba 1.214 ## Step 4
567 wakaba 1.134 # if (can) {
568     ## change the encoding on the fly.
569     #$self->{confident} = 1;
570     #return;
571     # }
572    
573 wakaba 1.214 ## Step 5
574 wakaba 1.134 throw Whatpm::HTML::RestartParser ();
575 wakaba 1.63 }
576     }; # $self->{change_encoding}
577    
578 wakaba 1.136 my $char_onerror = sub {
579     my (undef, $type, %opt) = @_;
580 wakaba 1.153 !!!parse-error (layer => 'encode',
581 wakaba 1.174 line => $self->{line}, column => $self->{column} + 1,
582     %opt, type => $type);
583 wakaba 1.136 if ($opt{octets}) {
584     ${$opt{octets}} = "\x{FFFD}"; # relacement character
585     }
586     };
587 wakaba 1.162
588     my $wrapped_char_stream = $get_wrapper->($char_stream);
589     $wrapped_char_stream->onerror ($char_onerror);
590 wakaba 1.136
591 wakaba 1.182 my @args = ($_[1], $_[2]); # $doc, $onerror - $get_wrapper = undef;
592 wakaba 1.63 my $return;
593     try {
594 wakaba 1.162 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
595 wakaba 1.63 } catch Whatpm::HTML::RestartParser with {
596 wakaba 1.134 ## NOTE: Invoked after {change_encoding}.
597    
598     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
599 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
600 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
601     level => $self->{level}->{uncertain},
602 wakaba 1.160 #text => $self->{input_encoding},
603 wakaba 1.153 line => 1, column => 1,
604     layer => 'encode');
605 wakaba 1.134 } elsif (not ($e_status &
606 wakaba 1.178 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
607 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name;
608 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
609     text => $self->{input_encoding},
610     level => $self->{level}->{uncertain},
611     line => 1, column => 1,
612     layer => 'encode');
613 wakaba 1.160 } else {
614     $self->{input_encoding} = $charset->get_iana_name;
615 wakaba 1.134 }
616 wakaba 1.63 $self->{confident} = 1;
617 wakaba 1.162
618     $wrapped_char_stream = $get_wrapper->($char_stream);
619     $wrapped_char_stream->onerror ($char_onerror);
620    
621     $return = $self->parse_char_stream ($wrapped_char_stream, @args);
622 wakaba 1.63 };
623     return $return;
624 wakaba 1.138 } # parse_byte_stream
625 wakaba 1.63
626 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
627     ## and the HTML layer MUST ignore it. However, we does strip BOM in
628     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
629     ## because the core part of our HTML parser expects a string of character,
630     ## not a string of bytes or code units or anything which might contain a BOM.
631     ## Therefore, any parser interface that accepts a string of bytes,
632     ## such as |parse_byte_string| in this module, must ensure that it does
633     ## strip the BOM and never strip any ZWNBSP.
634    
635 wakaba 1.162 sub parse_char_string ($$$;$$) {
636     #my ($self, $s, $doc, $onerror, $get_wrapper) = @_;
637 wakaba 1.135 my $self = shift;
638 wakaba 1.139 my $s = ref $_[0] ? $_[0] : \($_[0]);
639 wakaba 1.171 require Whatpm::Charset::DecodeHandle;
640     my $input = Whatpm::Charset::DecodeHandle::CharString->new ($s);
641 wakaba 1.135 return $self->parse_char_stream ($input, @_[1..$#_]);
642     } # parse_char_string
643 wakaba 1.162 *parse_string = \&parse_char_string; ## NOTE: Alias for backward compatibility.
644 wakaba 1.63
645 wakaba 1.182 sub parse_char_stream ($$$;$$) {
646 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
647 wakaba 1.135 my $input = $_[0];
648 wakaba 1.1 $self->{document} = $_[1];
649 wakaba 1.63 @{$self->{document}->child_nodes} = ();
650 wakaba 1.1
651 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
652    
653 wakaba 1.235 ## Confidence: irrelevant.
654 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
655 wakaba 1.235
656 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
657     if defined $self->{input_encoding};
658 wakaba 1.178 ## TODO: |{input_encoding}| is needless?
659 wakaba 1.63
660 wakaba 1.112 $self->{line_prev} = $self->{line} = 1;
661 wakaba 1.179 $self->{column_prev} = -1;
662     $self->{column} = 0;
663 wakaba 1.183 $self->{set_nc} = sub {
664 wakaba 1.1 my $self = shift;
665 wakaba 1.13
666 wakaba 1.178 my $char = '';
667 wakaba 1.183 if (defined $self->{next_nc}) {
668     $char = $self->{next_nc};
669     delete $self->{next_nc};
670     $self->{nc} = ord $char;
671 wakaba 1.139 } else {
672 wakaba 1.179 $self->{char_buffer} = '';
673     $self->{char_buffer_pos} = 0;
674    
675     my $count = $input->manakai_read_until
676 wakaba 1.182 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/, $self->{char_buffer_pos});
677 wakaba 1.179 if ($count) {
678     $self->{line_prev} = $self->{line};
679     $self->{column_prev} = $self->{column};
680     $self->{column}++;
681 wakaba 1.183 $self->{nc}
682 wakaba 1.179 = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
683     return;
684     }
685    
686 wakaba 1.178 if ($input->read ($char, 1)) {
687 wakaba 1.183 $self->{nc} = ord $char;
688 wakaba 1.178 } else {
689 wakaba 1.183 $self->{nc} = -1;
690 wakaba 1.178 return;
691     }
692 wakaba 1.139 }
693 wakaba 1.112
694     ($self->{line_prev}, $self->{column_prev})
695     = ($self->{line}, $self->{column});
696     $self->{column}++;
697 wakaba 1.1
698 wakaba 1.183 if ($self->{nc} == 0x000A) { # LF
699 wakaba 1.132 !!!cp ('j1');
700 wakaba 1.112 $self->{line}++;
701     $self->{column} = 0;
702 wakaba 1.183 } elsif ($self->{nc} == 0x000D) { # CR
703 wakaba 1.132 !!!cp ('j2');
704 wakaba 1.170 ## TODO: support for abort/streaming
705 wakaba 1.178 my $next = '';
706     if ($input->read ($next, 1) and $next ne "\x0A") {
707 wakaba 1.183 $self->{next_nc} = $next;
708 wakaba 1.135 }
709 wakaba 1.183 $self->{nc} = 0x000A; # LF # MUST
710 wakaba 1.112 $self->{line}++;
711     $self->{column} = 0;
712 wakaba 1.183 } elsif ($self->{nc} == 0x0000) { # NULL
713 wakaba 1.132 !!!cp ('j4');
714 wakaba 1.8 !!!parse-error (type => 'NULL');
715 wakaba 1.183 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
716 wakaba 1.1 }
717     };
718    
719 wakaba 1.172 $self->{read_until} = sub {
720     #my ($scalar, $specials_range, $offset) = @_;
721 wakaba 1.183 return 0 if defined $self->{next_nc};
722 wakaba 1.180
723 wakaba 1.182 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
724 wakaba 1.180 my $offset = $_[2] || 0;
725    
726     if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
727     pos ($self->{char_buffer}) = $self->{char_buffer_pos};
728     if ($self->{char_buffer} =~ /\G(?>$pattern)+/) {
729     substr ($_[0], $offset)
730     = substr ($self->{char_buffer}, $-[0], $+[0] - $-[0]);
731     my $count = $+[0] - $-[0];
732     if ($count) {
733     $self->{column} += $count;
734     $self->{char_buffer_pos} += $count;
735     $self->{line_prev} = $self->{line};
736     $self->{column_prev} = $self->{column} - 1;
737 wakaba 1.183 $self->{nc} = -1;
738 wakaba 1.180 }
739     return $count;
740     } else {
741     return 0;
742     }
743     } else {
744     my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
745     if ($count) {
746     $self->{column} += $count;
747     $self->{line_prev} = $self->{line};
748     $self->{column_prev} = $self->{column} - 1;
749 wakaba 1.183 $self->{nc} = -1;
750 wakaba 1.180 }
751     return $count;
752 wakaba 1.172 }
753     }; # $self->{read_until}
754 wakaba 1.171
755 wakaba 1.3 my $onerror = $_[2] || sub {
756     my (%opt) = @_;
757 wakaba 1.112 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
758     my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
759     warn "Parse error ($opt{type}) at line $line column $column\n";
760 wakaba 1.3 };
761     $self->{parse_error} = sub {
762 wakaba 1.112 $onerror->(line => $self->{line}, column => $self->{column}, @_);
763 wakaba 1.1 };
764    
765 wakaba 1.182 my $char_onerror = sub {
766     my (undef, $type, %opt) = @_;
767     !!!parse-error (layer => 'encode',
768     line => $self->{line}, column => $self->{column} + 1,
769     %opt, type => $type);
770     }; # $char_onerror
771    
772     if ($_[3]) {
773     $input = $_[3]->($input);
774     $input->onerror ($char_onerror);
775     } else {
776     $input->onerror ($char_onerror) unless defined $input->onerror;
777     }
778    
779 wakaba 1.1 $self->_initialize_tokenizer;
780     $self->_initialize_tree_constructor;
781     $self->_construct_tree;
782     $self->_terminate_tree_constructor;
783    
784 wakaba 1.112 delete $self->{parse_error}; # remove loop
785    
786 wakaba 1.1 return $self->{document};
787 wakaba 1.135 } # parse_char_stream
788 wakaba 1.1
789     sub new ($) {
790     my $class = shift;
791 wakaba 1.134 my $self = bless {
792 wakaba 1.244 level => {
793     must => 'm',
794     should => 's',
795     obc => 's', ## Obsolete but conforming, # XXX distinguish from "should"
796     warn => 'w',
797     info => 'i',
798     uncertain => 'u',
799     },
800 wakaba 1.134 }, $class;
801 wakaba 1.183 $self->{set_nc} = sub {
802     $self->{nc} = -1;
803 wakaba 1.1 };
804     $self->{parse_error} = sub {
805     #
806     };
807 wakaba 1.63 $self->{change_encoding} = sub {
808     # if ($_[0] is a supported encoding) {
809     # run "change the encoding" algorithm;
810     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
811     # }
812     };
813 wakaba 1.61 $self->{application_cache_selection} = sub {
814     #
815     };
816 wakaba 1.1 return $self;
817     } # new
818    
819 wakaba 1.208 ## Insertion modes
820 wakaba 1.55
821 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
822     sub HEAD_IMS () { 0b1000 }
823     sub BODY_IMS () { 0b10000 }
824 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
825 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
826 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
827 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
828     sub FRAME_IMS () { 0b1000000000 }
829 wakaba 1.101 sub SELECT_IMS () { 0b10000000000 }
830 wakaba 1.208 #sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 } # see Whatpm::HTML::Tokenizer
831 wakaba 1.126 ## NOTE: "in foreign content" insertion mode is special; it is combined
832     ## with the secondary insertion mode. In this parser, they are stored
833     ## together in the bit-or'ed form.
834 wakaba 1.205 sub IN_CDATA_RCDATA_IM () { 0b1000000000000 }
835     ## NOTE: "in CDATA/RCDATA" insertion mode is also special; it is
836     ## combined with the original insertion mode. In thie parser,
837     ## they are stored together in the bit-or'ed form.
838 wakaba 1.54
839 wakaba 1.210 sub IM_MASK () { 0b11111111111 }
840    
841 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
842    
843     ## NOTE: "after after body" insertion mode.
844 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
845 wakaba 1.84
846     ## NOTE: "after after frameset" insertion mode.
847 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
848 wakaba 1.84
849 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
850     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
851     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
852     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
853     sub IN_BODY_IM () { BODY_IMS }
854 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
855     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
856     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
857     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
858 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
859     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
860     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
861     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
862 wakaba 1.101 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
863     sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
864 wakaba 1.54 sub IN_COLUMN_GROUP_IM () { 0b10 }
865    
866 wakaba 1.1 sub _initialize_tree_constructor ($) {
867     my $self = shift;
868     ## NOTE: $self->{document} MUST be specified before this method is called
869     $self->{document}->strict_error_checking (0);
870     ## TODO: Turn mutation events off # MUST
871     ## TODO: Turn loose Document option (manakai extension) on
872 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
873 wakaba 1.154 $self->{document}->set_user_data (manakai_source_line => 1);
874     $self->{document}->set_user_data (manakai_source_column => 1);
875 wakaba 1.241
876     $self->{frameset_ok} = 1;
877 wakaba 1.1 } # _initialize_tree_constructor
878    
879     sub _terminate_tree_constructor ($) {
880     my $self = shift;
881     $self->{document}->strict_error_checking (1);
882     ## TODO: Turn mutation events on
883     } # _terminate_tree_constructor
884    
885     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
886    
887 wakaba 1.3 { # tree construction stage
888     my $token;
889    
890 wakaba 1.1 sub _construct_tree ($) {
891     my ($self) = @_;
892    
893     ## When an interactive UA render the $self->{document} available
894     ## to the user, or when it begin accepting user input, are
895     ## not defined.
896    
897     !!!next-token;
898    
899 wakaba 1.3 undef $self->{form_element};
900     undef $self->{head_element};
901 wakaba 1.202 undef $self->{head_element_inserted};
902 wakaba 1.3 $self->{open_elements} = [];
903     undef $self->{inner_html_node};
904 wakaba 1.206 undef $self->{ignore_newline};
905 wakaba 1.3
906 wakaba 1.84 ## NOTE: The "initial" insertion mode.
907 wakaba 1.3 $self->_tree_construction_initial; # MUST
908 wakaba 1.84
909     ## NOTE: The "before html" insertion mode.
910 wakaba 1.3 $self->_tree_construction_root_element;
911 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
912    
913     ## NOTE: The "before head" insertion mode and so on.
914 wakaba 1.3 $self->_tree_construction_main;
915     } # _construct_tree
916    
917     sub _tree_construction_initial ($) {
918     my $self = shift;
919 wakaba 1.84
920     ## NOTE: "initial" insertion mode
921    
922 wakaba 1.18 INITIAL: {
923 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
924 wakaba 1.227 ## NOTE: Conformance checkers MAY, instead of reporting "not
925     ## HTML5" error, switch to a conformance checking mode for
926     ## another language. (We don't support such mode switchings; it
927     ## is nonsense to do anything different from what browsers do.)
928 wakaba 1.18 my $doctype_name = $token->{name};
929     $doctype_name = '' unless defined $doctype_name;
930 wakaba 1.227 my $doctype = $self->{document}->create_document_type_definition
931     ($doctype_name);
932    
933 wakaba 1.244 $doctype_name =~ tr/A-Z/a-z/; # ASCII case-insensitive.
934 wakaba 1.228 if ($doctype_name ne 'html') {
935 wakaba 1.79 !!!cp ('t1');
936 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
937 wakaba 1.228 } elsif (defined $token->{pubid}) {
938 wakaba 1.244 ## Obsolete permitted DOCTYPEs (case-sensitive)
939     my $xsysid = {
940     '-//W3C//DTD HTML 4.0//EN' => 'http://www.w3.org/TR/REC-html40/strict.dtd',
941     '-//W3C//DTD HTML 4.01//EN' => 'http://www.w3.org/TR/html4/strict.dtd',
942     '-//W3C//DTD XHTML 1.0 Strict//EN' => 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd',
943     '-//W3C//DTD XHTML 1.1//EN' => 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd',
944     }->{$token->{pubid}};
945     if (defined $xsysid and
946     (not defined $token->{sysid} or $token->{sysid} eq $xsysid)) {
947     !!!cp ('t2');
948     !!!parse-error (type => 'obs DOCTYPE', token => $token,
949     level => $self->{level}->{obc}); ## XXX error type
950     } else {
951     !!!cp ('t2.1');
952     !!!parse-error (type => 'not HTML5', token => $token);
953     }
954 wakaba 1.228 } elsif (defined $token->{sysid}) {
955     if ($token->{sysid} eq 'about:legacy-compat') {
956     !!!cp ('t1.2'); ## <!DOCTYPE HTML SYSTEM "about:legacy-compat">
957 wakaba 1.159 !!!parse-error (type => 'XSLT-compat', token => $token,
958     level => $self->{level}->{should});
959     } else {
960     !!!parse-error (type => 'not HTML5', token => $token);
961     }
962 wakaba 1.228 } else { ## <!DOCTYPE HTML>
963 wakaba 1.79 !!!cp ('t3');
964 wakaba 1.159 #
965 wakaba 1.18 }
966    
967 wakaba 1.122 ## NOTE: Default value for both |public_id| and |system_id| attributes
968     ## are empty strings, so that we don't set any value in missing cases.
969 wakaba 1.183 $doctype->public_id ($token->{pubid}) if defined $token->{pubid};
970     $doctype->system_id ($token->{sysid}) if defined $token->{sysid};
971 wakaba 1.227
972 wakaba 1.18 ## NOTE: Other DocumentType attributes are null or empty lists.
973 wakaba 1.211 ## In Firefox3, |internalSubset| attribute is set to the empty
974     ## string, while |null| is an allowed value for the attribute
975     ## according to DOM3 Core.
976 wakaba 1.18 $self->{document}->append_child ($doctype);
977    
978 wakaba 1.228 if ($token->{quirks} or $doctype_name ne 'html') {
979 wakaba 1.79 !!!cp ('t4');
980 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
981 wakaba 1.183 } elsif (defined $token->{pubid}) {
982     my $pubid = $token->{pubid};
983 wakaba 1.244 $pubid =~ tr/a-z/A-Z/; ## ASCII case-insensitive.
984 wakaba 1.143 my $prefix = [
985     "+//SILMARIL//DTD HTML PRO V0R11 19970101//",
986     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
987     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
988     "-//IETF//DTD HTML 2.0 LEVEL 1//",
989     "-//IETF//DTD HTML 2.0 LEVEL 2//",
990     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//",
991     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//",
992     "-//IETF//DTD HTML 2.0 STRICT//",
993     "-//IETF//DTD HTML 2.0//",
994     "-//IETF//DTD HTML 2.1E//",
995     "-//IETF//DTD HTML 3.0//",
996     "-//IETF//DTD HTML 3.2 FINAL//",
997     "-//IETF//DTD HTML 3.2//",
998     "-//IETF//DTD HTML 3//",
999     "-//IETF//DTD HTML LEVEL 0//",
1000     "-//IETF//DTD HTML LEVEL 1//",
1001     "-//IETF//DTD HTML LEVEL 2//",
1002     "-//IETF//DTD HTML LEVEL 3//",
1003     "-//IETF//DTD HTML STRICT LEVEL 0//",
1004     "-//IETF//DTD HTML STRICT LEVEL 1//",
1005     "-//IETF//DTD HTML STRICT LEVEL 2//",
1006     "-//IETF//DTD HTML STRICT LEVEL 3//",
1007     "-//IETF//DTD HTML STRICT//",
1008     "-//IETF//DTD HTML//",
1009     "-//METRIUS//DTD METRIUS PRESENTATIONAL//",
1010     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//",
1011     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//",
1012     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//",
1013     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//",
1014     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//",
1015     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//",
1016     "-//NETSCAPE COMM. CORP.//DTD HTML//",
1017     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//",
1018     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//",
1019     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//",
1020     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//",
1021     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//",
1022     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//",
1023     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//",
1024     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//",
1025     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//",
1026     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//",
1027     "-//W3C//DTD HTML 3 1995-03-24//",
1028     "-//W3C//DTD HTML 3.2 DRAFT//",
1029     "-//W3C//DTD HTML 3.2 FINAL//",
1030     "-//W3C//DTD HTML 3.2//",
1031     "-//W3C//DTD HTML 3.2S DRAFT//",
1032     "-//W3C//DTD HTML 4.0 FRAMESET//",
1033     "-//W3C//DTD HTML 4.0 TRANSITIONAL//",
1034     "-//W3C//DTD HTML EXPERIMETNAL 19960712//",
1035     "-//W3C//DTD HTML EXPERIMENTAL 970421//",
1036     "-//W3C//DTD W3 HTML//",
1037     "-//W3O//DTD W3 HTML 3.0//",
1038     "-//WEBTECHS//DTD MOZILLA HTML 2.0//",
1039     "-//WEBTECHS//DTD MOZILLA HTML//",
1040     ]; # $prefix
1041     my $match;
1042     for (@$prefix) {
1043     if (substr ($prefix, 0, length $_) eq $_) {
1044     $match = 1;
1045     last;
1046     }
1047     }
1048     if ($match or
1049     $pubid eq "-//W3O//DTD W3 HTML STRICT 3.0//EN//" or
1050     $pubid eq "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" or
1051     $pubid eq "HTML") {
1052 wakaba 1.79 !!!cp ('t5');
1053 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1054 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
1055     $pubid =~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
1056 wakaba 1.183 if (defined $token->{sysid}) {
1057 wakaba 1.79 !!!cp ('t6');
1058 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1059     } else {
1060 wakaba 1.79 !!!cp ('t7');
1061 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
1062 wakaba 1.3 }
1063 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
1064     $pubid =~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
1065 wakaba 1.79 !!!cp ('t8');
1066 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
1067 wakaba 1.79 } else {
1068     !!!cp ('t9');
1069 wakaba 1.18 }
1070 wakaba 1.79 } else {
1071     !!!cp ('t10');
1072 wakaba 1.18 }
1073 wakaba 1.183 if (defined $token->{sysid}) {
1074     my $sysid = $token->{sysid};
1075 wakaba 1.244 $sysid =~ tr/A-Z/a-z/; ## ASCII case-insensitive.
1076 wakaba 1.18 if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
1077 wakaba 1.244 ## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"|
1078     ## is signaled as in quirks mode!
1079 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1080 wakaba 1.79 !!!cp ('t11');
1081     } else {
1082     !!!cp ('t12');
1083 wakaba 1.18 }
1084 wakaba 1.79 } else {
1085     !!!cp ('t13');
1086 wakaba 1.18 }
1087    
1088 wakaba 1.84 ## Go to the "before html" insertion mode.
1089 wakaba 1.18 !!!next-token;
1090     return;
1091     } elsif ({
1092 wakaba 1.55 START_TAG_TOKEN, 1,
1093     END_TAG_TOKEN, 1,
1094     END_OF_FILE_TOKEN, 1,
1095 wakaba 1.18 }->{$token->{type}}) {
1096 wakaba 1.79 !!!cp ('t14');
1097 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
1098 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1099 wakaba 1.84 ## Go to the "before html" insertion mode.
1100 wakaba 1.18 ## reprocess
1101 wakaba 1.125 !!!ack-later;
1102 wakaba 1.18 return;
1103 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
1104 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1105 wakaba 1.18 ## Ignore the token
1106 wakaba 1.26
1107 wakaba 1.18 unless (length $token->{data}) {
1108 wakaba 1.79 !!!cp ('t15');
1109 wakaba 1.84 ## Stay in the insertion mode.
1110 wakaba 1.18 !!!next-token;
1111     redo INITIAL;
1112 wakaba 1.79 } else {
1113     !!!cp ('t16');
1114 wakaba 1.3 }
1115 wakaba 1.79 } else {
1116     !!!cp ('t17');
1117 wakaba 1.3 }
1118 wakaba 1.18
1119 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
1120 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1121 wakaba 1.84 ## Go to the "before html" insertion mode.
1122 wakaba 1.18 ## reprocess
1123     return;
1124 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
1125 wakaba 1.79 !!!cp ('t18');
1126 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
1127     $self->{document}->append_child ($comment);
1128    
1129 wakaba 1.84 ## Stay in the insertion mode.
1130 wakaba 1.18 !!!next-token;
1131     redo INITIAL;
1132     } else {
1133 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
1134 wakaba 1.18 }
1135     } # INITIAL
1136 wakaba 1.79
1137     die "$0: _tree_construction_initial: This should be never reached";
1138 wakaba 1.3 } # _tree_construction_initial
1139    
1140     sub _tree_construction_root_element ($) {
1141     my $self = shift;
1142 wakaba 1.84
1143     ## NOTE: "before html" insertion mode.
1144 wakaba 1.3
1145     B: {
1146 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
1147 wakaba 1.79 !!!cp ('t19');
1148 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1149 wakaba 1.3 ## Ignore the token
1150 wakaba 1.84 ## Stay in the insertion mode.
1151 wakaba 1.3 !!!next-token;
1152     redo B;
1153 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
1154 wakaba 1.79 !!!cp ('t20');
1155 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
1156     $self->{document}->append_child ($comment);
1157 wakaba 1.84 ## Stay in the insertion mode.
1158 wakaba 1.3 !!!next-token;
1159     redo B;
1160 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
1161 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1162 wakaba 1.26 ## Ignore the token.
1163    
1164 wakaba 1.3 unless (length $token->{data}) {
1165 wakaba 1.79 !!!cp ('t21');
1166 wakaba 1.84 ## Stay in the insertion mode.
1167 wakaba 1.3 !!!next-token;
1168     redo B;
1169 wakaba 1.79 } else {
1170     !!!cp ('t22');
1171 wakaba 1.3 }
1172 wakaba 1.79 } else {
1173     !!!cp ('t23');
1174 wakaba 1.3 }
1175 wakaba 1.61
1176     $self->{application_cache_selection}->(undef);
1177    
1178     #
1179     } elsif ($token->{type} == START_TAG_TOKEN) {
1180 wakaba 1.84 if ($token->{tag_name} eq 'html') {
1181     my $root_element;
1182 wakaba 1.126 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
1183 wakaba 1.84 $self->{document}->append_child ($root_element);
1184 wakaba 1.123 push @{$self->{open_elements}},
1185     [$root_element, $el_category->{html}];
1186 wakaba 1.84
1187     if ($token->{attributes}->{manifest}) {
1188     !!!cp ('t24');
1189     $self->{application_cache_selection}
1190     ->($token->{attributes}->{manifest}->{value});
1191 wakaba 1.118 ## ISSUE: Spec is unclear on relative references.
1192     ## According to Hixie (#whatwg 2008-03-19), it should be
1193     ## resolved against the base URI of the document in HTML
1194     ## or xml:base of the element in XHTML.
1195 wakaba 1.84 } else {
1196     !!!cp ('t25');
1197     $self->{application_cache_selection}->(undef);
1198     }
1199    
1200 wakaba 1.125 !!!nack ('t25c');
1201    
1202 wakaba 1.84 !!!next-token;
1203     return; ## Go to the "before head" insertion mode.
1204 wakaba 1.61 } else {
1205 wakaba 1.84 !!!cp ('t25.1');
1206     #
1207 wakaba 1.61 }
1208 wakaba 1.3 } elsif ({
1209 wakaba 1.55 END_TAG_TOKEN, 1,
1210     END_OF_FILE_TOKEN, 1,
1211 wakaba 1.3 }->{$token->{type}}) {
1212 wakaba 1.79 !!!cp ('t26');
1213 wakaba 1.3 #
1214     } else {
1215 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
1216 wakaba 1.3 }
1217 wakaba 1.61
1218 wakaba 1.126 my $root_element;
1219     !!!create-element ($root_element, $HTML_NS, 'html',, $token);
1220 wakaba 1.84 $self->{document}->append_child ($root_element);
1221 wakaba 1.123 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
1222 wakaba 1.84
1223     $self->{application_cache_selection}->(undef);
1224    
1225     ## NOTE: Reprocess the token.
1226 wakaba 1.125 !!!ack-later;
1227 wakaba 1.84 return; ## Go to the "before head" insertion mode.
1228 wakaba 1.3 } # B
1229 wakaba 1.79
1230     die "$0: _tree_construction_root_element: This should never be reached";
1231 wakaba 1.3 } # _tree_construction_root_element
1232    
1233     sub _reset_insertion_mode ($) {
1234     my $self = shift;
1235    
1236     ## Step 1
1237     my $last;
1238    
1239     ## Step 2
1240     my $i = -1;
1241     my $node = $self->{open_elements}->[$i];
1242    
1243     ## Step 3
1244     S3: {
1245 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
1246     $last = 1;
1247     if (defined $self->{inner_html_node}) {
1248 wakaba 1.140 !!!cp ('t28');
1249     $node = $self->{inner_html_node};
1250     } else {
1251     die "_reset_insertion_mode: t27";
1252 wakaba 1.3 }
1253     }
1254 wakaba 1.140
1255     ## Step 4..14
1256     my $new_mode;
1257     if ($node->[1] & FOREIGN_EL) {
1258     !!!cp ('t28.1');
1259     ## NOTE: Strictly spaking, the line below only applies to MathML and
1260     ## SVG elements. Currently the HTML syntax supports only MathML and
1261     ## SVG elements as foreigners.
1262 wakaba 1.148 $new_mode = IN_BODY_IM | IN_FOREIGN_CONTENT_IM;
1263 wakaba 1.206 } elsif ($node->[1] == TABLE_CELL_EL) {
1264 wakaba 1.140 if ($last) {
1265     !!!cp ('t28.2');
1266     #
1267     } else {
1268     !!!cp ('t28.3');
1269     $new_mode = IN_CELL_IM;
1270     }
1271     } else {
1272     !!!cp ('t28.4');
1273     $new_mode = {
1274 wakaba 1.54 select => IN_SELECT_IM,
1275 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
1276     ## insertion mode to "in select" by themselves.
1277 wakaba 1.54 tr => IN_ROW_IM,
1278     tbody => IN_TABLE_BODY_IM,
1279     thead => IN_TABLE_BODY_IM,
1280     tfoot => IN_TABLE_BODY_IM,
1281     caption => IN_CAPTION_IM,
1282     colgroup => IN_COLUMN_GROUP_IM,
1283     table => IN_TABLE_IM,
1284     head => IN_BODY_IM, # not in head!
1285     body => IN_BODY_IM,
1286     frameset => IN_FRAMESET_IM,
1287 wakaba 1.123 }->{$node->[0]->manakai_local_name};
1288 wakaba 1.140 }
1289     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1290 wakaba 1.3
1291 wakaba 1.126 ## Step 15
1292 wakaba 1.206 if ($node->[1] == HTML_EL) {
1293 wakaba 1.3 unless (defined $self->{head_element}) {
1294 wakaba 1.79 !!!cp ('t29');
1295 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
1296 wakaba 1.3 } else {
1297 wakaba 1.81 ## ISSUE: Can this state be reached?
1298 wakaba 1.79 !!!cp ('t30');
1299 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
1300 wakaba 1.3 }
1301     return;
1302 wakaba 1.79 } else {
1303     !!!cp ('t31');
1304 wakaba 1.3 }
1305    
1306 wakaba 1.126 ## Step 16
1307 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
1308 wakaba 1.3
1309 wakaba 1.126 ## Step 17
1310 wakaba 1.3 $i--;
1311     $node = $self->{open_elements}->[$i];
1312    
1313 wakaba 1.126 ## Step 18
1314 wakaba 1.3 redo S3;
1315     } # S3
1316 wakaba 1.79
1317     die "$0: _reset_insertion_mode: This line should never be reached";
1318 wakaba 1.3 } # _reset_insertion_mode
1319    
1320     sub _tree_construction_main ($) {
1321     my $self = shift;
1322    
1323 wakaba 1.1 my $active_formatting_elements = [];
1324    
1325     my $reconstruct_active_formatting_elements = sub { # MUST
1326     my $insert = shift;
1327    
1328     ## Step 1
1329     return unless @$active_formatting_elements;
1330    
1331     ## Step 3
1332     my $i = -1;
1333     my $entry = $active_formatting_elements->[$i];
1334    
1335     ## Step 2
1336     return if $entry->[0] eq '#marker';
1337 wakaba 1.3 for (@{$self->{open_elements}}) {
1338 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1339 wakaba 1.79 !!!cp ('t32');
1340 wakaba 1.1 return;
1341     }
1342     }
1343    
1344     S4: {
1345     ## Step 4
1346     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1347    
1348     ## Step 5
1349     $i--;
1350     $entry = $active_formatting_elements->[$i];
1351    
1352     ## Step 6
1353     if ($entry->[0] eq '#marker') {
1354 wakaba 1.81 !!!cp ('t33_1');
1355 wakaba 1.1 #
1356     } else {
1357     my $in_open_elements;
1358 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
1359 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1360 wakaba 1.79 !!!cp ('t33');
1361 wakaba 1.1 $in_open_elements = 1;
1362     last OE;
1363     }
1364     }
1365     if ($in_open_elements) {
1366 wakaba 1.79 !!!cp ('t34');
1367 wakaba 1.1 #
1368     } else {
1369 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
1370 wakaba 1.79 !!!cp ('t35');
1371 wakaba 1.1 redo S4;
1372     }
1373     }
1374    
1375     ## Step 7
1376     $i++;
1377     $entry = $active_formatting_elements->[$i];
1378     } # S4
1379    
1380     S7: {
1381     ## Step 8
1382     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1383    
1384     ## Step 9
1385     $insert->($clone->[0]);
1386 wakaba 1.3 push @{$self->{open_elements}}, $clone;
1387 wakaba 1.1
1388     ## Step 10
1389 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1390 wakaba 1.1
1391     ## Step 11
1392     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1393 wakaba 1.79 !!!cp ('t36');
1394 wakaba 1.1 ## Step 7'
1395     $i++;
1396     $entry = $active_formatting_elements->[$i];
1397    
1398     redo S7;
1399     }
1400 wakaba 1.79
1401     !!!cp ('t37');
1402 wakaba 1.1 } # S7
1403     }; # $reconstruct_active_formatting_elements
1404    
1405     my $clear_up_to_marker = sub {
1406     for (reverse 0..$#$active_formatting_elements) {
1407     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1408 wakaba 1.79 !!!cp ('t38');
1409 wakaba 1.1 splice @$active_formatting_elements, $_;
1410     return;
1411     }
1412     }
1413 wakaba 1.79
1414     !!!cp ('t39');
1415 wakaba 1.1 }; # $clear_up_to_marker
1416    
1417 wakaba 1.96 my $insert;
1418    
1419     my $parse_rcdata = sub ($) {
1420     my ($content_model_flag) = @_;
1421 wakaba 1.25
1422     ## Step 1
1423     my $start_tag_name = $token->{tag_name};
1424 wakaba 1.205 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
1425 wakaba 1.25
1426     ## Step 2
1427 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
1428 wakaba 1.13 delete $self->{escape}; # MUST
1429 wakaba 1.25
1430 wakaba 1.205 ## Step 3, 4
1431     $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1432    
1433 wakaba 1.125 !!!nack ('t40.1');
1434 wakaba 1.1 !!!next-token;
1435 wakaba 1.25 }; # $parse_rcdata
1436 wakaba 1.1
1437 wakaba 1.96 my $script_start_tag = sub () {
1438 wakaba 1.205 ## Step 1
1439 wakaba 1.1 my $script_el;
1440 wakaba 1.126 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
1441 wakaba 1.205
1442     ## Step 2
1443 wakaba 1.1 ## TODO: mark as "parser-inserted"
1444    
1445 wakaba 1.205 ## Step 3
1446     ## TODO: Mark as "already executed", if ...
1447    
1448 wakaba 1.221 ## Step 4 (HTML5 revision 2702)
1449 wakaba 1.205 $insert->($script_el);
1450     push @{$self->{open_elements}}, [$script_el, $el_category->{script}];
1451    
1452     ## Step 5
1453 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
1454 wakaba 1.13 delete $self->{escape}; # MUST
1455 wakaba 1.1
1456 wakaba 1.205 ## Step 6-7
1457     $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1458 wakaba 1.25
1459 wakaba 1.205 !!!nack ('t40.2');
1460 wakaba 1.1 !!!next-token;
1461     }; # $script_start_tag
1462    
1463 wakaba 1.102 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
1464 wakaba 1.229 ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag (OBSOLETE; unused).
1465 wakaba 1.202 ## NOTE: $open_tables->[-1]->[2] is set false when non-Text node inserted.
1466 wakaba 1.102 my $open_tables = [[$self->{open_elements}->[0]->[0]]];
1467    
1468 wakaba 1.1 my $formatting_end_tag = sub {
1469 wakaba 1.113 my $end_tag_token = shift;
1470     my $tag_name = $end_tag_token->{tag_name};
1471 wakaba 1.1
1472 wakaba 1.103 ## NOTE: The adoption agency algorithm (AAA).
1473 wakaba 1.102
1474 wakaba 1.1 FET: {
1475     ## Step 1
1476     my $formatting_element;
1477     my $formatting_element_i_in_active;
1478     AFE: for (reverse 0..$#$active_formatting_elements) {
1479 wakaba 1.123 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1480     !!!cp ('t52');
1481     last AFE;
1482     } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
1483     eq $tag_name) {
1484 wakaba 1.79 !!!cp ('t51');
1485 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
1486     $formatting_element_i_in_active = $_;
1487     last AFE;
1488     }
1489     } # AFE
1490     unless (defined $formatting_element) {
1491 wakaba 1.79 !!!cp ('t53');
1492 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => $tag_name, token => $end_tag_token);
1493 wakaba 1.1 ## Ignore the token
1494     !!!next-token;
1495     return;
1496     }
1497     ## has an element in scope
1498     my $in_scope = 1;
1499     my $formatting_element_i_in_open;
1500 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
1501     my $node = $self->{open_elements}->[$_];
1502 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
1503     if ($in_scope) {
1504 wakaba 1.79 !!!cp ('t54');
1505 wakaba 1.1 $formatting_element_i_in_open = $_;
1506     last INSCOPE;
1507     } else { # in open elements but not in scope
1508 wakaba 1.79 !!!cp ('t55');
1509 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
1510     text => $token->{tag_name},
1511 wakaba 1.113 token => $end_tag_token);
1512 wakaba 1.1 ## Ignore the token
1513     !!!next-token;
1514     return;
1515     }
1516 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
1517 wakaba 1.79 !!!cp ('t56');
1518 wakaba 1.1 $in_scope = 0;
1519     }
1520     } # INSCOPE
1521     unless (defined $formatting_element_i_in_open) {
1522 wakaba 1.79 !!!cp ('t57');
1523 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
1524     text => $token->{tag_name},
1525 wakaba 1.113 token => $end_tag_token);
1526 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
1527     !!!next-token; ## TODO: ok?
1528     return;
1529     }
1530 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
1531 wakaba 1.79 !!!cp ('t58');
1532 wakaba 1.122 !!!parse-error (type => 'not closed',
1533 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
1534 wakaba 1.122 ->manakai_local_name,
1535 wakaba 1.113 token => $end_tag_token);
1536 wakaba 1.1 }
1537    
1538     ## Step 2
1539     my $furthest_block;
1540     my $furthest_block_i_in_open;
1541 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
1542     my $node = $self->{open_elements}->[$_];
1543 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
1544 wakaba 1.1 #not $phrasing_category->{$node->[1]} and
1545 wakaba 1.123 ($node->[1] & SPECIAL_EL or
1546     $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
1547 wakaba 1.79 !!!cp ('t59');
1548 wakaba 1.1 $furthest_block = $node;
1549     $furthest_block_i_in_open = $_;
1550 wakaba 1.203 ## NOTE: The topmost (eldest) node.
1551 wakaba 1.1 } elsif ($node->[0] eq $formatting_element->[0]) {
1552 wakaba 1.79 !!!cp ('t60');
1553 wakaba 1.1 last OE;
1554     }
1555     } # OE
1556    
1557     ## Step 3
1558     unless (defined $furthest_block) { # MUST
1559 wakaba 1.79 !!!cp ('t61');
1560 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
1561 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
1562     !!!next-token;
1563     return;
1564     }
1565    
1566     ## Step 4
1567 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
1568 wakaba 1.1
1569     ## Step 5
1570     my $furthest_block_parent = $furthest_block->[0]->parent_node;
1571     if (defined $furthest_block_parent) {
1572 wakaba 1.79 !!!cp ('t62');
1573 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
1574     }
1575    
1576     ## Step 6
1577     my $bookmark_prev_el
1578     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
1579     ->[0];
1580    
1581     ## Step 7
1582     my $node = $furthest_block;
1583     my $node_i_in_open = $furthest_block_i_in_open;
1584     my $last_node = $furthest_block;
1585     S7: {
1586     ## Step 1
1587     $node_i_in_open--;
1588 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
1589 wakaba 1.1
1590     ## Step 2
1591     my $node_i_in_active;
1592     S7S2: {
1593     for (reverse 0..$#$active_formatting_elements) {
1594     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
1595 wakaba 1.79 !!!cp ('t63');
1596 wakaba 1.1 $node_i_in_active = $_;
1597     last S7S2;
1598     }
1599     }
1600 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
1601 wakaba 1.1 redo S7;
1602     } # S7S2
1603    
1604     ## Step 3
1605     last S7 if $node->[0] eq $formatting_element->[0];
1606    
1607     ## Step 4
1608     if ($last_node->[0] eq $furthest_block->[0]) {
1609 wakaba 1.79 !!!cp ('t64');
1610 wakaba 1.1 $bookmark_prev_el = $node->[0];
1611     }
1612    
1613     ## Step 5
1614     if ($node->[0]->has_child_nodes ()) {
1615 wakaba 1.79 !!!cp ('t65');
1616 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
1617     $active_formatting_elements->[$node_i_in_active] = $clone;
1618 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
1619 wakaba 1.1 $node = $clone;
1620     }
1621    
1622     ## Step 6
1623     $node->[0]->append_child ($last_node->[0]);
1624    
1625     ## Step 7
1626     $last_node = $node;
1627    
1628     ## Step 8
1629     redo S7;
1630     } # S7
1631    
1632     ## Step 8
1633 wakaba 1.123 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
1634 wakaba 1.234 ## Foster parenting.
1635 wakaba 1.102 my $foster_parent_element;
1636     my $next_sibling;
1637 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
1638 wakaba 1.206 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1639 wakaba 1.234 !!!cp ('t65.2');
1640     $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
1641     $next_sibling = $self->{open_elements}->[$_]->[0];
1642     undef $next_sibling
1643     unless $next_sibling->parent_node eq $foster_parent_element;
1644     last OE;
1645     }
1646     } # OE
1647     $foster_parent_element ||= $self->{open_elements}->[0]->[0];
1648    
1649 wakaba 1.102 $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
1650     $open_tables->[-1]->[1] = 1; # tainted
1651     } else {
1652     !!!cp ('t65.3');
1653     $common_ancestor_node->[0]->append_child ($last_node->[0]);
1654     }
1655 wakaba 1.1
1656     ## Step 9
1657     my $clone = [$formatting_element->[0]->clone_node (0),
1658     $formatting_element->[1]];
1659    
1660     ## Step 10
1661     my @cn = @{$furthest_block->[0]->child_nodes};
1662     $clone->[0]->append_child ($_) for @cn;
1663    
1664     ## Step 11
1665     $furthest_block->[0]->append_child ($clone->[0]);
1666    
1667     ## Step 12
1668     my $i;
1669     AFE: for (reverse 0..$#$active_formatting_elements) {
1670     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
1671 wakaba 1.79 !!!cp ('t66');
1672 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
1673     $i-- and last AFE if defined $i;
1674     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
1675 wakaba 1.79 !!!cp ('t67');
1676 wakaba 1.1 $i = $_;
1677     }
1678     } # AFE
1679     splice @$active_formatting_elements, $i + 1, 0, $clone;
1680    
1681     ## Step 13
1682     undef $i;
1683 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
1684     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
1685 wakaba 1.79 !!!cp ('t68');
1686 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
1687 wakaba 1.1 $i-- and last OE if defined $i;
1688 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
1689 wakaba 1.79 !!!cp ('t69');
1690 wakaba 1.1 $i = $_;
1691     }
1692     } # OE
1693 wakaba 1.203 splice @{$self->{open_elements}}, $i + 1, 0, $clone;
1694 wakaba 1.1
1695     ## Step 14
1696     redo FET;
1697     } # FET
1698     }; # $formatting_end_tag
1699    
1700 wakaba 1.96 $insert = my $insert_to_current = sub {
1701 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
1702 wakaba 1.1 }; # $insert_to_current
1703    
1704 wakaba 1.234 ## Foster parenting. Note that there are three "foster parenting"
1705     ## code in the parser: for elements (this one), for texts, and for
1706     ## elements in the AAA code.
1707 wakaba 1.1 my $insert_to_foster = sub {
1708 wakaba 1.95 my $child = shift;
1709 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
1710 wakaba 1.95 # MUST
1711     my $foster_parent_element;
1712     my $next_sibling;
1713 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
1714 wakaba 1.206 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1715 wakaba 1.234 !!!cp ('t71');
1716     $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
1717     $next_sibling = $self->{open_elements}->[$_]->[0];
1718     undef $next_sibling
1719     unless $next_sibling->parent_node eq $foster_parent_element;
1720     last OE;
1721     }
1722     } # OE
1723     $foster_parent_element ||= $self->{open_elements}->[0]->[0];
1724    
1725     $foster_parent_element->insert_before ($child, $next_sibling);
1726 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
1727     } else {
1728     !!!cp ('t72');
1729     $self->{open_elements}->[-1]->[0]->append_child ($child);
1730     }
1731 wakaba 1.1 }; # $insert_to_foster
1732    
1733 wakaba 1.204 ## NOTE: Insert a character (MUST): When a character is inserted, if
1734     ## the last node that was inserted by the parser is a Text node and
1735     ## the character has to be inserted after that node, then the
1736     ## character is appended to the Text node. However, if any other
1737     ## node is inserted by the parser, then a new Text node is created
1738     ## and the character is appended as that Text node. If I'm not
1739     ## wrong, for a parser with scripting disabled, there are only two
1740     ## cases where this occurs. One is the case where an element node
1741     ## is inserted to the |head| element. This is covered by using the
1742 wakaba 1.202 ## |$self->{head_element_inserted}| flag. Another is the case where
1743     ## an element or comment is inserted into the |table| subtree while
1744     ## foster parenting happens. This is covered by using the [2] flag
1745     ## of the |$open_tables| structure. All other cases are handled
1746     ## simply by calling |manakai_append_text| method.
1747    
1748 wakaba 1.204 ## TODO: |<body><script>document.write("a<br>");
1749     ## document.body.removeChild (document.body.lastChild);
1750     ## document.write ("b")</script>|
1751    
1752 wakaba 1.126 B: while (1) {
1753 wakaba 1.230
1754     ## The "in table text" insertion mode.
1755     if ($self->{insertion_mode} & TABLE_IMS and
1756     not $self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
1757     not $self->{insertion_mode} & IN_CDATA_RCDATA_IM) {
1758     C: {
1759     my $s;
1760     if ($token->{type} == CHARACTER_TOKEN) {
1761     !!!cp ('t194');
1762     $self->{pending_chars} ||= [];
1763     push @{$self->{pending_chars}}, $token;
1764     !!!next-token;
1765     next B;
1766     } else {
1767     if ($self->{pending_chars}) {
1768     $s = join '', map { $_->{data} } @{$self->{pending_chars}};
1769     delete $self->{pending_chars};
1770     if ($s =~ /[^\x09\x0A\x0C\x0D\x20]/) {
1771     !!!cp ('t195');
1772     #
1773     } else {
1774     !!!cp ('t195.1');
1775     #$self->{open_elements}->[-1]->[0]->manakai_append_text ($s);
1776     $self->{open_elements}->[-1]->[0]->append_child
1777     ($self->{document}->create_text_node ($s));
1778     last C;
1779     }
1780     } else {
1781     !!!cp ('t195.2');
1782     last C;
1783     }
1784     }
1785    
1786 wakaba 1.234 ## Foster parenting.
1787 wakaba 1.230 !!!parse-error (type => 'in table:#text', token => $token);
1788    
1789     ## NOTE: As if in body, but insert into the foster parent element.
1790     $reconstruct_active_formatting_elements->($insert_to_foster);
1791    
1792     if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
1793     # MUST
1794     my $foster_parent_element;
1795     my $next_sibling;
1796     OE: for (reverse 0..$#{$self->{open_elements}}) {
1797     if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1798 wakaba 1.234 !!!cp ('t197');
1799     $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
1800     $next_sibling = $self->{open_elements}->[$_]->[0];
1801     undef $next_sibling
1802     unless $next_sibling->parent_node eq $foster_parent_element;
1803 wakaba 1.230 last OE;
1804     }
1805     } # OE
1806 wakaba 1.234 $foster_parent_element ||= $self->{open_elements}->[0]->[0];
1807    
1808     !!!cp ('t199');
1809     $foster_parent_element->insert_before
1810     ($self->{document}->create_text_node ($s), $next_sibling);
1811    
1812 wakaba 1.230 $open_tables->[-1]->[1] = 1; # tainted
1813     $open_tables->[-1]->[2] = 1; # ~node inserted
1814     } else {
1815     ## NOTE: Fragment case or in a foster parent'ed element
1816     ## (e.g. |<table><span>a|). In fragment case, whether the
1817     ## character is appended to existing node or a new node is
1818     ## created is irrelevant, since the foster parent'ed nodes
1819     ## are discarded and fragment parsing does not invoke any
1820     ## script.
1821     !!!cp ('t200');
1822     $self->{open_elements}->[-1]->[0]->manakai_append_text ($s);
1823     }
1824     } # C
1825     } # TABLE_IMS
1826    
1827 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
1828 wakaba 1.79 !!!cp ('t73');
1829 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1830 wakaba 1.52 ## Ignore the token
1831     ## Stay in the phase
1832     !!!next-token;
1833 wakaba 1.126 next B;
1834 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
1835 wakaba 1.52 $token->{tag_name} eq 'html') {
1836 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
1837 wakaba 1.79 !!!cp ('t79');
1838 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
1839 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
1840     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
1841 wakaba 1.79 !!!cp ('t80');
1842 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
1843 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
1844 wakaba 1.79 } else {
1845     !!!cp ('t81');
1846 wakaba 1.52 }
1847    
1848 wakaba 1.84 !!!cp ('t82');
1849 wakaba 1.113 !!!parse-error (type => 'not first start tag', token => $token);
1850 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
1851     for my $attr_name (keys %{$token->{attributes}}) {
1852     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
1853 wakaba 1.79 !!!cp ('t84');
1854 wakaba 1.52 $top_el->set_attribute_ns
1855     (undef, [undef, $attr_name],
1856     $token->{attributes}->{$attr_name}->{value});
1857     }
1858     }
1859 wakaba 1.125 !!!nack ('t84.1');
1860 wakaba 1.52 !!!next-token;
1861 wakaba 1.126 next B;
1862 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
1863 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
1864 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
1865 wakaba 1.79 !!!cp ('t85');
1866 wakaba 1.52 $self->{document}->append_child ($comment);
1867 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
1868 wakaba 1.79 !!!cp ('t86');
1869 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
1870     } else {
1871 wakaba 1.79 !!!cp ('t87');
1872 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
1873 wakaba 1.202 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
1874 wakaba 1.52 }
1875     !!!next-token;
1876 wakaba 1.126 next B;
1877 wakaba 1.205 } elsif ($self->{insertion_mode} & IN_CDATA_RCDATA_IM) {
1878     if ($token->{type} == CHARACTER_TOKEN) {
1879     $token->{data} =~ s/^\x0A// if $self->{ignore_newline};
1880     delete $self->{ignore_newline};
1881    
1882     if (length $token->{data}) {
1883     !!!cp ('t43');
1884     $self->{open_elements}->[-1]->[0]->manakai_append_text
1885     ($token->{data});
1886     } else {
1887     !!!cp ('t43.1');
1888     }
1889     !!!next-token;
1890     next B;
1891     } elsif ($token->{type} == END_TAG_TOKEN) {
1892     delete $self->{ignore_newline};
1893    
1894     if ($token->{tag_name} eq 'script') {
1895     !!!cp ('t50');
1896    
1897     ## Para 1-2
1898     my $script = pop @{$self->{open_elements}};
1899    
1900     ## Para 3
1901     $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1902    
1903     ## Para 4
1904     ## TODO: $old_insertion_point = $current_insertion_point;
1905     ## TODO: $current_insertion_point = just before $self->{nc};
1906    
1907     ## Para 5
1908     ## TODO: Run the $script->[0].
1909    
1910     ## Para 6
1911     ## TODO: $current_insertion_point = $old_insertion_point;
1912    
1913     ## Para 7
1914     ## TODO: if ($pending_external_script) {
1915     ## TODO: ...
1916     ## TODO: }
1917    
1918     !!!next-token;
1919     next B;
1920     } else {
1921     !!!cp ('t42');
1922    
1923     pop @{$self->{open_elements}};
1924    
1925     $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1926     !!!next-token;
1927     next B;
1928     }
1929     } elsif ($token->{type} == END_OF_FILE_TOKEN) {
1930     delete $self->{ignore_newline};
1931    
1932     !!!cp ('t44');
1933     !!!parse-error (type => 'not closed',
1934     text => $self->{open_elements}->[-1]->[0]
1935     ->manakai_local_name,
1936     token => $token);
1937    
1938 wakaba 1.206 #if ($self->{open_elements}->[-1]->[1] == SCRIPT_EL) {
1939 wakaba 1.205 # ## TODO: Mark as "already executed"
1940     #}
1941    
1942     pop @{$self->{open_elements}};
1943    
1944     $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1945     ## Reprocess.
1946     next B;
1947     } else {
1948     die "$0: $token->{type}: In CDATA/RCDATA: Unknown token type";
1949     }
1950 wakaba 1.126 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
1951     if ($token->{type} == CHARACTER_TOKEN) {
1952     !!!cp ('t87.1');
1953 wakaba 1.243
1954 wakaba 1.126 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
1955 wakaba 1.243
1956     if ($token->{data} =~ /[^\x09\x0A\x0C\x0D\x20]/) {
1957     delete $self->{frameset_ok};
1958     }
1959    
1960 wakaba 1.126 !!!next-token;
1961     next B;
1962     } elsif ($token->{type} == START_TAG_TOKEN) {
1963 wakaba 1.129 if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
1964     $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
1965 wakaba 1.126 not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
1966     ($token->{tag_name} eq 'svg' and
1967 wakaba 1.206 $self->{open_elements}->[-1]->[1] == MML_AXML_EL)) {
1968 wakaba 1.126 ## NOTE: "using the rules for secondary insertion mode"then"continue"
1969     !!!cp ('t87.2');
1970     #
1971     } elsif ({
1972 wakaba 1.130 b => 1, big => 1, blockquote => 1, body => 1, br => 1,
1973 wakaba 1.146 center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1,
1974 wakaba 1.223 em => 1, embed => 1, h1 => 1, h2 => 1, h3 => 1,
1975 wakaba 1.146 h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1,
1976     img => 1, li => 1, listing => 1, menu => 1, meta => 1,
1977     nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1,
1978     small => 1, span => 1, strong => 1, strike => 1, sub => 1,
1979     sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1,
1980 wakaba 1.223 }->{$token->{tag_name}} or
1981     ($token->{tag_name} eq 'font' and
1982     ($token->{attributes}->{color} or
1983     $token->{attributes}->{face} or
1984     $token->{attributes}->{size}))) {
1985 wakaba 1.126 !!!cp ('t87.2');
1986     !!!parse-error (type => 'not closed',
1987 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
1988 wakaba 1.126 ->manakai_local_name,
1989     token => $token);
1990    
1991     pop @{$self->{open_elements}}
1992     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
1993    
1994 wakaba 1.130 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
1995 wakaba 1.126 ## Reprocess.
1996     next B;
1997     } else {
1998 wakaba 1.131 my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
1999     my $tag_name = $token->{tag_name};
2000     if ($nsuri eq $SVG_NS) {
2001     $tag_name = {
2002     altglyph => 'altGlyph',
2003     altglyphdef => 'altGlyphDef',
2004     altglyphitem => 'altGlyphItem',
2005     animatecolor => 'animateColor',
2006     animatemotion => 'animateMotion',
2007     animatetransform => 'animateTransform',
2008     clippath => 'clipPath',
2009     feblend => 'feBlend',
2010     fecolormatrix => 'feColorMatrix',
2011     fecomponenttransfer => 'feComponentTransfer',
2012     fecomposite => 'feComposite',
2013     feconvolvematrix => 'feConvolveMatrix',
2014     fediffuselighting => 'feDiffuseLighting',
2015     fedisplacementmap => 'feDisplacementMap',
2016     fedistantlight => 'feDistantLight',
2017     feflood => 'feFlood',
2018     fefunca => 'feFuncA',
2019     fefuncb => 'feFuncB',
2020     fefuncg => 'feFuncG',
2021     fefuncr => 'feFuncR',
2022     fegaussianblur => 'feGaussianBlur',
2023     feimage => 'feImage',
2024     femerge => 'feMerge',
2025     femergenode => 'feMergeNode',
2026     femorphology => 'feMorphology',
2027     feoffset => 'feOffset',
2028     fepointlight => 'fePointLight',
2029     fespecularlighting => 'feSpecularLighting',
2030     fespotlight => 'feSpotLight',
2031     fetile => 'feTile',
2032     feturbulence => 'feTurbulence',
2033     foreignobject => 'foreignObject',
2034     glyphref => 'glyphRef',
2035     lineargradient => 'linearGradient',
2036     radialgradient => 'radialGradient',
2037     #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
2038     textpath => 'textPath',
2039     }->{$tag_name} || $tag_name;
2040     }
2041    
2042     ## "adjust SVG attributes" (SVG only) - done in insert-element-f
2043    
2044     ## "adjust foreign attributes" - done in insert-element-f
2045 wakaba 1.126
2046 wakaba 1.131 !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);
2047 wakaba 1.126
2048     if ($self->{self_closing}) {
2049     pop @{$self->{open_elements}};
2050     !!!ack ('t87.3');
2051     } else {
2052     !!!cp ('t87.4');
2053     }
2054    
2055     !!!next-token;
2056     next B;
2057     }
2058     } elsif ($token->{type} == END_TAG_TOKEN) {
2059     ## NOTE: "using the rules for secondary insertion mode" then "continue"
2060 wakaba 1.219 if ($token->{tag_name} eq 'script') {
2061     !!!cp ('t87.41');
2062     #
2063     ## XXXscript: Execute script here.
2064     } else {
2065     !!!cp ('t87.5');
2066     #
2067     }
2068 wakaba 1.126 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2069     !!!cp ('t87.6');
2070 wakaba 1.146 !!!parse-error (type => 'not closed',
2071 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2072 wakaba 1.146 ->manakai_local_name,
2073     token => $token);
2074    
2075     pop @{$self->{open_elements}}
2076     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
2077    
2078 wakaba 1.200 ## NOTE: |<span><svg>| ... two parse errors, |<svg>| ... a parse error.
2079    
2080 wakaba 1.146 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
2081     ## Reprocess.
2082     next B;
2083 wakaba 1.126 } else {
2084     die "$0: $token->{type}: Unknown token type";
2085     }
2086     }
2087    
2088     if ($self->{insertion_mode} & HEAD_IMS) {
2089 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
2090 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
2091 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2092 wakaba 1.202 if ($self->{head_element_inserted}) {
2093     !!!cp ('t88.3');
2094     $self->{open_elements}->[-1]->[0]->append_child
2095     ($self->{document}->create_text_node ($1));
2096     delete $self->{head_element_inserted};
2097     ## NOTE: |</head> <link> |
2098     #
2099     } else {
2100     !!!cp ('t88.2');
2101     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
2102     ## NOTE: |</head> &#x20;|
2103     #
2104     }
2105 wakaba 1.99 } else {
2106     !!!cp ('t88.1');
2107     ## Ignore the token.
2108 wakaba 1.177 #
2109 wakaba 1.99 }
2110 wakaba 1.52 unless (length $token->{data}) {
2111 wakaba 1.79 !!!cp ('t88');
2112 wakaba 1.52 !!!next-token;
2113 wakaba 1.126 next B;
2114 wakaba 1.1 }
2115 wakaba 1.177 ## TODO: set $token->{column} appropriately
2116 wakaba 1.1 }
2117 wakaba 1.52
2118 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2119 wakaba 1.79 !!!cp ('t89');
2120 wakaba 1.52 ## As if <head>
2121 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2122 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2123 wakaba 1.123 push @{$self->{open_elements}},
2124     [$self->{head_element}, $el_category->{head}];
2125 wakaba 1.52
2126     ## Reprocess in the "in head" insertion mode...
2127     pop @{$self->{open_elements}};
2128    
2129     ## Reprocess in the "after head" insertion mode...
2130 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2131 wakaba 1.79 !!!cp ('t90');
2132 wakaba 1.52 ## As if </noscript>
2133     pop @{$self->{open_elements}};
2134 wakaba 1.153 !!!parse-error (type => 'in noscript:#text', token => $token);
2135 wakaba 1.1
2136 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
2137     ## As if </head>
2138     pop @{$self->{open_elements}};
2139    
2140     ## Reprocess in the "after head" insertion mode...
2141 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2142 wakaba 1.79 !!!cp ('t91');
2143 wakaba 1.52 pop @{$self->{open_elements}};
2144    
2145     ## Reprocess in the "after head" insertion mode...
2146 wakaba 1.79 } else {
2147     !!!cp ('t92');
2148 wakaba 1.1 }
2149 wakaba 1.52
2150 wakaba 1.123 ## "after head" insertion mode
2151     ## As if <body>
2152     !!!insert-element ('body',, $token);
2153     $self->{insertion_mode} = IN_BODY_IM;
2154 wakaba 1.243 ## The "frameset-ok" flag is left unchanged in this case.
2155     ## Reporcess the token.
2156 wakaba 1.126 next B;
2157 wakaba 1.123 } elsif ($token->{type} == START_TAG_TOKEN) {
2158     if ($token->{tag_name} eq 'head') {
2159     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2160     !!!cp ('t93');
2161 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
2162 wakaba 1.123 $self->{open_elements}->[-1]->[0]->append_child
2163     ($self->{head_element});
2164     push @{$self->{open_elements}},
2165     [$self->{head_element}, $el_category->{head}];
2166     $self->{insertion_mode} = IN_HEAD_IM;
2167 wakaba 1.125 !!!nack ('t93.1');
2168 wakaba 1.123 !!!next-token;
2169 wakaba 1.126 next B;
2170 wakaba 1.125 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2171 wakaba 1.139 !!!cp ('t93.2');
2172 wakaba 1.153 !!!parse-error (type => 'after head', text => 'head',
2173     token => $token);
2174 wakaba 1.139 ## Ignore the token
2175     !!!nack ('t93.3');
2176     !!!next-token;
2177     next B;
2178 wakaba 1.125 } else {
2179     !!!cp ('t95');
2180 wakaba 1.153 !!!parse-error (type => 'in head:head',
2181     token => $token); # or in head noscript
2182 wakaba 1.125 ## Ignore the token
2183     !!!nack ('t95.1');
2184     !!!next-token;
2185 wakaba 1.126 next B;
2186 wakaba 1.125 }
2187     } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2188 wakaba 1.126 !!!cp ('t96');
2189     ## As if <head>
2190     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2191     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2192     push @{$self->{open_elements}},
2193     [$self->{head_element}, $el_category->{head}];
2194 wakaba 1.52
2195 wakaba 1.126 $self->{insertion_mode} = IN_HEAD_IM;
2196     ## Reprocess in the "in head" insertion mode...
2197     } else {
2198     !!!cp ('t97');
2199     }
2200 wakaba 1.52
2201 wakaba 1.202 if ($token->{tag_name} eq 'base') {
2202     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2203     !!!cp ('t98');
2204     ## As if </noscript>
2205     pop @{$self->{open_elements}};
2206     !!!parse-error (type => 'in noscript', text => 'base',
2207     token => $token);
2208    
2209     $self->{insertion_mode} = IN_HEAD_IM;
2210     ## Reprocess in the "in head" insertion mode...
2211     } else {
2212     !!!cp ('t99');
2213     }
2214 wakaba 1.49
2215 wakaba 1.202 ## NOTE: There is a "as if in head" code clone.
2216     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2217     !!!cp ('t100');
2218     !!!parse-error (type => 'after head',
2219     text => $token->{tag_name}, token => $token);
2220     push @{$self->{open_elements}},
2221     [$self->{head_element}, $el_category->{head}];
2222     $self->{head_element_inserted} = 1;
2223     } else {
2224     !!!cp ('t101');
2225     }
2226     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2227     pop @{$self->{open_elements}};
2228     pop @{$self->{open_elements}} # <head>
2229     if $self->{insertion_mode} == AFTER_HEAD_IM;
2230     !!!nack ('t101.1');
2231     !!!next-token;
2232     next B;
2233 wakaba 1.194 } elsif ($token->{tag_name} eq 'link') {
2234     ## NOTE: There is a "as if in head" code clone.
2235     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2236     !!!cp ('t102');
2237     !!!parse-error (type => 'after head',
2238     text => $token->{tag_name}, token => $token);
2239     push @{$self->{open_elements}},
2240     [$self->{head_element}, $el_category->{head}];
2241 wakaba 1.202 $self->{head_element_inserted} = 1;
2242 wakaba 1.194 } else {
2243     !!!cp ('t103');
2244     }
2245     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2246     pop @{$self->{open_elements}};
2247     pop @{$self->{open_elements}} # <head>
2248     if $self->{insertion_mode} == AFTER_HEAD_IM;
2249     !!!ack ('t103.1');
2250     !!!next-token;
2251     next B;
2252 wakaba 1.232 } elsif ($token->{tag_name} eq 'command') {
2253 wakaba 1.194 if ($self->{insertion_mode} == IN_HEAD_IM) {
2254     ## NOTE: If the insertion mode at the time of the emission
2255     ## of the token was "before head", $self->{insertion_mode}
2256     ## is already changed to |IN_HEAD_IM|.
2257    
2258     ## NOTE: There is a "as if in head" code clone.
2259     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2260     pop @{$self->{open_elements}};
2261     pop @{$self->{open_elements}} # <head>
2262     if $self->{insertion_mode} == AFTER_HEAD_IM;
2263     !!!ack ('t103.2');
2264     !!!next-token;
2265     next B;
2266     } else {
2267     ## NOTE: "in head noscript" or "after head" insertion mode
2268     ## - in these cases, these tags are treated as same as
2269     ## normal in-body tags.
2270     !!!cp ('t103.3');
2271     #
2272     }
2273 wakaba 1.202 } elsif ($token->{tag_name} eq 'meta') {
2274     ## NOTE: There is a "as if in head" code clone.
2275     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2276     !!!cp ('t104');
2277     !!!parse-error (type => 'after head',
2278     text => $token->{tag_name}, token => $token);
2279     push @{$self->{open_elements}},
2280     [$self->{head_element}, $el_category->{head}];
2281     $self->{head_element_inserted} = 1;
2282     } else {
2283     !!!cp ('t105');
2284     }
2285     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2286     my $meta_el = pop @{$self->{open_elements}};
2287 wakaba 1.34
2288     unless ($self->{confident}) {
2289 wakaba 1.134 if ($token->{attributes}->{charset}) {
2290 wakaba 1.79 !!!cp ('t106');
2291 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
2292     ## in the {change_encoding} callback.
2293 wakaba 1.63 $self->{change_encoding}
2294 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value},
2295     $token);
2296 wakaba 1.66
2297     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2298     ->set_user_data (manakai_has_reference =>
2299     $token->{attributes}->{charset}
2300     ->{has_reference});
2301 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
2302     if ($token->{attributes}->{content}->{value}
2303 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2304 wakaba 1.186 [\x09\x0A\x0C\x0D\x20]*=
2305     [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
2306     ([^"'\x09\x0A\x0C\x0D\x20]
2307     [^\x09\x0A\x0C\x0D\x20\x3B]*))/x) {
2308 wakaba 1.79 !!!cp ('t107');
2309 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
2310     ## in the {change_encoding} callback.
2311 wakaba 1.63 $self->{change_encoding}
2312 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
2313     $token);
2314 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2315     ->set_user_data (manakai_has_reference =>
2316     $token->{attributes}->{content}
2317     ->{has_reference});
2318 wakaba 1.79 } else {
2319     !!!cp ('t108');
2320 wakaba 1.63 }
2321 wakaba 1.34 }
2322 wakaba 1.66 } else {
2323     if ($token->{attributes}->{charset}) {
2324 wakaba 1.79 !!!cp ('t109');
2325 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2326     ->set_user_data (manakai_has_reference =>
2327     $token->{attributes}->{charset}
2328     ->{has_reference});
2329     }
2330 wakaba 1.68 if ($token->{attributes}->{content}) {
2331 wakaba 1.79 !!!cp ('t110');
2332 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2333     ->set_user_data (manakai_has_reference =>
2334     $token->{attributes}->{content}
2335     ->{has_reference});
2336     }
2337 wakaba 1.34 }
2338    
2339 wakaba 1.100 pop @{$self->{open_elements}} # <head>
2340 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
2341 wakaba 1.125 !!!ack ('t110.1');
2342 wakaba 1.34 !!!next-token;
2343 wakaba 1.126 next B;
2344 wakaba 1.202 } elsif ($token->{tag_name} eq 'title') {
2345     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2346     !!!cp ('t111');
2347     ## As if </noscript>
2348     pop @{$self->{open_elements}};
2349     !!!parse-error (type => 'in noscript', text => 'title',
2350     token => $token);
2351    
2352     $self->{insertion_mode} = IN_HEAD_IM;
2353     ## Reprocess in the "in head" insertion mode...
2354     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2355     !!!cp ('t112');
2356     !!!parse-error (type => 'after head',
2357     text => $token->{tag_name}, token => $token);
2358     push @{$self->{open_elements}},
2359     [$self->{head_element}, $el_category->{head}];
2360     $self->{head_element_inserted} = 1;
2361     } else {
2362     !!!cp ('t113');
2363     }
2364 wakaba 1.49
2365 wakaba 1.202 ## NOTE: There is a "as if in head" code clone.
2366     $parse_rcdata->(RCDATA_CONTENT_MODEL);
2367 wakaba 1.225
2368     ## NOTE: At this point the stack of open elements contain
2369     ## the |head| element (index == -2) and the |script| element
2370     ## (index == -1). In the "after head" insertion mode the
2371     ## |head| element is inserted only for the purpose of
2372     ## providing the context for the |script| element, and
2373     ## therefore we can now and have to remove the element from
2374     ## the stack.
2375 wakaba 1.205 splice @{$self->{open_elements}}, -2, 1, () # <head>
2376 wakaba 1.210 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2377 wakaba 1.202 next B;
2378     } elsif ($token->{tag_name} eq 'style' or
2379     $token->{tag_name} eq 'noframes') {
2380     ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
2381     ## insertion mode IN_HEAD_IM)
2382     ## NOTE: There is a "as if in head" code clone.
2383     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2384     !!!cp ('t114');
2385     !!!parse-error (type => 'after head',
2386     text => $token->{tag_name}, token => $token);
2387     push @{$self->{open_elements}},
2388     [$self->{head_element}, $el_category->{head}];
2389     $self->{head_element_inserted} = 1;
2390     } else {
2391     !!!cp ('t115');
2392     }
2393     $parse_rcdata->(CDATA_CONTENT_MODEL);
2394 wakaba 1.205 ## ISSUE: A spec bug [Bug 6038]
2395     splice @{$self->{open_elements}}, -2, 1, () # <head>
2396 wakaba 1.210 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2397 wakaba 1.202 next B;
2398 wakaba 1.205 } elsif ($token->{tag_name} eq 'noscript') {
2399 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
2400 wakaba 1.79 !!!cp ('t116');
2401 wakaba 1.25 ## NOTE: and scripting is disalbed
2402 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2403 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
2404 wakaba 1.125 !!!nack ('t116.1');
2405 wakaba 1.1 !!!next-token;
2406 wakaba 1.126 next B;
2407 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2408 wakaba 1.79 !!!cp ('t117');
2409 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'noscript',
2410     token => $token);
2411 wakaba 1.1 ## Ignore the token
2412 wakaba 1.125 !!!nack ('t117.1');
2413 wakaba 1.41 !!!next-token;
2414 wakaba 1.126 next B;
2415 wakaba 1.1 } else {
2416 wakaba 1.79 !!!cp ('t118');
2417 wakaba 1.25 #
2418 wakaba 1.1 }
2419 wakaba 1.202 } elsif ($token->{tag_name} eq 'script') {
2420     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2421     !!!cp ('t119');
2422     ## As if </noscript>
2423     pop @{$self->{open_elements}};
2424     !!!parse-error (type => 'in noscript', text => 'script',
2425     token => $token);
2426    
2427     $self->{insertion_mode} = IN_HEAD_IM;
2428     ## Reprocess in the "in head" insertion mode...
2429     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2430     !!!cp ('t120');
2431     !!!parse-error (type => 'after head',
2432     text => $token->{tag_name}, token => $token);
2433     push @{$self->{open_elements}},
2434     [$self->{head_element}, $el_category->{head}];
2435     $self->{head_element_inserted} = 1;
2436     } else {
2437     !!!cp ('t121');
2438     }
2439 wakaba 1.49
2440 wakaba 1.202 ## NOTE: There is a "as if in head" code clone.
2441     $script_start_tag->();
2442 wakaba 1.205 ## ISSUE: A spec bug [Bug 6038]
2443     splice @{$self->{open_elements}}, -2, 1 # <head>
2444 wakaba 1.210 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2445 wakaba 1.202 next B;
2446     } elsif ($token->{tag_name} eq 'body' or
2447     $token->{tag_name} eq 'frameset') {
2448 wakaba 1.243 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2449     !!!cp ('t122');
2450     ## As if </noscript>
2451     pop @{$self->{open_elements}};
2452     !!!parse-error (type => 'in noscript',
2453     text => $token->{tag_name}, token => $token);
2454    
2455     ## Reprocess in the "in head" insertion mode...
2456     ## As if </head>
2457     pop @{$self->{open_elements}};
2458    
2459     ## Reprocess in the "after head" insertion mode...
2460     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2461     !!!cp ('t124');
2462     pop @{$self->{open_elements}};
2463    
2464     ## Reprocess in the "after head" insertion mode...
2465     } else {
2466     !!!cp ('t125');
2467     }
2468 wakaba 1.49
2469 wakaba 1.243 ## "after head" insertion mode
2470     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2471     if ($token->{tag_name} eq 'body') {
2472     !!!cp ('t126');
2473     delete $self->{frameset_ok};
2474     $self->{insertion_mode} = IN_BODY_IM;
2475     } elsif ($token->{tag_name} eq 'frameset') {
2476     !!!cp ('t127');
2477     $self->{insertion_mode} = IN_FRAMESET_IM;
2478     } else {
2479     die "$0: tag name: $self->{tag_name}";
2480     }
2481     !!!nack ('t127.1');
2482     !!!next-token;
2483     next B;
2484     } else {
2485     !!!cp ('t128');
2486     #
2487     }
2488 wakaba 1.49
2489 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2490 wakaba 1.79 !!!cp ('t129');
2491 wakaba 1.49 ## As if </noscript>
2492     pop @{$self->{open_elements}};
2493 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
2494     text => $token->{tag_name}, token => $token);
2495 wakaba 1.49
2496     ## Reprocess in the "in head" insertion mode...
2497     ## As if </head>
2498 wakaba 1.25 pop @{$self->{open_elements}};
2499 wakaba 1.49
2500     ## Reprocess in the "after head" insertion mode...
2501 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2502 wakaba 1.79 !!!cp ('t130');
2503 wakaba 1.49 ## As if </head>
2504 wakaba 1.25 pop @{$self->{open_elements}};
2505 wakaba 1.49
2506     ## Reprocess in the "after head" insertion mode...
2507 wakaba 1.79 } else {
2508     !!!cp ('t131');
2509 wakaba 1.49 }
2510    
2511 wakaba 1.243 ## "after head" insertion mode
2512     ## As if <body>
2513     !!!insert-element ('body',, $token);
2514     $self->{insertion_mode} = IN_BODY_IM;
2515     ## The "frameset-ok" flag is not changed in this case.
2516     ## Reprocess the token.
2517     !!!ack-later;
2518     next B;
2519 wakaba 1.238 } elsif ($token->{type} == END_TAG_TOKEN) {
2520     ## "Before head", "in head", and "after head" insertion modes
2521     ## ignore most of end tags. Exceptions are "body", "html",
2522     ## and "br" end tags. "Before head" and "in head" insertion
2523     ## modes also recognize "head" end tag. "In head noscript"
2524     ## insertion modes ignore end tags except for "noscript" and
2525     ## "br".
2526    
2527     if ($token->{tag_name} eq 'head') {
2528     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2529     !!!cp ('t132');
2530     ## As if <head>
2531     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2532     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2533     push @{$self->{open_elements}},
2534     [$self->{head_element}, $el_category->{head}];
2535 wakaba 1.50
2536 wakaba 1.238 ## Reprocess in the "in head" insertion mode...
2537     pop @{$self->{open_elements}};
2538     $self->{insertion_mode} = AFTER_HEAD_IM;
2539     !!!next-token;
2540     next B;
2541     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2542     !!!cp ('t133');
2543     #
2544     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2545     !!!cp ('t134');
2546     pop @{$self->{open_elements}};
2547     $self->{insertion_mode} = AFTER_HEAD_IM;
2548     !!!next-token;
2549     next B;
2550     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2551     !!!cp ('t134.1');
2552     #
2553     } else {
2554     die "$0: $self->{insertion_mode}: Unknown insertion mode";
2555     }
2556     } elsif ($token->{tag_name} eq 'noscript') {
2557     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2558     !!!cp ('t136');
2559     pop @{$self->{open_elements}};
2560     $self->{insertion_mode} = IN_HEAD_IM;
2561     !!!next-token;
2562     next B;
2563     } else {
2564     !!!cp ('t138');
2565     #
2566     }
2567     } elsif ({
2568     body => ($self->{insertion_mode} != IN_HEAD_NOSCRIPT_IM),
2569     html => ($self->{insertion_mode} != IN_HEAD_NOSCRIPT_IM),
2570     br => 1,
2571     }->{$token->{tag_name}}) {
2572 wakaba 1.224 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2573     !!!cp ('t142.2');
2574     ## (before head) as if <head>, (in head) as if </head>
2575     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2576     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2577     $self->{insertion_mode} = AFTER_HEAD_IM;
2578 wakaba 1.139
2579 wakaba 1.224 ## Reprocess in the "after head" insertion mode...
2580     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2581     !!!cp ('t143.2');
2582     ## As if </head>
2583     pop @{$self->{open_elements}};
2584     $self->{insertion_mode} = AFTER_HEAD_IM;
2585 wakaba 1.139
2586 wakaba 1.224 ## Reprocess in the "after head" insertion mode...
2587     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2588     !!!cp ('t143.3');
2589     ## NOTE: Two parse errors for <head><noscript></br>
2590     !!!parse-error (type => 'unmatched end tag',
2591 wakaba 1.238 text => $token->{tag_name}, token => $token);
2592 wakaba 1.224 ## As if </noscript>
2593     pop @{$self->{open_elements}};
2594     $self->{insertion_mode} = IN_HEAD_IM;
2595 wakaba 1.50
2596 wakaba 1.224 ## Reprocess in the "in head" insertion mode...
2597     ## As if </head>
2598     pop @{$self->{open_elements}};
2599     $self->{insertion_mode} = AFTER_HEAD_IM;
2600 wakaba 1.139
2601 wakaba 1.224 ## Reprocess in the "after head" insertion mode...
2602     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2603     !!!cp ('t143.4');
2604     #
2605     } else {
2606     die "$0: $self->{insertion_mode}: Unknown insertion mode";
2607     }
2608 wakaba 1.50
2609 wakaba 1.238 ## "after head" insertion mode
2610     ## As if <body>
2611     !!!insert-element ('body',, $token);
2612     $self->{insertion_mode} = IN_BODY_IM;
2613 wakaba 1.243 ## The "frameset-ok" flag is left unchanged in this case.
2614     ## Reprocess the token.
2615 wakaba 1.238 next B;
2616     }
2617 wakaba 1.49
2618 wakaba 1.238 ## End tags are ignored by default.
2619     !!!cp ('t145');
2620     !!!parse-error (type => 'unmatched end tag',
2621     text => $token->{tag_name}, token => $token);
2622     ## Ignore the token.
2623     !!!next-token;
2624 wakaba 1.224 next B;
2625 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2626     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2627     !!!cp ('t149.1');
2628    
2629     ## NOTE: As if <head>
2630 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2631 wakaba 1.104 $self->{open_elements}->[-1]->[0]->append_child
2632     ($self->{head_element});
2633 wakaba 1.123 #push @{$self->{open_elements}},
2634     # [$self->{head_element}, $el_category->{head}];
2635 wakaba 1.104 #$self->{insertion_mode} = IN_HEAD_IM;
2636     ## NOTE: Reprocess.
2637    
2638     ## NOTE: As if </head>
2639     #pop @{$self->{open_elements}};
2640     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2641     ## NOTE: Reprocess.
2642    
2643     #
2644     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2645     !!!cp ('t149.2');
2646    
2647     ## NOTE: As if </head>
2648     pop @{$self->{open_elements}};
2649     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2650     ## NOTE: Reprocess.
2651    
2652     #
2653     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2654     !!!cp ('t149.3');
2655    
2656 wakaba 1.113 !!!parse-error (type => 'in noscript:#eof', token => $token);
2657 wakaba 1.104
2658     ## As if </noscript>
2659     pop @{$self->{open_elements}};
2660     #$self->{insertion_mode} = IN_HEAD_IM;
2661     ## NOTE: Reprocess.
2662    
2663     ## NOTE: As if </head>
2664     pop @{$self->{open_elements}};
2665     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2666     ## NOTE: Reprocess.
2667    
2668     #
2669     } else {
2670     !!!cp ('t149.4');
2671     #
2672     }
2673    
2674     ## NOTE: As if <body>
2675 wakaba 1.116 !!!insert-element ('body',, $token);
2676 wakaba 1.104 $self->{insertion_mode} = IN_BODY_IM;
2677 wakaba 1.243 ## The "frameset-ok" flag is left unchanged in this case.
2678     ## Reprocess the token.
2679 wakaba 1.126 next B;
2680 wakaba 1.104 } else {
2681     die "$0: $token->{type}: Unknown token type";
2682     }
2683 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
2684 wakaba 1.243 if ($token->{type} == CHARACTER_TOKEN) {
2685     !!!cp ('t150');
2686     $reconstruct_active_formatting_elements->($insert_to_current);
2687    
2688     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
2689    
2690     if ($token->{data} =~ /[^\x09\x0A\x0C\x0D\x20]/) {
2691     delete $self->{frameset_ok};
2692     }
2693 wakaba 1.52
2694 wakaba 1.243 !!!next-token;
2695     next B;
2696     } elsif ($token->{type} == START_TAG_TOKEN) {
2697 wakaba 1.52 if ({
2698     caption => 1, col => 1, colgroup => 1, tbody => 1,
2699     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
2700     }->{$token->{tag_name}}) {
2701 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2702 wakaba 1.52 ## have an element in table scope
2703 wakaba 1.108 for (reverse 0..$#{$self->{open_elements}}) {
2704 wakaba 1.52 my $node = $self->{open_elements}->[$_];
2705 wakaba 1.206 if ($node->[1] == TABLE_CELL_EL) {
2706 wakaba 1.79 !!!cp ('t151');
2707 wakaba 1.108
2708     ## Close the cell
2709 wakaba 1.125 !!!back-token; # <x>
2710 wakaba 1.122 $token = {type => END_TAG_TOKEN,
2711     tag_name => $node->[0]->manakai_local_name,
2712 wakaba 1.114 line => $token->{line},
2713     column => $token->{column}};
2714 wakaba 1.126 next B;
2715 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2716 wakaba 1.79 !!!cp ('t152');
2717 wakaba 1.108 ## ISSUE: This case can never be reached, maybe.
2718     last;
2719 wakaba 1.52 }
2720 wakaba 1.108 }
2721    
2722     !!!cp ('t153');
2723     !!!parse-error (type => 'start tag not allowed',
2724 wakaba 1.153 text => $token->{tag_name}, token => $token);
2725 wakaba 1.108 ## Ignore the token
2726 wakaba 1.125 !!!nack ('t153.1');
2727 wakaba 1.108 !!!next-token;
2728 wakaba 1.126 next B;
2729 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2730 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
2731     token => $token);
2732 wakaba 1.52
2733 wakaba 1.108 ## NOTE: As if </caption>.
2734 wakaba 1.52 ## have a table element in table scope
2735     my $i;
2736 wakaba 1.108 INSCOPE: {
2737     for (reverse 0..$#{$self->{open_elements}}) {
2738     my $node = $self->{open_elements}->[$_];
2739 wakaba 1.206 if ($node->[1] == CAPTION_EL) {
2740 wakaba 1.108 !!!cp ('t155');
2741     $i = $_;
2742     last INSCOPE;
2743 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2744 wakaba 1.108 !!!cp ('t156');
2745     last;
2746     }
2747 wakaba 1.52 }
2748 wakaba 1.108
2749     !!!cp ('t157');
2750     !!!parse-error (type => 'start tag not allowed',
2751 wakaba 1.153 text => $token->{tag_name}, token => $token);
2752 wakaba 1.108 ## Ignore the token
2753 wakaba 1.125 !!!nack ('t157.1');
2754 wakaba 1.108 !!!next-token;
2755 wakaba 1.126 next B;
2756 wakaba 1.52 } # INSCOPE
2757    
2758     ## generate implied end tags
2759 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
2760     & END_TAG_OPTIONAL_EL) {
2761 wakaba 1.79 !!!cp ('t158');
2762 wakaba 1.86 pop @{$self->{open_elements}};
2763 wakaba 1.52 }
2764    
2765 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2766 wakaba 1.79 !!!cp ('t159');
2767 wakaba 1.122 !!!parse-error (type => 'not closed',
2768 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2769 wakaba 1.122 ->manakai_local_name,
2770     token => $token);
2771 wakaba 1.79 } else {
2772     !!!cp ('t160');
2773 wakaba 1.52 }
2774    
2775     splice @{$self->{open_elements}}, $i;
2776    
2777     $clear_up_to_marker->();
2778    
2779 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
2780 wakaba 1.52
2781     ## reprocess
2782 wakaba 1.125 !!!ack-later;
2783 wakaba 1.126 next B;
2784 wakaba 1.52 } else {
2785 wakaba 1.79 !!!cp ('t161');
2786 wakaba 1.52 #
2787     }
2788     } else {
2789 wakaba 1.79 !!!cp ('t162');
2790 wakaba 1.52 #
2791     }
2792 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
2793 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
2794 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2795 wakaba 1.43 ## have an element in table scope
2796 wakaba 1.52 my $i;
2797 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2798     my $node = $self->{open_elements}->[$_];
2799 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2800 wakaba 1.79 !!!cp ('t163');
2801 wakaba 1.52 $i = $_;
2802 wakaba 1.43 last INSCOPE;
2803 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2804 wakaba 1.79 !!!cp ('t164');
2805 wakaba 1.43 last INSCOPE;
2806     }
2807     } # INSCOPE
2808 wakaba 1.52 unless (defined $i) {
2809 wakaba 1.79 !!!cp ('t165');
2810 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2811     text => $token->{tag_name},
2812     token => $token);
2813 wakaba 1.43 ## Ignore the token
2814     !!!next-token;
2815 wakaba 1.126 next B;
2816 wakaba 1.43 }
2817    
2818 wakaba 1.52 ## generate implied end tags
2819 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
2820     & END_TAG_OPTIONAL_EL) {
2821 wakaba 1.79 !!!cp ('t166');
2822 wakaba 1.86 pop @{$self->{open_elements}};
2823 wakaba 1.52 }
2824 wakaba 1.86
2825 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
2826     ne $token->{tag_name}) {
2827 wakaba 1.79 !!!cp ('t167');
2828 wakaba 1.122 !!!parse-error (type => 'not closed',
2829 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2830 wakaba 1.122 ->manakai_local_name,
2831     token => $token);
2832 wakaba 1.79 } else {
2833     !!!cp ('t168');
2834 wakaba 1.52 }
2835    
2836     splice @{$self->{open_elements}}, $i;
2837    
2838     $clear_up_to_marker->();
2839    
2840 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
2841 wakaba 1.52
2842     !!!next-token;
2843 wakaba 1.126 next B;
2844 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2845 wakaba 1.79 !!!cp ('t169');
2846 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2847     text => $token->{tag_name}, token => $token);
2848 wakaba 1.52 ## Ignore the token
2849     !!!next-token;
2850 wakaba 1.126 next B;
2851 wakaba 1.52 } else {
2852 wakaba 1.79 !!!cp ('t170');
2853 wakaba 1.52 #
2854     }
2855     } elsif ($token->{tag_name} eq 'caption') {
2856 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2857 wakaba 1.43 ## have a table element in table scope
2858     my $i;
2859 wakaba 1.108 INSCOPE: {
2860     for (reverse 0..$#{$self->{open_elements}}) {
2861     my $node = $self->{open_elements}->[$_];
2862 wakaba 1.206 if ($node->[1] == CAPTION_EL) {
2863 wakaba 1.108 !!!cp ('t171');
2864     $i = $_;
2865     last INSCOPE;
2866 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2867 wakaba 1.108 !!!cp ('t172');
2868     last;
2869     }
2870 wakaba 1.43 }
2871 wakaba 1.108
2872     !!!cp ('t173');
2873     !!!parse-error (type => 'unmatched end tag',
2874 wakaba 1.153 text => $token->{tag_name}, token => $token);
2875 wakaba 1.108 ## Ignore the token
2876     !!!next-token;
2877 wakaba 1.126 next B;
2878 wakaba 1.43 } # INSCOPE
2879    
2880     ## generate implied end tags
2881 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
2882     & END_TAG_OPTIONAL_EL) {
2883 wakaba 1.79 !!!cp ('t174');
2884 wakaba 1.86 pop @{$self->{open_elements}};
2885 wakaba 1.43 }
2886 wakaba 1.52
2887 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2888 wakaba 1.79 !!!cp ('t175');
2889 wakaba 1.122 !!!parse-error (type => 'not closed',
2890 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2891 wakaba 1.122 ->manakai_local_name,
2892     token => $token);
2893 wakaba 1.79 } else {
2894     !!!cp ('t176');
2895 wakaba 1.52 }
2896    
2897     splice @{$self->{open_elements}}, $i;
2898    
2899     $clear_up_to_marker->();
2900    
2901 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
2902 wakaba 1.52
2903     !!!next-token;
2904 wakaba 1.126 next B;
2905 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2906 wakaba 1.79 !!!cp ('t177');
2907 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2908     text => $token->{tag_name}, token => $token);
2909 wakaba 1.52 ## Ignore the token
2910     !!!next-token;
2911 wakaba 1.126 next B;
2912 wakaba 1.52 } else {
2913 wakaba 1.79 !!!cp ('t178');
2914 wakaba 1.52 #
2915     }
2916     } elsif ({
2917     table => 1, tbody => 1, tfoot => 1,
2918     thead => 1, tr => 1,
2919     }->{$token->{tag_name}} and
2920 wakaba 1.210 ($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2921 wakaba 1.52 ## have an element in table scope
2922     my $i;
2923     my $tn;
2924 wakaba 1.108 INSCOPE: {
2925     for (reverse 0..$#{$self->{open_elements}}) {
2926     my $node = $self->{open_elements}->[$_];
2927 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2928 wakaba 1.108 !!!cp ('t179');
2929     $i = $_;
2930    
2931     ## Close the cell
2932 wakaba 1.125 !!!back-token; # </x>
2933 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $tn,
2934     line => $token->{line},
2935     column => $token->{column}};
2936 wakaba 1.126 next B;
2937 wakaba 1.206 } elsif ($node->[1] == TABLE_CELL_EL) {
2938 wakaba 1.108 !!!cp ('t180');
2939 wakaba 1.123 $tn = $node->[0]->manakai_local_name;
2940 wakaba 1.108 ## NOTE: There is exactly one |td| or |th| element
2941     ## in scope in the stack of open elements by definition.
2942 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2943 wakaba 1.108 ## ISSUE: Can this be reached?
2944     !!!cp ('t181');
2945     last;
2946     }
2947 wakaba 1.52 }
2948 wakaba 1.108
2949 wakaba 1.79 !!!cp ('t182');
2950 wakaba 1.108 !!!parse-error (type => 'unmatched end tag',
2951 wakaba 1.153 text => $token->{tag_name}, token => $token);
2952 wakaba 1.52 ## Ignore the token
2953     !!!next-token;
2954 wakaba 1.126 next B;
2955 wakaba 1.108 } # INSCOPE
2956 wakaba 1.52 } elsif ($token->{tag_name} eq 'table' and
2957 wakaba 1.210 ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2958 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
2959     token => $token);
2960 wakaba 1.52
2961     ## As if </caption>
2962     ## have a table element in table scope
2963     my $i;
2964     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2965     my $node = $self->{open_elements}->[$_];
2966 wakaba 1.206 if ($node->[1] == CAPTION_EL) {
2967 wakaba 1.79 !!!cp ('t184');
2968 wakaba 1.52 $i = $_;
2969     last INSCOPE;
2970 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2971 wakaba 1.79 !!!cp ('t185');
2972 wakaba 1.52 last INSCOPE;
2973     }
2974     } # INSCOPE
2975     unless (defined $i) {
2976 wakaba 1.79 !!!cp ('t186');
2977 wakaba 1.209 ## TODO: Wrong error type?
2978 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2979     text => 'caption', token => $token);
2980 wakaba 1.52 ## Ignore the token
2981     !!!next-token;
2982 wakaba 1.126 next B;
2983 wakaba 1.52 }
2984    
2985     ## generate implied end tags
2986 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
2987 wakaba 1.79 !!!cp ('t187');
2988 wakaba 1.86 pop @{$self->{open_elements}};
2989 wakaba 1.52 }
2990    
2991 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2992 wakaba 1.79 !!!cp ('t188');
2993 wakaba 1.122 !!!parse-error (type => 'not closed',
2994 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2995 wakaba 1.122 ->manakai_local_name,
2996     token => $token);
2997 wakaba 1.79 } else {
2998     !!!cp ('t189');
2999 wakaba 1.52 }
3000    
3001     splice @{$self->{open_elements}}, $i;
3002    
3003     $clear_up_to_marker->();
3004    
3005 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3006 wakaba 1.52
3007     ## reprocess
3008 wakaba 1.126 next B;
3009 wakaba 1.52 } elsif ({
3010     body => 1, col => 1, colgroup => 1, html => 1,
3011     }->{$token->{tag_name}}) {
3012 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
3013 wakaba 1.79 !!!cp ('t190');
3014 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3015     text => $token->{tag_name}, token => $token);
3016 wakaba 1.52 ## Ignore the token
3017     !!!next-token;
3018 wakaba 1.126 next B;
3019 wakaba 1.52 } else {
3020 wakaba 1.79 !!!cp ('t191');
3021 wakaba 1.52 #
3022     }
3023 wakaba 1.210 } elsif ({
3024     tbody => 1, tfoot => 1,
3025     thead => 1, tr => 1,
3026     }->{$token->{tag_name}} and
3027     ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
3028     !!!cp ('t192');
3029     !!!parse-error (type => 'unmatched end tag',
3030     text => $token->{tag_name}, token => $token);
3031     ## Ignore the token
3032     !!!next-token;
3033     next B;
3034     } else {
3035     !!!cp ('t193');
3036     #
3037     }
3038 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3039     for my $entry (@{$self->{open_elements}}) {
3040 wakaba 1.123 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
3041 wakaba 1.104 !!!cp ('t75');
3042 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
3043 wakaba 1.104 last;
3044     }
3045     }
3046    
3047     ## Stop parsing.
3048     last B;
3049 wakaba 1.52 } else {
3050     die "$0: $token->{type}: Unknown token type";
3051     }
3052    
3053     $insert = $insert_to_current;
3054     #
3055 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
3056 wakaba 1.229 if ($token->{type} == START_TAG_TOKEN) {
3057 wakaba 1.153 if ({
3058 wakaba 1.210 tr => (($self->{insertion_mode} & IM_MASK) != IN_ROW_IM),
3059 wakaba 1.153 th => 1, td => 1,
3060     }->{$token->{tag_name}}) {
3061 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_IM) {
3062 wakaba 1.153 ## Clear back to table context
3063     while (not ($self->{open_elements}->[-1]->[1]
3064     & TABLE_SCOPING_EL)) {
3065     !!!cp ('t201');
3066     pop @{$self->{open_elements}};
3067     }
3068    
3069     !!!insert-element ('tbody',, $token);
3070     $self->{insertion_mode} = IN_TABLE_BODY_IM;
3071     ## reprocess in the "in table body" insertion mode...
3072     }
3073    
3074 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3075 wakaba 1.153 unless ($token->{tag_name} eq 'tr') {
3076     !!!cp ('t202');
3077     !!!parse-error (type => 'missing start tag:tr', token => $token);
3078     }
3079 wakaba 1.43
3080 wakaba 1.153 ## Clear back to table body context
3081     while (not ($self->{open_elements}->[-1]->[1]
3082     & TABLE_ROWS_SCOPING_EL)) {
3083     !!!cp ('t203');
3084     ## ISSUE: Can this case be reached?
3085     pop @{$self->{open_elements}};
3086     }
3087 wakaba 1.43
3088 wakaba 1.202 $self->{insertion_mode} = IN_ROW_IM;
3089     if ($token->{tag_name} eq 'tr') {
3090     !!!cp ('t204');
3091     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3092     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3093     !!!nack ('t204');
3094     !!!next-token;
3095     next B;
3096     } else {
3097     !!!cp ('t205');
3098     !!!insert-element ('tr',, $token);
3099     ## reprocess in the "in row" insertion mode
3100     }
3101     } else {
3102     !!!cp ('t206');
3103     }
3104 wakaba 1.52
3105     ## Clear back to table row context
3106 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3107     & TABLE_ROW_SCOPING_EL)) {
3108 wakaba 1.79 !!!cp ('t207');
3109 wakaba 1.52 pop @{$self->{open_elements}};
3110 wakaba 1.43 }
3111 wakaba 1.52
3112 wakaba 1.202 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3113     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3114     $self->{insertion_mode} = IN_CELL_IM;
3115 wakaba 1.52
3116 wakaba 1.202 push @$active_formatting_elements, ['#marker', ''];
3117 wakaba 1.52
3118 wakaba 1.202 !!!nack ('t207.1');
3119     !!!next-token;
3120     next B;
3121     } elsif ({
3122     caption => 1, col => 1, colgroup => 1,
3123     tbody => 1, tfoot => 1, thead => 1,
3124     tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3125     }->{$token->{tag_name}}) {
3126 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3127 wakaba 1.202 ## As if </tr>
3128     ## have an element in table scope
3129     my $i;
3130     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3131     my $node = $self->{open_elements}->[$_];
3132 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3133 wakaba 1.202 !!!cp ('t208');
3134     $i = $_;
3135     last INSCOPE;
3136     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3137     !!!cp ('t209');
3138     last INSCOPE;
3139     }
3140     } # INSCOPE
3141     unless (defined $i) {
3142     !!!cp ('t210');
3143     ## TODO: This type is wrong.
3144     !!!parse-error (type => 'unmacthed end tag',
3145     text => $token->{tag_name}, token => $token);
3146     ## Ignore the token
3147     !!!nack ('t210.1');
3148 wakaba 1.52 !!!next-token;
3149 wakaba 1.126 next B;
3150 wakaba 1.202 }
3151 wakaba 1.43
3152 wakaba 1.52 ## Clear back to table row context
3153 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3154     & TABLE_ROW_SCOPING_EL)) {
3155 wakaba 1.79 !!!cp ('t211');
3156 wakaba 1.83 ## ISSUE: Can this case be reached?
3157 wakaba 1.52 pop @{$self->{open_elements}};
3158 wakaba 1.1 }
3159 wakaba 1.43
3160 wakaba 1.52 pop @{$self->{open_elements}}; # tr
3161 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3162 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
3163 wakaba 1.79 !!!cp ('t212');
3164 wakaba 1.52 ## reprocess
3165 wakaba 1.125 !!!ack-later;
3166 wakaba 1.126 next B;
3167 wakaba 1.52 } else {
3168 wakaba 1.79 !!!cp ('t213');
3169 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
3170     }
3171 wakaba 1.1 }
3172 wakaba 1.52
3173 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3174 wakaba 1.52 ## have an element in table scope
3175 wakaba 1.43 my $i;
3176     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3177     my $node = $self->{open_elements}->[$_];
3178 wakaba 1.206 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3179 wakaba 1.79 !!!cp ('t214');
3180 wakaba 1.43 $i = $_;
3181     last INSCOPE;
3182 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3183 wakaba 1.79 !!!cp ('t215');
3184 wakaba 1.43 last INSCOPE;
3185     }
3186     } # INSCOPE
3187 wakaba 1.52 unless (defined $i) {
3188 wakaba 1.79 !!!cp ('t216');
3189 wakaba 1.153 ## TODO: This erorr type is wrong.
3190     !!!parse-error (type => 'unmatched end tag',
3191     text => $token->{tag_name}, token => $token);
3192 wakaba 1.52 ## Ignore the token
3193 wakaba 1.125 !!!nack ('t216.1');
3194 wakaba 1.52 !!!next-token;
3195 wakaba 1.126 next B;
3196 wakaba 1.43 }
3197 wakaba 1.52
3198     ## Clear back to table body context
3199 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3200     & TABLE_ROWS_SCOPING_EL)) {
3201 wakaba 1.79 !!!cp ('t217');
3202 wakaba 1.83 ## ISSUE: Can this state be reached?
3203 wakaba 1.52 pop @{$self->{open_elements}};
3204 wakaba 1.43 }
3205    
3206 wakaba 1.52 ## As if <{current node}>
3207     ## have an element in table scope
3208     ## true by definition
3209 wakaba 1.43
3210 wakaba 1.52 ## Clear back to table body context
3211     ## nop by definition
3212 wakaba 1.43
3213 wakaba 1.52 pop @{$self->{open_elements}};
3214 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3215 wakaba 1.52 ## reprocess in "in table" insertion mode...
3216 wakaba 1.79 } else {
3217     !!!cp ('t218');
3218 wakaba 1.52 }
3219    
3220 wakaba 1.202 if ($token->{tag_name} eq 'col') {
3221     ## Clear back to table context
3222     while (not ($self->{open_elements}->[-1]->[1]
3223     & TABLE_SCOPING_EL)) {
3224     !!!cp ('t219');
3225     ## ISSUE: Can this state be reached?
3226     pop @{$self->{open_elements}};
3227     }
3228    
3229     !!!insert-element ('colgroup',, $token);
3230     $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
3231     ## reprocess
3232     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3233     !!!ack-later;
3234     next B;
3235     } elsif ({
3236     caption => 1,
3237     colgroup => 1,
3238     tbody => 1, tfoot => 1, thead => 1,
3239     }->{$token->{tag_name}}) {
3240     ## Clear back to table context
3241 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3242     & TABLE_SCOPING_EL)) {
3243 wakaba 1.79 !!!cp ('t220');
3244 wakaba 1.83 ## ISSUE: Can this state be reached?
3245 wakaba 1.52 pop @{$self->{open_elements}};
3246 wakaba 1.1 }
3247 wakaba 1.52
3248 wakaba 1.202 push @$active_formatting_elements, ['#marker', '']
3249     if $token->{tag_name} eq 'caption';
3250 wakaba 1.52
3251 wakaba 1.202 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3252     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3253     $self->{insertion_mode} = {
3254     caption => IN_CAPTION_IM,
3255     colgroup => IN_COLUMN_GROUP_IM,
3256     tbody => IN_TABLE_BODY_IM,
3257     tfoot => IN_TABLE_BODY_IM,
3258     thead => IN_TABLE_BODY_IM,
3259     }->{$token->{tag_name}};
3260     !!!next-token;
3261     !!!nack ('t220.1');
3262     next B;
3263     } else {
3264     die "$0: in table: <>: $token->{tag_name}";
3265     }
3266 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
3267 wakaba 1.122 !!!parse-error (type => 'not closed',
3268 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
3269 wakaba 1.122 ->manakai_local_name,
3270     token => $token);
3271 wakaba 1.1
3272 wakaba 1.52 ## As if </table>
3273 wakaba 1.1 ## have a table element in table scope
3274     my $i;
3275 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3276     my $node = $self->{open_elements}->[$_];
3277 wakaba 1.206 if ($node->[1] == TABLE_EL) {
3278 wakaba 1.79 !!!cp ('t221');
3279 wakaba 1.1 $i = $_;
3280     last INSCOPE;
3281 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3282 wakaba 1.79 !!!cp ('t222');
3283 wakaba 1.1 last INSCOPE;
3284     }
3285     } # INSCOPE
3286     unless (defined $i) {
3287 wakaba 1.79 !!!cp ('t223');
3288 wakaba 1.83 ## TODO: The following is wrong, maybe.
3289 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => 'table',
3290     token => $token);
3291 wakaba 1.52 ## Ignore tokens </table><table>
3292 wakaba 1.125 !!!nack ('t223.1');
3293 wakaba 1.1 !!!next-token;
3294 wakaba 1.126 next B;
3295 wakaba 1.1 }
3296    
3297 wakaba 1.151 ## TODO: Followings are removed from the latest spec.
3298 wakaba 1.1 ## generate implied end tags
3299 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
3300 wakaba 1.79 !!!cp ('t224');
3301 wakaba 1.86 pop @{$self->{open_elements}};
3302 wakaba 1.1 }
3303    
3304 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == TABLE_EL) {
3305 wakaba 1.79 !!!cp ('t225');
3306 wakaba 1.122 ## NOTE: |<table><tr><table>|
3307     !!!parse-error (type => 'not closed',
3308 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
3309 wakaba 1.122 ->manakai_local_name,
3310     token => $token);
3311 wakaba 1.79 } else {
3312     !!!cp ('t226');
3313 wakaba 1.1 }
3314    
3315 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3316 wakaba 1.95 pop @{$open_tables};
3317 wakaba 1.1
3318 wakaba 1.52 $self->_reset_insertion_mode;
3319 wakaba 1.1
3320 wakaba 1.125 ## reprocess
3321     !!!ack-later;
3322 wakaba 1.126 next B;
3323 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
3324 wakaba 1.233 !!!cp ('t227.8');
3325     ## NOTE: This is a "as if in head" code clone.
3326     $parse_rcdata->(CDATA_CONTENT_MODEL);
3327     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3328     next B;
3329 wakaba 1.100 } elsif ($token->{tag_name} eq 'script') {
3330 wakaba 1.233 !!!cp ('t227.6');
3331     ## NOTE: This is a "as if in head" code clone.
3332     $script_start_tag->();
3333     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3334     next B;
3335 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
3336 wakaba 1.233 if ($token->{attributes}->{type}) {
3337     my $type = $token->{attributes}->{type}->{value};
3338     $type =~ tr/A-Z/a-z/; ## ASCII case-insensitive.
3339     if ($type eq 'hidden') {
3340     !!!cp ('t227.3');
3341     !!!parse-error (type => 'in table',
3342     text => $token->{tag_name}, token => $token);
3343 wakaba 1.98
3344 wakaba 1.233 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3345     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3346 wakaba 1.98
3347 wakaba 1.233 ## TODO: form element pointer
3348 wakaba 1.98
3349 wakaba 1.233 pop @{$self->{open_elements}};
3350 wakaba 1.98
3351 wakaba 1.233 !!!next-token;
3352     !!!ack ('t227.2.1');
3353     next B;
3354 wakaba 1.98 } else {
3355     !!!cp ('t227.1');
3356     #
3357     }
3358     } else {
3359     !!!cp ('t227.4');
3360     #
3361     }
3362 wakaba 1.58 } else {
3363 wakaba 1.79 !!!cp ('t227');
3364 wakaba 1.58 #
3365     }
3366 wakaba 1.98
3367 wakaba 1.153 !!!parse-error (type => 'in table', text => $token->{tag_name},
3368     token => $token);
3369 wakaba 1.98
3370     $insert = $insert_to_foster;
3371     #
3372 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
3373 wakaba 1.210 if ($token->{tag_name} eq 'tr' and
3374     ($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3375     ## have an element in table scope
3376 wakaba 1.52 my $i;
3377     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3378     my $node = $self->{open_elements}->[$_];
3379 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3380 wakaba 1.79 !!!cp ('t228');
3381 wakaba 1.52 $i = $_;
3382     last INSCOPE;
3383 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3384 wakaba 1.79 !!!cp ('t229');
3385 wakaba 1.52 last INSCOPE;
3386     }
3387     } # INSCOPE
3388     unless (defined $i) {
3389 wakaba 1.79 !!!cp ('t230');
3390 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3391     text => $token->{tag_name}, token => $token);
3392 wakaba 1.52 ## Ignore the token
3393 wakaba 1.125 !!!nack ('t230.1');
3394 wakaba 1.42 !!!next-token;
3395 wakaba 1.126 next B;
3396 wakaba 1.79 } else {
3397     !!!cp ('t232');
3398 wakaba 1.42 }
3399    
3400 wakaba 1.52 ## Clear back to table row context
3401 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3402     & TABLE_ROW_SCOPING_EL)) {
3403 wakaba 1.79 !!!cp ('t231');
3404 wakaba 1.83 ## ISSUE: Can this state be reached?
3405 wakaba 1.52 pop @{$self->{open_elements}};
3406     }
3407 wakaba 1.42
3408 wakaba 1.52 pop @{$self->{open_elements}}; # tr
3409 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3410 wakaba 1.52 !!!next-token;
3411 wakaba 1.125 !!!nack ('t231.1');
3412 wakaba 1.126 next B;
3413 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
3414 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3415 wakaba 1.52 ## As if </tr>
3416     ## have an element in table scope
3417     my $i;
3418     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3419     my $node = $self->{open_elements}->[$_];
3420 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3421 wakaba 1.79 !!!cp ('t233');
3422 wakaba 1.52 $i = $_;
3423     last INSCOPE;
3424 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3425 wakaba 1.79 !!!cp ('t234');
3426 wakaba 1.52 last INSCOPE;
3427 wakaba 1.42 }
3428 wakaba 1.52 } # INSCOPE
3429     unless (defined $i) {
3430 wakaba 1.79 !!!cp ('t235');
3431 wakaba 1.83 ## TODO: The following is wrong.
3432 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3433     text => $token->{type}, token => $token);
3434 wakaba 1.52 ## Ignore the token
3435 wakaba 1.125 !!!nack ('t236.1');
3436 wakaba 1.52 !!!next-token;
3437 wakaba 1.126 next B;
3438 wakaba 1.42 }
3439 wakaba 1.52
3440     ## Clear back to table row context
3441 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3442     & TABLE_ROW_SCOPING_EL)) {
3443 wakaba 1.79 !!!cp ('t236');
3444 wakaba 1.83 ## ISSUE: Can this state be reached?
3445 wakaba 1.46 pop @{$self->{open_elements}};
3446 wakaba 1.1 }
3447 wakaba 1.46
3448 wakaba 1.52 pop @{$self->{open_elements}}; # tr
3449 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3450 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
3451 wakaba 1.1 }
3452    
3453 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3454 wakaba 1.52 ## have an element in table scope
3455     my $i;
3456     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3457     my $node = $self->{open_elements}->[$_];
3458 wakaba 1.206 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3459 wakaba 1.79 !!!cp ('t237');
3460 wakaba 1.52 $i = $_;
3461     last INSCOPE;
3462 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3463 wakaba 1.79 !!!cp ('t238');
3464 wakaba 1.52 last INSCOPE;
3465     }
3466     } # INSCOPE
3467     unless (defined $i) {
3468 wakaba 1.79 !!!cp ('t239');
3469 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3470     text => $token->{tag_name}, token => $token);
3471 wakaba 1.52 ## Ignore the token
3472 wakaba 1.125 !!!nack ('t239.1');
3473 wakaba 1.52 !!!next-token;
3474 wakaba 1.126 next B;
3475 wakaba 1.47 }
3476    
3477     ## Clear back to table body context
3478 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3479     & TABLE_ROWS_SCOPING_EL)) {
3480 wakaba 1.79 !!!cp ('t240');
3481 wakaba 1.47 pop @{$self->{open_elements}};
3482     }
3483    
3484 wakaba 1.52 ## As if <{current node}>
3485     ## have an element in table scope
3486     ## true by definition
3487    
3488     ## Clear back to table body context
3489     ## nop by definition
3490    
3491     pop @{$self->{open_elements}};
3492 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3493 wakaba 1.52 ## reprocess in the "in table" insertion mode...
3494     }
3495    
3496 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
3497     ## When you edit the code fragment below, please ensure that
3498     ## the code for <table> in the "in table" insertion mode
3499     ## is synced with it.
3500    
3501 wakaba 1.52 ## have a table element in table scope
3502     my $i;
3503     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3504     my $node = $self->{open_elements}->[$_];
3505 wakaba 1.206 if ($node->[1] == TABLE_EL) {
3506 wakaba 1.79 !!!cp ('t241');
3507 wakaba 1.52 $i = $_;
3508     last INSCOPE;
3509 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3510 wakaba 1.79 !!!cp ('t242');
3511 wakaba 1.52 last INSCOPE;
3512 wakaba 1.47 }
3513 wakaba 1.52 } # INSCOPE
3514     unless (defined $i) {
3515 wakaba 1.79 !!!cp ('t243');
3516 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3517     text => $token->{tag_name}, token => $token);
3518 wakaba 1.52 ## Ignore the token
3519 wakaba 1.125 !!!nack ('t243.1');
3520 wakaba 1.52 !!!next-token;
3521 wakaba 1.126 next B;
3522 wakaba 1.3 }
3523 wakaba 1.52
3524     splice @{$self->{open_elements}}, $i;
3525 wakaba 1.95 pop @{$open_tables};
3526 wakaba 1.1
3527 wakaba 1.52 $self->_reset_insertion_mode;
3528 wakaba 1.47
3529     !!!next-token;
3530 wakaba 1.126 next B;
3531 wakaba 1.47 } elsif ({
3532 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
3533 wakaba 1.52 }->{$token->{tag_name}} and
3534 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
3535 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3536 wakaba 1.52 ## have an element in table scope
3537     my $i;
3538     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3539     my $node = $self->{open_elements}->[$_];
3540 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3541 wakaba 1.79 !!!cp ('t247');
3542 wakaba 1.52 $i = $_;
3543     last INSCOPE;
3544 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3545 wakaba 1.79 !!!cp ('t248');
3546 wakaba 1.52 last INSCOPE;
3547     }
3548     } # INSCOPE
3549     unless (defined $i) {
3550 wakaba 1.79 !!!cp ('t249');
3551 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3552     text => $token->{tag_name}, token => $token);
3553 wakaba 1.52 ## Ignore the token
3554 wakaba 1.125 !!!nack ('t249.1');
3555 wakaba 1.52 !!!next-token;
3556 wakaba 1.126 next B;
3557 wakaba 1.52 }
3558    
3559 wakaba 1.48 ## As if </tr>
3560     ## have an element in table scope
3561     my $i;
3562     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3563     my $node = $self->{open_elements}->[$_];
3564 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3565 wakaba 1.79 !!!cp ('t250');
3566 wakaba 1.48 $i = $_;
3567     last INSCOPE;
3568 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3569 wakaba 1.79 !!!cp ('t251');
3570 wakaba 1.48 last INSCOPE;
3571     }
3572     } # INSCOPE
3573 wakaba 1.52 unless (defined $i) {
3574 wakaba 1.79 !!!cp ('t252');
3575 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3576     text => 'tr', token => $token);
3577 wakaba 1.52 ## Ignore the token
3578 wakaba 1.125 !!!nack ('t252.1');
3579 wakaba 1.52 !!!next-token;
3580 wakaba 1.126 next B;
3581 wakaba 1.52 }
3582 wakaba 1.48
3583     ## Clear back to table row context
3584 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3585     & TABLE_ROW_SCOPING_EL)) {
3586 wakaba 1.79 !!!cp ('t253');
3587 wakaba 1.83 ## ISSUE: Can this case be reached?
3588 wakaba 1.48 pop @{$self->{open_elements}};
3589     }
3590    
3591     pop @{$self->{open_elements}}; # tr
3592 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3593 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
3594     }
3595    
3596     ## have an element in table scope
3597     my $i;
3598     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3599     my $node = $self->{open_elements}->[$_];
3600 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3601 wakaba 1.79 !!!cp ('t254');
3602 wakaba 1.52 $i = $_;
3603     last INSCOPE;
3604 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3605 wakaba 1.79 !!!cp ('t255');
3606 wakaba 1.52 last INSCOPE;
3607     }
3608     } # INSCOPE
3609     unless (defined $i) {
3610 wakaba 1.79 !!!cp ('t256');
3611 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3612     text => $token->{tag_name}, token => $token);
3613 wakaba 1.52 ## Ignore the token
3614 wakaba 1.125 !!!nack ('t256.1');
3615 wakaba 1.52 !!!next-token;
3616 wakaba 1.126 next B;
3617 wakaba 1.52 }
3618    
3619     ## Clear back to table body context
3620 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3621     & TABLE_ROWS_SCOPING_EL)) {
3622 wakaba 1.79 !!!cp ('t257');
3623 wakaba 1.83 ## ISSUE: Can this case be reached?
3624 wakaba 1.52 pop @{$self->{open_elements}};
3625     }
3626    
3627     pop @{$self->{open_elements}};
3628 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3629 wakaba 1.125 !!!nack ('t257.1');
3630 wakaba 1.52 !!!next-token;
3631 wakaba 1.126 next B;
3632 wakaba 1.52 } elsif ({
3633     body => 1, caption => 1, col => 1, colgroup => 1,
3634     html => 1, td => 1, th => 1,
3635 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3636     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
3637 wakaba 1.52 }->{$token->{tag_name}}) {
3638 wakaba 1.125 !!!cp ('t258');
3639 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3640     text => $token->{tag_name}, token => $token);
3641 wakaba 1.125 ## Ignore the token
3642     !!!nack ('t258.1');
3643     !!!next-token;
3644 wakaba 1.126 next B;
3645 wakaba 1.58 } else {
3646 wakaba 1.79 !!!cp ('t259');
3647 wakaba 1.153 !!!parse-error (type => 'in table:/',
3648     text => $token->{tag_name}, token => $token);
3649 wakaba 1.52
3650 wakaba 1.58 $insert = $insert_to_foster;
3651     #
3652     }
3653 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3654 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
3655 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
3656 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
3657 wakaba 1.104 !!!cp ('t259.1');
3658 wakaba 1.105 #
3659 wakaba 1.104 } else {
3660     !!!cp ('t259.2');
3661 wakaba 1.105 #
3662 wakaba 1.104 }
3663    
3664     ## Stop parsing
3665     last B;
3666 wakaba 1.58 } else {
3667     die "$0: $token->{type}: Unknown token type";
3668     }
3669 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
3670 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
3671 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3672 wakaba 1.52 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3673     unless (length $token->{data}) {
3674 wakaba 1.79 !!!cp ('t260');
3675 wakaba 1.52 !!!next-token;
3676 wakaba 1.126 next B;
3677 wakaba 1.52 }
3678     }
3679    
3680 wakaba 1.79 !!!cp ('t261');
3681 wakaba 1.52 #
3682 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
3683 wakaba 1.52 if ($token->{tag_name} eq 'col') {
3684 wakaba 1.79 !!!cp ('t262');
3685 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3686 wakaba 1.52 pop @{$self->{open_elements}};
3687 wakaba 1.125 !!!ack ('t262.1');
3688 wakaba 1.52 !!!next-token;
3689 wakaba 1.126 next B;
3690 wakaba 1.52 } else {
3691 wakaba 1.79 !!!cp ('t263');
3692 wakaba 1.52 #
3693     }
3694 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
3695 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
3696 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3697 wakaba 1.79 !!!cp ('t264');
3698 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3699     text => 'colgroup', token => $token);
3700 wakaba 1.52 ## Ignore the token
3701     !!!next-token;
3702 wakaba 1.126 next B;
3703 wakaba 1.52 } else {
3704 wakaba 1.79 !!!cp ('t265');
3705 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
3706 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3707 wakaba 1.52 !!!next-token;
3708 wakaba 1.126 next B;
3709 wakaba 1.52 }
3710     } elsif ($token->{tag_name} eq 'col') {
3711 wakaba 1.79 !!!cp ('t266');
3712 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3713     text => 'col', token => $token);
3714 wakaba 1.52 ## Ignore the token
3715     !!!next-token;
3716 wakaba 1.126 next B;
3717 wakaba 1.52 } else {
3718 wakaba 1.79 !!!cp ('t267');
3719 wakaba 1.52 #
3720     }
3721 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3722 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
3723 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
3724     !!!cp ('t270.2');
3725     ## Stop parsing.
3726     last B;
3727     } else {
3728     ## NOTE: As if </colgroup>.
3729     !!!cp ('t270.1');
3730     pop @{$self->{open_elements}}; # colgroup
3731     $self->{insertion_mode} = IN_TABLE_IM;
3732     ## Reprocess.
3733 wakaba 1.126 next B;
3734 wakaba 1.104 }
3735     } else {
3736     die "$0: $token->{type}: Unknown token type";
3737     }
3738 wakaba 1.52
3739     ## As if </colgroup>
3740 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3741 wakaba 1.79 !!!cp ('t269');
3742 wakaba 1.104 ## TODO: Wrong error type?
3743 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3744     text => 'colgroup', token => $token);
3745 wakaba 1.52 ## Ignore the token
3746 wakaba 1.125 !!!nack ('t269.1');
3747 wakaba 1.52 !!!next-token;
3748 wakaba 1.126 next B;
3749 wakaba 1.52 } else {
3750 wakaba 1.79 !!!cp ('t270');
3751 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
3752 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3753 wakaba 1.125 !!!ack-later;
3754 wakaba 1.52 ## reprocess
3755 wakaba 1.126 next B;
3756 wakaba 1.52 }
3757 wakaba 1.101 } elsif ($self->{insertion_mode} & SELECT_IMS) {
3758 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
3759 wakaba 1.79 !!!cp ('t271');
3760 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3761     !!!next-token;
3762 wakaba 1.126 next B;
3763 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
3764 wakaba 1.123 if ($token->{tag_name} eq 'option') {
3765 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3766 wakaba 1.123 !!!cp ('t272');
3767     ## As if </option>
3768     pop @{$self->{open_elements}};
3769     } else {
3770     !!!cp ('t273');
3771     }
3772 wakaba 1.52
3773 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3774 wakaba 1.125 !!!nack ('t273.1');
3775 wakaba 1.123 !!!next-token;
3776 wakaba 1.126 next B;
3777 wakaba 1.123 } elsif ($token->{tag_name} eq 'optgroup') {
3778 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3779 wakaba 1.123 !!!cp ('t274');
3780     ## As if </option>
3781     pop @{$self->{open_elements}};
3782     } else {
3783     !!!cp ('t275');
3784     }
3785 wakaba 1.52
3786 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3787 wakaba 1.123 !!!cp ('t276');
3788     ## As if </optgroup>
3789     pop @{$self->{open_elements}};
3790     } else {
3791     !!!cp ('t277');
3792     }
3793 wakaba 1.52
3794 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3795 wakaba 1.125 !!!nack ('t277.1');
3796 wakaba 1.123 !!!next-token;
3797 wakaba 1.126 next B;
3798 wakaba 1.146 } elsif ({
3799 wakaba 1.216 select => 1, input => 1, textarea => 1, keygen => 1,
3800 wakaba 1.146 }->{$token->{tag_name}} or
3801 wakaba 1.210 (($self->{insertion_mode} & IM_MASK)
3802     == IN_SELECT_IN_TABLE_IM and
3803 wakaba 1.101 {
3804     caption => 1, table => 1,
3805     tbody => 1, tfoot => 1, thead => 1,
3806     tr => 1, td => 1, th => 1,
3807     }->{$token->{tag_name}})) {
3808 wakaba 1.222
3809     ## 1. Parse error.
3810     if ($token->{tag_name} eq 'select') {
3811     !!!parse-error (type => 'select in select', ## XXX: documentation
3812     token => $token);
3813     } else {
3814     !!!parse-error (type => 'not closed', text => 'select',
3815     token => $token);
3816     }
3817    
3818     ## 2./<select>-1. Unless "have an element in table scope" (select):
3819 wakaba 1.123 my $i;
3820     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3821     my $node = $self->{open_elements}->[$_];
3822 wakaba 1.206 if ($node->[1] == SELECT_EL) {
3823 wakaba 1.123 !!!cp ('t278');
3824     $i = $_;
3825     last INSCOPE;
3826     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3827     !!!cp ('t279');
3828     last INSCOPE;
3829     }
3830     } # INSCOPE
3831     unless (defined $i) {
3832     !!!cp ('t280');
3833 wakaba 1.222 if ($token->{tag_name} eq 'select') {
3834     ## NOTE: This error would be raised when
3835     ## |select.innerHTML = '<select>'| is executed; in this
3836     ## case two errors, "select in select" and "unmatched
3837     ## end tags" are reported to the user, the latter might
3838     ## be confusing but this is what the spec requires.
3839     !!!parse-error (type => 'unmatched end tag',
3840     text => 'select',
3841     token => $token);
3842     }
3843     ## Ignore the token.
3844 wakaba 1.125 !!!nack ('t280.1');
3845 wakaba 1.123 !!!next-token;
3846 wakaba 1.126 next B;
3847 wakaba 1.123 }
3848 wakaba 1.222
3849     ## 3. Otherwise, as if there were <select>:
3850 wakaba 1.52
3851 wakaba 1.123 !!!cp ('t281');
3852     splice @{$self->{open_elements}}, $i;
3853 wakaba 1.52
3854 wakaba 1.123 $self->_reset_insertion_mode;
3855 wakaba 1.47
3856 wakaba 1.101 if ($token->{tag_name} eq 'select') {
3857 wakaba 1.125 !!!nack ('t281.2');
3858 wakaba 1.101 !!!next-token;
3859 wakaba 1.126 next B;
3860 wakaba 1.101 } else {
3861     !!!cp ('t281.1');
3862 wakaba 1.125 !!!ack-later;
3863 wakaba 1.101 ## Reprocess the token.
3864 wakaba 1.126 next B;
3865 wakaba 1.101 }
3866 wakaba 1.226 } elsif ($token->{tag_name} eq 'script') {
3867     !!!cp ('t281.3');
3868     ## NOTE: This is an "as if in head" code clone
3869     $script_start_tag->();
3870     next B;
3871 wakaba 1.58 } else {
3872 wakaba 1.79 !!!cp ('t282');
3873 wakaba 1.153 !!!parse-error (type => 'in select',
3874     text => $token->{tag_name}, token => $token);
3875 wakaba 1.58 ## Ignore the token
3876 wakaba 1.125 !!!nack ('t282.1');
3877 wakaba 1.58 !!!next-token;
3878 wakaba 1.126 next B;
3879 wakaba 1.58 }
3880     } elsif ($token->{type} == END_TAG_TOKEN) {
3881 wakaba 1.123 if ($token->{tag_name} eq 'optgroup') {
3882 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL and
3883     $self->{open_elements}->[-2]->[1] == OPTGROUP_EL) {
3884 wakaba 1.123 !!!cp ('t283');
3885     ## As if </option>
3886     splice @{$self->{open_elements}}, -2;
3887 wakaba 1.206 } elsif ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3888 wakaba 1.123 !!!cp ('t284');
3889     pop @{$self->{open_elements}};
3890     } else {
3891     !!!cp ('t285');
3892 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3893     text => $token->{tag_name}, token => $token);
3894 wakaba 1.123 ## Ignore the token
3895     }
3896 wakaba 1.125 !!!nack ('t285.1');
3897 wakaba 1.123 !!!next-token;
3898 wakaba 1.126 next B;
3899 wakaba 1.123 } elsif ($token->{tag_name} eq 'option') {
3900 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3901 wakaba 1.123 !!!cp ('t286');
3902     pop @{$self->{open_elements}};
3903     } else {
3904     !!!cp ('t287');
3905 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3906     text => $token->{tag_name}, token => $token);
3907 wakaba 1.123 ## Ignore the token
3908     }
3909 wakaba 1.125 !!!nack ('t287.1');
3910 wakaba 1.123 !!!next-token;
3911 wakaba 1.126 next B;
3912 wakaba 1.123 } elsif ($token->{tag_name} eq 'select') {
3913     ## have an element in table scope
3914     my $i;
3915     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3916     my $node = $self->{open_elements}->[$_];
3917 wakaba 1.206 if ($node->[1] == SELECT_EL) {
3918 wakaba 1.123 !!!cp ('t288');
3919     $i = $_;
3920     last INSCOPE;
3921     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3922     !!!cp ('t289');
3923     last INSCOPE;
3924     }
3925     } # INSCOPE
3926     unless (defined $i) {
3927     !!!cp ('t290');
3928 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3929     text => $token->{tag_name}, token => $token);
3930 wakaba 1.123 ## Ignore the token
3931 wakaba 1.125 !!!nack ('t290.1');
3932 wakaba 1.123 !!!next-token;
3933 wakaba 1.126 next B;
3934 wakaba 1.123 }
3935 wakaba 1.52
3936 wakaba 1.123 !!!cp ('t291');
3937     splice @{$self->{open_elements}}, $i;
3938 wakaba 1.52
3939 wakaba 1.123 $self->_reset_insertion_mode;
3940 wakaba 1.52
3941 wakaba 1.125 !!!nack ('t291.1');
3942 wakaba 1.123 !!!next-token;
3943 wakaba 1.126 next B;
3944 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK)
3945     == IN_SELECT_IN_TABLE_IM and
3946 wakaba 1.101 {
3947     caption => 1, table => 1, tbody => 1,
3948     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
3949     }->{$token->{tag_name}}) {
3950 wakaba 1.83 ## TODO: The following is wrong?
3951 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3952     text => $token->{tag_name}, token => $token);
3953 wakaba 1.52
3954 wakaba 1.123 ## have an element in table scope
3955     my $i;
3956     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3957     my $node = $self->{open_elements}->[$_];
3958     if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3959     !!!cp ('t292');
3960     $i = $_;
3961     last INSCOPE;
3962     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3963     !!!cp ('t293');
3964     last INSCOPE;
3965     }
3966     } # INSCOPE
3967     unless (defined $i) {
3968     !!!cp ('t294');
3969     ## Ignore the token
3970 wakaba 1.125 !!!nack ('t294.1');
3971 wakaba 1.123 !!!next-token;
3972 wakaba 1.126 next B;
3973 wakaba 1.123 }
3974 wakaba 1.52
3975 wakaba 1.123 ## As if </select>
3976     ## have an element in table scope
3977     undef $i;
3978     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3979     my $node = $self->{open_elements}->[$_];
3980 wakaba 1.206 if ($node->[1] == SELECT_EL) {
3981 wakaba 1.123 !!!cp ('t295');
3982     $i = $_;
3983     last INSCOPE;
3984     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3985 wakaba 1.83 ## ISSUE: Can this state be reached?
3986 wakaba 1.123 !!!cp ('t296');
3987     last INSCOPE;
3988     }
3989     } # INSCOPE
3990     unless (defined $i) {
3991     !!!cp ('t297');
3992 wakaba 1.83 ## TODO: The following error type is correct?
3993 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3994     text => 'select', token => $token);
3995 wakaba 1.123 ## Ignore the </select> token
3996 wakaba 1.125 !!!nack ('t297.1');
3997 wakaba 1.123 !!!next-token; ## TODO: ok?
3998 wakaba 1.126 next B;
3999 wakaba 1.123 }
4000 wakaba 1.52
4001 wakaba 1.123 !!!cp ('t298');
4002     splice @{$self->{open_elements}}, $i;
4003 wakaba 1.52
4004 wakaba 1.123 $self->_reset_insertion_mode;
4005 wakaba 1.52
4006 wakaba 1.125 !!!ack-later;
4007 wakaba 1.123 ## reprocess
4008 wakaba 1.126 next B;
4009 wakaba 1.58 } else {
4010 wakaba 1.79 !!!cp ('t299');
4011 wakaba 1.153 !!!parse-error (type => 'in select:/',
4012     text => $token->{tag_name}, token => $token);
4013 wakaba 1.52 ## Ignore the token
4014 wakaba 1.125 !!!nack ('t299.3');
4015 wakaba 1.52 !!!next-token;
4016 wakaba 1.126 next B;
4017 wakaba 1.58 }
4018 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4019 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4020 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
4021     !!!cp ('t299.1');
4022 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4023 wakaba 1.104 } else {
4024     !!!cp ('t299.2');
4025     }
4026    
4027     ## Stop parsing.
4028     last B;
4029 wakaba 1.58 } else {
4030     die "$0: $token->{type}: Unknown token type";
4031     }
4032 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
4033 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4034 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4035 wakaba 1.52 my $data = $1;
4036     ## As if in body
4037     $reconstruct_active_formatting_elements->($insert_to_current);
4038    
4039     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4040    
4041     unless (length $token->{data}) {
4042 wakaba 1.79 !!!cp ('t300');
4043 wakaba 1.52 !!!next-token;
4044 wakaba 1.126 next B;
4045 wakaba 1.52 }
4046     }
4047    
4048 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4049 wakaba 1.79 !!!cp ('t301');
4050 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
4051 wakaba 1.188 #
4052 wakaba 1.79 } else {
4053     !!!cp ('t302');
4054 wakaba 1.188 ## "after body" insertion mode
4055     !!!parse-error (type => 'after body:#text', token => $token);
4056     #
4057 wakaba 1.52 }
4058    
4059 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4060 wakaba 1.52 ## reprocess
4061 wakaba 1.126 next B;
4062 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4063 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4064 wakaba 1.79 !!!cp ('t303');
4065 wakaba 1.153 !!!parse-error (type => 'after html',
4066     text => $token->{tag_name}, token => $token);
4067 wakaba 1.188 #
4068 wakaba 1.79 } else {
4069     !!!cp ('t304');
4070 wakaba 1.188 ## "after body" insertion mode
4071     !!!parse-error (type => 'after body',
4072     text => $token->{tag_name}, token => $token);
4073     #
4074 wakaba 1.52 }
4075    
4076 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4077 wakaba 1.125 !!!ack-later;
4078 wakaba 1.52 ## reprocess
4079 wakaba 1.126 next B;
4080 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4081 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4082 wakaba 1.79 !!!cp ('t305');
4083 wakaba 1.153 !!!parse-error (type => 'after html:/',
4084     text => $token->{tag_name}, token => $token);
4085 wakaba 1.52
4086 wakaba 1.188 $self->{insertion_mode} = IN_BODY_IM;
4087     ## Reprocess.
4088     next B;
4089 wakaba 1.79 } else {
4090     !!!cp ('t306');
4091 wakaba 1.52 }
4092    
4093     ## "after body" insertion mode
4094     if ($token->{tag_name} eq 'html') {
4095     if (defined $self->{inner_html_node}) {
4096 wakaba 1.79 !!!cp ('t307');
4097 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4098     text => 'html', token => $token);
4099 wakaba 1.52 ## Ignore the token
4100     !!!next-token;
4101 wakaba 1.126 next B;
4102 wakaba 1.52 } else {
4103 wakaba 1.79 !!!cp ('t308');
4104 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
4105 wakaba 1.52 !!!next-token;
4106 wakaba 1.126 next B;
4107 wakaba 1.52 }
4108     } else {
4109 wakaba 1.79 !!!cp ('t309');
4110 wakaba 1.153 !!!parse-error (type => 'after body:/',
4111     text => $token->{tag_name}, token => $token);
4112 wakaba 1.52
4113 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4114 wakaba 1.52 ## reprocess
4115 wakaba 1.126 next B;
4116 wakaba 1.52 }
4117 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4118     !!!cp ('t309.2');
4119     ## Stop parsing
4120     last B;
4121 wakaba 1.52 } else {
4122     die "$0: $token->{type}: Unknown token type";
4123     }
4124 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
4125 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4126 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4127 wakaba 1.52 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4128    
4129     unless (length $token->{data}) {
4130 wakaba 1.79 !!!cp ('t310');
4131 wakaba 1.52 !!!next-token;
4132 wakaba 1.126 next B;
4133 wakaba 1.52 }
4134     }
4135    
4136 wakaba 1.188 if ($token->{data} =~ s/^[^\x09\x0A\x0C\x20]+//) {
4137 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4138 wakaba 1.79 !!!cp ('t311');
4139 wakaba 1.153 !!!parse-error (type => 'in frameset:#text', token => $token);
4140 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4141 wakaba 1.79 !!!cp ('t312');
4142 wakaba 1.153 !!!parse-error (type => 'after frameset:#text', token => $token);
4143 wakaba 1.158 } else { # "after after frameset"
4144 wakaba 1.79 !!!cp ('t313');
4145 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
4146 wakaba 1.52 }
4147    
4148     ## Ignore the token.
4149     if (length $token->{data}) {
4150 wakaba 1.79 !!!cp ('t314');
4151 wakaba 1.52 ## reprocess the rest of characters
4152     } else {
4153 wakaba 1.79 !!!cp ('t315');
4154 wakaba 1.52 !!!next-token;
4155     }
4156 wakaba 1.126 next B;
4157 wakaba 1.52 }
4158    
4159     die qq[$0: Character "$token->{data}"];
4160 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4161 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
4162 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
4163 wakaba 1.79 !!!cp ('t318');
4164 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4165 wakaba 1.125 !!!nack ('t318.1');
4166 wakaba 1.52 !!!next-token;
4167 wakaba 1.126 next B;
4168 wakaba 1.52 } elsif ($token->{tag_name} eq 'frame' and
4169 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
4170 wakaba 1.79 !!!cp ('t319');
4171 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4172 wakaba 1.52 pop @{$self->{open_elements}};
4173 wakaba 1.125 !!!ack ('t319.1');
4174 wakaba 1.52 !!!next-token;
4175 wakaba 1.126 next B;
4176 wakaba 1.52 } elsif ($token->{tag_name} eq 'noframes') {
4177 wakaba 1.79 !!!cp ('t320');
4178 wakaba 1.148 ## NOTE: As if in head.
4179 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4180 wakaba 1.126 next B;
4181 wakaba 1.158
4182     ## NOTE: |<!DOCTYPE HTML><frameset></frameset></html><noframes></noframes>|
4183     ## has no parse error.
4184 wakaba 1.52 } else {
4185 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4186 wakaba 1.79 !!!cp ('t321');
4187 wakaba 1.153 !!!parse-error (type => 'in frameset',
4188     text => $token->{tag_name}, token => $token);
4189 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4190 wakaba 1.79 !!!cp ('t322');
4191 wakaba 1.153 !!!parse-error (type => 'after frameset',
4192     text => $token->{tag_name}, token => $token);
4193 wakaba 1.158 } else { # "after after frameset"
4194     !!!cp ('t322.2');
4195     !!!parse-error (type => 'after after frameset',
4196     text => $token->{tag_name}, token => $token);
4197 wakaba 1.52 }
4198     ## Ignore the token
4199 wakaba 1.125 !!!nack ('t322.1');
4200 wakaba 1.52 !!!next-token;
4201 wakaba 1.126 next B;
4202 wakaba 1.52 }
4203 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4204 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
4205 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
4206 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
4207 wakaba 1.52 @{$self->{open_elements}} == 1) {
4208 wakaba 1.79 !!!cp ('t325');
4209 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4210     text => $token->{tag_name}, token => $token);
4211 wakaba 1.52 ## Ignore the token
4212     !!!next-token;
4213     } else {
4214 wakaba 1.79 !!!cp ('t326');
4215 wakaba 1.52 pop @{$self->{open_elements}};
4216     !!!next-token;
4217     }
4218 wakaba 1.47
4219 wakaba 1.52 if (not defined $self->{inner_html_node} and
4220 wakaba 1.206 not ($self->{open_elements}->[-1]->[1] == FRAMESET_EL)) {
4221 wakaba 1.79 !!!cp ('t327');
4222 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
4223 wakaba 1.79 } else {
4224     !!!cp ('t328');
4225 wakaba 1.52 }
4226 wakaba 1.126 next B;
4227 wakaba 1.52 } elsif ($token->{tag_name} eq 'html' and
4228 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
4229 wakaba 1.79 !!!cp ('t329');
4230 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
4231 wakaba 1.52 !!!next-token;
4232 wakaba 1.126 next B;
4233 wakaba 1.52 } else {
4234 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4235 wakaba 1.79 !!!cp ('t330');
4236 wakaba 1.153 !!!parse-error (type => 'in frameset:/',
4237     text => $token->{tag_name}, token => $token);
4238 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4239     !!!cp ('t330.1');
4240     !!!parse-error (type => 'after frameset:/',
4241     text => $token->{tag_name}, token => $token);
4242     } else { # "after after html"
4243 wakaba 1.79 !!!cp ('t331');
4244 wakaba 1.158 !!!parse-error (type => 'after after frameset:/',
4245 wakaba 1.153 text => $token->{tag_name}, token => $token);
4246 wakaba 1.52 }
4247     ## Ignore the token
4248     !!!next-token;
4249 wakaba 1.126 next B;
4250 wakaba 1.52 }
4251 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4252 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4253 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
4254     !!!cp ('t331.1');
4255 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4256 wakaba 1.104 } else {
4257     !!!cp ('t331.2');
4258     }
4259    
4260     ## Stop parsing
4261     last B;
4262 wakaba 1.52 } else {
4263     die "$0: $token->{type}: Unknown token type";
4264     }
4265     } else {
4266     die "$0: $self->{insertion_mode}: Unknown insertion mode";
4267     }
4268 wakaba 1.47
4269 wakaba 1.52 ## "in body" insertion mode
4270 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
4271 wakaba 1.52 if ($token->{tag_name} eq 'script') {
4272 wakaba 1.79 !!!cp ('t332');
4273 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
4274 wakaba 1.100 $script_start_tag->();
4275 wakaba 1.126 next B;
4276 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
4277 wakaba 1.79 !!!cp ('t333');
4278 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
4279 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4280 wakaba 1.126 next B;
4281 wakaba 1.52 } elsif ({
4282 wakaba 1.232 base => 1, command => 1, link => 1,
4283 wakaba 1.52 }->{$token->{tag_name}}) {
4284 wakaba 1.79 !!!cp ('t334');
4285 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
4286 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4287 wakaba 1.194 pop @{$self->{open_elements}};
4288 wakaba 1.125 !!!ack ('t334.1');
4289 wakaba 1.52 !!!next-token;
4290 wakaba 1.126 next B;
4291 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
4292     ## NOTE: This is an "as if in head" code clone, only "-t" differs
4293 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4294 wakaba 1.194 my $meta_el = pop @{$self->{open_elements}};
4295 wakaba 1.46
4296 wakaba 1.52 unless ($self->{confident}) {
4297 wakaba 1.134 if ($token->{attributes}->{charset}) {
4298 wakaba 1.79 !!!cp ('t335');
4299 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4300     ## in the {change_encoding} callback.
4301 wakaba 1.63 $self->{change_encoding}
4302 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value}, $token);
4303 wakaba 1.66
4304     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4305     ->set_user_data (manakai_has_reference =>
4306     $token->{attributes}->{charset}
4307     ->{has_reference});
4308 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
4309     if ($token->{attributes}->{content}->{value}
4310 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4311 wakaba 1.189 [\x09\x0A\x0C\x0D\x20]*=
4312     [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4313     ([^"'\x09\x0A\x0C\x0D\x20][^\x09\x0A\x0C\x0D\x20\x3B]*))
4314     /x) {
4315 wakaba 1.79 !!!cp ('t336');
4316 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4317     ## in the {change_encoding} callback.
4318 wakaba 1.63 $self->{change_encoding}
4319 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
4320 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4321     ->set_user_data (manakai_has_reference =>
4322     $token->{attributes}->{content}
4323     ->{has_reference});
4324 wakaba 1.63 }
4325 wakaba 1.52 }
4326 wakaba 1.66 } else {
4327     if ($token->{attributes}->{charset}) {
4328 wakaba 1.79 !!!cp ('t337');
4329 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4330     ->set_user_data (manakai_has_reference =>
4331     $token->{attributes}->{charset}
4332     ->{has_reference});
4333     }
4334 wakaba 1.68 if ($token->{attributes}->{content}) {
4335 wakaba 1.79 !!!cp ('t338');
4336 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4337     ->set_user_data (manakai_has_reference =>
4338     $token->{attributes}->{content}
4339     ->{has_reference});
4340     }
4341 wakaba 1.52 }
4342 wakaba 1.1
4343 wakaba 1.125 !!!ack ('t338.1');
4344 wakaba 1.52 !!!next-token;
4345 wakaba 1.126 next B;
4346 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
4347 wakaba 1.79 !!!cp ('t341');
4348 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
4349 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
4350 wakaba 1.126 next B;
4351 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
4352 wakaba 1.153 !!!parse-error (type => 'in body', text => 'body', token => $token);
4353 wakaba 1.46
4354 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
4355 wakaba 1.206 not ($self->{open_elements}->[1]->[1] == BODY_EL)) {
4356 wakaba 1.79 !!!cp ('t342');
4357 wakaba 1.52 ## Ignore the token
4358     } else {
4359     my $body_el = $self->{open_elements}->[1]->[0];
4360     for my $attr_name (keys %{$token->{attributes}}) {
4361     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
4362 wakaba 1.79 !!!cp ('t343');
4363 wakaba 1.52 $body_el->set_attribute_ns
4364     (undef, [undef, $attr_name],
4365     $token->{attributes}->{$attr_name}->{value});
4366     }
4367     }
4368     }
4369 wakaba 1.125 !!!nack ('t343.1');
4370 wakaba 1.52 !!!next-token;
4371 wakaba 1.126 next B;
4372 wakaba 1.242 } elsif ($token->{tag_name} eq 'frameset') {
4373 wakaba 1.241 !!!parse-error (type => 'in body', text => $token->{tag_name},
4374     token => $token);
4375    
4376     if (@{$self->{open_elements}} == 1 or
4377 wakaba 1.242 not ($self->{open_elements}->[1]->[1] == BODY_EL)) {
4378 wakaba 1.241 !!!cp ('t343.2');
4379     ## Ignore the token.
4380     } elsif (not $self->{frameset_ok}) {
4381     !!!cp ('t343.3');
4382     ## Ignore the token.
4383     } else {
4384     !!!cp ('t343.4');
4385    
4386     ## 1. Remove the second element.
4387     my $body = $self->{open_elements}->[1]->[0];
4388     my $body_parent = $body->parent_node;
4389     $body_parent->remove_child ($body) if $body_parent;
4390    
4391     ## 2. Pop nodes.
4392     splice @{$self->{open_elements}}, 1;
4393    
4394     ## 3. Insert.
4395     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4396    
4397     ## 4. Switch.
4398     $self->{insertion_mode} = IN_FRAMESET_IM;
4399     }
4400    
4401     !!!nack ('t343.5');
4402     !!!next-token;
4403     next B;
4404 wakaba 1.52 } elsif ({
4405 wakaba 1.195 ## NOTE: Start tags for non-phrasing flow content elements
4406    
4407     ## NOTE: The normal one
4408     address => 1, article => 1, aside => 1, blockquote => 1,
4409     center => 1, datagrid => 1, details => 1, dialog => 1,
4410     dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
4411     footer => 1, h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1,
4412 wakaba 1.237 h6 => 1, header => 1, hgroup => 1,
4413     menu => 1, nav => 1, ol => 1, p => 1,
4414 wakaba 1.195 section => 1, ul => 1,
4415     ## NOTE: As normal, but drops leading newline
4416 wakaba 1.97 pre => 1, listing => 1,
4417 wakaba 1.195 ## NOTE: As normal, but interacts with the form element pointer
4418 wakaba 1.109 form => 1,
4419 wakaba 1.195
4420 wakaba 1.109 table => 1,
4421     hr => 1,
4422 wakaba 1.52 }->{$token->{tag_name}}) {
4423 wakaba 1.225
4424     ## 1. When there is an opening |form| element:
4425 wakaba 1.109 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
4426     !!!cp ('t350');
4427 wakaba 1.113 !!!parse-error (type => 'in form:form', token => $token);
4428 wakaba 1.109 ## Ignore the token
4429 wakaba 1.125 !!!nack ('t350.1');
4430 wakaba 1.109 !!!next-token;
4431 wakaba 1.126 next B;
4432 wakaba 1.109 }
4433    
4434 wakaba 1.225 ## 2. Close the |p| element, if any.
4435 wakaba 1.217 if ($token->{tag_name} ne 'table' or # The Hixie Quirk
4436     $self->{document}->manakai_compat_mode ne 'quirks') {
4437     ## has a p element in scope
4438     INSCOPE: for (reverse @{$self->{open_elements}}) {
4439     if ($_->[1] == P_EL) {
4440     !!!cp ('t344');
4441     !!!back-token; # <form>
4442     $token = {type => END_TAG_TOKEN, tag_name => 'p',
4443     line => $token->{line}, column => $token->{column}};
4444     next B;
4445     } elsif ($_->[1] & SCOPING_EL) {
4446     !!!cp ('t345');
4447     last INSCOPE;
4448     }
4449     } # INSCOPE
4450     }
4451 wakaba 1.225
4452     ## 3. Close the opening <hn> element, if any.
4453     if ({h1 => 1, h2 => 1, h3 => 1,
4454     h4 => 1, h5 => 1, h6 => 1}->{$token->{tag_name}}) {
4455     if ($self->{open_elements}->[-1]->[1] == HEADING_EL) {
4456     !!!parse-error (type => 'not closed',
4457     text => $self->{open_elements}->[-1]->[0]->manakai_local_name,
4458     token => $token);
4459     pop @{$self->{open_elements}};
4460     }
4461     }
4462    
4463     ## 4. Insertion.
4464 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4465 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
4466 wakaba 1.125 !!!nack ('t346.1');
4467 wakaba 1.52 !!!next-token;
4468 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4469 wakaba 1.52 $token->{data} =~ s/^\x0A//;
4470     unless (length $token->{data}) {
4471 wakaba 1.79 !!!cp ('t346');
4472 wakaba 1.1 !!!next-token;
4473 wakaba 1.79 } else {
4474     !!!cp ('t349');
4475 wakaba 1.52 }
4476 wakaba 1.79 } else {
4477     !!!cp ('t348');
4478 wakaba 1.52 }
4479 wakaba 1.243
4480     delete $self->{frameset_ok};
4481 wakaba 1.109 } elsif ($token->{tag_name} eq 'form') {
4482     !!!cp ('t347.1');
4483     $self->{form_element} = $self->{open_elements}->[-1]->[0];
4484    
4485 wakaba 1.125 !!!nack ('t347.2');
4486 wakaba 1.109 !!!next-token;
4487     } elsif ($token->{tag_name} eq 'table') {
4488     !!!cp ('t382');
4489     push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
4490 wakaba 1.243
4491     delete $self->{frameset_ok};
4492 wakaba 1.109
4493     $self->{insertion_mode} = IN_TABLE_IM;
4494    
4495 wakaba 1.125 !!!nack ('t382.1');
4496 wakaba 1.109 !!!next-token;
4497     } elsif ($token->{tag_name} eq 'hr') {
4498     !!!cp ('t386');
4499     pop @{$self->{open_elements}};
4500 wakaba 1.243
4501 wakaba 1.240 !!!ack ('t386.1');
4502 wakaba 1.243
4503     delete $self->{frameset_ok};
4504    
4505 wakaba 1.109 !!!next-token;
4506 wakaba 1.52 } else {
4507 wakaba 1.125 !!!nack ('t347.1');
4508 wakaba 1.52 !!!next-token;
4509     }
4510 wakaba 1.126 next B;
4511 wakaba 1.196 } elsif ($token->{tag_name} eq 'li') {
4512     ## NOTE: As normal, but imply </li> when there's another <li> ...
4513 wakaba 1.193
4514 wakaba 1.225 ## NOTE: Special, Scope (<li><foo><li> == <li><foo><li/></foo></li>)::
4515     ## Interpreted as <li><foo/></li><li/> (non-conforming):
4516 wakaba 1.193 ## blockquote (O9.27), center (O), dd (Fx3, O, S3.1.2, IE7),
4517     ## dt (Fx, O, S, IE), dl (O), fieldset (O, S, IE), form (Fx, O, S),
4518     ## hn (O), pre (O), applet (O, S), button (O, S), marquee (Fx, O, S),
4519     ## object (Fx)
4520 wakaba 1.225 ## Generate non-tree (non-conforming):
4521 wakaba 1.193 ## basefont (IE7 (where basefont is non-void)), center (IE),
4522     ## form (IE), hn (IE)
4523 wakaba 1.225 ## address, div, p (<li><foo><li> == <li><foo/></li><li/>)::
4524     ## Interpreted as <li><foo><li/></foo></li> (non-conforming):
4525 wakaba 1.193 ## div (Fx, S)
4526 wakaba 1.196
4527 wakaba 1.243 ## 1. Frameset-ng
4528     delete $self->{frameset_ok};
4529    
4530 wakaba 1.196 my $non_optional;
4531 wakaba 1.52 my $i = -1;
4532 wakaba 1.196
4533 wakaba 1.243 ## 2.
4534 wakaba 1.196 for my $node (reverse @{$self->{open_elements}}) {
4535 wakaba 1.206 if ($node->[1] == LI_EL) {
4536 wakaba 1.243 ## 3. (a) As if </li>
4537 wakaba 1.196 {
4538     ## If no </li> - not applied
4539     #
4540    
4541     ## Otherwise
4542    
4543     ## 1. generate implied end tags, except for </li>
4544     #
4545    
4546     ## 2. If current node != "li", parse error
4547     if ($non_optional) {
4548     !!!parse-error (type => 'not closed',
4549     text => $non_optional->[0]->manakai_local_name,
4550     token => $token);
4551     !!!cp ('t355');
4552     } else {
4553     !!!cp ('t356');
4554     }
4555    
4556     ## 3. Pop
4557     splice @{$self->{open_elements}}, $i;
4558 wakaba 1.52 }
4559 wakaba 1.196
4560 wakaba 1.243 last; ## 3. (b) goto 5.
4561 wakaba 1.196 } elsif (
4562     ## NOTE: not "formatting" and not "phrasing"
4563     ($node->[1] & SPECIAL_EL or
4564     $node->[1] & SCOPING_EL) and
4565     ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4566 wakaba 1.206 (not $node->[1] & ADDRESS_DIV_P_EL)
4567     ) {
4568 wakaba 1.243 ## 4.
4569 wakaba 1.79 !!!cp ('t357');
4570 wakaba 1.243 last; ## goto 6.
4571 wakaba 1.196 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4572 wakaba 1.79 !!!cp ('t358');
4573 wakaba 1.196 #
4574     } else {
4575     !!!cp ('t359');
4576     $non_optional ||= $node;
4577     #
4578 wakaba 1.52 }
4579 wakaba 1.243 ## 5.
4580     ## goto 3.
4581 wakaba 1.52 $i--;
4582 wakaba 1.196 }
4583    
4584 wakaba 1.243 ## 6. (a) has a |p| element in scope
4585 wakaba 1.196 INSCOPE: for (reverse @{$self->{open_elements}}) {
4586 wakaba 1.206 if ($_->[1] == P_EL) {
4587 wakaba 1.196 !!!cp ('t353');
4588 wakaba 1.198
4589     ## NOTE: |<p><li>|, for example.
4590    
4591 wakaba 1.196 !!!back-token; # <x>
4592     $token = {type => END_TAG_TOKEN, tag_name => 'p',
4593     line => $token->{line}, column => $token->{column}};
4594     next B;
4595     } elsif ($_->[1] & SCOPING_EL) {
4596     !!!cp ('t354');
4597     last INSCOPE;
4598     }
4599     } # INSCOPE
4600    
4601 wakaba 1.243 ## 6. (b) insert
4602 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4603 wakaba 1.125 !!!nack ('t359.1');
4604 wakaba 1.52 !!!next-token;
4605 wakaba 1.126 next B;
4606 wakaba 1.196 } elsif ($token->{tag_name} eq 'dt' or
4607     $token->{tag_name} eq 'dd') {
4608     ## NOTE: As normal, but imply </dt> or </dd> when ...
4609    
4610 wakaba 1.243 ## 1. Frameset-ng
4611     delete $self->{frameset_ok};
4612    
4613 wakaba 1.196 my $non_optional;
4614     my $i = -1;
4615    
4616 wakaba 1.243 ## 2.
4617 wakaba 1.196 for my $node (reverse @{$self->{open_elements}}) {
4618 wakaba 1.207 if ($node->[1] == DTDD_EL) {
4619 wakaba 1.243 ## 3. (a) As if </li>
4620 wakaba 1.196 {
4621     ## If no </li> - not applied
4622     #
4623    
4624     ## Otherwise
4625    
4626     ## 1. generate implied end tags, except for </dt> or </dd>
4627     #
4628    
4629     ## 2. If current node != "dt"|"dd", parse error
4630     if ($non_optional) {
4631     !!!parse-error (type => 'not closed',
4632     text => $non_optional->[0]->manakai_local_name,
4633     token => $token);
4634     !!!cp ('t355.1');
4635     } else {
4636     !!!cp ('t356.1');
4637     }
4638    
4639     ## 3. Pop
4640     splice @{$self->{open_elements}}, $i;
4641     }
4642    
4643 wakaba 1.243 last; ## 3. (b) goto 5.
4644 wakaba 1.196 } elsif (
4645     ## NOTE: not "formatting" and not "phrasing"
4646     ($node->[1] & SPECIAL_EL or
4647     $node->[1] & SCOPING_EL) and
4648     ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4649    
4650 wakaba 1.206 (not $node->[1] & ADDRESS_DIV_P_EL)
4651     ) {
4652 wakaba 1.243 ## 4.
4653 wakaba 1.196 !!!cp ('t357.1');
4654     last; ## goto 5.
4655     } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4656     !!!cp ('t358.1');
4657     #
4658     } else {
4659     !!!cp ('t359.1');
4660     $non_optional ||= $node;
4661     #
4662     }
4663 wakaba 1.243 ## 5.
4664     ## goto 3.
4665 wakaba 1.196 $i--;
4666     }
4667    
4668 wakaba 1.243 ## 6. (a) has a |p| element in scope
4669 wakaba 1.196 INSCOPE: for (reverse @{$self->{open_elements}}) {
4670 wakaba 1.206 if ($_->[1] == P_EL) {
4671 wakaba 1.196 !!!cp ('t353.1');
4672     !!!back-token; # <x>
4673     $token = {type => END_TAG_TOKEN, tag_name => 'p',
4674     line => $token->{line}, column => $token->{column}};
4675     next B;
4676     } elsif ($_->[1] & SCOPING_EL) {
4677     !!!cp ('t354.1');
4678     last INSCOPE;
4679     }
4680     } # INSCOPE
4681    
4682 wakaba 1.243 ## 6. (b) insert
4683 wakaba 1.196 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4684     !!!nack ('t359.2');
4685     !!!next-token;
4686     next B;
4687 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
4688 wakaba 1.195 ## NOTE: As normal, but effectively ends parsing
4689    
4690 wakaba 1.52 ## has a p element in scope
4691     INSCOPE: for (reverse @{$self->{open_elements}}) {
4692 wakaba 1.206 if ($_->[1] == P_EL) {
4693 wakaba 1.79 !!!cp ('t367');
4694 wakaba 1.125 !!!back-token; # <plaintext>
4695 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4696     line => $token->{line}, column => $token->{column}};
4697 wakaba 1.126 next B;
4698 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
4699 wakaba 1.79 !!!cp ('t368');
4700 wakaba 1.52 last INSCOPE;
4701 wakaba 1.46 }
4702 wakaba 1.52 } # INSCOPE
4703    
4704 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4705 wakaba 1.52
4706     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
4707    
4708 wakaba 1.125 !!!nack ('t368.1');
4709 wakaba 1.52 !!!next-token;
4710 wakaba 1.126 next B;
4711 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
4712     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
4713     my $node = $active_formatting_elements->[$i];
4714 wakaba 1.206 if ($node->[1] == A_EL) {
4715 wakaba 1.79 !!!cp ('t371');
4716 wakaba 1.113 !!!parse-error (type => 'in a:a', token => $token);
4717 wakaba 1.52
4718 wakaba 1.125 !!!back-token; # <a>
4719 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'a',
4720     line => $token->{line}, column => $token->{column}};
4721 wakaba 1.113 $formatting_end_tag->($token);
4722 wakaba 1.52
4723     AFE2: for (reverse 0..$#$active_formatting_elements) {
4724     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
4725 wakaba 1.79 !!!cp ('t372');
4726 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
4727     last AFE2;
4728 wakaba 1.1 }
4729 wakaba 1.52 } # AFE2
4730     OE: for (reverse 0..$#{$self->{open_elements}}) {
4731     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
4732 wakaba 1.79 !!!cp ('t373');
4733 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
4734     last OE;
4735 wakaba 1.1 }
4736 wakaba 1.52 } # OE
4737     last AFE;
4738     } elsif ($node->[0] eq '#marker') {
4739 wakaba 1.79 !!!cp ('t374');
4740 wakaba 1.52 last AFE;
4741     }
4742     } # AFE
4743    
4744     $reconstruct_active_formatting_elements->($insert_to_current);
4745 wakaba 1.1
4746 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4747 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
4748 wakaba 1.1
4749 wakaba 1.125 !!!nack ('t374.1');
4750 wakaba 1.52 !!!next-token;
4751 wakaba 1.126 next B;
4752 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
4753     $reconstruct_active_formatting_elements->($insert_to_current);
4754 wakaba 1.1
4755 wakaba 1.52 ## has a |nobr| element in scope
4756     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4757     my $node = $self->{open_elements}->[$_];
4758 wakaba 1.206 if ($node->[1] == NOBR_EL) {
4759 wakaba 1.79 !!!cp ('t376');
4760 wakaba 1.113 !!!parse-error (type => 'in nobr:nobr', token => $token);
4761 wakaba 1.125 !!!back-token; # <nobr>
4762 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
4763     line => $token->{line}, column => $token->{column}};
4764 wakaba 1.126 next B;
4765 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
4766 wakaba 1.79 !!!cp ('t377');
4767 wakaba 1.52 last INSCOPE;
4768     }
4769     } # INSCOPE
4770    
4771 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4772 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
4773    
4774 wakaba 1.125 !!!nack ('t377.1');
4775 wakaba 1.52 !!!next-token;
4776 wakaba 1.126 next B;
4777 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
4778     ## has a button element in scope
4779     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4780     my $node = $self->{open_elements}->[$_];
4781 wakaba 1.206 if ($node->[1] == BUTTON_EL) {
4782 wakaba 1.79 !!!cp ('t378');
4783 wakaba 1.113 !!!parse-error (type => 'in button:button', token => $token);
4784 wakaba 1.125 !!!back-token; # <button>
4785 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'button',
4786     line => $token->{line}, column => $token->{column}};
4787 wakaba 1.126 next B;
4788 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
4789 wakaba 1.79 !!!cp ('t379');
4790 wakaba 1.52 last INSCOPE;
4791     }
4792     } # INSCOPE
4793    
4794     $reconstruct_active_formatting_elements->($insert_to_current);
4795    
4796 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4797 wakaba 1.85
4798     ## TODO: associate with $self->{form_element} if defined
4799    
4800 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
4801 wakaba 1.1
4802 wakaba 1.243 delete $self->{frameset_ok};
4803    
4804 wakaba 1.125 !!!nack ('t379.1');
4805 wakaba 1.52 !!!next-token;
4806 wakaba 1.126 next B;
4807 wakaba 1.103 } elsif ({
4808 wakaba 1.109 xmp => 1,
4809     iframe => 1,
4810     noembed => 1,
4811 wakaba 1.148 noframes => 1, ## NOTE: This is an "as if in head" code clone.
4812 wakaba 1.109 noscript => 0, ## TODO: 1 if scripting is enabled
4813 wakaba 1.103 }->{$token->{tag_name}}) {
4814 wakaba 1.109 if ($token->{tag_name} eq 'xmp') {
4815     !!!cp ('t381');
4816     $reconstruct_active_formatting_elements->($insert_to_current);
4817 wakaba 1.243
4818     delete $self->{frameset_ok};
4819     } elsif ($token->{tag_name} eq 'iframe') {
4820     !!!cp ('t381.1');
4821     delete $self->{frameset_ok};
4822 wakaba 1.109 } else {
4823     !!!cp ('t399');
4824     }
4825     ## NOTE: There is an "as if in body" code clone.
4826 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4827 wakaba 1.126 next B;
4828 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
4829 wakaba 1.113 !!!parse-error (type => 'isindex', token => $token);
4830 wakaba 1.52
4831     if (defined $self->{form_element}) {
4832 wakaba 1.79 !!!cp ('t389');
4833 wakaba 1.52 ## Ignore the token
4834 wakaba 1.125 !!!nack ('t389'); ## NOTE: Not acknowledged.
4835 wakaba 1.52 !!!next-token;
4836 wakaba 1.126 next B;
4837 wakaba 1.52 } else {
4838 wakaba 1.147 !!!ack ('t391.1');
4839    
4840 wakaba 1.52 my $at = $token->{attributes};
4841     my $form_attrs;
4842     $form_attrs->{action} = $at->{action} if $at->{action};
4843     my $prompt_attr = $at->{prompt};
4844     $at->{name} = {name => 'name', value => 'isindex'};
4845     delete $at->{action};
4846     delete $at->{prompt};
4847     my @tokens = (
4848 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
4849 wakaba 1.114 attributes => $form_attrs,
4850     line => $token->{line}, column => $token->{column}},
4851     {type => START_TAG_TOKEN, tag_name => 'hr',
4852     line => $token->{line}, column => $token->{column}},
4853     {type => START_TAG_TOKEN, tag_name => 'label',
4854     line => $token->{line}, column => $token->{column}},
4855 wakaba 1.52 );
4856     if ($prompt_attr) {
4857 wakaba 1.79 !!!cp ('t390');
4858 wakaba 1.114 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
4859 wakaba 1.118 #line => $token->{line}, column => $token->{column},
4860     };
4861 wakaba 1.1 } else {
4862 wakaba 1.79 !!!cp ('t391');
4863 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
4864 wakaba 1.114 data => 'This is a searchable index. Insert your search keywords here: ',
4865 wakaba 1.118 #line => $token->{line}, column => $token->{column},
4866     }; # SHOULD
4867 wakaba 1.52 ## TODO: make this configurable
4868 wakaba 1.1 }
4869 wakaba 1.52 push @tokens,
4870 wakaba 1.114 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
4871     line => $token->{line}, column => $token->{column}},
4872 wakaba 1.55 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
4873 wakaba 1.114 {type => END_TAG_TOKEN, tag_name => 'label',
4874     line => $token->{line}, column => $token->{column}},
4875     {type => START_TAG_TOKEN, tag_name => 'hr',
4876     line => $token->{line}, column => $token->{column}},
4877     {type => END_TAG_TOKEN, tag_name => 'form',
4878     line => $token->{line}, column => $token->{column}};
4879 wakaba 1.52 !!!back-token (@tokens);
4880 wakaba 1.125 !!!next-token;
4881 wakaba 1.126 next B;
4882 wakaba 1.52 }
4883     } elsif ($token->{tag_name} eq 'textarea') {
4884 wakaba 1.224 ## 1. Insert
4885 wakaba 1.205 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4886 wakaba 1.52
4887 wakaba 1.224 ## Step 2 # XXX
4888 wakaba 1.52 ## TODO: $self->{form_element} if defined
4889 wakaba 1.205
4890 wakaba 1.224 ## 2. Drop U+000A LINE FEED
4891 wakaba 1.205 $self->{ignore_newline} = 1;
4892    
4893 wakaba 1.224 ## 3. RCDATA
4894 wakaba 1.52 $self->{content_model} = RCDATA_CONTENT_MODEL;
4895     delete $self->{escape}; # MUST
4896 wakaba 1.205
4897 wakaba 1.224 ## 4., 6. Insertion mode
4898 wakaba 1.205 $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
4899    
4900 wakaba 1.243 ## 5. Frameset-ng.
4901     delete $self->{frameset_ok};
4902 wakaba 1.224
4903 wakaba 1.125 !!!nack ('t392.1');
4904 wakaba 1.52 !!!next-token;
4905 wakaba 1.126 next B;
4906 wakaba 1.201 } elsif ($token->{tag_name} eq 'optgroup' or
4907     $token->{tag_name} eq 'option') {
4908     ## has an |option| element in scope
4909     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4910     my $node = $self->{open_elements}->[$_];
4911 wakaba 1.206 if ($node->[1] == OPTION_EL) {
4912 wakaba 1.201 !!!cp ('t397.1');
4913     ## NOTE: As if </option>
4914     !!!back-token; # <option> or <optgroup>
4915     $token = {type => END_TAG_TOKEN, tag_name => 'option',
4916     line => $token->{line}, column => $token->{column}};
4917     next B;
4918     } elsif ($node->[1] & SCOPING_EL) {
4919     !!!cp ('t397.2');
4920     last INSCOPE;
4921     }
4922     } # INSCOPE
4923    
4924     $reconstruct_active_formatting_elements->($insert_to_current);
4925    
4926     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4927    
4928     !!!nack ('t397.3');
4929     !!!next-token;
4930     redo B;
4931 wakaba 1.151 } elsif ($token->{tag_name} eq 'rt' or
4932     $token->{tag_name} eq 'rp') {
4933     ## has a |ruby| element in scope
4934     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4935     my $node = $self->{open_elements}->[$_];
4936 wakaba 1.206 if ($node->[1] == RUBY_EL) {
4937 wakaba 1.151 !!!cp ('t398.1');
4938     ## generate implied end tags
4939     while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
4940     !!!cp ('t398.2');
4941     pop @{$self->{open_elements}};
4942     }
4943 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == RUBY_EL) {
4944 wakaba 1.151 !!!cp ('t398.3');
4945     !!!parse-error (type => 'not closed',
4946 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4947 wakaba 1.151 ->manakai_local_name,
4948     token => $token);
4949     pop @{$self->{open_elements}}
4950 wakaba 1.206 while not $self->{open_elements}->[-1]->[1] == RUBY_EL;
4951 wakaba 1.151 }
4952     last INSCOPE;
4953     } elsif ($node->[1] & SCOPING_EL) {
4954     !!!cp ('t398.4');
4955     last INSCOPE;
4956     }
4957     } # INSCOPE
4958 wakaba 1.212
4959     ## TODO: <non-ruby><rt> is not allowed.
4960 wakaba 1.151
4961     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4962    
4963     !!!nack ('t398.5');
4964     !!!next-token;
4965     redo B;
4966 wakaba 1.126 } elsif ($token->{tag_name} eq 'math' or
4967     $token->{tag_name} eq 'svg') {
4968     $reconstruct_active_formatting_elements->($insert_to_current);
4969 wakaba 1.131
4970 wakaba 1.155 ## "Adjust MathML attributes" ('math' only) - done in insert-element-f
4971    
4972 wakaba 1.131 ## "adjust SVG attributes" ('svg' only) - done in insert-element-f
4973    
4974     ## "adjust foreign attributes" - done in insert-element-f
4975 wakaba 1.126
4976 wakaba 1.131 !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
4977 wakaba 1.126
4978     if ($self->{self_closing}) {
4979     pop @{$self->{open_elements}};
4980 wakaba 1.201 !!!ack ('t398.6');
4981 wakaba 1.126 } else {
4982 wakaba 1.201 !!!cp ('t398.7');
4983 wakaba 1.126 $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
4984     ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
4985     ## mode, "in body" (not "in foreign content") secondary insertion
4986     ## mode, maybe.
4987     }
4988    
4989     !!!next-token;
4990     next B;
4991 wakaba 1.52 } elsif ({
4992     caption => 1, col => 1, colgroup => 1, frame => 1,
4993 wakaba 1.242 head => 1,
4994 wakaba 1.52 tbody => 1, td => 1, tfoot => 1, th => 1,
4995     thead => 1, tr => 1,
4996     }->{$token->{tag_name}}) {
4997 wakaba 1.79 !!!cp ('t401');
4998 wakaba 1.153 !!!parse-error (type => 'in body',
4999     text => $token->{tag_name}, token => $token);
5000 wakaba 1.52 ## Ignore the token
5001 wakaba 1.125 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
5002 wakaba 1.52 !!!next-token;
5003 wakaba 1.126 next B;
5004 wakaba 1.198 } elsif ($token->{tag_name} eq 'param' or
5005     $token->{tag_name} eq 'source') {
5006     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5007     pop @{$self->{open_elements}};
5008    
5009     !!!ack ('t398.5');
5010     !!!next-token;
5011     redo B;
5012 wakaba 1.52 } else {
5013 wakaba 1.110 if ($token->{tag_name} eq 'image') {
5014     !!!cp ('t384');
5015 wakaba 1.113 !!!parse-error (type => 'image', token => $token);
5016 wakaba 1.110 $token->{tag_name} = 'img';
5017     } else {
5018     !!!cp ('t385');
5019     }
5020    
5021     ## NOTE: There is an "as if <br>" code clone.
5022 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
5023    
5024 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5025 wakaba 1.109
5026 wakaba 1.110 if ({
5027     applet => 1, marquee => 1, object => 1,
5028     }->{$token->{tag_name}}) {
5029     !!!cp ('t380');
5030 wakaba 1.243
5031 wakaba 1.110 push @$active_formatting_elements, ['#marker', ''];
5032 wakaba 1.243
5033     delete $self->{frameset_ok};
5034    
5035 wakaba 1.125 !!!nack ('t380.1');
5036 wakaba 1.110 } elsif ({
5037     b => 1, big => 1, em => 1, font => 1, i => 1,
5038 wakaba 1.193 s => 1, small => 1, strike => 1,
5039 wakaba 1.110 strong => 1, tt => 1, u => 1,
5040     }->{$token->{tag_name}}) {
5041     !!!cp ('t375');
5042     push @$active_formatting_elements, $self->{open_elements}->[-1];
5043 wakaba 1.125 !!!nack ('t375.1');
5044 wakaba 1.110 } elsif ($token->{tag_name} eq 'input') {
5045     !!!cp ('t388');
5046     ## TODO: associate with $self->{form_element} if defined
5047     pop @{$self->{open_elements}};
5048 wakaba 1.125 !!!ack ('t388.2');
5049 wakaba 1.110 } elsif ({
5050     area => 1, basefont => 1, bgsound => 1, br => 1,
5051 wakaba 1.198 embed => 1, img => 1, spacer => 1, wbr => 1,
5052 wakaba 1.231 keygen => 1,
5053 wakaba 1.110 }->{$token->{tag_name}}) {
5054     !!!cp ('t388.1');
5055 wakaba 1.243
5056 wakaba 1.110 pop @{$self->{open_elements}};
5057 wakaba 1.243
5058     delete $self->{frameset_ok};
5059    
5060 wakaba 1.125 !!!ack ('t388.3');
5061 wakaba 1.110 } elsif ($token->{tag_name} eq 'select') {
5062 wakaba 1.109 ## TODO: associate with $self->{form_element} if defined
5063 wakaba 1.243
5064     delete $self->{frameset_ok};
5065    
5066 wakaba 1.109 if ($self->{insertion_mode} & TABLE_IMS or
5067     $self->{insertion_mode} & BODY_TABLE_IMS or
5068 wakaba 1.210 ($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
5069 wakaba 1.109 !!!cp ('t400.1');
5070     $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
5071     } else {
5072     !!!cp ('t400.2');
5073     $self->{insertion_mode} = IN_SELECT_IM;
5074     }
5075 wakaba 1.125 !!!nack ('t400.3');
5076 wakaba 1.110 } else {
5077 wakaba 1.125 !!!nack ('t402');
5078 wakaba 1.109 }
5079 wakaba 1.51
5080 wakaba 1.52 !!!next-token;
5081 wakaba 1.126 next B;
5082 wakaba 1.52 }
5083 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5084 wakaba 1.239 if ($token->{tag_name} eq 'body' or $token->{tag_name} eq 'html') {
5085 wakaba 1.225
5086     ## 1. If not "have an element in scope":
5087     ## "has a |body| element in scope"
5088 wakaba 1.107 my $i;
5089 wakaba 1.111 INSCOPE: {
5090     for (reverse @{$self->{open_elements}}) {
5091 wakaba 1.206 if ($_->[1] == BODY_EL) {
5092 wakaba 1.111 !!!cp ('t405');
5093     $i = $_;
5094     last INSCOPE;
5095 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
5096 wakaba 1.111 !!!cp ('t405.1');
5097     last;
5098     }
5099 wakaba 1.52 }
5100 wakaba 1.111
5101 wakaba 1.239 ## NOTE: |<marquee></body>|, |<svg><foreignobject></body>|,
5102     ## and fragment cases.
5103 wakaba 1.200
5104     !!!parse-error (type => 'unmatched end tag',
5105 wakaba 1.153 text => $token->{tag_name}, token => $token);
5106 wakaba 1.239 ## Ignore the token. (</body> or </html>)
5107 wakaba 1.52 !!!next-token;
5108 wakaba 1.126 next B;
5109 wakaba 1.111 } # INSCOPE
5110 wakaba 1.107
5111 wakaba 1.225 ## 2. If unclosed elements:
5112 wakaba 1.107 for (@{$self->{open_elements}}) {
5113 wakaba 1.220 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL ||
5114     $_->[1] == OPTGROUP_EL ||
5115     $_->[1] == OPTION_EL ||
5116     $_->[1] == RUBY_COMPONENT_EL) {
5117 wakaba 1.107 !!!cp ('t403');
5118 wakaba 1.122 !!!parse-error (type => 'not closed',
5119 wakaba 1.153 text => $_->[0]->manakai_local_name,
5120 wakaba 1.122 token => $token);
5121 wakaba 1.107 last;
5122     } else {
5123     !!!cp ('t404');
5124     }
5125     }
5126    
5127 wakaba 1.225 ## 3. Switch the insertion mode.
5128 wakaba 1.107 $self->{insertion_mode} = AFTER_BODY_IM;
5129 wakaba 1.239 if ($token->{tag_name} eq 'body') {
5130 wakaba 1.52 !!!next-token;
5131 wakaba 1.239 } else { # html
5132     ## Reprocess.
5133 wakaba 1.51 }
5134 wakaba 1.239 next B;
5135 wakaba 1.52 } elsif ({
5136 wakaba 1.195 ## NOTE: End tags for non-phrasing flow content elements
5137    
5138     ## NOTE: The normal ones
5139     address => 1, article => 1, aside => 1, blockquote => 1,
5140     center => 1, datagrid => 1, details => 1, dialog => 1,
5141     dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
5142 wakaba 1.237 footer => 1, header => 1, hgroup => 1,
5143     listing => 1, menu => 1, nav => 1,
5144 wakaba 1.195 ol => 1, pre => 1, section => 1, ul => 1,
5145    
5146     ## NOTE: As normal, but ... optional tags
5147 wakaba 1.52 dd => 1, dt => 1, li => 1,
5148 wakaba 1.195
5149 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
5150 wakaba 1.52 }->{$token->{tag_name}}) {
5151 wakaba 1.197 ## NOTE: Code for <li> start tags includes "as if </li>" code.
5152     ## Code for <dt> or <dd> start tags includes "as if </dt> or
5153     ## </dd>" code.
5154    
5155 wakaba 1.52 ## has an element in scope
5156     my $i;
5157     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5158     my $node = $self->{open_elements}->[$_];
5159 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5160 wakaba 1.79 !!!cp ('t410');
5161 wakaba 1.52 $i = $_;
5162 wakaba 1.87 last INSCOPE;
5163 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5164 wakaba 1.79 !!!cp ('t411');
5165 wakaba 1.52 last INSCOPE;
5166 wakaba 1.51 }
5167 wakaba 1.52 } # INSCOPE
5168 wakaba 1.89
5169     unless (defined $i) { # has an element in scope
5170     !!!cp ('t413');
5171 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5172     text => $token->{tag_name}, token => $token);
5173 wakaba 1.157 ## NOTE: Ignore the token.
5174 wakaba 1.89 } else {
5175     ## Step 1. generate implied end tags
5176     while ({
5177 wakaba 1.151 ## END_TAG_OPTIONAL_EL
5178 wakaba 1.89 dd => ($token->{tag_name} ne 'dd'),
5179     dt => ($token->{tag_name} ne 'dt'),
5180     li => ($token->{tag_name} ne 'li'),
5181 wakaba 1.194 option => 1,
5182     optgroup => 1,
5183 wakaba 1.89 p => 1,
5184 wakaba 1.151 rt => 1,
5185     rp => 1,
5186 wakaba 1.123 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
5187 wakaba 1.89 !!!cp ('t409');
5188     pop @{$self->{open_elements}};
5189     }
5190    
5191     ## Step 2.
5192 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5193     ne $token->{tag_name}) {
5194 wakaba 1.79 !!!cp ('t412');
5195 wakaba 1.122 !!!parse-error (type => 'not closed',
5196 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5197 wakaba 1.122 ->manakai_local_name,
5198     token => $token);
5199 wakaba 1.51 } else {
5200 wakaba 1.89 !!!cp ('t414');
5201 wakaba 1.51 }
5202 wakaba 1.89
5203     ## Step 3.
5204 wakaba 1.52 splice @{$self->{open_elements}}, $i;
5205 wakaba 1.89
5206     ## Step 4.
5207     $clear_up_to_marker->()
5208     if {
5209 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
5210 wakaba 1.89 }->{$token->{tag_name}};
5211 wakaba 1.51 }
5212 wakaba 1.52 !!!next-token;
5213 wakaba 1.126 next B;
5214 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
5215 wakaba 1.195 ## NOTE: As normal, but interacts with the form element pointer
5216    
5217 wakaba 1.92 undef $self->{form_element};
5218    
5219 wakaba 1.52 ## has an element in scope
5220 wakaba 1.92 my $i;
5221 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5222     my $node = $self->{open_elements}->[$_];
5223 wakaba 1.206 if ($node->[1] == FORM_EL) {
5224 wakaba 1.79 !!!cp ('t418');
5225 wakaba 1.92 $i = $_;
5226 wakaba 1.52 last INSCOPE;
5227 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5228 wakaba 1.79 !!!cp ('t419');
5229 wakaba 1.52 last INSCOPE;
5230     }
5231     } # INSCOPE
5232 wakaba 1.92
5233     unless (defined $i) { # has an element in scope
5234 wakaba 1.79 !!!cp ('t421');
5235 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5236     text => $token->{tag_name}, token => $token);
5237 wakaba 1.157 ## NOTE: Ignore the token.
5238 wakaba 1.92 } else {
5239     ## Step 1. generate implied end tags
5240 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5241 wakaba 1.92 !!!cp ('t417');
5242     pop @{$self->{open_elements}};
5243     }
5244    
5245     ## Step 2.
5246 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5247     ne $token->{tag_name}) {
5248 wakaba 1.92 !!!cp ('t417.1');
5249 wakaba 1.122 !!!parse-error (type => 'not closed',
5250 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5251 wakaba 1.122 ->manakai_local_name,
5252     token => $token);
5253 wakaba 1.92 } else {
5254     !!!cp ('t420');
5255     }
5256    
5257     ## Step 3.
5258     splice @{$self->{open_elements}}, $i;
5259 wakaba 1.52 }
5260    
5261     !!!next-token;
5262 wakaba 1.126 next B;
5263 wakaba 1.52 } elsif ({
5264 wakaba 1.195 ## NOTE: As normal, except acts as a closer for any ...
5265 wakaba 1.52 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
5266     }->{$token->{tag_name}}) {
5267     ## has an element in scope
5268     my $i;
5269     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5270     my $node = $self->{open_elements}->[$_];
5271 wakaba 1.206 if ($node->[1] == HEADING_EL) {
5272 wakaba 1.79 !!!cp ('t423');
5273 wakaba 1.52 $i = $_;
5274     last INSCOPE;
5275 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5276 wakaba 1.79 !!!cp ('t424');
5277 wakaba 1.52 last INSCOPE;
5278 wakaba 1.51 }
5279 wakaba 1.52 } # INSCOPE
5280 wakaba 1.93
5281     unless (defined $i) { # has an element in scope
5282     !!!cp ('t425.1');
5283 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5284     text => $token->{tag_name}, token => $token);
5285 wakaba 1.157 ## NOTE: Ignore the token.
5286 wakaba 1.79 } else {
5287 wakaba 1.93 ## Step 1. generate implied end tags
5288 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5289 wakaba 1.93 !!!cp ('t422');
5290     pop @{$self->{open_elements}};
5291     }
5292    
5293     ## Step 2.
5294 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5295     ne $token->{tag_name}) {
5296 wakaba 1.93 !!!cp ('t425');
5297 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5298     text => $token->{tag_name}, token => $token);
5299 wakaba 1.93 } else {
5300     !!!cp ('t426');
5301     }
5302    
5303     ## Step 3.
5304     splice @{$self->{open_elements}}, $i;
5305 wakaba 1.36 }
5306 wakaba 1.52
5307     !!!next-token;
5308 wakaba 1.126 next B;
5309 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
5310 wakaba 1.195 ## NOTE: As normal, except </p> implies <p> and ...
5311    
5312 wakaba 1.87 ## has an element in scope
5313 wakaba 1.197 my $non_optional;
5314 wakaba 1.87 my $i;
5315     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5316     my $node = $self->{open_elements}->[$_];
5317 wakaba 1.206 if ($node->[1] == P_EL) {
5318 wakaba 1.87 !!!cp ('t410.1');
5319     $i = $_;
5320 wakaba 1.88 last INSCOPE;
5321 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5322 wakaba 1.87 !!!cp ('t411.1');
5323     last INSCOPE;
5324 wakaba 1.197 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
5325     ## NOTE: |END_TAG_OPTIONAL_EL| includes "p"
5326     !!!cp ('t411.2');
5327     #
5328     } else {
5329     !!!cp ('t411.3');
5330     $non_optional ||= $node;
5331     #
5332 wakaba 1.87 }
5333     } # INSCOPE
5334 wakaba 1.91
5335     if (defined $i) {
5336 wakaba 1.197 ## 1. Generate implied end tags
5337     #
5338    
5339     ## 2. If current node != "p", parse error
5340     if ($non_optional) {
5341 wakaba 1.87 !!!cp ('t412.1');
5342 wakaba 1.122 !!!parse-error (type => 'not closed',
5343 wakaba 1.197 text => $non_optional->[0]->manakai_local_name,
5344 wakaba 1.122 token => $token);
5345 wakaba 1.87 } else {
5346 wakaba 1.91 !!!cp ('t414.1');
5347 wakaba 1.87 }
5348 wakaba 1.91
5349 wakaba 1.197 ## 3. Pop
5350 wakaba 1.87 splice @{$self->{open_elements}}, $i;
5351     } else {
5352 wakaba 1.91 !!!cp ('t413.1');
5353 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5354     text => $token->{tag_name}, token => $token);
5355 wakaba 1.91
5356 wakaba 1.87 !!!cp ('t415.1');
5357     ## As if <p>, then reprocess the current token
5358     my $el;
5359 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'p',, $token);
5360 wakaba 1.87 $insert->($el);
5361 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
5362 wakaba 1.87 }
5363 wakaba 1.91
5364 wakaba 1.87 !!!next-token;
5365 wakaba 1.126 next B;
5366 wakaba 1.52 } elsif ({
5367     a => 1,
5368     b => 1, big => 1, em => 1, font => 1, i => 1,
5369 wakaba 1.193 nobr => 1, s => 1, small => 1, strike => 1,
5370 wakaba 1.52 strong => 1, tt => 1, u => 1,
5371     }->{$token->{tag_name}}) {
5372 wakaba 1.79 !!!cp ('t427');
5373 wakaba 1.113 $formatting_end_tag->($token);
5374 wakaba 1.126 next B;
5375 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
5376 wakaba 1.79 !!!cp ('t428');
5377 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5378     text => 'br', token => $token);
5379 wakaba 1.52
5380     ## As if <br>
5381     $reconstruct_active_formatting_elements->($insert_to_current);
5382    
5383     my $el;
5384 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'br',, $token);
5385 wakaba 1.52 $insert->($el);
5386    
5387     ## Ignore the token.
5388     !!!next-token;
5389 wakaba 1.126 next B;
5390 wakaba 1.52 } else {
5391 wakaba 1.195 if ($token->{tag_name} eq 'sarcasm') {
5392     sleep 0.001; # take a deep breath
5393     }
5394    
5395 wakaba 1.52 ## Step 1
5396     my $node_i = -1;
5397     my $node = $self->{open_elements}->[$node_i];
5398 wakaba 1.51
5399 wakaba 1.52 ## Step 2
5400     S2: {
5401 wakaba 1.200 my $node_tag_name = $node->[0]->manakai_local_name;
5402     $node_tag_name =~ tr/A-Z/a-z/; # for SVG camelCase tag names
5403     if ($node_tag_name eq $token->{tag_name}) {
5404 wakaba 1.52 ## Step 1
5405     ## generate implied end tags
5406 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5407 wakaba 1.79 !!!cp ('t430');
5408 wakaba 1.151 ## NOTE: |<ruby><rt></ruby>|.
5409     ## ISSUE: <ruby><rt></rt> will also take this code path,
5410     ## which seems wrong.
5411 wakaba 1.86 pop @{$self->{open_elements}};
5412 wakaba 1.151 $node_i++;
5413 wakaba 1.52 }
5414    
5415     ## Step 2
5416 wakaba 1.200 my $current_tag_name
5417     = $self->{open_elements}->[-1]->[0]->manakai_local_name;
5418     $current_tag_name =~ tr/A-Z/a-z/;
5419     if ($current_tag_name ne $token->{tag_name}) {
5420 wakaba 1.79 !!!cp ('t431');
5421 wakaba 1.58 ## NOTE: <x><y></x>
5422 wakaba 1.122 !!!parse-error (type => 'not closed',
5423 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5424 wakaba 1.122 ->manakai_local_name,
5425     token => $token);
5426 wakaba 1.79 } else {
5427     !!!cp ('t432');
5428 wakaba 1.52 }
5429    
5430     ## Step 3
5431 wakaba 1.151 splice @{$self->{open_elements}}, $node_i if $node_i < 0;
5432 wakaba 1.51
5433 wakaba 1.1 !!!next-token;
5434 wakaba 1.52 last S2;
5435 wakaba 1.1 } else {
5436 wakaba 1.52 ## Step 3
5437 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
5438 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
5439 wakaba 1.123 ($node->[1] & SPECIAL_EL or
5440     $node->[1] & SCOPING_EL)) {
5441 wakaba 1.79 !!!cp ('t433');
5442 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5443     text => $token->{tag_name}, token => $token);
5444 wakaba 1.52 ## Ignore the token
5445     !!!next-token;
5446     last S2;
5447 wakaba 1.193
5448     ## NOTE: |<span><dd></span>a|: In Safari 3.1.2 and Opera
5449     ## 9.27, "a" is a child of <dd> (conforming). In
5450     ## Firefox 3.0.2, "a" is a child of <body>. In WinIE 7,
5451     ## "a" is a child of both <body> and <dd>.
5452 wakaba 1.52 }
5453 wakaba 1.193
5454 wakaba 1.79 !!!cp ('t434');
5455 wakaba 1.1 }
5456 wakaba 1.52
5457     ## Step 4
5458     $node_i--;
5459     $node = $self->{open_elements}->[$node_i];
5460    
5461     ## Step 5;
5462     redo S2;
5463     } # S2
5464 wakaba 1.126 next B;
5465 wakaba 1.1 }
5466     }
5467 wakaba 1.126 next B;
5468     } continue { # B
5469     if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
5470     ## NOTE: The code below is executed in cases where it does not have
5471     ## to be, but it it is harmless even in those cases.
5472     ## has an element in scope
5473     INSCOPE: {
5474     for (reverse 0..$#{$self->{open_elements}}) {
5475     my $node = $self->{open_elements}->[$_];
5476     if ($node->[1] & FOREIGN_EL) {
5477     last INSCOPE;
5478     } elsif ($node->[1] & SCOPING_EL) {
5479     last;
5480     }
5481     }
5482    
5483     ## NOTE: No foreign element in scope.
5484     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
5485     } # INSCOPE
5486     }
5487 wakaba 1.1 } # B
5488    
5489     ## Stop parsing # MUST
5490    
5491     ## TODO: script stuffs
5492 wakaba 1.3 } # _tree_construct_main
5493    
5494 wakaba 1.218 ## XXX: How this method is organized is somewhat out of date, although
5495     ## it still does what the current spec documents.
5496 wakaba 1.177 sub set_inner_html ($$$$;$) {
5497 wakaba 1.3 my $class = shift;
5498 wakaba 1.218 my $node = shift; # /context/
5499 wakaba 1.177 #my $s = \$_[0];
5500 wakaba 1.3 my $onerror = $_[1];
5501 wakaba 1.162 my $get_wrapper = $_[2] || sub ($) { return $_[0] };
5502 wakaba 1.3
5503     my $nt = $node->node_type;
5504 wakaba 1.218 if ($nt == 9) { # Document (invoke the algorithm with no /context/ element)
5505 wakaba 1.3 # MUST
5506    
5507     ## Step 1 # MUST
5508     ## TODO: If the document has an active parser, ...
5509     ## ISSUE: There is an issue in the spec.
5510    
5511     ## Step 2 # MUST
5512     my @cn = @{$node->child_nodes};
5513     for (@cn) {
5514     $node->remove_child ($_);
5515     }
5516    
5517     ## Step 3, 4, 5 # MUST
5518 wakaba 1.177 $class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper);
5519 wakaba 1.218 } elsif ($nt == 1) { # Element (invoke the algorithm with /context/ element)
5520 wakaba 1.3 ## TODO: If non-html element
5521    
5522     ## NOTE: Most of this code is copied from |parse_string|
5523    
5524 wakaba 1.162 ## TODO: Support for $get_wrapper
5525    
5526 wakaba 1.218 ## F1. Create an HTML document.
5527 wakaba 1.14 my $this_doc = $node->owner_document;
5528     my $doc = $this_doc->implementation->create_document;
5529 wakaba 1.18 $doc->manakai_is_html (1);
5530 wakaba 1.218
5531     ## F2. Propagate quirkness flag
5532     my $node_doc = $node->owner_document;
5533     $doc->manakai_compat_mode ($node_doc->manakai_compat_mode);
5534    
5535     ## F3. Create an HTML parser
5536 wakaba 1.3 my $p = $class->new;
5537     $p->{document} = $doc;
5538    
5539 wakaba 1.84 ## Step 8 # MUST
5540 wakaba 1.3 my $i = 0;
5541 wakaba 1.121 $p->{line_prev} = $p->{line} = 1;
5542     $p->{column_prev} = $p->{column} = 0;
5543 wakaba 1.177 require Whatpm::Charset::DecodeHandle;
5544     my $input = Whatpm::Charset::DecodeHandle::CharString->new (\($_[0]));
5545     $input = $get_wrapper->($input);
5546 wakaba 1.183 $p->{set_nc} = sub {
5547 wakaba 1.3 my $self = shift;
5548 wakaba 1.14
5549 wakaba 1.178 my $char = '';
5550 wakaba 1.183 if (defined $self->{next_nc}) {
5551     $char = $self->{next_nc};
5552     delete $self->{next_nc};
5553     $self->{nc} = ord $char;
5554 wakaba 1.177 } else {
5555 wakaba 1.180 $self->{char_buffer} = '';
5556     $self->{char_buffer_pos} = 0;
5557    
5558     my $count = $input->manakai_read_until
5559 wakaba 1.182 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/,
5560     $self->{char_buffer_pos});
5561 wakaba 1.180 if ($count) {
5562     $self->{line_prev} = $self->{line};
5563     $self->{column_prev} = $self->{column};
5564     $self->{column}++;
5565 wakaba 1.183 $self->{nc}
5566 wakaba 1.180 = ord substr ($self->{char_buffer},
5567     $self->{char_buffer_pos}++, 1);
5568     return;
5569     }
5570    
5571 wakaba 1.178 if ($input->read ($char, 1)) {
5572 wakaba 1.183 $self->{nc} = ord $char;
5573 wakaba 1.178 } else {
5574 wakaba 1.183 $self->{nc} = -1;
5575 wakaba 1.178 return;
5576     }
5577 wakaba 1.177 }
5578 wakaba 1.121
5579     ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
5580     $p->{column}++;
5581 wakaba 1.4
5582 wakaba 1.183 if ($self->{nc} == 0x000A) { # LF
5583 wakaba 1.121 $p->{line}++;
5584     $p->{column} = 0;
5585 wakaba 1.79 !!!cp ('i1');
5586 wakaba 1.183 } elsif ($self->{nc} == 0x000D) { # CR
5587 wakaba 1.177 ## TODO: support for abort/streaming
5588 wakaba 1.178 my $next = '';
5589     if ($input->read ($next, 1) and $next ne "\x0A") {
5590 wakaba 1.183 $self->{next_nc} = $next;
5591 wakaba 1.177 }
5592 wakaba 1.183 $self->{nc} = 0x000A; # LF # MUST
5593 wakaba 1.121 $p->{line}++;
5594     $p->{column} = 0;
5595 wakaba 1.79 !!!cp ('i2');
5596 wakaba 1.183 } elsif ($self->{nc} == 0x0000) { # NULL
5597 wakaba 1.79 !!!cp ('i4');
5598 wakaba 1.14 !!!parse-error (type => 'NULL');
5599 wakaba 1.183 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5600 wakaba 1.3 }
5601     };
5602 wakaba 1.171
5603 wakaba 1.172 $p->{read_until} = sub {
5604 wakaba 1.177 #my ($scalar, $specials_range, $offset) = @_;
5605 wakaba 1.183 return 0 if defined $p->{next_nc};
5606 wakaba 1.180
5607 wakaba 1.182 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
5608 wakaba 1.180 my $offset = $_[2] || 0;
5609    
5610     if ($p->{char_buffer_pos} < length $p->{char_buffer}) {
5611     pos ($p->{char_buffer}) = $p->{char_buffer_pos};
5612     if ($p->{char_buffer} =~ /\G(?>$pattern)+/) {
5613     substr ($_[0], $offset)
5614     = substr ($p->{char_buffer}, $-[0], $+[0] - $-[0]);
5615     my $count = $+[0] - $-[0];
5616     if ($count) {
5617     $p->{column} += $count;
5618     $p->{char_buffer_pos} += $count;
5619     $p->{line_prev} = $p->{line};
5620     $p->{column_prev} = $p->{column} - 1;
5621 wakaba 1.183 $p->{nc} = -1;
5622 wakaba 1.180 }
5623     return $count;
5624     } else {
5625     return 0;
5626     }
5627     } else {
5628     my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
5629     if ($count) {
5630     $p->{column} += $count;
5631     $p->{column_prev} += $count;
5632 wakaba 1.183 $p->{nc} = -1;
5633 wakaba 1.180 }
5634     return $count;
5635 wakaba 1.177 }
5636     }; # $p->{read_until}
5637 wakaba 1.171
5638 wakaba 1.3 my $ponerror = $onerror || sub {
5639     my (%opt) = @_;
5640 wakaba 1.121 my $line = $opt{line};
5641     my $column = $opt{column};
5642     if (defined $opt{token} and defined $opt{token}->{line}) {
5643     $line = $opt{token}->{line};
5644     $column = $opt{token}->{column};
5645     }
5646     warn "Parse error ($opt{type}) at line $line column $column\n";
5647 wakaba 1.3 };
5648     $p->{parse_error} = sub {
5649 wakaba 1.121 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
5650 wakaba 1.3 };
5651    
5652 wakaba 1.178 my $char_onerror = sub {
5653     my (undef, $type, %opt) = @_;
5654     $ponerror->(layer => 'encode',
5655     line => $p->{line}, column => $p->{column} + 1,
5656     %opt, type => $type);
5657     }; # $char_onerror
5658     $input->onerror ($char_onerror);
5659    
5660 wakaba 1.3 $p->_initialize_tokenizer;
5661     $p->_initialize_tree_constructor;
5662    
5663 wakaba 1.218 ## F4. If /context/ is not undef...
5664    
5665     ## F4.1. content model flag
5666 wakaba 1.71 my $node_ln = $node->manakai_local_name;
5667 wakaba 1.40 $p->{content_model} = {
5668     title => RCDATA_CONTENT_MODEL,
5669     textarea => RCDATA_CONTENT_MODEL,
5670     style => CDATA_CONTENT_MODEL,
5671     script => CDATA_CONTENT_MODEL,
5672     xmp => CDATA_CONTENT_MODEL,
5673     iframe => CDATA_CONTENT_MODEL,
5674     noembed => CDATA_CONTENT_MODEL,
5675     noframes => CDATA_CONTENT_MODEL,
5676     noscript => CDATA_CONTENT_MODEL,
5677     plaintext => PLAINTEXT_CONTENT_MODEL,
5678     }->{$node_ln};
5679     $p->{content_model} = PCDATA_CONTENT_MODEL
5680     unless defined $p->{content_model};
5681 wakaba 1.3
5682 wakaba 1.123 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
5683     ## TODO: Foreign element OK?
5684 wakaba 1.3
5685 wakaba 1.218 ## F4.2. Root |html| element
5686 wakaba 1.3 my $root = $doc->create_element_ns
5687     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
5688    
5689 wakaba 1.218 ## F4.3.
5690 wakaba 1.3 $doc->append_child ($root);
5691    
5692 wakaba 1.218 ## F4.4.
5693 wakaba 1.123 push @{$p->{open_elements}}, [$root, $el_category->{html}];
5694 wakaba 1.3
5695     undef $p->{head_element};
5696 wakaba 1.202 undef $p->{head_element_inserted};
5697 wakaba 1.3
5698 wakaba 1.218 ## F4.5.
5699 wakaba 1.3 $p->_reset_insertion_mode;
5700    
5701 wakaba 1.218 ## F4.6.
5702 wakaba 1.3 my $anode = $node;
5703     AN: while (defined $anode) {
5704     if ($anode->node_type == 1) {
5705     my $nsuri = $anode->namespace_uri;
5706     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
5707 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
5708 wakaba 1.79 !!!cp ('i5');
5709 wakaba 1.3 $p->{form_element} = $anode;
5710     last AN;
5711     }
5712     }
5713     }
5714     $anode = $anode->parent_node;
5715     } # AN
5716 wakaba 1.218
5717 wakaba 1.235 ## F.5. Set the input stream.
5718 wakaba 1.236 $p->{confident} = 1; ## Confident: irrelevant.
5719 wakaba 1.235
5720 wakaba 1.218 ## F.6. Start the parser.
5721 wakaba 1.3 {
5722     my $self = $p;
5723     !!!next-token;
5724     }
5725     $p->_tree_construction_main;
5726    
5727 wakaba 1.218 ## F.7.
5728 wakaba 1.3 my @cn = @{$node->child_nodes};
5729     for (@cn) {
5730     $node->remove_child ($_);
5731     }
5732     ## ISSUE: mutation events? read-only?
5733    
5734 wakaba 1.84 ## Step 11 # MUST
5735 wakaba 1.3 @cn = @{$root->child_nodes};
5736     for (@cn) {
5737 wakaba 1.14 $this_doc->adopt_node ($_);
5738 wakaba 1.3 $node->append_child ($_);
5739     }
5740 wakaba 1.14 ## ISSUE: mutation events?
5741 wakaba 1.3
5742     $p->_terminate_tree_constructor;
5743 wakaba 1.121
5744     delete $p->{parse_error}; # delete loop
5745 wakaba 1.3 } else {
5746     die "$0: |set_inner_html| is not defined for node of type $nt";
5747     }
5748     } # set_inner_html
5749    
5750     } # tree construction stage
5751 wakaba 1.1
5752 wakaba 1.63 package Whatpm::HTML::RestartParser;
5753     push our @ISA, 'Error';
5754    
5755 wakaba 1.1 1;
5756 wakaba 1.244 # $Date: 2009/09/06 13:52:06 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24