/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.238 - (hide annotations) (download) (as text)
Sun Sep 6 09:53:29 2009 UTC (15 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.237: +68 -139 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	6 Sep 2009 09:53:12 -0000
	* tree-test-1.dat, tree-test-2.dat: Some test results are updated
	as per HTML5 revision 2730's new handling of end tags in the
	|head| area.

2009-09-06  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	6 Sep 2009 09:52:24 -0000
	* HTML.pm.src: Rewrote end tag handling in |head| area (cf. HTML5
	revision 2730, but it was entirely broken, maybe I missed some
	spec changes before rev.2730).

2009-09-06  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.238 our $VERSION=do{my @r=(q$Revision: 1.237 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.208 use Whatpm::HTML::Tokenizer;
7    
8 wakaba 1.182 ## NOTE: This module don't check all HTML5 parse errors; character
9     ## encoding related parse errors are expected to be handled by relevant
10     ## modules.
11     ## Parse errors for control characters that are not allowed in HTML5
12     ## documents, for surrogate code points, and for noncharacter code
13     ## points, as well as U+FFFD substitions for characters whose code points
14     ## is higher than U+10FFFF may be detected by combining the parser with
15     ## the checker implemented by Whatpm::Charset::UnicodeChecker (for its
16     ## usage example, see |t/HTML-tree.t| in the Whatpm package or the
17     ## WebHACC::Language::HTML module in the WebHACC package).
18    
19 wakaba 1.18 ## ISSUE:
20     ## var doc = implementation.createDocument (null, null, null);
21     ## doc.write ('');
22     ## alert (doc.compatMode);
23 wakaba 1.1
24 wakaba 1.139 require IO::Handle;
25    
26 wakaba 1.208 ## Namespace URLs
27    
28 wakaba 1.126 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
29     my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
30     my $SVG_NS = q<http://www.w3.org/2000/svg>;
31     my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
32     my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
33     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
34    
35 wakaba 1.208 ## Element categories
36    
37 wakaba 1.206 ## Bits 12-15
38     sub SPECIAL_EL () { 0b1_000000000000000 }
39     sub SCOPING_EL () { 0b1_00000000000000 }
40     sub FORMATTING_EL () { 0b1_0000000000000 }
41     sub PHRASING_EL () { 0b1_000000000000 }
42    
43     ## Bits 10-11
44 wakaba 1.208 #sub FOREIGN_EL () { 0b1_00000000000 } # see Whatpm::HTML::Tokenizer
45 wakaba 1.206 sub FOREIGN_FLOW_CONTENT_EL () { 0b1_0000000000 }
46    
47     ## Bits 6-9
48     sub TABLE_SCOPING_EL () { 0b1_000000000 }
49     sub TABLE_ROWS_SCOPING_EL () { 0b1_00000000 }
50     sub TABLE_ROW_SCOPING_EL () { 0b1_0000000 }
51     sub TABLE_ROWS_EL () { 0b1_000000 }
52    
53     ## Bit 5
54     sub ADDRESS_DIV_P_EL () { 0b1_00000 }
55    
56     ## NOTE: Used in </body> and EOF algorithms.
57     ## Bit 4
58     sub ALL_END_TAG_OPTIONAL_EL () { 0b1_0000 }
59 wakaba 1.123
60 wakaba 1.151 ## NOTE: Used in "generate implied end tags" algorithm.
61 wakaba 1.194 ## NOTE: There is a code where a modified version of
62     ## END_TAG_OPTIONAL_EL is used in "generate implied end tags"
63     ## implementation (search for the algorithm name).
64 wakaba 1.206 ## Bit 3
65     sub END_TAG_OPTIONAL_EL () { 0b1_000 }
66    
67     ## Bits 0-2
68    
69     sub MISC_SPECIAL_EL () { SPECIAL_EL | 0b000 }
70     sub FORM_EL () { SPECIAL_EL | 0b001 }
71     sub FRAMESET_EL () { SPECIAL_EL | 0b010 }
72     sub HEADING_EL () { SPECIAL_EL | 0b011 }
73     sub SELECT_EL () { SPECIAL_EL | 0b100 }
74     sub SCRIPT_EL () { SPECIAL_EL | 0b101 }
75    
76     sub ADDRESS_DIV_EL () { SPECIAL_EL | ADDRESS_DIV_P_EL | 0b001 }
77     sub BODY_EL () { SPECIAL_EL | ALL_END_TAG_OPTIONAL_EL | 0b001 }
78    
79 wakaba 1.207 sub DTDD_EL () {
80 wakaba 1.206 SPECIAL_EL |
81     END_TAG_OPTIONAL_EL |
82     ALL_END_TAG_OPTIONAL_EL |
83     0b010
84     }
85     sub LI_EL () {
86     SPECIAL_EL |
87     END_TAG_OPTIONAL_EL |
88     ALL_END_TAG_OPTIONAL_EL |
89     0b100
90     }
91     sub P_EL () {
92     SPECIAL_EL |
93     ADDRESS_DIV_P_EL |
94     END_TAG_OPTIONAL_EL |
95     ALL_END_TAG_OPTIONAL_EL |
96     0b001
97 wakaba 1.123 }
98    
99 wakaba 1.206 sub TABLE_ROW_EL () {
100     SPECIAL_EL |
101     TABLE_ROWS_EL |
102     TABLE_ROW_SCOPING_EL |
103     ALL_END_TAG_OPTIONAL_EL |
104     0b001
105     }
106     sub TABLE_ROW_GROUP_EL () {
107     SPECIAL_EL |
108     TABLE_ROWS_EL |
109     TABLE_ROWS_SCOPING_EL |
110     ALL_END_TAG_OPTIONAL_EL |
111     0b001
112 wakaba 1.123 }
113    
114 wakaba 1.206 sub MISC_SCOPING_EL () { SCOPING_EL | 0b000 }
115     sub BUTTON_EL () { SCOPING_EL | 0b001 }
116     sub CAPTION_EL () { SCOPING_EL | 0b010 }
117     sub HTML_EL () {
118     SCOPING_EL |
119     TABLE_SCOPING_EL |
120     TABLE_ROWS_SCOPING_EL |
121     TABLE_ROW_SCOPING_EL |
122     ALL_END_TAG_OPTIONAL_EL |
123     0b001
124 wakaba 1.123 }
125 wakaba 1.206 sub TABLE_EL () {
126     SCOPING_EL |
127     TABLE_ROWS_EL |
128     TABLE_SCOPING_EL |
129     0b001
130 wakaba 1.123 }
131 wakaba 1.206 sub TABLE_CELL_EL () {
132     SCOPING_EL |
133     TABLE_ROW_SCOPING_EL |
134     ALL_END_TAG_OPTIONAL_EL |
135     0b001
136 wakaba 1.123 }
137    
138 wakaba 1.206 sub MISC_FORMATTING_EL () { FORMATTING_EL | 0b000 }
139     sub A_EL () { FORMATTING_EL | 0b001 }
140     sub NOBR_EL () { FORMATTING_EL | 0b010 }
141    
142     sub RUBY_EL () { PHRASING_EL | 0b001 }
143    
144     ## ISSUE: ALL_END_TAG_OPTIONAL_EL?
145     sub OPTGROUP_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b001 }
146     sub OPTION_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b010 }
147     sub RUBY_COMPONENT_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b100 }
148 wakaba 1.123
149 wakaba 1.206 sub MML_AXML_EL () { PHRASING_EL | FOREIGN_EL | 0b001 }
150 wakaba 1.123
151     my $el_category = {
152 wakaba 1.206 a => A_EL,
153     address => ADDRESS_DIV_EL,
154 wakaba 1.123 applet => MISC_SCOPING_EL,
155     area => MISC_SPECIAL_EL,
156 wakaba 1.193 article => MISC_SPECIAL_EL,
157     aside => MISC_SPECIAL_EL,
158 wakaba 1.123 b => FORMATTING_EL,
159     base => MISC_SPECIAL_EL,
160     basefont => MISC_SPECIAL_EL,
161     bgsound => MISC_SPECIAL_EL,
162     big => FORMATTING_EL,
163     blockquote => MISC_SPECIAL_EL,
164     body => BODY_EL,
165     br => MISC_SPECIAL_EL,
166     button => BUTTON_EL,
167     caption => CAPTION_EL,
168     center => MISC_SPECIAL_EL,
169     col => MISC_SPECIAL_EL,
170     colgroup => MISC_SPECIAL_EL,
171 wakaba 1.193 command => MISC_SPECIAL_EL,
172     datagrid => MISC_SPECIAL_EL,
173 wakaba 1.207 dd => DTDD_EL,
174 wakaba 1.193 details => MISC_SPECIAL_EL,
175     dialog => MISC_SPECIAL_EL,
176 wakaba 1.123 dir => MISC_SPECIAL_EL,
177 wakaba 1.206 div => ADDRESS_DIV_EL,
178 wakaba 1.123 dl => MISC_SPECIAL_EL,
179 wakaba 1.207 dt => DTDD_EL,
180 wakaba 1.123 em => FORMATTING_EL,
181     embed => MISC_SPECIAL_EL,
182     fieldset => MISC_SPECIAL_EL,
183 wakaba 1.193 figure => MISC_SPECIAL_EL,
184 wakaba 1.123 font => FORMATTING_EL,
185 wakaba 1.193 footer => MISC_SPECIAL_EL,
186 wakaba 1.123 form => FORM_EL,
187     frame => MISC_SPECIAL_EL,
188     frameset => FRAMESET_EL,
189     h1 => HEADING_EL,
190     h2 => HEADING_EL,
191     h3 => HEADING_EL,
192     h4 => HEADING_EL,
193     h5 => HEADING_EL,
194     h6 => HEADING_EL,
195     head => MISC_SPECIAL_EL,
196 wakaba 1.193 header => MISC_SPECIAL_EL,
197 wakaba 1.237 hgroup => MISC_SPECIAL_EL,
198 wakaba 1.123 hr => MISC_SPECIAL_EL,
199     html => HTML_EL,
200     i => FORMATTING_EL,
201     iframe => MISC_SPECIAL_EL,
202     img => MISC_SPECIAL_EL,
203 wakaba 1.193 #image => MISC_SPECIAL_EL, ## NOTE: Commented out in the spec.
204 wakaba 1.123 input => MISC_SPECIAL_EL,
205     isindex => MISC_SPECIAL_EL,
206 wakaba 1.232 ## XXX keygen? (Whether a void element is in Special or not does not
207     ## affect to the processing, however.)
208 wakaba 1.123 li => LI_EL,
209     link => MISC_SPECIAL_EL,
210     listing => MISC_SPECIAL_EL,
211     marquee => MISC_SCOPING_EL,
212     menu => MISC_SPECIAL_EL,
213     meta => MISC_SPECIAL_EL,
214 wakaba 1.193 nav => MISC_SPECIAL_EL,
215 wakaba 1.206 nobr => NOBR_EL,
216 wakaba 1.123 noembed => MISC_SPECIAL_EL,
217     noframes => MISC_SPECIAL_EL,
218     noscript => MISC_SPECIAL_EL,
219     object => MISC_SCOPING_EL,
220     ol => MISC_SPECIAL_EL,
221     optgroup => OPTGROUP_EL,
222     option => OPTION_EL,
223     p => P_EL,
224     param => MISC_SPECIAL_EL,
225     plaintext => MISC_SPECIAL_EL,
226     pre => MISC_SPECIAL_EL,
227 wakaba 1.151 rp => RUBY_COMPONENT_EL,
228     rt => RUBY_COMPONENT_EL,
229     ruby => RUBY_EL,
230 wakaba 1.123 s => FORMATTING_EL,
231     script => MISC_SPECIAL_EL,
232     select => SELECT_EL,
233 wakaba 1.193 section => MISC_SPECIAL_EL,
234 wakaba 1.123 small => FORMATTING_EL,
235     spacer => MISC_SPECIAL_EL,
236     strike => FORMATTING_EL,
237     strong => FORMATTING_EL,
238     style => MISC_SPECIAL_EL,
239     table => TABLE_EL,
240     tbody => TABLE_ROW_GROUP_EL,
241     td => TABLE_CELL_EL,
242     textarea => MISC_SPECIAL_EL,
243     tfoot => TABLE_ROW_GROUP_EL,
244     th => TABLE_CELL_EL,
245     thead => TABLE_ROW_GROUP_EL,
246     title => MISC_SPECIAL_EL,
247     tr => TABLE_ROW_EL,
248     tt => FORMATTING_EL,
249     u => FORMATTING_EL,
250     ul => MISC_SPECIAL_EL,
251     wbr => MISC_SPECIAL_EL,
252 wakaba 1.236 xmp => MISC_SPECIAL_EL,
253 wakaba 1.123 };
254    
255 wakaba 1.126 my $el_category_f = {
256     $MML_NS => {
257     'annotation-xml' => MML_AXML_EL,
258 wakaba 1.206 mi => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
259     mo => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
260     mn => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
261     ms => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
262     mtext => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
263 wakaba 1.126 },
264     $SVG_NS => {
265 wakaba 1.206 foreignObject => SCOPING_EL | FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
266     desc => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
267     title => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
268 wakaba 1.126 },
269     ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
270     };
271    
272 wakaba 1.131 my $svg_attr_name = {
273 wakaba 1.146 attributename => 'attributeName',
274 wakaba 1.131 attributetype => 'attributeType',
275     basefrequency => 'baseFrequency',
276     baseprofile => 'baseProfile',
277     calcmode => 'calcMode',
278     clippathunits => 'clipPathUnits',
279     contentscripttype => 'contentScriptType',
280     contentstyletype => 'contentStyleType',
281     diffuseconstant => 'diffuseConstant',
282     edgemode => 'edgeMode',
283     externalresourcesrequired => 'externalResourcesRequired',
284     filterres => 'filterRes',
285     filterunits => 'filterUnits',
286     glyphref => 'glyphRef',
287     gradienttransform => 'gradientTransform',
288     gradientunits => 'gradientUnits',
289     kernelmatrix => 'kernelMatrix',
290     kernelunitlength => 'kernelUnitLength',
291     keypoints => 'keyPoints',
292     keysplines => 'keySplines',
293     keytimes => 'keyTimes',
294     lengthadjust => 'lengthAdjust',
295     limitingconeangle => 'limitingConeAngle',
296     markerheight => 'markerHeight',
297     markerunits => 'markerUnits',
298     markerwidth => 'markerWidth',
299     maskcontentunits => 'maskContentUnits',
300     maskunits => 'maskUnits',
301     numoctaves => 'numOctaves',
302     pathlength => 'pathLength',
303     patterncontentunits => 'patternContentUnits',
304     patterntransform => 'patternTransform',
305     patternunits => 'patternUnits',
306     pointsatx => 'pointsAtX',
307     pointsaty => 'pointsAtY',
308     pointsatz => 'pointsAtZ',
309     preservealpha => 'preserveAlpha',
310     preserveaspectratio => 'preserveAspectRatio',
311     primitiveunits => 'primitiveUnits',
312     refx => 'refX',
313     refy => 'refY',
314     repeatcount => 'repeatCount',
315     repeatdur => 'repeatDur',
316     requiredextensions => 'requiredExtensions',
317 wakaba 1.146 requiredfeatures => 'requiredFeatures',
318 wakaba 1.131 specularconstant => 'specularConstant',
319     specularexponent => 'specularExponent',
320     spreadmethod => 'spreadMethod',
321     startoffset => 'startOffset',
322     stddeviation => 'stdDeviation',
323     stitchtiles => 'stitchTiles',
324     surfacescale => 'surfaceScale',
325     systemlanguage => 'systemLanguage',
326     tablevalues => 'tableValues',
327     targetx => 'targetX',
328     targety => 'targetY',
329     textlength => 'textLength',
330     viewbox => 'viewBox',
331     viewtarget => 'viewTarget',
332     xchannelselector => 'xChannelSelector',
333     ychannelselector => 'yChannelSelector',
334     zoomandpan => 'zoomAndPan',
335     };
336    
337     my $foreign_attr_xname = {
338     'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
339     'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
340     'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
341     'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
342     'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
343     'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
344     'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
345     'xml:base' => [$XML_NS, ['xml', 'base']],
346     'xml:lang' => [$XML_NS, ['xml', 'lang']],
347     'xml:space' => [$XML_NS, ['xml', 'space']],
348     'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
349     'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
350     };
351    
352     ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
353    
354 wakaba 1.192 ## TODO: Invoke the reset algorithm when a resettable element is
355     ## created (cf. HTML5 revision 2259).
356    
357 wakaba 1.63 sub parse_byte_string ($$$$;$) {
358 wakaba 1.138 my $self = shift;
359     my $charset_name = shift;
360     open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
361     return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
362     } # parse_byte_string
363    
364 wakaba 1.162 sub parse_byte_stream ($$$$;$$) {
365     # my ($self, $charset_name, $byte_stream, $doc, $onerror, $get_wrapper) = @_;
366 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
367 wakaba 1.133 my $charset_name = shift;
368 wakaba 1.138 my $byte_stream = $_[0];
369 wakaba 1.133
370 wakaba 1.134 my $onerror = $_[2] || sub {
371     my (%opt) = @_;
372     warn "Parse error ($opt{type})\n";
373     };
374     $self->{parse_error} = $onerror; # updated later by parse_char_string
375    
376 wakaba 1.162 my $get_wrapper = $_[3] || sub ($) {
377     return $_[0]; # $_[0] = byte stream handle, returned = arg to char handle
378     };
379    
380 wakaba 1.133 ## HTML5 encoding sniffing algorithm
381     require Message::Charset::Info;
382     my $charset;
383 wakaba 1.136 my $buffer;
384     my ($char_stream, $e_status);
385 wakaba 1.133
386     SNIFFING: {
387 wakaba 1.160 ## NOTE: By setting |allow_fallback| option true when the
388     ## |get_decode_handle| method is invoked, we ignore what the HTML5
389     ## spec requires, i.e. unsupported encoding should be ignored.
390     ## TODO: We should not do this unless the parser is invoked
391     ## in the conformance checking mode, in which this behavior
392     ## would be useful.
393 wakaba 1.133
394     ## Step 1
395     if (defined $charset_name) {
396 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
397     ## TODO: Is this ok? Transfer protocol's parameter should be
398     ## interpreted in its semantics?
399 wakaba 1.133
400 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
401     ($byte_stream, allow_error_reporting => 1,
402 wakaba 1.133 allow_fallback => 1);
403 wakaba 1.136 if ($char_stream) {
404 wakaba 1.133 $self->{confident} = 1;
405     last SNIFFING;
406 wakaba 1.136 } else {
407 wakaba 1.190 !!!parse-error (type => 'charset:not supported',
408     layer => 'encode',
409     line => 1, column => 1,
410     value => $charset_name,
411     level => $self->{level}->{uncertain});
412 wakaba 1.133 }
413     }
414    
415     ## Step 2
416 wakaba 1.136 my $byte_buffer = '';
417     for (1..1024) {
418     my $char = $byte_stream->getc;
419     last unless defined $char;
420     $byte_buffer .= $char;
421     } ## TODO: timeout
422 wakaba 1.133
423     ## Step 3
424 wakaba 1.136 if ($byte_buffer =~ /^\xFE\xFF/) {
425 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-16be');
426 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
427     ($byte_stream, allow_error_reporting => 1,
428     allow_fallback => 1, byte_buffer => \$byte_buffer);
429 wakaba 1.133 $self->{confident} = 1;
430     last SNIFFING;
431 wakaba 1.136 } elsif ($byte_buffer =~ /^\xFF\xFE/) {
432 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-16le');
433 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
434     ($byte_stream, allow_error_reporting => 1,
435     allow_fallback => 1, byte_buffer => \$byte_buffer);
436 wakaba 1.133 $self->{confident} = 1;
437     last SNIFFING;
438 wakaba 1.136 } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
439 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
440 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
441     ($byte_stream, allow_error_reporting => 1,
442     allow_fallback => 1, byte_buffer => \$byte_buffer);
443 wakaba 1.133 $self->{confident} = 1;
444     last SNIFFING;
445     }
446    
447     ## Step 4
448     ## TODO: <meta charset>
449    
450     ## Step 5
451     ## TODO: from history
452    
453     ## Step 6
454 wakaba 1.65 require Whatpm::Charset::UniversalCharDet;
455 wakaba 1.133 $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
456 wakaba 1.136 ($byte_buffer);
457 wakaba 1.133 if (defined $charset_name) {
458 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
459 wakaba 1.133
460 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
461     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
462     ($byte_stream);
463     ($char_stream, $e_status) = $charset->get_decode_handle
464     ($buffer, allow_error_reporting => 1,
465     allow_fallback => 1, byte_buffer => \$byte_buffer);
466     if ($char_stream) {
467     $buffer->{buffer} = $byte_buffer;
468 wakaba 1.153 !!!parse-error (type => 'sniffing:chardet',
469     text => $charset_name,
470     level => $self->{level}->{info},
471     layer => 'encode',
472 wakaba 1.134 line => 1, column => 1);
473 wakaba 1.133 $self->{confident} = 0;
474     last SNIFFING;
475     }
476     }
477    
478     ## Step 7: default
479     ## TODO: Make this configurable.
480 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('windows-1252');
481 wakaba 1.133 ## NOTE: We choose |windows-1252| here, since |utf-8| should be
482     ## detectable in the step 6.
483 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
484     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
485     ($byte_stream);
486     ($char_stream, $e_status)
487     = $charset->get_decode_handle ($buffer,
488     allow_error_reporting => 1,
489     allow_fallback => 1,
490     byte_buffer => \$byte_buffer);
491     $buffer->{buffer} = $byte_buffer;
492 wakaba 1.153 !!!parse-error (type => 'sniffing:default',
493     text => 'windows-1252',
494     level => $self->{level}->{info},
495     line => 1, column => 1,
496     layer => 'encode');
497 wakaba 1.63 $self->{confident} = 0;
498 wakaba 1.133 } # SNIFFING
499    
500     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
501 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
502 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
503 wakaba 1.160 #text => $self->{input_encoding},
504 wakaba 1.153 level => $self->{level}->{uncertain},
505     line => 1, column => 1,
506     layer => 'encode');
507 wakaba 1.133 } elsif (not ($e_status &
508 wakaba 1.178 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
509 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name;
510 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
511     text => $self->{input_encoding},
512     level => $self->{level}->{uncertain},
513     line => 1, column => 1,
514     layer => 'encode');
515 wakaba 1.160 } else {
516     $self->{input_encoding} = $charset->get_iana_name;
517 wakaba 1.63 }
518    
519     $self->{change_encoding} = sub {
520     my $self = shift;
521 wakaba 1.134 $charset_name = shift;
522 wakaba 1.114 my $token = shift;
523 wakaba 1.63
524 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
525 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
526     ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
527     byte_buffer => \ $buffer->{buffer});
528 wakaba 1.134
529 wakaba 1.136 if ($char_stream) { # if supported
530 wakaba 1.134 ## "Change the encoding" algorithm:
531 wakaba 1.215
532     ## Step 1
533     if (defined $self->{input_encoding} and
534     $self->{input_encoding} eq $charset_name) {
535     !!!parse-error (type => 'charset label:matching',
536     text => $charset_name,
537     level => $self->{level}->{info});
538     $self->{confident} = 1;
539     return;
540     }
541 wakaba 1.63
542 wakaba 1.214 ## Step 2 (HTML5 revision 3205)
543     if (defined $self->{input_encoding} and
544     Message::Charset::Info->get_by_html_name ($self->{input_encoding})
545     ->{category} & Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
546     $self->{confident} = 1;
547     return;
548     }
549    
550     ## Step 3
551 wakaba 1.149 if ($charset->{category} &
552     Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
553 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
554 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
555     ($byte_stream,
556     byte_buffer => \ $buffer->{buffer});
557 wakaba 1.134 }
558     $charset_name = $charset->get_iana_name;
559 wakaba 1.63
560 wakaba 1.153 !!!parse-error (type => 'charset label detected',
561     text => $self->{input_encoding},
562     value => $charset_name,
563     level => $self->{level}->{warn},
564     token => $token);
565 wakaba 1.134
566 wakaba 1.214 ## Step 4
567 wakaba 1.134 # if (can) {
568     ## change the encoding on the fly.
569     #$self->{confident} = 1;
570     #return;
571     # }
572    
573 wakaba 1.214 ## Step 5
574 wakaba 1.134 throw Whatpm::HTML::RestartParser ();
575 wakaba 1.63 }
576     }; # $self->{change_encoding}
577    
578 wakaba 1.136 my $char_onerror = sub {
579     my (undef, $type, %opt) = @_;
580 wakaba 1.153 !!!parse-error (layer => 'encode',
581 wakaba 1.174 line => $self->{line}, column => $self->{column} + 1,
582     %opt, type => $type);
583 wakaba 1.136 if ($opt{octets}) {
584     ${$opt{octets}} = "\x{FFFD}"; # relacement character
585     }
586     };
587 wakaba 1.162
588     my $wrapped_char_stream = $get_wrapper->($char_stream);
589     $wrapped_char_stream->onerror ($char_onerror);
590 wakaba 1.136
591 wakaba 1.182 my @args = ($_[1], $_[2]); # $doc, $onerror - $get_wrapper = undef;
592 wakaba 1.63 my $return;
593     try {
594 wakaba 1.162 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
595 wakaba 1.63 } catch Whatpm::HTML::RestartParser with {
596 wakaba 1.134 ## NOTE: Invoked after {change_encoding}.
597    
598     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
599 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
600 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
601     level => $self->{level}->{uncertain},
602 wakaba 1.160 #text => $self->{input_encoding},
603 wakaba 1.153 line => 1, column => 1,
604     layer => 'encode');
605 wakaba 1.134 } elsif (not ($e_status &
606 wakaba 1.178 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
607 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name;
608 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
609     text => $self->{input_encoding},
610     level => $self->{level}->{uncertain},
611     line => 1, column => 1,
612     layer => 'encode');
613 wakaba 1.160 } else {
614     $self->{input_encoding} = $charset->get_iana_name;
615 wakaba 1.134 }
616 wakaba 1.63 $self->{confident} = 1;
617 wakaba 1.162
618     $wrapped_char_stream = $get_wrapper->($char_stream);
619     $wrapped_char_stream->onerror ($char_onerror);
620    
621     $return = $self->parse_char_stream ($wrapped_char_stream, @args);
622 wakaba 1.63 };
623     return $return;
624 wakaba 1.138 } # parse_byte_stream
625 wakaba 1.63
626 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
627     ## and the HTML layer MUST ignore it. However, we does strip BOM in
628     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
629     ## because the core part of our HTML parser expects a string of character,
630     ## not a string of bytes or code units or anything which might contain a BOM.
631     ## Therefore, any parser interface that accepts a string of bytes,
632     ## such as |parse_byte_string| in this module, must ensure that it does
633     ## strip the BOM and never strip any ZWNBSP.
634    
635 wakaba 1.162 sub parse_char_string ($$$;$$) {
636     #my ($self, $s, $doc, $onerror, $get_wrapper) = @_;
637 wakaba 1.135 my $self = shift;
638 wakaba 1.139 my $s = ref $_[0] ? $_[0] : \($_[0]);
639 wakaba 1.171 require Whatpm::Charset::DecodeHandle;
640     my $input = Whatpm::Charset::DecodeHandle::CharString->new ($s);
641 wakaba 1.135 return $self->parse_char_stream ($input, @_[1..$#_]);
642     } # parse_char_string
643 wakaba 1.162 *parse_string = \&parse_char_string; ## NOTE: Alias for backward compatibility.
644 wakaba 1.63
645 wakaba 1.182 sub parse_char_stream ($$$;$$) {
646 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
647 wakaba 1.135 my $input = $_[0];
648 wakaba 1.1 $self->{document} = $_[1];
649 wakaba 1.63 @{$self->{document}->child_nodes} = ();
650 wakaba 1.1
651 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
652    
653 wakaba 1.235 ## Confidence: irrelevant.
654 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
655 wakaba 1.235
656 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
657     if defined $self->{input_encoding};
658 wakaba 1.178 ## TODO: |{input_encoding}| is needless?
659 wakaba 1.63
660 wakaba 1.112 $self->{line_prev} = $self->{line} = 1;
661 wakaba 1.179 $self->{column_prev} = -1;
662     $self->{column} = 0;
663 wakaba 1.183 $self->{set_nc} = sub {
664 wakaba 1.1 my $self = shift;
665 wakaba 1.13
666 wakaba 1.178 my $char = '';
667 wakaba 1.183 if (defined $self->{next_nc}) {
668     $char = $self->{next_nc};
669     delete $self->{next_nc};
670     $self->{nc} = ord $char;
671 wakaba 1.139 } else {
672 wakaba 1.179 $self->{char_buffer} = '';
673     $self->{char_buffer_pos} = 0;
674    
675     my $count = $input->manakai_read_until
676 wakaba 1.182 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/, $self->{char_buffer_pos});
677 wakaba 1.179 if ($count) {
678     $self->{line_prev} = $self->{line};
679     $self->{column_prev} = $self->{column};
680     $self->{column}++;
681 wakaba 1.183 $self->{nc}
682 wakaba 1.179 = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
683     return;
684     }
685    
686 wakaba 1.178 if ($input->read ($char, 1)) {
687 wakaba 1.183 $self->{nc} = ord $char;
688 wakaba 1.178 } else {
689 wakaba 1.183 $self->{nc} = -1;
690 wakaba 1.178 return;
691     }
692 wakaba 1.139 }
693 wakaba 1.112
694     ($self->{line_prev}, $self->{column_prev})
695     = ($self->{line}, $self->{column});
696     $self->{column}++;
697 wakaba 1.1
698 wakaba 1.183 if ($self->{nc} == 0x000A) { # LF
699 wakaba 1.132 !!!cp ('j1');
700 wakaba 1.112 $self->{line}++;
701     $self->{column} = 0;
702 wakaba 1.183 } elsif ($self->{nc} == 0x000D) { # CR
703 wakaba 1.132 !!!cp ('j2');
704 wakaba 1.170 ## TODO: support for abort/streaming
705 wakaba 1.178 my $next = '';
706     if ($input->read ($next, 1) and $next ne "\x0A") {
707 wakaba 1.183 $self->{next_nc} = $next;
708 wakaba 1.135 }
709 wakaba 1.183 $self->{nc} = 0x000A; # LF # MUST
710 wakaba 1.112 $self->{line}++;
711     $self->{column} = 0;
712 wakaba 1.183 } elsif ($self->{nc} == 0x0000) { # NULL
713 wakaba 1.132 !!!cp ('j4');
714 wakaba 1.8 !!!parse-error (type => 'NULL');
715 wakaba 1.183 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
716 wakaba 1.1 }
717     };
718    
719 wakaba 1.172 $self->{read_until} = sub {
720     #my ($scalar, $specials_range, $offset) = @_;
721 wakaba 1.183 return 0 if defined $self->{next_nc};
722 wakaba 1.180
723 wakaba 1.182 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
724 wakaba 1.180 my $offset = $_[2] || 0;
725    
726     if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
727     pos ($self->{char_buffer}) = $self->{char_buffer_pos};
728     if ($self->{char_buffer} =~ /\G(?>$pattern)+/) {
729     substr ($_[0], $offset)
730     = substr ($self->{char_buffer}, $-[0], $+[0] - $-[0]);
731     my $count = $+[0] - $-[0];
732     if ($count) {
733     $self->{column} += $count;
734     $self->{char_buffer_pos} += $count;
735     $self->{line_prev} = $self->{line};
736     $self->{column_prev} = $self->{column} - 1;
737 wakaba 1.183 $self->{nc} = -1;
738 wakaba 1.180 }
739     return $count;
740     } else {
741     return 0;
742     }
743     } else {
744     my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
745     if ($count) {
746     $self->{column} += $count;
747     $self->{line_prev} = $self->{line};
748     $self->{column_prev} = $self->{column} - 1;
749 wakaba 1.183 $self->{nc} = -1;
750 wakaba 1.180 }
751     return $count;
752 wakaba 1.172 }
753     }; # $self->{read_until}
754 wakaba 1.171
755 wakaba 1.3 my $onerror = $_[2] || sub {
756     my (%opt) = @_;
757 wakaba 1.112 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
758     my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
759     warn "Parse error ($opt{type}) at line $line column $column\n";
760 wakaba 1.3 };
761     $self->{parse_error} = sub {
762 wakaba 1.112 $onerror->(line => $self->{line}, column => $self->{column}, @_);
763 wakaba 1.1 };
764    
765 wakaba 1.182 my $char_onerror = sub {
766     my (undef, $type, %opt) = @_;
767     !!!parse-error (layer => 'encode',
768     line => $self->{line}, column => $self->{column} + 1,
769     %opt, type => $type);
770     }; # $char_onerror
771    
772     if ($_[3]) {
773     $input = $_[3]->($input);
774     $input->onerror ($char_onerror);
775     } else {
776     $input->onerror ($char_onerror) unless defined $input->onerror;
777     }
778    
779 wakaba 1.1 $self->_initialize_tokenizer;
780     $self->_initialize_tree_constructor;
781     $self->_construct_tree;
782     $self->_terminate_tree_constructor;
783    
784 wakaba 1.112 delete $self->{parse_error}; # remove loop
785    
786 wakaba 1.1 return $self->{document};
787 wakaba 1.135 } # parse_char_stream
788 wakaba 1.1
789     sub new ($) {
790     my $class = shift;
791 wakaba 1.134 my $self = bless {
792 wakaba 1.153 level => {must => 'm',
793 wakaba 1.159 should => 's',
794 wakaba 1.153 warn => 'w',
795     info => 'i',
796     uncertain => 'u'},
797 wakaba 1.134 }, $class;
798 wakaba 1.183 $self->{set_nc} = sub {
799     $self->{nc} = -1;
800 wakaba 1.1 };
801     $self->{parse_error} = sub {
802     #
803     };
804 wakaba 1.63 $self->{change_encoding} = sub {
805     # if ($_[0] is a supported encoding) {
806     # run "change the encoding" algorithm;
807     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
808     # }
809     };
810 wakaba 1.61 $self->{application_cache_selection} = sub {
811     #
812     };
813 wakaba 1.1 return $self;
814     } # new
815    
816 wakaba 1.208 ## Insertion modes
817 wakaba 1.55
818 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
819     sub HEAD_IMS () { 0b1000 }
820     sub BODY_IMS () { 0b10000 }
821 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
822 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
823 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
824 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
825     sub FRAME_IMS () { 0b1000000000 }
826 wakaba 1.101 sub SELECT_IMS () { 0b10000000000 }
827 wakaba 1.208 #sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 } # see Whatpm::HTML::Tokenizer
828 wakaba 1.126 ## NOTE: "in foreign content" insertion mode is special; it is combined
829     ## with the secondary insertion mode. In this parser, they are stored
830     ## together in the bit-or'ed form.
831 wakaba 1.205 sub IN_CDATA_RCDATA_IM () { 0b1000000000000 }
832     ## NOTE: "in CDATA/RCDATA" insertion mode is also special; it is
833     ## combined with the original insertion mode. In thie parser,
834     ## they are stored together in the bit-or'ed form.
835 wakaba 1.54
836 wakaba 1.210 sub IM_MASK () { 0b11111111111 }
837    
838 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
839    
840     ## NOTE: "after after body" insertion mode.
841 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
842 wakaba 1.84
843     ## NOTE: "after after frameset" insertion mode.
844 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
845 wakaba 1.84
846 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
847     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
848     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
849     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
850     sub IN_BODY_IM () { BODY_IMS }
851 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
852     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
853     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
854     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
855 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
856     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
857     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
858     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
859 wakaba 1.101 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
860     sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
861 wakaba 1.54 sub IN_COLUMN_GROUP_IM () { 0b10 }
862    
863 wakaba 1.1 sub _initialize_tree_constructor ($) {
864     my $self = shift;
865     ## NOTE: $self->{document} MUST be specified before this method is called
866     $self->{document}->strict_error_checking (0);
867     ## TODO: Turn mutation events off # MUST
868     ## TODO: Turn loose Document option (manakai extension) on
869 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
870 wakaba 1.154 $self->{document}->set_user_data (manakai_source_line => 1);
871     $self->{document}->set_user_data (manakai_source_column => 1);
872 wakaba 1.1 } # _initialize_tree_constructor
873    
874     sub _terminate_tree_constructor ($) {
875     my $self = shift;
876     $self->{document}->strict_error_checking (1);
877     ## TODO: Turn mutation events on
878     } # _terminate_tree_constructor
879    
880     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
881    
882 wakaba 1.3 { # tree construction stage
883     my $token;
884    
885 wakaba 1.1 sub _construct_tree ($) {
886     my ($self) = @_;
887    
888     ## When an interactive UA render the $self->{document} available
889     ## to the user, or when it begin accepting user input, are
890     ## not defined.
891    
892     !!!next-token;
893    
894 wakaba 1.3 undef $self->{form_element};
895     undef $self->{head_element};
896 wakaba 1.202 undef $self->{head_element_inserted};
897 wakaba 1.3 $self->{open_elements} = [];
898     undef $self->{inner_html_node};
899 wakaba 1.206 undef $self->{ignore_newline};
900 wakaba 1.3
901 wakaba 1.84 ## NOTE: The "initial" insertion mode.
902 wakaba 1.3 $self->_tree_construction_initial; # MUST
903 wakaba 1.84
904     ## NOTE: The "before html" insertion mode.
905 wakaba 1.3 $self->_tree_construction_root_element;
906 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
907    
908     ## NOTE: The "before head" insertion mode and so on.
909 wakaba 1.3 $self->_tree_construction_main;
910     } # _construct_tree
911    
912     sub _tree_construction_initial ($) {
913     my $self = shift;
914 wakaba 1.84
915     ## NOTE: "initial" insertion mode
916    
917 wakaba 1.18 INITIAL: {
918 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
919 wakaba 1.227 ## NOTE: Conformance checkers MAY, instead of reporting "not
920     ## HTML5" error, switch to a conformance checking mode for
921     ## another language. (We don't support such mode switchings; it
922     ## is nonsense to do anything different from what browsers do.)
923 wakaba 1.18 my $doctype_name = $token->{name};
924     $doctype_name = '' unless defined $doctype_name;
925 wakaba 1.227 my $doctype = $self->{document}->create_document_type_definition
926     ($doctype_name);
927    
928 wakaba 1.228 $doctype_name =~ tr/A-Z/a-z/; # ASCII case-insensitive
929     if ($doctype_name ne 'html') {
930 wakaba 1.79 !!!cp ('t1');
931 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
932 wakaba 1.228 } elsif (defined $token->{pubid}) {
933 wakaba 1.79 !!!cp ('t2');
934 wakaba 1.228 ## XXX Obsolete permitted DOCTYPEs
935 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
936 wakaba 1.228 } elsif (defined $token->{sysid}) {
937     if ($token->{sysid} eq 'about:legacy-compat') {
938     !!!cp ('t1.2'); ## <!DOCTYPE HTML SYSTEM "about:legacy-compat">
939 wakaba 1.159 !!!parse-error (type => 'XSLT-compat', token => $token,
940     level => $self->{level}->{should});
941     } else {
942     !!!parse-error (type => 'not HTML5', token => $token);
943     }
944 wakaba 1.228 } else { ## <!DOCTYPE HTML>
945 wakaba 1.79 !!!cp ('t3');
946 wakaba 1.159 #
947 wakaba 1.18 }
948    
949 wakaba 1.122 ## NOTE: Default value for both |public_id| and |system_id| attributes
950     ## are empty strings, so that we don't set any value in missing cases.
951 wakaba 1.183 $doctype->public_id ($token->{pubid}) if defined $token->{pubid};
952     $doctype->system_id ($token->{sysid}) if defined $token->{sysid};
953 wakaba 1.227
954 wakaba 1.18 ## NOTE: Other DocumentType attributes are null or empty lists.
955 wakaba 1.211 ## In Firefox3, |internalSubset| attribute is set to the empty
956     ## string, while |null| is an allowed value for the attribute
957     ## according to DOM3 Core.
958 wakaba 1.18 $self->{document}->append_child ($doctype);
959    
960 wakaba 1.228 if ($token->{quirks} or $doctype_name ne 'html') {
961 wakaba 1.79 !!!cp ('t4');
962 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
963 wakaba 1.183 } elsif (defined $token->{pubid}) {
964     my $pubid = $token->{pubid};
965 wakaba 1.18 $pubid =~ tr/a-z/A-z/;
966 wakaba 1.143 my $prefix = [
967     "+//SILMARIL//DTD HTML PRO V0R11 19970101//",
968     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
969     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
970     "-//IETF//DTD HTML 2.0 LEVEL 1//",
971     "-//IETF//DTD HTML 2.0 LEVEL 2//",
972     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//",
973     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//",
974     "-//IETF//DTD HTML 2.0 STRICT//",
975     "-//IETF//DTD HTML 2.0//",
976     "-//IETF//DTD HTML 2.1E//",
977     "-//IETF//DTD HTML 3.0//",
978     "-//IETF//DTD HTML 3.2 FINAL//",
979     "-//IETF//DTD HTML 3.2//",
980     "-//IETF//DTD HTML 3//",
981     "-//IETF//DTD HTML LEVEL 0//",
982     "-//IETF//DTD HTML LEVEL 1//",
983     "-//IETF//DTD HTML LEVEL 2//",
984     "-//IETF//DTD HTML LEVEL 3//",
985     "-//IETF//DTD HTML STRICT LEVEL 0//",
986     "-//IETF//DTD HTML STRICT LEVEL 1//",
987     "-//IETF//DTD HTML STRICT LEVEL 2//",
988     "-//IETF//DTD HTML STRICT LEVEL 3//",
989     "-//IETF//DTD HTML STRICT//",
990     "-//IETF//DTD HTML//",
991     "-//METRIUS//DTD METRIUS PRESENTATIONAL//",
992     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//",
993     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//",
994     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//",
995     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//",
996     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//",
997     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//",
998     "-//NETSCAPE COMM. CORP.//DTD HTML//",
999     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//",
1000     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//",
1001     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//",
1002     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//",
1003     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//",
1004     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//",
1005     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//",
1006     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//",
1007     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//",
1008     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//",
1009     "-//W3C//DTD HTML 3 1995-03-24//",
1010     "-//W3C//DTD HTML 3.2 DRAFT//",
1011     "-//W3C//DTD HTML 3.2 FINAL//",
1012     "-//W3C//DTD HTML 3.2//",
1013     "-//W3C//DTD HTML 3.2S DRAFT//",
1014     "-//W3C//DTD HTML 4.0 FRAMESET//",
1015     "-//W3C//DTD HTML 4.0 TRANSITIONAL//",
1016     "-//W3C//DTD HTML EXPERIMETNAL 19960712//",
1017     "-//W3C//DTD HTML EXPERIMENTAL 970421//",
1018     "-//W3C//DTD W3 HTML//",
1019     "-//W3O//DTD W3 HTML 3.0//",
1020     "-//WEBTECHS//DTD MOZILLA HTML 2.0//",
1021     "-//WEBTECHS//DTD MOZILLA HTML//",
1022     ]; # $prefix
1023     my $match;
1024     for (@$prefix) {
1025     if (substr ($prefix, 0, length $_) eq $_) {
1026     $match = 1;
1027     last;
1028     }
1029     }
1030     if ($match or
1031     $pubid eq "-//W3O//DTD W3 HTML STRICT 3.0//EN//" or
1032     $pubid eq "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" or
1033     $pubid eq "HTML") {
1034 wakaba 1.79 !!!cp ('t5');
1035 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1036 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
1037     $pubid =~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
1038 wakaba 1.183 if (defined $token->{sysid}) {
1039 wakaba 1.79 !!!cp ('t6');
1040 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1041     } else {
1042 wakaba 1.79 !!!cp ('t7');
1043 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
1044 wakaba 1.3 }
1045 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
1046     $pubid =~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
1047 wakaba 1.79 !!!cp ('t8');
1048 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
1049 wakaba 1.79 } else {
1050     !!!cp ('t9');
1051 wakaba 1.18 }
1052 wakaba 1.79 } else {
1053     !!!cp ('t10');
1054 wakaba 1.18 }
1055 wakaba 1.183 if (defined $token->{sysid}) {
1056     my $sysid = $token->{sysid};
1057 wakaba 1.18 $sysid =~ tr/A-Z/a-z/;
1058     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
1059 wakaba 1.143 ## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"| is
1060     ## marked as quirks.
1061 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1062 wakaba 1.79 !!!cp ('t11');
1063     } else {
1064     !!!cp ('t12');
1065 wakaba 1.18 }
1066 wakaba 1.79 } else {
1067     !!!cp ('t13');
1068 wakaba 1.18 }
1069    
1070 wakaba 1.84 ## Go to the "before html" insertion mode.
1071 wakaba 1.18 !!!next-token;
1072     return;
1073     } elsif ({
1074 wakaba 1.55 START_TAG_TOKEN, 1,
1075     END_TAG_TOKEN, 1,
1076     END_OF_FILE_TOKEN, 1,
1077 wakaba 1.18 }->{$token->{type}}) {
1078 wakaba 1.79 !!!cp ('t14');
1079 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
1080 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1081 wakaba 1.84 ## Go to the "before html" insertion mode.
1082 wakaba 1.18 ## reprocess
1083 wakaba 1.125 !!!ack-later;
1084 wakaba 1.18 return;
1085 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
1086 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1087 wakaba 1.18 ## Ignore the token
1088 wakaba 1.26
1089 wakaba 1.18 unless (length $token->{data}) {
1090 wakaba 1.79 !!!cp ('t15');
1091 wakaba 1.84 ## Stay in the insertion mode.
1092 wakaba 1.18 !!!next-token;
1093     redo INITIAL;
1094 wakaba 1.79 } else {
1095     !!!cp ('t16');
1096 wakaba 1.3 }
1097 wakaba 1.79 } else {
1098     !!!cp ('t17');
1099 wakaba 1.3 }
1100 wakaba 1.18
1101 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
1102 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1103 wakaba 1.84 ## Go to the "before html" insertion mode.
1104 wakaba 1.18 ## reprocess
1105     return;
1106 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
1107 wakaba 1.79 !!!cp ('t18');
1108 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
1109     $self->{document}->append_child ($comment);
1110    
1111 wakaba 1.84 ## Stay in the insertion mode.
1112 wakaba 1.18 !!!next-token;
1113     redo INITIAL;
1114     } else {
1115 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
1116 wakaba 1.18 }
1117     } # INITIAL
1118 wakaba 1.79
1119     die "$0: _tree_construction_initial: This should be never reached";
1120 wakaba 1.3 } # _tree_construction_initial
1121    
1122     sub _tree_construction_root_element ($) {
1123     my $self = shift;
1124 wakaba 1.84
1125     ## NOTE: "before html" insertion mode.
1126 wakaba 1.3
1127     B: {
1128 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
1129 wakaba 1.79 !!!cp ('t19');
1130 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1131 wakaba 1.3 ## Ignore the token
1132 wakaba 1.84 ## Stay in the insertion mode.
1133 wakaba 1.3 !!!next-token;
1134     redo B;
1135 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
1136 wakaba 1.79 !!!cp ('t20');
1137 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
1138     $self->{document}->append_child ($comment);
1139 wakaba 1.84 ## Stay in the insertion mode.
1140 wakaba 1.3 !!!next-token;
1141     redo B;
1142 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
1143 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1144 wakaba 1.26 ## Ignore the token.
1145    
1146 wakaba 1.3 unless (length $token->{data}) {
1147 wakaba 1.79 !!!cp ('t21');
1148 wakaba 1.84 ## Stay in the insertion mode.
1149 wakaba 1.3 !!!next-token;
1150     redo B;
1151 wakaba 1.79 } else {
1152     !!!cp ('t22');
1153 wakaba 1.3 }
1154 wakaba 1.79 } else {
1155     !!!cp ('t23');
1156 wakaba 1.3 }
1157 wakaba 1.61
1158     $self->{application_cache_selection}->(undef);
1159    
1160     #
1161     } elsif ($token->{type} == START_TAG_TOKEN) {
1162 wakaba 1.84 if ($token->{tag_name} eq 'html') {
1163     my $root_element;
1164 wakaba 1.126 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
1165 wakaba 1.84 $self->{document}->append_child ($root_element);
1166 wakaba 1.123 push @{$self->{open_elements}},
1167     [$root_element, $el_category->{html}];
1168 wakaba 1.84
1169     if ($token->{attributes}->{manifest}) {
1170     !!!cp ('t24');
1171     $self->{application_cache_selection}
1172     ->($token->{attributes}->{manifest}->{value});
1173 wakaba 1.118 ## ISSUE: Spec is unclear on relative references.
1174     ## According to Hixie (#whatwg 2008-03-19), it should be
1175     ## resolved against the base URI of the document in HTML
1176     ## or xml:base of the element in XHTML.
1177 wakaba 1.84 } else {
1178     !!!cp ('t25');
1179     $self->{application_cache_selection}->(undef);
1180     }
1181    
1182 wakaba 1.125 !!!nack ('t25c');
1183    
1184 wakaba 1.84 !!!next-token;
1185     return; ## Go to the "before head" insertion mode.
1186 wakaba 1.61 } else {
1187 wakaba 1.84 !!!cp ('t25.1');
1188     #
1189 wakaba 1.61 }
1190 wakaba 1.3 } elsif ({
1191 wakaba 1.55 END_TAG_TOKEN, 1,
1192     END_OF_FILE_TOKEN, 1,
1193 wakaba 1.3 }->{$token->{type}}) {
1194 wakaba 1.79 !!!cp ('t26');
1195 wakaba 1.3 #
1196     } else {
1197 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
1198 wakaba 1.3 }
1199 wakaba 1.61
1200 wakaba 1.126 my $root_element;
1201     !!!create-element ($root_element, $HTML_NS, 'html',, $token);
1202 wakaba 1.84 $self->{document}->append_child ($root_element);
1203 wakaba 1.123 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
1204 wakaba 1.84
1205     $self->{application_cache_selection}->(undef);
1206    
1207     ## NOTE: Reprocess the token.
1208 wakaba 1.125 !!!ack-later;
1209 wakaba 1.84 return; ## Go to the "before head" insertion mode.
1210 wakaba 1.3 } # B
1211 wakaba 1.79
1212     die "$0: _tree_construction_root_element: This should never be reached";
1213 wakaba 1.3 } # _tree_construction_root_element
1214    
1215     sub _reset_insertion_mode ($) {
1216     my $self = shift;
1217    
1218     ## Step 1
1219     my $last;
1220    
1221     ## Step 2
1222     my $i = -1;
1223     my $node = $self->{open_elements}->[$i];
1224    
1225     ## Step 3
1226     S3: {
1227 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
1228     $last = 1;
1229     if (defined $self->{inner_html_node}) {
1230 wakaba 1.140 !!!cp ('t28');
1231     $node = $self->{inner_html_node};
1232     } else {
1233     die "_reset_insertion_mode: t27";
1234 wakaba 1.3 }
1235     }
1236 wakaba 1.140
1237     ## Step 4..14
1238     my $new_mode;
1239     if ($node->[1] & FOREIGN_EL) {
1240     !!!cp ('t28.1');
1241     ## NOTE: Strictly spaking, the line below only applies to MathML and
1242     ## SVG elements. Currently the HTML syntax supports only MathML and
1243     ## SVG elements as foreigners.
1244 wakaba 1.148 $new_mode = IN_BODY_IM | IN_FOREIGN_CONTENT_IM;
1245 wakaba 1.206 } elsif ($node->[1] == TABLE_CELL_EL) {
1246 wakaba 1.140 if ($last) {
1247     !!!cp ('t28.2');
1248     #
1249     } else {
1250     !!!cp ('t28.3');
1251     $new_mode = IN_CELL_IM;
1252     }
1253     } else {
1254     !!!cp ('t28.4');
1255     $new_mode = {
1256 wakaba 1.54 select => IN_SELECT_IM,
1257 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
1258     ## insertion mode to "in select" by themselves.
1259 wakaba 1.54 tr => IN_ROW_IM,
1260     tbody => IN_TABLE_BODY_IM,
1261     thead => IN_TABLE_BODY_IM,
1262     tfoot => IN_TABLE_BODY_IM,
1263     caption => IN_CAPTION_IM,
1264     colgroup => IN_COLUMN_GROUP_IM,
1265     table => IN_TABLE_IM,
1266     head => IN_BODY_IM, # not in head!
1267     body => IN_BODY_IM,
1268     frameset => IN_FRAMESET_IM,
1269 wakaba 1.123 }->{$node->[0]->manakai_local_name};
1270 wakaba 1.140 }
1271     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1272 wakaba 1.3
1273 wakaba 1.126 ## Step 15
1274 wakaba 1.206 if ($node->[1] == HTML_EL) {
1275 wakaba 1.3 unless (defined $self->{head_element}) {
1276 wakaba 1.79 !!!cp ('t29');
1277 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
1278 wakaba 1.3 } else {
1279 wakaba 1.81 ## ISSUE: Can this state be reached?
1280 wakaba 1.79 !!!cp ('t30');
1281 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
1282 wakaba 1.3 }
1283     return;
1284 wakaba 1.79 } else {
1285     !!!cp ('t31');
1286 wakaba 1.3 }
1287    
1288 wakaba 1.126 ## Step 16
1289 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
1290 wakaba 1.3
1291 wakaba 1.126 ## Step 17
1292 wakaba 1.3 $i--;
1293     $node = $self->{open_elements}->[$i];
1294    
1295 wakaba 1.126 ## Step 18
1296 wakaba 1.3 redo S3;
1297     } # S3
1298 wakaba 1.79
1299     die "$0: _reset_insertion_mode: This line should never be reached";
1300 wakaba 1.3 } # _reset_insertion_mode
1301    
1302     sub _tree_construction_main ($) {
1303     my $self = shift;
1304    
1305 wakaba 1.1 my $active_formatting_elements = [];
1306    
1307     my $reconstruct_active_formatting_elements = sub { # MUST
1308     my $insert = shift;
1309    
1310     ## Step 1
1311     return unless @$active_formatting_elements;
1312    
1313     ## Step 3
1314     my $i = -1;
1315     my $entry = $active_formatting_elements->[$i];
1316    
1317     ## Step 2
1318     return if $entry->[0] eq '#marker';
1319 wakaba 1.3 for (@{$self->{open_elements}}) {
1320 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1321 wakaba 1.79 !!!cp ('t32');
1322 wakaba 1.1 return;
1323     }
1324     }
1325    
1326     S4: {
1327     ## Step 4
1328     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1329    
1330     ## Step 5
1331     $i--;
1332     $entry = $active_formatting_elements->[$i];
1333    
1334     ## Step 6
1335     if ($entry->[0] eq '#marker') {
1336 wakaba 1.81 !!!cp ('t33_1');
1337 wakaba 1.1 #
1338     } else {
1339     my $in_open_elements;
1340 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
1341 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1342 wakaba 1.79 !!!cp ('t33');
1343 wakaba 1.1 $in_open_elements = 1;
1344     last OE;
1345     }
1346     }
1347     if ($in_open_elements) {
1348 wakaba 1.79 !!!cp ('t34');
1349 wakaba 1.1 #
1350     } else {
1351 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
1352 wakaba 1.79 !!!cp ('t35');
1353 wakaba 1.1 redo S4;
1354     }
1355     }
1356    
1357     ## Step 7
1358     $i++;
1359     $entry = $active_formatting_elements->[$i];
1360     } # S4
1361    
1362     S7: {
1363     ## Step 8
1364     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1365    
1366     ## Step 9
1367     $insert->($clone->[0]);
1368 wakaba 1.3 push @{$self->{open_elements}}, $clone;
1369 wakaba 1.1
1370     ## Step 10
1371 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1372 wakaba 1.1
1373     ## Step 11
1374     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1375 wakaba 1.79 !!!cp ('t36');
1376 wakaba 1.1 ## Step 7'
1377     $i++;
1378     $entry = $active_formatting_elements->[$i];
1379    
1380     redo S7;
1381     }
1382 wakaba 1.79
1383     !!!cp ('t37');
1384 wakaba 1.1 } # S7
1385     }; # $reconstruct_active_formatting_elements
1386    
1387     my $clear_up_to_marker = sub {
1388     for (reverse 0..$#$active_formatting_elements) {
1389     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1390 wakaba 1.79 !!!cp ('t38');
1391 wakaba 1.1 splice @$active_formatting_elements, $_;
1392     return;
1393     }
1394     }
1395 wakaba 1.79
1396     !!!cp ('t39');
1397 wakaba 1.1 }; # $clear_up_to_marker
1398    
1399 wakaba 1.96 my $insert;
1400    
1401     my $parse_rcdata = sub ($) {
1402     my ($content_model_flag) = @_;
1403 wakaba 1.25
1404     ## Step 1
1405     my $start_tag_name = $token->{tag_name};
1406 wakaba 1.205 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
1407 wakaba 1.25
1408     ## Step 2
1409 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
1410 wakaba 1.13 delete $self->{escape}; # MUST
1411 wakaba 1.25
1412 wakaba 1.205 ## Step 3, 4
1413     $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1414    
1415 wakaba 1.125 !!!nack ('t40.1');
1416 wakaba 1.1 !!!next-token;
1417 wakaba 1.25 }; # $parse_rcdata
1418 wakaba 1.1
1419 wakaba 1.96 my $script_start_tag = sub () {
1420 wakaba 1.205 ## Step 1
1421 wakaba 1.1 my $script_el;
1422 wakaba 1.126 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
1423 wakaba 1.205
1424     ## Step 2
1425 wakaba 1.1 ## TODO: mark as "parser-inserted"
1426    
1427 wakaba 1.205 ## Step 3
1428     ## TODO: Mark as "already executed", if ...
1429    
1430 wakaba 1.221 ## Step 4 (HTML5 revision 2702)
1431 wakaba 1.205 $insert->($script_el);
1432     push @{$self->{open_elements}}, [$script_el, $el_category->{script}];
1433    
1434     ## Step 5
1435 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
1436 wakaba 1.13 delete $self->{escape}; # MUST
1437 wakaba 1.1
1438 wakaba 1.205 ## Step 6-7
1439     $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1440 wakaba 1.25
1441 wakaba 1.205 !!!nack ('t40.2');
1442 wakaba 1.1 !!!next-token;
1443     }; # $script_start_tag
1444    
1445 wakaba 1.102 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
1446 wakaba 1.229 ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag (OBSOLETE; unused).
1447 wakaba 1.202 ## NOTE: $open_tables->[-1]->[2] is set false when non-Text node inserted.
1448 wakaba 1.102 my $open_tables = [[$self->{open_elements}->[0]->[0]]];
1449    
1450 wakaba 1.1 my $formatting_end_tag = sub {
1451 wakaba 1.113 my $end_tag_token = shift;
1452     my $tag_name = $end_tag_token->{tag_name};
1453 wakaba 1.1
1454 wakaba 1.103 ## NOTE: The adoption agency algorithm (AAA).
1455 wakaba 1.102
1456 wakaba 1.1 FET: {
1457     ## Step 1
1458     my $formatting_element;
1459     my $formatting_element_i_in_active;
1460     AFE: for (reverse 0..$#$active_formatting_elements) {
1461 wakaba 1.123 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1462     !!!cp ('t52');
1463     last AFE;
1464     } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
1465     eq $tag_name) {
1466 wakaba 1.79 !!!cp ('t51');
1467 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
1468     $formatting_element_i_in_active = $_;
1469     last AFE;
1470     }
1471     } # AFE
1472     unless (defined $formatting_element) {
1473 wakaba 1.79 !!!cp ('t53');
1474 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => $tag_name, token => $end_tag_token);
1475 wakaba 1.1 ## Ignore the token
1476     !!!next-token;
1477     return;
1478     }
1479     ## has an element in scope
1480     my $in_scope = 1;
1481     my $formatting_element_i_in_open;
1482 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
1483     my $node = $self->{open_elements}->[$_];
1484 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
1485     if ($in_scope) {
1486 wakaba 1.79 !!!cp ('t54');
1487 wakaba 1.1 $formatting_element_i_in_open = $_;
1488     last INSCOPE;
1489     } else { # in open elements but not in scope
1490 wakaba 1.79 !!!cp ('t55');
1491 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
1492     text => $token->{tag_name},
1493 wakaba 1.113 token => $end_tag_token);
1494 wakaba 1.1 ## Ignore the token
1495     !!!next-token;
1496     return;
1497     }
1498 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
1499 wakaba 1.79 !!!cp ('t56');
1500 wakaba 1.1 $in_scope = 0;
1501     }
1502     } # INSCOPE
1503     unless (defined $formatting_element_i_in_open) {
1504 wakaba 1.79 !!!cp ('t57');
1505 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
1506     text => $token->{tag_name},
1507 wakaba 1.113 token => $end_tag_token);
1508 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
1509     !!!next-token; ## TODO: ok?
1510     return;
1511     }
1512 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
1513 wakaba 1.79 !!!cp ('t58');
1514 wakaba 1.122 !!!parse-error (type => 'not closed',
1515 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
1516 wakaba 1.122 ->manakai_local_name,
1517 wakaba 1.113 token => $end_tag_token);
1518 wakaba 1.1 }
1519    
1520     ## Step 2
1521     my $furthest_block;
1522     my $furthest_block_i_in_open;
1523 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
1524     my $node = $self->{open_elements}->[$_];
1525 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
1526 wakaba 1.1 #not $phrasing_category->{$node->[1]} and
1527 wakaba 1.123 ($node->[1] & SPECIAL_EL or
1528     $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
1529 wakaba 1.79 !!!cp ('t59');
1530 wakaba 1.1 $furthest_block = $node;
1531     $furthest_block_i_in_open = $_;
1532 wakaba 1.203 ## NOTE: The topmost (eldest) node.
1533 wakaba 1.1 } elsif ($node->[0] eq $formatting_element->[0]) {
1534 wakaba 1.79 !!!cp ('t60');
1535 wakaba 1.1 last OE;
1536     }
1537     } # OE
1538    
1539     ## Step 3
1540     unless (defined $furthest_block) { # MUST
1541 wakaba 1.79 !!!cp ('t61');
1542 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
1543 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
1544     !!!next-token;
1545     return;
1546     }
1547    
1548     ## Step 4
1549 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
1550 wakaba 1.1
1551     ## Step 5
1552     my $furthest_block_parent = $furthest_block->[0]->parent_node;
1553     if (defined $furthest_block_parent) {
1554 wakaba 1.79 !!!cp ('t62');
1555 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
1556     }
1557    
1558     ## Step 6
1559     my $bookmark_prev_el
1560     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
1561     ->[0];
1562    
1563     ## Step 7
1564     my $node = $furthest_block;
1565     my $node_i_in_open = $furthest_block_i_in_open;
1566     my $last_node = $furthest_block;
1567     S7: {
1568     ## Step 1
1569     $node_i_in_open--;
1570 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
1571 wakaba 1.1
1572     ## Step 2
1573     my $node_i_in_active;
1574     S7S2: {
1575     for (reverse 0..$#$active_formatting_elements) {
1576     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
1577 wakaba 1.79 !!!cp ('t63');
1578 wakaba 1.1 $node_i_in_active = $_;
1579     last S7S2;
1580     }
1581     }
1582 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
1583 wakaba 1.1 redo S7;
1584     } # S7S2
1585    
1586     ## Step 3
1587     last S7 if $node->[0] eq $formatting_element->[0];
1588    
1589     ## Step 4
1590     if ($last_node->[0] eq $furthest_block->[0]) {
1591 wakaba 1.79 !!!cp ('t64');
1592 wakaba 1.1 $bookmark_prev_el = $node->[0];
1593     }
1594    
1595     ## Step 5
1596     if ($node->[0]->has_child_nodes ()) {
1597 wakaba 1.79 !!!cp ('t65');
1598 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
1599     $active_formatting_elements->[$node_i_in_active] = $clone;
1600 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
1601 wakaba 1.1 $node = $clone;
1602     }
1603    
1604     ## Step 6
1605     $node->[0]->append_child ($last_node->[0]);
1606    
1607     ## Step 7
1608     $last_node = $node;
1609    
1610     ## Step 8
1611     redo S7;
1612     } # S7
1613    
1614     ## Step 8
1615 wakaba 1.123 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
1616 wakaba 1.234 ## Foster parenting.
1617 wakaba 1.102 my $foster_parent_element;
1618     my $next_sibling;
1619 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
1620 wakaba 1.206 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1621 wakaba 1.234 !!!cp ('t65.2');
1622     $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
1623     $next_sibling = $self->{open_elements}->[$_]->[0];
1624     undef $next_sibling
1625     unless $next_sibling->parent_node eq $foster_parent_element;
1626     last OE;
1627     }
1628     } # OE
1629     $foster_parent_element ||= $self->{open_elements}->[0]->[0];
1630    
1631 wakaba 1.102 $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
1632     $open_tables->[-1]->[1] = 1; # tainted
1633     } else {
1634     !!!cp ('t65.3');
1635     $common_ancestor_node->[0]->append_child ($last_node->[0]);
1636     }
1637 wakaba 1.1
1638     ## Step 9
1639     my $clone = [$formatting_element->[0]->clone_node (0),
1640     $formatting_element->[1]];
1641    
1642     ## Step 10
1643     my @cn = @{$furthest_block->[0]->child_nodes};
1644     $clone->[0]->append_child ($_) for @cn;
1645    
1646     ## Step 11
1647     $furthest_block->[0]->append_child ($clone->[0]);
1648    
1649     ## Step 12
1650     my $i;
1651     AFE: for (reverse 0..$#$active_formatting_elements) {
1652     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
1653 wakaba 1.79 !!!cp ('t66');
1654 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
1655     $i-- and last AFE if defined $i;
1656     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
1657 wakaba 1.79 !!!cp ('t67');
1658 wakaba 1.1 $i = $_;
1659     }
1660     } # AFE
1661     splice @$active_formatting_elements, $i + 1, 0, $clone;
1662    
1663     ## Step 13
1664     undef $i;
1665 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
1666     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
1667 wakaba 1.79 !!!cp ('t68');
1668 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
1669 wakaba 1.1 $i-- and last OE if defined $i;
1670 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
1671 wakaba 1.79 !!!cp ('t69');
1672 wakaba 1.1 $i = $_;
1673     }
1674     } # OE
1675 wakaba 1.203 splice @{$self->{open_elements}}, $i + 1, 0, $clone;
1676 wakaba 1.1
1677     ## Step 14
1678     redo FET;
1679     } # FET
1680     }; # $formatting_end_tag
1681    
1682 wakaba 1.96 $insert = my $insert_to_current = sub {
1683 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
1684 wakaba 1.1 }; # $insert_to_current
1685    
1686 wakaba 1.234 ## Foster parenting. Note that there are three "foster parenting"
1687     ## code in the parser: for elements (this one), for texts, and for
1688     ## elements in the AAA code.
1689 wakaba 1.1 my $insert_to_foster = sub {
1690 wakaba 1.95 my $child = shift;
1691 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
1692 wakaba 1.95 # MUST
1693     my $foster_parent_element;
1694     my $next_sibling;
1695 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
1696 wakaba 1.206 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1697 wakaba 1.234 !!!cp ('t71');
1698     $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
1699     $next_sibling = $self->{open_elements}->[$_]->[0];
1700     undef $next_sibling
1701     unless $next_sibling->parent_node eq $foster_parent_element;
1702     last OE;
1703     }
1704     } # OE
1705     $foster_parent_element ||= $self->{open_elements}->[0]->[0];
1706    
1707     $foster_parent_element->insert_before ($child, $next_sibling);
1708 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
1709     } else {
1710     !!!cp ('t72');
1711     $self->{open_elements}->[-1]->[0]->append_child ($child);
1712     }
1713 wakaba 1.1 }; # $insert_to_foster
1714    
1715 wakaba 1.204 ## NOTE: Insert a character (MUST): When a character is inserted, if
1716     ## the last node that was inserted by the parser is a Text node and
1717     ## the character has to be inserted after that node, then the
1718     ## character is appended to the Text node. However, if any other
1719     ## node is inserted by the parser, then a new Text node is created
1720     ## and the character is appended as that Text node. If I'm not
1721     ## wrong, for a parser with scripting disabled, there are only two
1722     ## cases where this occurs. One is the case where an element node
1723     ## is inserted to the |head| element. This is covered by using the
1724 wakaba 1.202 ## |$self->{head_element_inserted}| flag. Another is the case where
1725     ## an element or comment is inserted into the |table| subtree while
1726     ## foster parenting happens. This is covered by using the [2] flag
1727     ## of the |$open_tables| structure. All other cases are handled
1728     ## simply by calling |manakai_append_text| method.
1729    
1730 wakaba 1.204 ## TODO: |<body><script>document.write("a<br>");
1731     ## document.body.removeChild (document.body.lastChild);
1732     ## document.write ("b")</script>|
1733    
1734 wakaba 1.126 B: while (1) {
1735 wakaba 1.230
1736     ## The "in table text" insertion mode.
1737     if ($self->{insertion_mode} & TABLE_IMS and
1738     not $self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
1739     not $self->{insertion_mode} & IN_CDATA_RCDATA_IM) {
1740     C: {
1741     my $s;
1742     if ($token->{type} == CHARACTER_TOKEN) {
1743     !!!cp ('t194');
1744     $self->{pending_chars} ||= [];
1745     push @{$self->{pending_chars}}, $token;
1746     !!!next-token;
1747     next B;
1748     } else {
1749     if ($self->{pending_chars}) {
1750     $s = join '', map { $_->{data} } @{$self->{pending_chars}};
1751     delete $self->{pending_chars};
1752     if ($s =~ /[^\x09\x0A\x0C\x0D\x20]/) {
1753     !!!cp ('t195');
1754     #
1755     } else {
1756     !!!cp ('t195.1');
1757     #$self->{open_elements}->[-1]->[0]->manakai_append_text ($s);
1758     $self->{open_elements}->[-1]->[0]->append_child
1759     ($self->{document}->create_text_node ($s));
1760     last C;
1761     }
1762     } else {
1763     !!!cp ('t195.2');
1764     last C;
1765     }
1766     }
1767    
1768 wakaba 1.234 ## Foster parenting.
1769 wakaba 1.230 !!!parse-error (type => 'in table:#text', token => $token);
1770    
1771     ## NOTE: As if in body, but insert into the foster parent element.
1772     $reconstruct_active_formatting_elements->($insert_to_foster);
1773    
1774     if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
1775     # MUST
1776     my $foster_parent_element;
1777     my $next_sibling;
1778     OE: for (reverse 0..$#{$self->{open_elements}}) {
1779     if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1780 wakaba 1.234 !!!cp ('t197');
1781     $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
1782     $next_sibling = $self->{open_elements}->[$_]->[0];
1783     undef $next_sibling
1784     unless $next_sibling->parent_node eq $foster_parent_element;
1785 wakaba 1.230 last OE;
1786     }
1787     } # OE
1788 wakaba 1.234 $foster_parent_element ||= $self->{open_elements}->[0]->[0];
1789    
1790     !!!cp ('t199');
1791     $foster_parent_element->insert_before
1792     ($self->{document}->create_text_node ($s), $next_sibling);
1793    
1794 wakaba 1.230 $open_tables->[-1]->[1] = 1; # tainted
1795     $open_tables->[-1]->[2] = 1; # ~node inserted
1796     } else {
1797     ## NOTE: Fragment case or in a foster parent'ed element
1798     ## (e.g. |<table><span>a|). In fragment case, whether the
1799     ## character is appended to existing node or a new node is
1800     ## created is irrelevant, since the foster parent'ed nodes
1801     ## are discarded and fragment parsing does not invoke any
1802     ## script.
1803     !!!cp ('t200');
1804     $self->{open_elements}->[-1]->[0]->manakai_append_text ($s);
1805     }
1806     } # C
1807     } # TABLE_IMS
1808    
1809 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
1810 wakaba 1.79 !!!cp ('t73');
1811 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1812 wakaba 1.52 ## Ignore the token
1813     ## Stay in the phase
1814     !!!next-token;
1815 wakaba 1.126 next B;
1816 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
1817 wakaba 1.52 $token->{tag_name} eq 'html') {
1818 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
1819 wakaba 1.79 !!!cp ('t79');
1820 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
1821 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
1822     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
1823 wakaba 1.79 !!!cp ('t80');
1824 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
1825 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
1826 wakaba 1.79 } else {
1827     !!!cp ('t81');
1828 wakaba 1.52 }
1829    
1830 wakaba 1.84 !!!cp ('t82');
1831 wakaba 1.113 !!!parse-error (type => 'not first start tag', token => $token);
1832 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
1833     for my $attr_name (keys %{$token->{attributes}}) {
1834     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
1835 wakaba 1.79 !!!cp ('t84');
1836 wakaba 1.52 $top_el->set_attribute_ns
1837     (undef, [undef, $attr_name],
1838     $token->{attributes}->{$attr_name}->{value});
1839     }
1840     }
1841 wakaba 1.125 !!!nack ('t84.1');
1842 wakaba 1.52 !!!next-token;
1843 wakaba 1.126 next B;
1844 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
1845 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
1846 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
1847 wakaba 1.79 !!!cp ('t85');
1848 wakaba 1.52 $self->{document}->append_child ($comment);
1849 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
1850 wakaba 1.79 !!!cp ('t86');
1851 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
1852     } else {
1853 wakaba 1.79 !!!cp ('t87');
1854 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
1855 wakaba 1.202 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
1856 wakaba 1.52 }
1857     !!!next-token;
1858 wakaba 1.126 next B;
1859 wakaba 1.205 } elsif ($self->{insertion_mode} & IN_CDATA_RCDATA_IM) {
1860     if ($token->{type} == CHARACTER_TOKEN) {
1861     $token->{data} =~ s/^\x0A// if $self->{ignore_newline};
1862     delete $self->{ignore_newline};
1863    
1864     if (length $token->{data}) {
1865     !!!cp ('t43');
1866     $self->{open_elements}->[-1]->[0]->manakai_append_text
1867     ($token->{data});
1868     } else {
1869     !!!cp ('t43.1');
1870     }
1871     !!!next-token;
1872     next B;
1873     } elsif ($token->{type} == END_TAG_TOKEN) {
1874     delete $self->{ignore_newline};
1875    
1876     if ($token->{tag_name} eq 'script') {
1877     !!!cp ('t50');
1878    
1879     ## Para 1-2
1880     my $script = pop @{$self->{open_elements}};
1881    
1882     ## Para 3
1883     $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1884    
1885     ## Para 4
1886     ## TODO: $old_insertion_point = $current_insertion_point;
1887     ## TODO: $current_insertion_point = just before $self->{nc};
1888    
1889     ## Para 5
1890     ## TODO: Run the $script->[0].
1891    
1892     ## Para 6
1893     ## TODO: $current_insertion_point = $old_insertion_point;
1894    
1895     ## Para 7
1896     ## TODO: if ($pending_external_script) {
1897     ## TODO: ...
1898     ## TODO: }
1899    
1900     !!!next-token;
1901     next B;
1902     } else {
1903     !!!cp ('t42');
1904    
1905     pop @{$self->{open_elements}};
1906    
1907     $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1908     !!!next-token;
1909     next B;
1910     }
1911     } elsif ($token->{type} == END_OF_FILE_TOKEN) {
1912     delete $self->{ignore_newline};
1913    
1914     !!!cp ('t44');
1915     !!!parse-error (type => 'not closed',
1916     text => $self->{open_elements}->[-1]->[0]
1917     ->manakai_local_name,
1918     token => $token);
1919    
1920 wakaba 1.206 #if ($self->{open_elements}->[-1]->[1] == SCRIPT_EL) {
1921 wakaba 1.205 # ## TODO: Mark as "already executed"
1922     #}
1923    
1924     pop @{$self->{open_elements}};
1925    
1926     $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1927     ## Reprocess.
1928     next B;
1929     } else {
1930     die "$0: $token->{type}: In CDATA/RCDATA: Unknown token type";
1931     }
1932 wakaba 1.126 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
1933     if ($token->{type} == CHARACTER_TOKEN) {
1934     !!!cp ('t87.1');
1935     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
1936     !!!next-token;
1937     next B;
1938     } elsif ($token->{type} == START_TAG_TOKEN) {
1939 wakaba 1.129 if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
1940     $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
1941 wakaba 1.126 not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
1942     ($token->{tag_name} eq 'svg' and
1943 wakaba 1.206 $self->{open_elements}->[-1]->[1] == MML_AXML_EL)) {
1944 wakaba 1.126 ## NOTE: "using the rules for secondary insertion mode"then"continue"
1945     !!!cp ('t87.2');
1946     #
1947     } elsif ({
1948 wakaba 1.130 b => 1, big => 1, blockquote => 1, body => 1, br => 1,
1949 wakaba 1.146 center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1,
1950 wakaba 1.223 em => 1, embed => 1, h1 => 1, h2 => 1, h3 => 1,
1951 wakaba 1.146 h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1,
1952     img => 1, li => 1, listing => 1, menu => 1, meta => 1,
1953     nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1,
1954     small => 1, span => 1, strong => 1, strike => 1, sub => 1,
1955     sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1,
1956 wakaba 1.223 }->{$token->{tag_name}} or
1957     ($token->{tag_name} eq 'font' and
1958     ($token->{attributes}->{color} or
1959     $token->{attributes}->{face} or
1960     $token->{attributes}->{size}))) {
1961 wakaba 1.126 !!!cp ('t87.2');
1962     !!!parse-error (type => 'not closed',
1963 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
1964 wakaba 1.126 ->manakai_local_name,
1965     token => $token);
1966    
1967     pop @{$self->{open_elements}}
1968     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
1969    
1970 wakaba 1.130 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
1971 wakaba 1.126 ## Reprocess.
1972     next B;
1973     } else {
1974 wakaba 1.131 my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
1975     my $tag_name = $token->{tag_name};
1976     if ($nsuri eq $SVG_NS) {
1977     $tag_name = {
1978     altglyph => 'altGlyph',
1979     altglyphdef => 'altGlyphDef',
1980     altglyphitem => 'altGlyphItem',
1981     animatecolor => 'animateColor',
1982     animatemotion => 'animateMotion',
1983     animatetransform => 'animateTransform',
1984     clippath => 'clipPath',
1985     feblend => 'feBlend',
1986     fecolormatrix => 'feColorMatrix',
1987     fecomponenttransfer => 'feComponentTransfer',
1988     fecomposite => 'feComposite',
1989     feconvolvematrix => 'feConvolveMatrix',
1990     fediffuselighting => 'feDiffuseLighting',
1991     fedisplacementmap => 'feDisplacementMap',
1992     fedistantlight => 'feDistantLight',
1993     feflood => 'feFlood',
1994     fefunca => 'feFuncA',
1995     fefuncb => 'feFuncB',
1996     fefuncg => 'feFuncG',
1997     fefuncr => 'feFuncR',
1998     fegaussianblur => 'feGaussianBlur',
1999     feimage => 'feImage',
2000     femerge => 'feMerge',
2001     femergenode => 'feMergeNode',
2002     femorphology => 'feMorphology',
2003     feoffset => 'feOffset',
2004     fepointlight => 'fePointLight',
2005     fespecularlighting => 'feSpecularLighting',
2006     fespotlight => 'feSpotLight',
2007     fetile => 'feTile',
2008     feturbulence => 'feTurbulence',
2009     foreignobject => 'foreignObject',
2010     glyphref => 'glyphRef',
2011     lineargradient => 'linearGradient',
2012     radialgradient => 'radialGradient',
2013     #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
2014     textpath => 'textPath',
2015     }->{$tag_name} || $tag_name;
2016     }
2017    
2018     ## "adjust SVG attributes" (SVG only) - done in insert-element-f
2019    
2020     ## "adjust foreign attributes" - done in insert-element-f
2021 wakaba 1.126
2022 wakaba 1.131 !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);
2023 wakaba 1.126
2024     if ($self->{self_closing}) {
2025     pop @{$self->{open_elements}};
2026     !!!ack ('t87.3');
2027     } else {
2028     !!!cp ('t87.4');
2029     }
2030    
2031     !!!next-token;
2032     next B;
2033     }
2034     } elsif ($token->{type} == END_TAG_TOKEN) {
2035     ## NOTE: "using the rules for secondary insertion mode" then "continue"
2036 wakaba 1.219 if ($token->{tag_name} eq 'script') {
2037     !!!cp ('t87.41');
2038     #
2039     ## XXXscript: Execute script here.
2040     } else {
2041     !!!cp ('t87.5');
2042     #
2043     }
2044 wakaba 1.126 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2045     !!!cp ('t87.6');
2046 wakaba 1.146 !!!parse-error (type => 'not closed',
2047 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2048 wakaba 1.146 ->manakai_local_name,
2049     token => $token);
2050    
2051     pop @{$self->{open_elements}}
2052     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
2053    
2054 wakaba 1.200 ## NOTE: |<span><svg>| ... two parse errors, |<svg>| ... a parse error.
2055    
2056 wakaba 1.146 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
2057     ## Reprocess.
2058     next B;
2059 wakaba 1.126 } else {
2060     die "$0: $token->{type}: Unknown token type";
2061     }
2062     }
2063    
2064     if ($self->{insertion_mode} & HEAD_IMS) {
2065 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
2066 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
2067 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2068 wakaba 1.202 if ($self->{head_element_inserted}) {
2069     !!!cp ('t88.3');
2070     $self->{open_elements}->[-1]->[0]->append_child
2071     ($self->{document}->create_text_node ($1));
2072     delete $self->{head_element_inserted};
2073     ## NOTE: |</head> <link> |
2074     #
2075     } else {
2076     !!!cp ('t88.2');
2077     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
2078     ## NOTE: |</head> &#x20;|
2079     #
2080     }
2081 wakaba 1.99 } else {
2082     !!!cp ('t88.1');
2083     ## Ignore the token.
2084 wakaba 1.177 #
2085 wakaba 1.99 }
2086 wakaba 1.52 unless (length $token->{data}) {
2087 wakaba 1.79 !!!cp ('t88');
2088 wakaba 1.52 !!!next-token;
2089 wakaba 1.126 next B;
2090 wakaba 1.1 }
2091 wakaba 1.177 ## TODO: set $token->{column} appropriately
2092 wakaba 1.1 }
2093 wakaba 1.52
2094 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2095 wakaba 1.79 !!!cp ('t89');
2096 wakaba 1.52 ## As if <head>
2097 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2098 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2099 wakaba 1.123 push @{$self->{open_elements}},
2100     [$self->{head_element}, $el_category->{head}];
2101 wakaba 1.52
2102     ## Reprocess in the "in head" insertion mode...
2103     pop @{$self->{open_elements}};
2104    
2105     ## Reprocess in the "after head" insertion mode...
2106 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2107 wakaba 1.79 !!!cp ('t90');
2108 wakaba 1.52 ## As if </noscript>
2109     pop @{$self->{open_elements}};
2110 wakaba 1.153 !!!parse-error (type => 'in noscript:#text', token => $token);
2111 wakaba 1.1
2112 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
2113     ## As if </head>
2114     pop @{$self->{open_elements}};
2115    
2116     ## Reprocess in the "after head" insertion mode...
2117 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2118 wakaba 1.79 !!!cp ('t91');
2119 wakaba 1.52 pop @{$self->{open_elements}};
2120    
2121     ## Reprocess in the "after head" insertion mode...
2122 wakaba 1.79 } else {
2123     !!!cp ('t92');
2124 wakaba 1.1 }
2125 wakaba 1.52
2126 wakaba 1.123 ## "after head" insertion mode
2127     ## As if <body>
2128     !!!insert-element ('body',, $token);
2129     $self->{insertion_mode} = IN_BODY_IM;
2130     ## reprocess
2131 wakaba 1.126 next B;
2132 wakaba 1.123 } elsif ($token->{type} == START_TAG_TOKEN) {
2133     if ($token->{tag_name} eq 'head') {
2134     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2135     !!!cp ('t93');
2136 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
2137 wakaba 1.123 $self->{open_elements}->[-1]->[0]->append_child
2138     ($self->{head_element});
2139     push @{$self->{open_elements}},
2140     [$self->{head_element}, $el_category->{head}];
2141     $self->{insertion_mode} = IN_HEAD_IM;
2142 wakaba 1.125 !!!nack ('t93.1');
2143 wakaba 1.123 !!!next-token;
2144 wakaba 1.126 next B;
2145 wakaba 1.125 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2146 wakaba 1.139 !!!cp ('t93.2');
2147 wakaba 1.153 !!!parse-error (type => 'after head', text => 'head',
2148     token => $token);
2149 wakaba 1.139 ## Ignore the token
2150     !!!nack ('t93.3');
2151     !!!next-token;
2152     next B;
2153 wakaba 1.125 } else {
2154     !!!cp ('t95');
2155 wakaba 1.153 !!!parse-error (type => 'in head:head',
2156     token => $token); # or in head noscript
2157 wakaba 1.125 ## Ignore the token
2158     !!!nack ('t95.1');
2159     !!!next-token;
2160 wakaba 1.126 next B;
2161 wakaba 1.125 }
2162     } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2163 wakaba 1.126 !!!cp ('t96');
2164     ## As if <head>
2165     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2166     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2167     push @{$self->{open_elements}},
2168     [$self->{head_element}, $el_category->{head}];
2169 wakaba 1.52
2170 wakaba 1.126 $self->{insertion_mode} = IN_HEAD_IM;
2171     ## Reprocess in the "in head" insertion mode...
2172     } else {
2173     !!!cp ('t97');
2174     }
2175 wakaba 1.52
2176 wakaba 1.202 if ($token->{tag_name} eq 'base') {
2177     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2178     !!!cp ('t98');
2179     ## As if </noscript>
2180     pop @{$self->{open_elements}};
2181     !!!parse-error (type => 'in noscript', text => 'base',
2182     token => $token);
2183    
2184     $self->{insertion_mode} = IN_HEAD_IM;
2185     ## Reprocess in the "in head" insertion mode...
2186     } else {
2187     !!!cp ('t99');
2188     }
2189 wakaba 1.49
2190 wakaba 1.202 ## NOTE: There is a "as if in head" code clone.
2191     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2192     !!!cp ('t100');
2193     !!!parse-error (type => 'after head',
2194     text => $token->{tag_name}, token => $token);
2195     push @{$self->{open_elements}},
2196     [$self->{head_element}, $el_category->{head}];
2197     $self->{head_element_inserted} = 1;
2198     } else {
2199     !!!cp ('t101');
2200     }
2201     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2202     pop @{$self->{open_elements}};
2203     pop @{$self->{open_elements}} # <head>
2204     if $self->{insertion_mode} == AFTER_HEAD_IM;
2205     !!!nack ('t101.1');
2206     !!!next-token;
2207     next B;
2208 wakaba 1.194 } elsif ($token->{tag_name} eq 'link') {
2209     ## NOTE: There is a "as if in head" code clone.
2210     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2211     !!!cp ('t102');
2212     !!!parse-error (type => 'after head',
2213     text => $token->{tag_name}, token => $token);
2214     push @{$self->{open_elements}},
2215     [$self->{head_element}, $el_category->{head}];
2216 wakaba 1.202 $self->{head_element_inserted} = 1;
2217 wakaba 1.194 } else {
2218     !!!cp ('t103');
2219     }
2220     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2221     pop @{$self->{open_elements}};
2222     pop @{$self->{open_elements}} # <head>
2223     if $self->{insertion_mode} == AFTER_HEAD_IM;
2224     !!!ack ('t103.1');
2225     !!!next-token;
2226     next B;
2227 wakaba 1.232 } elsif ($token->{tag_name} eq 'command') {
2228 wakaba 1.194 if ($self->{insertion_mode} == IN_HEAD_IM) {
2229     ## NOTE: If the insertion mode at the time of the emission
2230     ## of the token was "before head", $self->{insertion_mode}
2231     ## is already changed to |IN_HEAD_IM|.
2232    
2233     ## NOTE: There is a "as if in head" code clone.
2234     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2235     pop @{$self->{open_elements}};
2236     pop @{$self->{open_elements}} # <head>
2237     if $self->{insertion_mode} == AFTER_HEAD_IM;
2238     !!!ack ('t103.2');
2239     !!!next-token;
2240     next B;
2241     } else {
2242     ## NOTE: "in head noscript" or "after head" insertion mode
2243     ## - in these cases, these tags are treated as same as
2244     ## normal in-body tags.
2245     !!!cp ('t103.3');
2246     #
2247     }
2248 wakaba 1.202 } elsif ($token->{tag_name} eq 'meta') {
2249     ## NOTE: There is a "as if in head" code clone.
2250     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2251     !!!cp ('t104');
2252     !!!parse-error (type => 'after head',
2253     text => $token->{tag_name}, token => $token);
2254     push @{$self->{open_elements}},
2255     [$self->{head_element}, $el_category->{head}];
2256     $self->{head_element_inserted} = 1;
2257     } else {
2258     !!!cp ('t105');
2259     }
2260     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2261     my $meta_el = pop @{$self->{open_elements}};
2262 wakaba 1.34
2263     unless ($self->{confident}) {
2264 wakaba 1.134 if ($token->{attributes}->{charset}) {
2265 wakaba 1.79 !!!cp ('t106');
2266 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
2267     ## in the {change_encoding} callback.
2268 wakaba 1.63 $self->{change_encoding}
2269 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value},
2270     $token);
2271 wakaba 1.66
2272     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2273     ->set_user_data (manakai_has_reference =>
2274     $token->{attributes}->{charset}
2275     ->{has_reference});
2276 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
2277     if ($token->{attributes}->{content}->{value}
2278 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2279 wakaba 1.186 [\x09\x0A\x0C\x0D\x20]*=
2280     [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
2281     ([^"'\x09\x0A\x0C\x0D\x20]
2282     [^\x09\x0A\x0C\x0D\x20\x3B]*))/x) {
2283 wakaba 1.79 !!!cp ('t107');
2284 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
2285     ## in the {change_encoding} callback.
2286 wakaba 1.63 $self->{change_encoding}
2287 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
2288     $token);
2289 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2290     ->set_user_data (manakai_has_reference =>
2291     $token->{attributes}->{content}
2292     ->{has_reference});
2293 wakaba 1.79 } else {
2294     !!!cp ('t108');
2295 wakaba 1.63 }
2296 wakaba 1.34 }
2297 wakaba 1.66 } else {
2298     if ($token->{attributes}->{charset}) {
2299 wakaba 1.79 !!!cp ('t109');
2300 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2301     ->set_user_data (manakai_has_reference =>
2302     $token->{attributes}->{charset}
2303     ->{has_reference});
2304     }
2305 wakaba 1.68 if ($token->{attributes}->{content}) {
2306 wakaba 1.79 !!!cp ('t110');
2307 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2308     ->set_user_data (manakai_has_reference =>
2309     $token->{attributes}->{content}
2310     ->{has_reference});
2311     }
2312 wakaba 1.34 }
2313    
2314 wakaba 1.100 pop @{$self->{open_elements}} # <head>
2315 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
2316 wakaba 1.125 !!!ack ('t110.1');
2317 wakaba 1.34 !!!next-token;
2318 wakaba 1.126 next B;
2319 wakaba 1.202 } elsif ($token->{tag_name} eq 'title') {
2320     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2321     !!!cp ('t111');
2322     ## As if </noscript>
2323     pop @{$self->{open_elements}};
2324     !!!parse-error (type => 'in noscript', text => 'title',
2325     token => $token);
2326    
2327     $self->{insertion_mode} = IN_HEAD_IM;
2328     ## Reprocess in the "in head" insertion mode...
2329     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2330     !!!cp ('t112');
2331     !!!parse-error (type => 'after head',
2332     text => $token->{tag_name}, token => $token);
2333     push @{$self->{open_elements}},
2334     [$self->{head_element}, $el_category->{head}];
2335     $self->{head_element_inserted} = 1;
2336     } else {
2337     !!!cp ('t113');
2338     }
2339 wakaba 1.49
2340 wakaba 1.202 ## NOTE: There is a "as if in head" code clone.
2341     $parse_rcdata->(RCDATA_CONTENT_MODEL);
2342 wakaba 1.225
2343     ## NOTE: At this point the stack of open elements contain
2344     ## the |head| element (index == -2) and the |script| element
2345     ## (index == -1). In the "after head" insertion mode the
2346     ## |head| element is inserted only for the purpose of
2347     ## providing the context for the |script| element, and
2348     ## therefore we can now and have to remove the element from
2349     ## the stack.
2350 wakaba 1.205 splice @{$self->{open_elements}}, -2, 1, () # <head>
2351 wakaba 1.210 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2352 wakaba 1.202 next B;
2353     } elsif ($token->{tag_name} eq 'style' or
2354     $token->{tag_name} eq 'noframes') {
2355     ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
2356     ## insertion mode IN_HEAD_IM)
2357     ## NOTE: There is a "as if in head" code clone.
2358     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2359     !!!cp ('t114');
2360     !!!parse-error (type => 'after head',
2361     text => $token->{tag_name}, token => $token);
2362     push @{$self->{open_elements}},
2363     [$self->{head_element}, $el_category->{head}];
2364     $self->{head_element_inserted} = 1;
2365     } else {
2366     !!!cp ('t115');
2367     }
2368     $parse_rcdata->(CDATA_CONTENT_MODEL);
2369 wakaba 1.205 ## ISSUE: A spec bug [Bug 6038]
2370     splice @{$self->{open_elements}}, -2, 1, () # <head>
2371 wakaba 1.210 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2372 wakaba 1.202 next B;
2373 wakaba 1.205 } elsif ($token->{tag_name} eq 'noscript') {
2374 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
2375 wakaba 1.79 !!!cp ('t116');
2376 wakaba 1.25 ## NOTE: and scripting is disalbed
2377 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2378 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
2379 wakaba 1.125 !!!nack ('t116.1');
2380 wakaba 1.1 !!!next-token;
2381 wakaba 1.126 next B;
2382 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2383 wakaba 1.79 !!!cp ('t117');
2384 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'noscript',
2385     token => $token);
2386 wakaba 1.1 ## Ignore the token
2387 wakaba 1.125 !!!nack ('t117.1');
2388 wakaba 1.41 !!!next-token;
2389 wakaba 1.126 next B;
2390 wakaba 1.1 } else {
2391 wakaba 1.79 !!!cp ('t118');
2392 wakaba 1.25 #
2393 wakaba 1.1 }
2394 wakaba 1.202 } elsif ($token->{tag_name} eq 'script') {
2395     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2396     !!!cp ('t119');
2397     ## As if </noscript>
2398     pop @{$self->{open_elements}};
2399     !!!parse-error (type => 'in noscript', text => 'script',
2400     token => $token);
2401    
2402     $self->{insertion_mode} = IN_HEAD_IM;
2403     ## Reprocess in the "in head" insertion mode...
2404     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2405     !!!cp ('t120');
2406     !!!parse-error (type => 'after head',
2407     text => $token->{tag_name}, token => $token);
2408     push @{$self->{open_elements}},
2409     [$self->{head_element}, $el_category->{head}];
2410     $self->{head_element_inserted} = 1;
2411     } else {
2412     !!!cp ('t121');
2413     }
2414 wakaba 1.49
2415 wakaba 1.202 ## NOTE: There is a "as if in head" code clone.
2416     $script_start_tag->();
2417 wakaba 1.205 ## ISSUE: A spec bug [Bug 6038]
2418     splice @{$self->{open_elements}}, -2, 1 # <head>
2419 wakaba 1.210 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2420 wakaba 1.202 next B;
2421     } elsif ($token->{tag_name} eq 'body' or
2422     $token->{tag_name} eq 'frameset') {
2423 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2424 wakaba 1.79 !!!cp ('t122');
2425 wakaba 1.49 ## As if </noscript>
2426     pop @{$self->{open_elements}};
2427 wakaba 1.153 !!!parse-error (type => 'in noscript',
2428     text => $token->{tag_name}, token => $token);
2429 wakaba 1.49
2430     ## Reprocess in the "in head" insertion mode...
2431     ## As if </head>
2432     pop @{$self->{open_elements}};
2433    
2434     ## Reprocess in the "after head" insertion mode...
2435 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2436 wakaba 1.79 !!!cp ('t124');
2437 wakaba 1.49 pop @{$self->{open_elements}};
2438    
2439     ## Reprocess in the "after head" insertion mode...
2440 wakaba 1.79 } else {
2441     !!!cp ('t125');
2442 wakaba 1.49 }
2443    
2444     ## "after head" insertion mode
2445 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2446 wakaba 1.54 if ($token->{tag_name} eq 'body') {
2447 wakaba 1.79 !!!cp ('t126');
2448 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
2449     } elsif ($token->{tag_name} eq 'frameset') {
2450 wakaba 1.79 !!!cp ('t127');
2451 wakaba 1.54 $self->{insertion_mode} = IN_FRAMESET_IM;
2452     } else {
2453     die "$0: tag name: $self->{tag_name}";
2454     }
2455 wakaba 1.125 !!!nack ('t127.1');
2456 wakaba 1.1 !!!next-token;
2457 wakaba 1.126 next B;
2458 wakaba 1.1 } else {
2459 wakaba 1.79 !!!cp ('t128');
2460 wakaba 1.1 #
2461     }
2462 wakaba 1.49
2463 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2464 wakaba 1.79 !!!cp ('t129');
2465 wakaba 1.49 ## As if </noscript>
2466     pop @{$self->{open_elements}};
2467 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
2468     text => $token->{tag_name}, token => $token);
2469 wakaba 1.49
2470     ## Reprocess in the "in head" insertion mode...
2471     ## As if </head>
2472 wakaba 1.25 pop @{$self->{open_elements}};
2473 wakaba 1.49
2474     ## Reprocess in the "after head" insertion mode...
2475 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2476 wakaba 1.79 !!!cp ('t130');
2477 wakaba 1.49 ## As if </head>
2478 wakaba 1.25 pop @{$self->{open_elements}};
2479 wakaba 1.49
2480     ## Reprocess in the "after head" insertion mode...
2481 wakaba 1.79 } else {
2482     !!!cp ('t131');
2483 wakaba 1.49 }
2484    
2485     ## "after head" insertion mode
2486     ## As if <body>
2487 wakaba 1.116 !!!insert-element ('body',, $token);
2488 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
2489 wakaba 1.49 ## reprocess
2490 wakaba 1.125 !!!ack-later;
2491 wakaba 1.126 next B;
2492 wakaba 1.238 } elsif ($token->{type} == END_TAG_TOKEN) {
2493     ## "Before head", "in head", and "after head" insertion modes
2494     ## ignore most of end tags. Exceptions are "body", "html",
2495     ## and "br" end tags. "Before head" and "in head" insertion
2496     ## modes also recognize "head" end tag. "In head noscript"
2497     ## insertion modes ignore end tags except for "noscript" and
2498     ## "br".
2499    
2500     if ($token->{tag_name} eq 'head') {
2501     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2502     !!!cp ('t132');
2503     ## As if <head>
2504     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2505     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2506     push @{$self->{open_elements}},
2507     [$self->{head_element}, $el_category->{head}];
2508 wakaba 1.50
2509 wakaba 1.238 ## Reprocess in the "in head" insertion mode...
2510     pop @{$self->{open_elements}};
2511     $self->{insertion_mode} = AFTER_HEAD_IM;
2512     !!!next-token;
2513     next B;
2514     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2515     !!!cp ('t133');
2516     #
2517     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2518     !!!cp ('t134');
2519     pop @{$self->{open_elements}};
2520     $self->{insertion_mode} = AFTER_HEAD_IM;
2521     !!!next-token;
2522     next B;
2523     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2524     !!!cp ('t134.1');
2525     #
2526     } else {
2527     die "$0: $self->{insertion_mode}: Unknown insertion mode";
2528     }
2529     } elsif ($token->{tag_name} eq 'noscript') {
2530     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2531     !!!cp ('t136');
2532     pop @{$self->{open_elements}};
2533     $self->{insertion_mode} = IN_HEAD_IM;
2534     !!!next-token;
2535     next B;
2536     } else {
2537     !!!cp ('t138');
2538     #
2539     }
2540     } elsif ({
2541     body => ($self->{insertion_mode} != IN_HEAD_NOSCRIPT_IM),
2542     html => ($self->{insertion_mode} != IN_HEAD_NOSCRIPT_IM),
2543     br => 1,
2544     }->{$token->{tag_name}}) {
2545 wakaba 1.224 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2546     !!!cp ('t142.2');
2547     ## (before head) as if <head>, (in head) as if </head>
2548     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2549     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2550     $self->{insertion_mode} = AFTER_HEAD_IM;
2551 wakaba 1.139
2552 wakaba 1.224 ## Reprocess in the "after head" insertion mode...
2553     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2554     !!!cp ('t143.2');
2555     ## As if </head>
2556     pop @{$self->{open_elements}};
2557     $self->{insertion_mode} = AFTER_HEAD_IM;
2558 wakaba 1.139
2559 wakaba 1.224 ## Reprocess in the "after head" insertion mode...
2560     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2561     !!!cp ('t143.3');
2562     ## NOTE: Two parse errors for <head><noscript></br>
2563     !!!parse-error (type => 'unmatched end tag',
2564 wakaba 1.238 text => $token->{tag_name}, token => $token);
2565 wakaba 1.224 ## As if </noscript>
2566     pop @{$self->{open_elements}};
2567     $self->{insertion_mode} = IN_HEAD_IM;
2568 wakaba 1.50
2569 wakaba 1.224 ## Reprocess in the "in head" insertion mode...
2570     ## As if </head>
2571     pop @{$self->{open_elements}};
2572     $self->{insertion_mode} = AFTER_HEAD_IM;
2573 wakaba 1.139
2574 wakaba 1.224 ## Reprocess in the "after head" insertion mode...
2575     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2576     !!!cp ('t143.4');
2577     #
2578     } else {
2579     die "$0: $self->{insertion_mode}: Unknown insertion mode";
2580     }
2581 wakaba 1.50
2582 wakaba 1.238 ## "after head" insertion mode
2583     ## As if <body>
2584     !!!insert-element ('body',, $token);
2585     $self->{insertion_mode} = IN_BODY_IM;
2586     ## Reprocess.
2587     next B;
2588     }
2589 wakaba 1.49
2590 wakaba 1.238 ## End tags are ignored by default.
2591     !!!cp ('t145');
2592     !!!parse-error (type => 'unmatched end tag',
2593     text => $token->{tag_name}, token => $token);
2594     ## Ignore the token.
2595     !!!next-token;
2596 wakaba 1.224 next B;
2597 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2598     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2599     !!!cp ('t149.1');
2600    
2601     ## NOTE: As if <head>
2602 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2603 wakaba 1.104 $self->{open_elements}->[-1]->[0]->append_child
2604     ($self->{head_element});
2605 wakaba 1.123 #push @{$self->{open_elements}},
2606     # [$self->{head_element}, $el_category->{head}];
2607 wakaba 1.104 #$self->{insertion_mode} = IN_HEAD_IM;
2608     ## NOTE: Reprocess.
2609    
2610     ## NOTE: As if </head>
2611     #pop @{$self->{open_elements}};
2612     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2613     ## NOTE: Reprocess.
2614    
2615     #
2616     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2617     !!!cp ('t149.2');
2618    
2619     ## NOTE: As if </head>
2620     pop @{$self->{open_elements}};
2621     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2622     ## NOTE: Reprocess.
2623    
2624     #
2625     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2626     !!!cp ('t149.3');
2627    
2628 wakaba 1.113 !!!parse-error (type => 'in noscript:#eof', token => $token);
2629 wakaba 1.104
2630     ## As if </noscript>
2631     pop @{$self->{open_elements}};
2632     #$self->{insertion_mode} = IN_HEAD_IM;
2633     ## NOTE: Reprocess.
2634    
2635     ## NOTE: As if </head>
2636     pop @{$self->{open_elements}};
2637     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2638     ## NOTE: Reprocess.
2639    
2640     #
2641     } else {
2642     !!!cp ('t149.4');
2643     #
2644     }
2645    
2646     ## NOTE: As if <body>
2647 wakaba 1.116 !!!insert-element ('body',, $token);
2648 wakaba 1.104 $self->{insertion_mode} = IN_BODY_IM;
2649     ## NOTE: Reprocess.
2650 wakaba 1.126 next B;
2651 wakaba 1.104 } else {
2652     die "$0: $token->{type}: Unknown token type";
2653     }
2654 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
2655 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
2656 wakaba 1.79 !!!cp ('t150');
2657 wakaba 1.52 ## NOTE: There is a code clone of "character in body".
2658     $reconstruct_active_formatting_elements->($insert_to_current);
2659    
2660     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
2661    
2662     !!!next-token;
2663 wakaba 1.126 next B;
2664 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
2665 wakaba 1.52 if ({
2666     caption => 1, col => 1, colgroup => 1, tbody => 1,
2667     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
2668     }->{$token->{tag_name}}) {
2669 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2670 wakaba 1.52 ## have an element in table scope
2671 wakaba 1.108 for (reverse 0..$#{$self->{open_elements}}) {
2672 wakaba 1.52 my $node = $self->{open_elements}->[$_];
2673 wakaba 1.206 if ($node->[1] == TABLE_CELL_EL) {
2674 wakaba 1.79 !!!cp ('t151');
2675 wakaba 1.108
2676     ## Close the cell
2677 wakaba 1.125 !!!back-token; # <x>
2678 wakaba 1.122 $token = {type => END_TAG_TOKEN,
2679     tag_name => $node->[0]->manakai_local_name,
2680 wakaba 1.114 line => $token->{line},
2681     column => $token->{column}};
2682 wakaba 1.126 next B;
2683 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2684 wakaba 1.79 !!!cp ('t152');
2685 wakaba 1.108 ## ISSUE: This case can never be reached, maybe.
2686     last;
2687 wakaba 1.52 }
2688 wakaba 1.108 }
2689    
2690     !!!cp ('t153');
2691     !!!parse-error (type => 'start tag not allowed',
2692 wakaba 1.153 text => $token->{tag_name}, token => $token);
2693 wakaba 1.108 ## Ignore the token
2694 wakaba 1.125 !!!nack ('t153.1');
2695 wakaba 1.108 !!!next-token;
2696 wakaba 1.126 next B;
2697 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2698 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
2699     token => $token);
2700 wakaba 1.52
2701 wakaba 1.108 ## NOTE: As if </caption>.
2702 wakaba 1.52 ## have a table element in table scope
2703     my $i;
2704 wakaba 1.108 INSCOPE: {
2705     for (reverse 0..$#{$self->{open_elements}}) {
2706     my $node = $self->{open_elements}->[$_];
2707 wakaba 1.206 if ($node->[1] == CAPTION_EL) {
2708 wakaba 1.108 !!!cp ('t155');
2709     $i = $_;
2710     last INSCOPE;
2711 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2712 wakaba 1.108 !!!cp ('t156');
2713     last;
2714     }
2715 wakaba 1.52 }
2716 wakaba 1.108
2717     !!!cp ('t157');
2718     !!!parse-error (type => 'start tag not allowed',
2719 wakaba 1.153 text => $token->{tag_name}, token => $token);
2720 wakaba 1.108 ## Ignore the token
2721 wakaba 1.125 !!!nack ('t157.1');
2722 wakaba 1.108 !!!next-token;
2723 wakaba 1.126 next B;
2724 wakaba 1.52 } # INSCOPE
2725    
2726     ## generate implied end tags
2727 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
2728     & END_TAG_OPTIONAL_EL) {
2729 wakaba 1.79 !!!cp ('t158');
2730 wakaba 1.86 pop @{$self->{open_elements}};
2731 wakaba 1.52 }
2732    
2733 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2734 wakaba 1.79 !!!cp ('t159');
2735 wakaba 1.122 !!!parse-error (type => 'not closed',
2736 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2737 wakaba 1.122 ->manakai_local_name,
2738     token => $token);
2739 wakaba 1.79 } else {
2740     !!!cp ('t160');
2741 wakaba 1.52 }
2742    
2743     splice @{$self->{open_elements}}, $i;
2744    
2745     $clear_up_to_marker->();
2746    
2747 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
2748 wakaba 1.52
2749     ## reprocess
2750 wakaba 1.125 !!!ack-later;
2751 wakaba 1.126 next B;
2752 wakaba 1.52 } else {
2753 wakaba 1.79 !!!cp ('t161');
2754 wakaba 1.52 #
2755     }
2756     } else {
2757 wakaba 1.79 !!!cp ('t162');
2758 wakaba 1.52 #
2759     }
2760 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
2761 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
2762 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2763 wakaba 1.43 ## have an element in table scope
2764 wakaba 1.52 my $i;
2765 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2766     my $node = $self->{open_elements}->[$_];
2767 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2768 wakaba 1.79 !!!cp ('t163');
2769 wakaba 1.52 $i = $_;
2770 wakaba 1.43 last INSCOPE;
2771 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2772 wakaba 1.79 !!!cp ('t164');
2773 wakaba 1.43 last INSCOPE;
2774     }
2775     } # INSCOPE
2776 wakaba 1.52 unless (defined $i) {
2777 wakaba 1.79 !!!cp ('t165');
2778 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2779     text => $token->{tag_name},
2780     token => $token);
2781 wakaba 1.43 ## Ignore the token
2782     !!!next-token;
2783 wakaba 1.126 next B;
2784 wakaba 1.43 }
2785    
2786 wakaba 1.52 ## generate implied end tags
2787 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
2788     & END_TAG_OPTIONAL_EL) {
2789 wakaba 1.79 !!!cp ('t166');
2790 wakaba 1.86 pop @{$self->{open_elements}};
2791 wakaba 1.52 }
2792 wakaba 1.86
2793 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
2794     ne $token->{tag_name}) {
2795 wakaba 1.79 !!!cp ('t167');
2796 wakaba 1.122 !!!parse-error (type => 'not closed',
2797 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2798 wakaba 1.122 ->manakai_local_name,
2799     token => $token);
2800 wakaba 1.79 } else {
2801     !!!cp ('t168');
2802 wakaba 1.52 }
2803    
2804     splice @{$self->{open_elements}}, $i;
2805    
2806     $clear_up_to_marker->();
2807    
2808 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
2809 wakaba 1.52
2810     !!!next-token;
2811 wakaba 1.126 next B;
2812 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2813 wakaba 1.79 !!!cp ('t169');
2814 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2815     text => $token->{tag_name}, token => $token);
2816 wakaba 1.52 ## Ignore the token
2817     !!!next-token;
2818 wakaba 1.126 next B;
2819 wakaba 1.52 } else {
2820 wakaba 1.79 !!!cp ('t170');
2821 wakaba 1.52 #
2822     }
2823     } elsif ($token->{tag_name} eq 'caption') {
2824 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2825 wakaba 1.43 ## have a table element in table scope
2826     my $i;
2827 wakaba 1.108 INSCOPE: {
2828     for (reverse 0..$#{$self->{open_elements}}) {
2829     my $node = $self->{open_elements}->[$_];
2830 wakaba 1.206 if ($node->[1] == CAPTION_EL) {
2831 wakaba 1.108 !!!cp ('t171');
2832     $i = $_;
2833     last INSCOPE;
2834 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2835 wakaba 1.108 !!!cp ('t172');
2836     last;
2837     }
2838 wakaba 1.43 }
2839 wakaba 1.108
2840     !!!cp ('t173');
2841     !!!parse-error (type => 'unmatched end tag',
2842 wakaba 1.153 text => $token->{tag_name}, token => $token);
2843 wakaba 1.108 ## Ignore the token
2844     !!!next-token;
2845 wakaba 1.126 next B;
2846 wakaba 1.43 } # INSCOPE
2847    
2848     ## generate implied end tags
2849 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
2850     & END_TAG_OPTIONAL_EL) {
2851 wakaba 1.79 !!!cp ('t174');
2852 wakaba 1.86 pop @{$self->{open_elements}};
2853 wakaba 1.43 }
2854 wakaba 1.52
2855 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2856 wakaba 1.79 !!!cp ('t175');
2857 wakaba 1.122 !!!parse-error (type => 'not closed',
2858 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2859 wakaba 1.122 ->manakai_local_name,
2860     token => $token);
2861 wakaba 1.79 } else {
2862     !!!cp ('t176');
2863 wakaba 1.52 }
2864    
2865     splice @{$self->{open_elements}}, $i;
2866    
2867     $clear_up_to_marker->();
2868    
2869 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
2870 wakaba 1.52
2871     !!!next-token;
2872 wakaba 1.126 next B;
2873 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2874 wakaba 1.79 !!!cp ('t177');
2875 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2876     text => $token->{tag_name}, token => $token);
2877 wakaba 1.52 ## Ignore the token
2878     !!!next-token;
2879 wakaba 1.126 next B;
2880 wakaba 1.52 } else {
2881 wakaba 1.79 !!!cp ('t178');
2882 wakaba 1.52 #
2883     }
2884     } elsif ({
2885     table => 1, tbody => 1, tfoot => 1,
2886     thead => 1, tr => 1,
2887     }->{$token->{tag_name}} and
2888 wakaba 1.210 ($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2889 wakaba 1.52 ## have an element in table scope
2890     my $i;
2891     my $tn;
2892 wakaba 1.108 INSCOPE: {
2893     for (reverse 0..$#{$self->{open_elements}}) {
2894     my $node = $self->{open_elements}->[$_];
2895 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2896 wakaba 1.108 !!!cp ('t179');
2897     $i = $_;
2898    
2899     ## Close the cell
2900 wakaba 1.125 !!!back-token; # </x>
2901 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $tn,
2902     line => $token->{line},
2903     column => $token->{column}};
2904 wakaba 1.126 next B;
2905 wakaba 1.206 } elsif ($node->[1] == TABLE_CELL_EL) {
2906 wakaba 1.108 !!!cp ('t180');
2907 wakaba 1.123 $tn = $node->[0]->manakai_local_name;
2908 wakaba 1.108 ## NOTE: There is exactly one |td| or |th| element
2909     ## in scope in the stack of open elements by definition.
2910 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2911 wakaba 1.108 ## ISSUE: Can this be reached?
2912     !!!cp ('t181');
2913     last;
2914     }
2915 wakaba 1.52 }
2916 wakaba 1.108
2917 wakaba 1.79 !!!cp ('t182');
2918 wakaba 1.108 !!!parse-error (type => 'unmatched end tag',
2919 wakaba 1.153 text => $token->{tag_name}, token => $token);
2920 wakaba 1.52 ## Ignore the token
2921     !!!next-token;
2922 wakaba 1.126 next B;
2923 wakaba 1.108 } # INSCOPE
2924 wakaba 1.52 } elsif ($token->{tag_name} eq 'table' and
2925 wakaba 1.210 ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2926 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
2927     token => $token);
2928 wakaba 1.52
2929     ## As if </caption>
2930     ## have a table element in table scope
2931     my $i;
2932     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2933     my $node = $self->{open_elements}->[$_];
2934 wakaba 1.206 if ($node->[1] == CAPTION_EL) {
2935 wakaba 1.79 !!!cp ('t184');
2936 wakaba 1.52 $i = $_;
2937     last INSCOPE;
2938 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2939 wakaba 1.79 !!!cp ('t185');
2940 wakaba 1.52 last INSCOPE;
2941     }
2942     } # INSCOPE
2943     unless (defined $i) {
2944 wakaba 1.79 !!!cp ('t186');
2945 wakaba 1.209 ## TODO: Wrong error type?
2946 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2947     text => 'caption', token => $token);
2948 wakaba 1.52 ## Ignore the token
2949     !!!next-token;
2950 wakaba 1.126 next B;
2951 wakaba 1.52 }
2952    
2953     ## generate implied end tags
2954 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
2955 wakaba 1.79 !!!cp ('t187');
2956 wakaba 1.86 pop @{$self->{open_elements}};
2957 wakaba 1.52 }
2958    
2959 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2960 wakaba 1.79 !!!cp ('t188');
2961 wakaba 1.122 !!!parse-error (type => 'not closed',
2962 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2963 wakaba 1.122 ->manakai_local_name,
2964     token => $token);
2965 wakaba 1.79 } else {
2966     !!!cp ('t189');
2967 wakaba 1.52 }
2968    
2969     splice @{$self->{open_elements}}, $i;
2970    
2971     $clear_up_to_marker->();
2972    
2973 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
2974 wakaba 1.52
2975     ## reprocess
2976 wakaba 1.126 next B;
2977 wakaba 1.52 } elsif ({
2978     body => 1, col => 1, colgroup => 1, html => 1,
2979     }->{$token->{tag_name}}) {
2980 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
2981 wakaba 1.79 !!!cp ('t190');
2982 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2983     text => $token->{tag_name}, token => $token);
2984 wakaba 1.52 ## Ignore the token
2985     !!!next-token;
2986 wakaba 1.126 next B;
2987 wakaba 1.52 } else {
2988 wakaba 1.79 !!!cp ('t191');
2989 wakaba 1.52 #
2990     }
2991 wakaba 1.210 } elsif ({
2992     tbody => 1, tfoot => 1,
2993     thead => 1, tr => 1,
2994     }->{$token->{tag_name}} and
2995     ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2996     !!!cp ('t192');
2997     !!!parse-error (type => 'unmatched end tag',
2998     text => $token->{tag_name}, token => $token);
2999     ## Ignore the token
3000     !!!next-token;
3001     next B;
3002     } else {
3003     !!!cp ('t193');
3004     #
3005     }
3006 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3007     for my $entry (@{$self->{open_elements}}) {
3008 wakaba 1.123 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
3009 wakaba 1.104 !!!cp ('t75');
3010 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
3011 wakaba 1.104 last;
3012     }
3013     }
3014    
3015     ## Stop parsing.
3016     last B;
3017 wakaba 1.52 } else {
3018     die "$0: $token->{type}: Unknown token type";
3019     }
3020    
3021     $insert = $insert_to_current;
3022     #
3023 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
3024 wakaba 1.229 if ($token->{type} == START_TAG_TOKEN) {
3025 wakaba 1.153 if ({
3026 wakaba 1.210 tr => (($self->{insertion_mode} & IM_MASK) != IN_ROW_IM),
3027 wakaba 1.153 th => 1, td => 1,
3028     }->{$token->{tag_name}}) {
3029 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_IM) {
3030 wakaba 1.153 ## Clear back to table context
3031     while (not ($self->{open_elements}->[-1]->[1]
3032     & TABLE_SCOPING_EL)) {
3033     !!!cp ('t201');
3034     pop @{$self->{open_elements}};
3035     }
3036    
3037     !!!insert-element ('tbody',, $token);
3038     $self->{insertion_mode} = IN_TABLE_BODY_IM;
3039     ## reprocess in the "in table body" insertion mode...
3040     }
3041    
3042 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3043 wakaba 1.153 unless ($token->{tag_name} eq 'tr') {
3044     !!!cp ('t202');
3045     !!!parse-error (type => 'missing start tag:tr', token => $token);
3046     }
3047 wakaba 1.43
3048 wakaba 1.153 ## Clear back to table body context
3049     while (not ($self->{open_elements}->[-1]->[1]
3050     & TABLE_ROWS_SCOPING_EL)) {
3051     !!!cp ('t203');
3052     ## ISSUE: Can this case be reached?
3053     pop @{$self->{open_elements}};
3054     }
3055 wakaba 1.43
3056 wakaba 1.202 $self->{insertion_mode} = IN_ROW_IM;
3057     if ($token->{tag_name} eq 'tr') {
3058     !!!cp ('t204');
3059     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3060     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3061     !!!nack ('t204');
3062     !!!next-token;
3063     next B;
3064     } else {
3065     !!!cp ('t205');
3066     !!!insert-element ('tr',, $token);
3067     ## reprocess in the "in row" insertion mode
3068     }
3069     } else {
3070     !!!cp ('t206');
3071     }
3072 wakaba 1.52
3073     ## Clear back to table row context
3074 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3075     & TABLE_ROW_SCOPING_EL)) {
3076 wakaba 1.79 !!!cp ('t207');
3077 wakaba 1.52 pop @{$self->{open_elements}};
3078 wakaba 1.43 }
3079 wakaba 1.52
3080 wakaba 1.202 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3081     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3082     $self->{insertion_mode} = IN_CELL_IM;
3083 wakaba 1.52
3084 wakaba 1.202 push @$active_formatting_elements, ['#marker', ''];
3085 wakaba 1.52
3086 wakaba 1.202 !!!nack ('t207.1');
3087     !!!next-token;
3088     next B;
3089     } elsif ({
3090     caption => 1, col => 1, colgroup => 1,
3091     tbody => 1, tfoot => 1, thead => 1,
3092     tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3093     }->{$token->{tag_name}}) {
3094 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3095 wakaba 1.202 ## As if </tr>
3096     ## have an element in table scope
3097     my $i;
3098     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3099     my $node = $self->{open_elements}->[$_];
3100 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3101 wakaba 1.202 !!!cp ('t208');
3102     $i = $_;
3103     last INSCOPE;
3104     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3105     !!!cp ('t209');
3106     last INSCOPE;
3107     }
3108     } # INSCOPE
3109     unless (defined $i) {
3110     !!!cp ('t210');
3111     ## TODO: This type is wrong.
3112     !!!parse-error (type => 'unmacthed end tag',
3113     text => $token->{tag_name}, token => $token);
3114     ## Ignore the token
3115     !!!nack ('t210.1');
3116 wakaba 1.52 !!!next-token;
3117 wakaba 1.126 next B;
3118 wakaba 1.202 }
3119 wakaba 1.43
3120 wakaba 1.52 ## Clear back to table row context
3121 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3122     & TABLE_ROW_SCOPING_EL)) {
3123 wakaba 1.79 !!!cp ('t211');
3124 wakaba 1.83 ## ISSUE: Can this case be reached?
3125 wakaba 1.52 pop @{$self->{open_elements}};
3126 wakaba 1.1 }
3127 wakaba 1.43
3128 wakaba 1.52 pop @{$self->{open_elements}}; # tr
3129 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3130 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
3131 wakaba 1.79 !!!cp ('t212');
3132 wakaba 1.52 ## reprocess
3133 wakaba 1.125 !!!ack-later;
3134 wakaba 1.126 next B;
3135 wakaba 1.52 } else {
3136 wakaba 1.79 !!!cp ('t213');
3137 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
3138     }
3139 wakaba 1.1 }
3140 wakaba 1.52
3141 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3142 wakaba 1.52 ## have an element in table scope
3143 wakaba 1.43 my $i;
3144     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3145     my $node = $self->{open_elements}->[$_];
3146 wakaba 1.206 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3147 wakaba 1.79 !!!cp ('t214');
3148 wakaba 1.43 $i = $_;
3149     last INSCOPE;
3150 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3151 wakaba 1.79 !!!cp ('t215');
3152 wakaba 1.43 last INSCOPE;
3153     }
3154     } # INSCOPE
3155 wakaba 1.52 unless (defined $i) {
3156 wakaba 1.79 !!!cp ('t216');
3157 wakaba 1.153 ## TODO: This erorr type is wrong.
3158     !!!parse-error (type => 'unmatched end tag',
3159     text => $token->{tag_name}, token => $token);
3160 wakaba 1.52 ## Ignore the token
3161 wakaba 1.125 !!!nack ('t216.1');
3162 wakaba 1.52 !!!next-token;
3163 wakaba 1.126 next B;
3164 wakaba 1.43 }
3165 wakaba 1.52
3166     ## Clear back to table body context
3167 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3168     & TABLE_ROWS_SCOPING_EL)) {
3169 wakaba 1.79 !!!cp ('t217');
3170 wakaba 1.83 ## ISSUE: Can this state be reached?
3171 wakaba 1.52 pop @{$self->{open_elements}};
3172 wakaba 1.43 }
3173    
3174 wakaba 1.52 ## As if <{current node}>
3175     ## have an element in table scope
3176     ## true by definition
3177 wakaba 1.43
3178 wakaba 1.52 ## Clear back to table body context
3179     ## nop by definition
3180 wakaba 1.43
3181 wakaba 1.52 pop @{$self->{open_elements}};
3182 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3183 wakaba 1.52 ## reprocess in "in table" insertion mode...
3184 wakaba 1.79 } else {
3185     !!!cp ('t218');
3186 wakaba 1.52 }
3187    
3188 wakaba 1.202 if ($token->{tag_name} eq 'col') {
3189     ## Clear back to table context
3190     while (not ($self->{open_elements}->[-1]->[1]
3191     & TABLE_SCOPING_EL)) {
3192     !!!cp ('t219');
3193     ## ISSUE: Can this state be reached?
3194     pop @{$self->{open_elements}};
3195     }
3196    
3197     !!!insert-element ('colgroup',, $token);
3198     $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
3199     ## reprocess
3200     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3201     !!!ack-later;
3202     next B;
3203     } elsif ({
3204     caption => 1,
3205     colgroup => 1,
3206     tbody => 1, tfoot => 1, thead => 1,
3207     }->{$token->{tag_name}}) {
3208     ## Clear back to table context
3209 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3210     & TABLE_SCOPING_EL)) {
3211 wakaba 1.79 !!!cp ('t220');
3212 wakaba 1.83 ## ISSUE: Can this state be reached?
3213 wakaba 1.52 pop @{$self->{open_elements}};
3214 wakaba 1.1 }
3215 wakaba 1.52
3216 wakaba 1.202 push @$active_formatting_elements, ['#marker', '']
3217     if $token->{tag_name} eq 'caption';
3218 wakaba 1.52
3219 wakaba 1.202 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3220     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3221     $self->{insertion_mode} = {
3222     caption => IN_CAPTION_IM,
3223     colgroup => IN_COLUMN_GROUP_IM,
3224     tbody => IN_TABLE_BODY_IM,
3225     tfoot => IN_TABLE_BODY_IM,
3226     thead => IN_TABLE_BODY_IM,
3227     }->{$token->{tag_name}};
3228     !!!next-token;
3229     !!!nack ('t220.1');
3230     next B;
3231     } else {
3232     die "$0: in table: <>: $token->{tag_name}";
3233     }
3234 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
3235 wakaba 1.122 !!!parse-error (type => 'not closed',
3236 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
3237 wakaba 1.122 ->manakai_local_name,
3238     token => $token);
3239 wakaba 1.1
3240 wakaba 1.52 ## As if </table>
3241 wakaba 1.1 ## have a table element in table scope
3242     my $i;
3243 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3244     my $node = $self->{open_elements}->[$_];
3245 wakaba 1.206 if ($node->[1] == TABLE_EL) {
3246 wakaba 1.79 !!!cp ('t221');
3247 wakaba 1.1 $i = $_;
3248     last INSCOPE;
3249 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3250 wakaba 1.79 !!!cp ('t222');
3251 wakaba 1.1 last INSCOPE;
3252     }
3253     } # INSCOPE
3254     unless (defined $i) {
3255 wakaba 1.79 !!!cp ('t223');
3256 wakaba 1.83 ## TODO: The following is wrong, maybe.
3257 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => 'table',
3258     token => $token);
3259 wakaba 1.52 ## Ignore tokens </table><table>
3260 wakaba 1.125 !!!nack ('t223.1');
3261 wakaba 1.1 !!!next-token;
3262 wakaba 1.126 next B;
3263 wakaba 1.1 }
3264    
3265 wakaba 1.151 ## TODO: Followings are removed from the latest spec.
3266 wakaba 1.1 ## generate implied end tags
3267 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
3268 wakaba 1.79 !!!cp ('t224');
3269 wakaba 1.86 pop @{$self->{open_elements}};
3270 wakaba 1.1 }
3271    
3272 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == TABLE_EL) {
3273 wakaba 1.79 !!!cp ('t225');
3274 wakaba 1.122 ## NOTE: |<table><tr><table>|
3275     !!!parse-error (type => 'not closed',
3276 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
3277 wakaba 1.122 ->manakai_local_name,
3278     token => $token);
3279 wakaba 1.79 } else {
3280     !!!cp ('t226');
3281 wakaba 1.1 }
3282    
3283 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3284 wakaba 1.95 pop @{$open_tables};
3285 wakaba 1.1
3286 wakaba 1.52 $self->_reset_insertion_mode;
3287 wakaba 1.1
3288 wakaba 1.125 ## reprocess
3289     !!!ack-later;
3290 wakaba 1.126 next B;
3291 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
3292 wakaba 1.233 !!!cp ('t227.8');
3293     ## NOTE: This is a "as if in head" code clone.
3294     $parse_rcdata->(CDATA_CONTENT_MODEL);
3295     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3296     next B;
3297 wakaba 1.100 } elsif ($token->{tag_name} eq 'script') {
3298 wakaba 1.233 !!!cp ('t227.6');
3299     ## NOTE: This is a "as if in head" code clone.
3300     $script_start_tag->();
3301     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3302     next B;
3303 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
3304 wakaba 1.233 if ($token->{attributes}->{type}) {
3305     my $type = $token->{attributes}->{type}->{value};
3306     $type =~ tr/A-Z/a-z/; ## ASCII case-insensitive.
3307     if ($type eq 'hidden') {
3308     !!!cp ('t227.3');
3309     !!!parse-error (type => 'in table',
3310     text => $token->{tag_name}, token => $token);
3311 wakaba 1.98
3312 wakaba 1.233 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3313     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3314 wakaba 1.98
3315 wakaba 1.233 ## TODO: form element pointer
3316 wakaba 1.98
3317 wakaba 1.233 pop @{$self->{open_elements}};
3318 wakaba 1.98
3319 wakaba 1.233 !!!next-token;
3320     !!!ack ('t227.2.1');
3321     next B;
3322 wakaba 1.98 } else {
3323     !!!cp ('t227.1');
3324     #
3325     }
3326     } else {
3327     !!!cp ('t227.4');
3328     #
3329     }
3330 wakaba 1.58 } else {
3331 wakaba 1.79 !!!cp ('t227');
3332 wakaba 1.58 #
3333     }
3334 wakaba 1.98
3335 wakaba 1.153 !!!parse-error (type => 'in table', text => $token->{tag_name},
3336     token => $token);
3337 wakaba 1.98
3338     $insert = $insert_to_foster;
3339     #
3340 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
3341 wakaba 1.210 if ($token->{tag_name} eq 'tr' and
3342     ($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3343     ## have an element in table scope
3344 wakaba 1.52 my $i;
3345     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3346     my $node = $self->{open_elements}->[$_];
3347 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3348 wakaba 1.79 !!!cp ('t228');
3349 wakaba 1.52 $i = $_;
3350     last INSCOPE;
3351 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3352 wakaba 1.79 !!!cp ('t229');
3353 wakaba 1.52 last INSCOPE;
3354     }
3355     } # INSCOPE
3356     unless (defined $i) {
3357 wakaba 1.79 !!!cp ('t230');
3358 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3359     text => $token->{tag_name}, token => $token);
3360 wakaba 1.52 ## Ignore the token
3361 wakaba 1.125 !!!nack ('t230.1');
3362 wakaba 1.42 !!!next-token;
3363 wakaba 1.126 next B;
3364 wakaba 1.79 } else {
3365     !!!cp ('t232');
3366 wakaba 1.42 }
3367    
3368 wakaba 1.52 ## Clear back to table row context
3369 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3370     & TABLE_ROW_SCOPING_EL)) {
3371 wakaba 1.79 !!!cp ('t231');
3372 wakaba 1.83 ## ISSUE: Can this state be reached?
3373 wakaba 1.52 pop @{$self->{open_elements}};
3374     }
3375 wakaba 1.42
3376 wakaba 1.52 pop @{$self->{open_elements}}; # tr
3377 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3378 wakaba 1.52 !!!next-token;
3379 wakaba 1.125 !!!nack ('t231.1');
3380 wakaba 1.126 next B;
3381 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
3382 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3383 wakaba 1.52 ## As if </tr>
3384     ## have an element in table scope
3385     my $i;
3386     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3387     my $node = $self->{open_elements}->[$_];
3388 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3389 wakaba 1.79 !!!cp ('t233');
3390 wakaba 1.52 $i = $_;
3391     last INSCOPE;
3392 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3393 wakaba 1.79 !!!cp ('t234');
3394 wakaba 1.52 last INSCOPE;
3395 wakaba 1.42 }
3396 wakaba 1.52 } # INSCOPE
3397     unless (defined $i) {
3398 wakaba 1.79 !!!cp ('t235');
3399 wakaba 1.83 ## TODO: The following is wrong.
3400 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3401     text => $token->{type}, token => $token);
3402 wakaba 1.52 ## Ignore the token
3403 wakaba 1.125 !!!nack ('t236.1');
3404 wakaba 1.52 !!!next-token;
3405 wakaba 1.126 next B;
3406 wakaba 1.42 }
3407 wakaba 1.52
3408     ## Clear back to table row context
3409 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3410     & TABLE_ROW_SCOPING_EL)) {
3411 wakaba 1.79 !!!cp ('t236');
3412 wakaba 1.83 ## ISSUE: Can this state be reached?
3413 wakaba 1.46 pop @{$self->{open_elements}};
3414 wakaba 1.1 }
3415 wakaba 1.46
3416 wakaba 1.52 pop @{$self->{open_elements}}; # tr
3417 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3418 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
3419 wakaba 1.1 }
3420    
3421 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3422 wakaba 1.52 ## have an element in table scope
3423     my $i;
3424     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3425     my $node = $self->{open_elements}->[$_];
3426 wakaba 1.206 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3427 wakaba 1.79 !!!cp ('t237');
3428 wakaba 1.52 $i = $_;
3429     last INSCOPE;
3430 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3431 wakaba 1.79 !!!cp ('t238');
3432 wakaba 1.52 last INSCOPE;
3433     }
3434     } # INSCOPE
3435     unless (defined $i) {
3436 wakaba 1.79 !!!cp ('t239');
3437 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3438     text => $token->{tag_name}, token => $token);
3439 wakaba 1.52 ## Ignore the token
3440 wakaba 1.125 !!!nack ('t239.1');
3441 wakaba 1.52 !!!next-token;
3442 wakaba 1.126 next B;
3443 wakaba 1.47 }
3444    
3445     ## Clear back to table body context
3446 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3447     & TABLE_ROWS_SCOPING_EL)) {
3448 wakaba 1.79 !!!cp ('t240');
3449 wakaba 1.47 pop @{$self->{open_elements}};
3450     }
3451    
3452 wakaba 1.52 ## As if <{current node}>
3453     ## have an element in table scope
3454     ## true by definition
3455    
3456     ## Clear back to table body context
3457     ## nop by definition
3458    
3459     pop @{$self->{open_elements}};
3460 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3461 wakaba 1.52 ## reprocess in the "in table" insertion mode...
3462     }
3463    
3464 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
3465     ## When you edit the code fragment below, please ensure that
3466     ## the code for <table> in the "in table" insertion mode
3467     ## is synced with it.
3468    
3469 wakaba 1.52 ## have a table element in table scope
3470     my $i;
3471     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3472     my $node = $self->{open_elements}->[$_];
3473 wakaba 1.206 if ($node->[1] == TABLE_EL) {
3474 wakaba 1.79 !!!cp ('t241');
3475 wakaba 1.52 $i = $_;
3476     last INSCOPE;
3477 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3478 wakaba 1.79 !!!cp ('t242');
3479 wakaba 1.52 last INSCOPE;
3480 wakaba 1.47 }
3481 wakaba 1.52 } # INSCOPE
3482     unless (defined $i) {
3483 wakaba 1.79 !!!cp ('t243');
3484 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3485     text => $token->{tag_name}, token => $token);
3486 wakaba 1.52 ## Ignore the token
3487 wakaba 1.125 !!!nack ('t243.1');
3488 wakaba 1.52 !!!next-token;
3489 wakaba 1.126 next B;
3490 wakaba 1.3 }
3491 wakaba 1.52
3492     splice @{$self->{open_elements}}, $i;
3493 wakaba 1.95 pop @{$open_tables};
3494 wakaba 1.1
3495 wakaba 1.52 $self->_reset_insertion_mode;
3496 wakaba 1.47
3497     !!!next-token;
3498 wakaba 1.126 next B;
3499 wakaba 1.47 } elsif ({
3500 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
3501 wakaba 1.52 }->{$token->{tag_name}} and
3502 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
3503 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3504 wakaba 1.52 ## have an element in table scope
3505     my $i;
3506     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3507     my $node = $self->{open_elements}->[$_];
3508 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3509 wakaba 1.79 !!!cp ('t247');
3510 wakaba 1.52 $i = $_;
3511     last INSCOPE;
3512 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3513 wakaba 1.79 !!!cp ('t248');
3514 wakaba 1.52 last INSCOPE;
3515     }
3516     } # INSCOPE
3517     unless (defined $i) {
3518 wakaba 1.79 !!!cp ('t249');
3519 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3520     text => $token->{tag_name}, token => $token);
3521 wakaba 1.52 ## Ignore the token
3522 wakaba 1.125 !!!nack ('t249.1');
3523 wakaba 1.52 !!!next-token;
3524 wakaba 1.126 next B;
3525 wakaba 1.52 }
3526    
3527 wakaba 1.48 ## As if </tr>
3528     ## have an element in table scope
3529     my $i;
3530     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3531     my $node = $self->{open_elements}->[$_];
3532 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3533 wakaba 1.79 !!!cp ('t250');
3534 wakaba 1.48 $i = $_;
3535     last INSCOPE;
3536 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3537 wakaba 1.79 !!!cp ('t251');
3538 wakaba 1.48 last INSCOPE;
3539     }
3540     } # INSCOPE
3541 wakaba 1.52 unless (defined $i) {
3542 wakaba 1.79 !!!cp ('t252');
3543 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3544     text => 'tr', token => $token);
3545 wakaba 1.52 ## Ignore the token
3546 wakaba 1.125 !!!nack ('t252.1');
3547 wakaba 1.52 !!!next-token;
3548 wakaba 1.126 next B;
3549 wakaba 1.52 }
3550 wakaba 1.48
3551     ## Clear back to table row context
3552 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3553     & TABLE_ROW_SCOPING_EL)) {
3554 wakaba 1.79 !!!cp ('t253');
3555 wakaba 1.83 ## ISSUE: Can this case be reached?
3556 wakaba 1.48 pop @{$self->{open_elements}};
3557     }
3558    
3559     pop @{$self->{open_elements}}; # tr
3560 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3561 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
3562     }
3563    
3564     ## have an element in table scope
3565     my $i;
3566     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3567     my $node = $self->{open_elements}->[$_];
3568 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3569 wakaba 1.79 !!!cp ('t254');
3570 wakaba 1.52 $i = $_;
3571     last INSCOPE;
3572 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3573 wakaba 1.79 !!!cp ('t255');
3574 wakaba 1.52 last INSCOPE;
3575     }
3576     } # INSCOPE
3577     unless (defined $i) {
3578 wakaba 1.79 !!!cp ('t256');
3579 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3580     text => $token->{tag_name}, token => $token);
3581 wakaba 1.52 ## Ignore the token
3582 wakaba 1.125 !!!nack ('t256.1');
3583 wakaba 1.52 !!!next-token;
3584 wakaba 1.126 next B;
3585 wakaba 1.52 }
3586    
3587     ## Clear back to table body context
3588 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3589     & TABLE_ROWS_SCOPING_EL)) {
3590 wakaba 1.79 !!!cp ('t257');
3591 wakaba 1.83 ## ISSUE: Can this case be reached?
3592 wakaba 1.52 pop @{$self->{open_elements}};
3593     }
3594    
3595     pop @{$self->{open_elements}};
3596 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3597 wakaba 1.125 !!!nack ('t257.1');
3598 wakaba 1.52 !!!next-token;
3599 wakaba 1.126 next B;
3600 wakaba 1.52 } elsif ({
3601     body => 1, caption => 1, col => 1, colgroup => 1,
3602     html => 1, td => 1, th => 1,
3603 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3604     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
3605 wakaba 1.52 }->{$token->{tag_name}}) {
3606 wakaba 1.125 !!!cp ('t258');
3607 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3608     text => $token->{tag_name}, token => $token);
3609 wakaba 1.125 ## Ignore the token
3610     !!!nack ('t258.1');
3611     !!!next-token;
3612 wakaba 1.126 next B;
3613 wakaba 1.58 } else {
3614 wakaba 1.79 !!!cp ('t259');
3615 wakaba 1.153 !!!parse-error (type => 'in table:/',
3616     text => $token->{tag_name}, token => $token);
3617 wakaba 1.52
3618 wakaba 1.58 $insert = $insert_to_foster;
3619     #
3620     }
3621 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3622 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
3623 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
3624 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
3625 wakaba 1.104 !!!cp ('t259.1');
3626 wakaba 1.105 #
3627 wakaba 1.104 } else {
3628     !!!cp ('t259.2');
3629 wakaba 1.105 #
3630 wakaba 1.104 }
3631    
3632     ## Stop parsing
3633     last B;
3634 wakaba 1.58 } else {
3635     die "$0: $token->{type}: Unknown token type";
3636     }
3637 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
3638 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
3639 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3640 wakaba 1.52 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3641     unless (length $token->{data}) {
3642 wakaba 1.79 !!!cp ('t260');
3643 wakaba 1.52 !!!next-token;
3644 wakaba 1.126 next B;
3645 wakaba 1.52 }
3646     }
3647    
3648 wakaba 1.79 !!!cp ('t261');
3649 wakaba 1.52 #
3650 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
3651 wakaba 1.52 if ($token->{tag_name} eq 'col') {
3652 wakaba 1.79 !!!cp ('t262');
3653 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3654 wakaba 1.52 pop @{$self->{open_elements}};
3655 wakaba 1.125 !!!ack ('t262.1');
3656 wakaba 1.52 !!!next-token;
3657 wakaba 1.126 next B;
3658 wakaba 1.52 } else {
3659 wakaba 1.79 !!!cp ('t263');
3660 wakaba 1.52 #
3661     }
3662 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
3663 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
3664 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3665 wakaba 1.79 !!!cp ('t264');
3666 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3667     text => 'colgroup', token => $token);
3668 wakaba 1.52 ## Ignore the token
3669     !!!next-token;
3670 wakaba 1.126 next B;
3671 wakaba 1.52 } else {
3672 wakaba 1.79 !!!cp ('t265');
3673 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
3674 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3675 wakaba 1.52 !!!next-token;
3676 wakaba 1.126 next B;
3677 wakaba 1.52 }
3678     } elsif ($token->{tag_name} eq 'col') {
3679 wakaba 1.79 !!!cp ('t266');
3680 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3681     text => 'col', token => $token);
3682 wakaba 1.52 ## Ignore the token
3683     !!!next-token;
3684 wakaba 1.126 next B;
3685 wakaba 1.52 } else {
3686 wakaba 1.79 !!!cp ('t267');
3687 wakaba 1.52 #
3688     }
3689 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3690 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
3691 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
3692     !!!cp ('t270.2');
3693     ## Stop parsing.
3694     last B;
3695     } else {
3696     ## NOTE: As if </colgroup>.
3697     !!!cp ('t270.1');
3698     pop @{$self->{open_elements}}; # colgroup
3699     $self->{insertion_mode} = IN_TABLE_IM;
3700     ## Reprocess.
3701 wakaba 1.126 next B;
3702 wakaba 1.104 }
3703     } else {
3704     die "$0: $token->{type}: Unknown token type";
3705     }
3706 wakaba 1.52
3707     ## As if </colgroup>
3708 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3709 wakaba 1.79 !!!cp ('t269');
3710 wakaba 1.104 ## TODO: Wrong error type?
3711 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3712     text => 'colgroup', token => $token);
3713 wakaba 1.52 ## Ignore the token
3714 wakaba 1.125 !!!nack ('t269.1');
3715 wakaba 1.52 !!!next-token;
3716 wakaba 1.126 next B;
3717 wakaba 1.52 } else {
3718 wakaba 1.79 !!!cp ('t270');
3719 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
3720 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3721 wakaba 1.125 !!!ack-later;
3722 wakaba 1.52 ## reprocess
3723 wakaba 1.126 next B;
3724 wakaba 1.52 }
3725 wakaba 1.101 } elsif ($self->{insertion_mode} & SELECT_IMS) {
3726 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
3727 wakaba 1.79 !!!cp ('t271');
3728 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3729     !!!next-token;
3730 wakaba 1.126 next B;
3731 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
3732 wakaba 1.123 if ($token->{tag_name} eq 'option') {
3733 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3734 wakaba 1.123 !!!cp ('t272');
3735     ## As if </option>
3736     pop @{$self->{open_elements}};
3737     } else {
3738     !!!cp ('t273');
3739     }
3740 wakaba 1.52
3741 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3742 wakaba 1.125 !!!nack ('t273.1');
3743 wakaba 1.123 !!!next-token;
3744 wakaba 1.126 next B;
3745 wakaba 1.123 } elsif ($token->{tag_name} eq 'optgroup') {
3746 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3747 wakaba 1.123 !!!cp ('t274');
3748     ## As if </option>
3749     pop @{$self->{open_elements}};
3750     } else {
3751     !!!cp ('t275');
3752     }
3753 wakaba 1.52
3754 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3755 wakaba 1.123 !!!cp ('t276');
3756     ## As if </optgroup>
3757     pop @{$self->{open_elements}};
3758     } else {
3759     !!!cp ('t277');
3760     }
3761 wakaba 1.52
3762 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3763 wakaba 1.125 !!!nack ('t277.1');
3764 wakaba 1.123 !!!next-token;
3765 wakaba 1.126 next B;
3766 wakaba 1.146 } elsif ({
3767 wakaba 1.216 select => 1, input => 1, textarea => 1, keygen => 1,
3768 wakaba 1.146 }->{$token->{tag_name}} or
3769 wakaba 1.210 (($self->{insertion_mode} & IM_MASK)
3770     == IN_SELECT_IN_TABLE_IM and
3771 wakaba 1.101 {
3772     caption => 1, table => 1,
3773     tbody => 1, tfoot => 1, thead => 1,
3774     tr => 1, td => 1, th => 1,
3775     }->{$token->{tag_name}})) {
3776 wakaba 1.222
3777     ## 1. Parse error.
3778     if ($token->{tag_name} eq 'select') {
3779     !!!parse-error (type => 'select in select', ## XXX: documentation
3780     token => $token);
3781     } else {
3782     !!!parse-error (type => 'not closed', text => 'select',
3783     token => $token);
3784     }
3785    
3786     ## 2./<select>-1. Unless "have an element in table scope" (select):
3787 wakaba 1.123 my $i;
3788     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3789     my $node = $self->{open_elements}->[$_];
3790 wakaba 1.206 if ($node->[1] == SELECT_EL) {
3791 wakaba 1.123 !!!cp ('t278');
3792     $i = $_;
3793     last INSCOPE;
3794     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3795     !!!cp ('t279');
3796     last INSCOPE;
3797     }
3798     } # INSCOPE
3799     unless (defined $i) {
3800     !!!cp ('t280');
3801 wakaba 1.222 if ($token->{tag_name} eq 'select') {
3802     ## NOTE: This error would be raised when
3803     ## |select.innerHTML = '<select>'| is executed; in this
3804     ## case two errors, "select in select" and "unmatched
3805     ## end tags" are reported to the user, the latter might
3806     ## be confusing but this is what the spec requires.
3807     !!!parse-error (type => 'unmatched end tag',
3808     text => 'select',
3809     token => $token);
3810     }
3811     ## Ignore the token.
3812 wakaba 1.125 !!!nack ('t280.1');
3813 wakaba 1.123 !!!next-token;
3814 wakaba 1.126 next B;
3815 wakaba 1.123 }
3816 wakaba 1.222
3817     ## 3. Otherwise, as if there were <select>:
3818 wakaba 1.52
3819 wakaba 1.123 !!!cp ('t281');
3820     splice @{$self->{open_elements}}, $i;
3821 wakaba 1.52
3822 wakaba 1.123 $self->_reset_insertion_mode;
3823 wakaba 1.47
3824 wakaba 1.101 if ($token->{tag_name} eq 'select') {
3825 wakaba 1.125 !!!nack ('t281.2');
3826 wakaba 1.101 !!!next-token;
3827 wakaba 1.126 next B;
3828 wakaba 1.101 } else {
3829     !!!cp ('t281.1');
3830 wakaba 1.125 !!!ack-later;
3831 wakaba 1.101 ## Reprocess the token.
3832 wakaba 1.126 next B;
3833 wakaba 1.101 }
3834 wakaba 1.226 } elsif ($token->{tag_name} eq 'script') {
3835     !!!cp ('t281.3');
3836     ## NOTE: This is an "as if in head" code clone
3837     $script_start_tag->();
3838     next B;
3839 wakaba 1.58 } else {
3840 wakaba 1.79 !!!cp ('t282');
3841 wakaba 1.153 !!!parse-error (type => 'in select',
3842     text => $token->{tag_name}, token => $token);
3843 wakaba 1.58 ## Ignore the token
3844 wakaba 1.125 !!!nack ('t282.1');
3845 wakaba 1.58 !!!next-token;
3846 wakaba 1.126 next B;
3847 wakaba 1.58 }
3848     } elsif ($token->{type} == END_TAG_TOKEN) {
3849 wakaba 1.123 if ($token->{tag_name} eq 'optgroup') {
3850 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL and
3851     $self->{open_elements}->[-2]->[1] == OPTGROUP_EL) {
3852 wakaba 1.123 !!!cp ('t283');
3853     ## As if </option>
3854     splice @{$self->{open_elements}}, -2;
3855 wakaba 1.206 } elsif ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3856 wakaba 1.123 !!!cp ('t284');
3857     pop @{$self->{open_elements}};
3858     } else {
3859     !!!cp ('t285');
3860 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3861     text => $token->{tag_name}, token => $token);
3862 wakaba 1.123 ## Ignore the token
3863     }
3864 wakaba 1.125 !!!nack ('t285.1');
3865 wakaba 1.123 !!!next-token;
3866 wakaba 1.126 next B;
3867 wakaba 1.123 } elsif ($token->{tag_name} eq 'option') {
3868 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3869 wakaba 1.123 !!!cp ('t286');
3870     pop @{$self->{open_elements}};
3871     } else {
3872     !!!cp ('t287');
3873 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3874     text => $token->{tag_name}, token => $token);
3875 wakaba 1.123 ## Ignore the token
3876     }
3877 wakaba 1.125 !!!nack ('t287.1');
3878 wakaba 1.123 !!!next-token;
3879 wakaba 1.126 next B;
3880 wakaba 1.123 } elsif ($token->{tag_name} eq 'select') {
3881     ## have an element in table scope
3882     my $i;
3883     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3884     my $node = $self->{open_elements}->[$_];
3885 wakaba 1.206 if ($node->[1] == SELECT_EL) {
3886 wakaba 1.123 !!!cp ('t288');
3887     $i = $_;
3888     last INSCOPE;
3889     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3890     !!!cp ('t289');
3891     last INSCOPE;
3892     }
3893     } # INSCOPE
3894     unless (defined $i) {
3895     !!!cp ('t290');
3896 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3897     text => $token->{tag_name}, token => $token);
3898 wakaba 1.123 ## Ignore the token
3899 wakaba 1.125 !!!nack ('t290.1');
3900 wakaba 1.123 !!!next-token;
3901 wakaba 1.126 next B;
3902 wakaba 1.123 }
3903 wakaba 1.52
3904 wakaba 1.123 !!!cp ('t291');
3905     splice @{$self->{open_elements}}, $i;
3906 wakaba 1.52
3907 wakaba 1.123 $self->_reset_insertion_mode;
3908 wakaba 1.52
3909 wakaba 1.125 !!!nack ('t291.1');
3910 wakaba 1.123 !!!next-token;
3911 wakaba 1.126 next B;
3912 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK)
3913     == IN_SELECT_IN_TABLE_IM and
3914 wakaba 1.101 {
3915     caption => 1, table => 1, tbody => 1,
3916     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
3917     }->{$token->{tag_name}}) {
3918 wakaba 1.83 ## TODO: The following is wrong?
3919 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3920     text => $token->{tag_name}, token => $token);
3921 wakaba 1.52
3922 wakaba 1.123 ## have an element in table scope
3923     my $i;
3924     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3925     my $node = $self->{open_elements}->[$_];
3926     if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3927     !!!cp ('t292');
3928     $i = $_;
3929     last INSCOPE;
3930     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3931     !!!cp ('t293');
3932     last INSCOPE;
3933     }
3934     } # INSCOPE
3935     unless (defined $i) {
3936     !!!cp ('t294');
3937     ## Ignore the token
3938 wakaba 1.125 !!!nack ('t294.1');
3939 wakaba 1.123 !!!next-token;
3940 wakaba 1.126 next B;
3941 wakaba 1.123 }
3942 wakaba 1.52
3943 wakaba 1.123 ## As if </select>
3944     ## have an element in table scope
3945     undef $i;
3946     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3947     my $node = $self->{open_elements}->[$_];
3948 wakaba 1.206 if ($node->[1] == SELECT_EL) {
3949 wakaba 1.123 !!!cp ('t295');
3950     $i = $_;
3951     last INSCOPE;
3952     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3953 wakaba 1.83 ## ISSUE: Can this state be reached?
3954 wakaba 1.123 !!!cp ('t296');
3955     last INSCOPE;
3956     }
3957     } # INSCOPE
3958     unless (defined $i) {
3959     !!!cp ('t297');
3960 wakaba 1.83 ## TODO: The following error type is correct?
3961 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3962     text => 'select', token => $token);
3963 wakaba 1.123 ## Ignore the </select> token
3964 wakaba 1.125 !!!nack ('t297.1');
3965 wakaba 1.123 !!!next-token; ## TODO: ok?
3966 wakaba 1.126 next B;
3967 wakaba 1.123 }
3968 wakaba 1.52
3969 wakaba 1.123 !!!cp ('t298');
3970     splice @{$self->{open_elements}}, $i;
3971 wakaba 1.52
3972 wakaba 1.123 $self->_reset_insertion_mode;
3973 wakaba 1.52
3974 wakaba 1.125 !!!ack-later;
3975 wakaba 1.123 ## reprocess
3976 wakaba 1.126 next B;
3977 wakaba 1.58 } else {
3978 wakaba 1.79 !!!cp ('t299');
3979 wakaba 1.153 !!!parse-error (type => 'in select:/',
3980     text => $token->{tag_name}, token => $token);
3981 wakaba 1.52 ## Ignore the token
3982 wakaba 1.125 !!!nack ('t299.3');
3983 wakaba 1.52 !!!next-token;
3984 wakaba 1.126 next B;
3985 wakaba 1.58 }
3986 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3987 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
3988 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
3989     !!!cp ('t299.1');
3990 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
3991 wakaba 1.104 } else {
3992     !!!cp ('t299.2');
3993     }
3994    
3995     ## Stop parsing.
3996     last B;
3997 wakaba 1.58 } else {
3998     die "$0: $token->{type}: Unknown token type";
3999     }
4000 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
4001 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4002 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4003 wakaba 1.52 my $data = $1;
4004     ## As if in body
4005     $reconstruct_active_formatting_elements->($insert_to_current);
4006    
4007     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4008    
4009     unless (length $token->{data}) {
4010 wakaba 1.79 !!!cp ('t300');
4011 wakaba 1.52 !!!next-token;
4012 wakaba 1.126 next B;
4013 wakaba 1.52 }
4014     }
4015    
4016 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4017 wakaba 1.79 !!!cp ('t301');
4018 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
4019 wakaba 1.188 #
4020 wakaba 1.79 } else {
4021     !!!cp ('t302');
4022 wakaba 1.188 ## "after body" insertion mode
4023     !!!parse-error (type => 'after body:#text', token => $token);
4024     #
4025 wakaba 1.52 }
4026    
4027 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4028 wakaba 1.52 ## reprocess
4029 wakaba 1.126 next B;
4030 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4031 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4032 wakaba 1.79 !!!cp ('t303');
4033 wakaba 1.153 !!!parse-error (type => 'after html',
4034     text => $token->{tag_name}, token => $token);
4035 wakaba 1.188 #
4036 wakaba 1.79 } else {
4037     !!!cp ('t304');
4038 wakaba 1.188 ## "after body" insertion mode
4039     !!!parse-error (type => 'after body',
4040     text => $token->{tag_name}, token => $token);
4041     #
4042 wakaba 1.52 }
4043    
4044 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4045 wakaba 1.125 !!!ack-later;
4046 wakaba 1.52 ## reprocess
4047 wakaba 1.126 next B;
4048 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4049 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4050 wakaba 1.79 !!!cp ('t305');
4051 wakaba 1.153 !!!parse-error (type => 'after html:/',
4052     text => $token->{tag_name}, token => $token);
4053 wakaba 1.52
4054 wakaba 1.188 $self->{insertion_mode} = IN_BODY_IM;
4055     ## Reprocess.
4056     next B;
4057 wakaba 1.79 } else {
4058     !!!cp ('t306');
4059 wakaba 1.52 }
4060    
4061     ## "after body" insertion mode
4062     if ($token->{tag_name} eq 'html') {
4063     if (defined $self->{inner_html_node}) {
4064 wakaba 1.79 !!!cp ('t307');
4065 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4066     text => 'html', token => $token);
4067 wakaba 1.52 ## Ignore the token
4068     !!!next-token;
4069 wakaba 1.126 next B;
4070 wakaba 1.52 } else {
4071 wakaba 1.79 !!!cp ('t308');
4072 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
4073 wakaba 1.52 !!!next-token;
4074 wakaba 1.126 next B;
4075 wakaba 1.52 }
4076     } else {
4077 wakaba 1.79 !!!cp ('t309');
4078 wakaba 1.153 !!!parse-error (type => 'after body:/',
4079     text => $token->{tag_name}, token => $token);
4080 wakaba 1.52
4081 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4082 wakaba 1.52 ## reprocess
4083 wakaba 1.126 next B;
4084 wakaba 1.52 }
4085 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4086     !!!cp ('t309.2');
4087     ## Stop parsing
4088     last B;
4089 wakaba 1.52 } else {
4090     die "$0: $token->{type}: Unknown token type";
4091     }
4092 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
4093 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4094 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4095 wakaba 1.52 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4096    
4097     unless (length $token->{data}) {
4098 wakaba 1.79 !!!cp ('t310');
4099 wakaba 1.52 !!!next-token;
4100 wakaba 1.126 next B;
4101 wakaba 1.52 }
4102     }
4103    
4104 wakaba 1.188 if ($token->{data} =~ s/^[^\x09\x0A\x0C\x20]+//) {
4105 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4106 wakaba 1.79 !!!cp ('t311');
4107 wakaba 1.153 !!!parse-error (type => 'in frameset:#text', token => $token);
4108 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4109 wakaba 1.79 !!!cp ('t312');
4110 wakaba 1.153 !!!parse-error (type => 'after frameset:#text', token => $token);
4111 wakaba 1.158 } else { # "after after frameset"
4112 wakaba 1.79 !!!cp ('t313');
4113 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
4114 wakaba 1.52 }
4115    
4116     ## Ignore the token.
4117     if (length $token->{data}) {
4118 wakaba 1.79 !!!cp ('t314');
4119 wakaba 1.52 ## reprocess the rest of characters
4120     } else {
4121 wakaba 1.79 !!!cp ('t315');
4122 wakaba 1.52 !!!next-token;
4123     }
4124 wakaba 1.126 next B;
4125 wakaba 1.52 }
4126    
4127     die qq[$0: Character "$token->{data}"];
4128 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4129 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
4130 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
4131 wakaba 1.79 !!!cp ('t318');
4132 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4133 wakaba 1.125 !!!nack ('t318.1');
4134 wakaba 1.52 !!!next-token;
4135 wakaba 1.126 next B;
4136 wakaba 1.52 } elsif ($token->{tag_name} eq 'frame' and
4137 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
4138 wakaba 1.79 !!!cp ('t319');
4139 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4140 wakaba 1.52 pop @{$self->{open_elements}};
4141 wakaba 1.125 !!!ack ('t319.1');
4142 wakaba 1.52 !!!next-token;
4143 wakaba 1.126 next B;
4144 wakaba 1.52 } elsif ($token->{tag_name} eq 'noframes') {
4145 wakaba 1.79 !!!cp ('t320');
4146 wakaba 1.148 ## NOTE: As if in head.
4147 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4148 wakaba 1.126 next B;
4149 wakaba 1.158
4150     ## NOTE: |<!DOCTYPE HTML><frameset></frameset></html><noframes></noframes>|
4151     ## has no parse error.
4152 wakaba 1.52 } else {
4153 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4154 wakaba 1.79 !!!cp ('t321');
4155 wakaba 1.153 !!!parse-error (type => 'in frameset',
4156     text => $token->{tag_name}, token => $token);
4157 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4158 wakaba 1.79 !!!cp ('t322');
4159 wakaba 1.153 !!!parse-error (type => 'after frameset',
4160     text => $token->{tag_name}, token => $token);
4161 wakaba 1.158 } else { # "after after frameset"
4162     !!!cp ('t322.2');
4163     !!!parse-error (type => 'after after frameset',
4164     text => $token->{tag_name}, token => $token);
4165 wakaba 1.52 }
4166     ## Ignore the token
4167 wakaba 1.125 !!!nack ('t322.1');
4168 wakaba 1.52 !!!next-token;
4169 wakaba 1.126 next B;
4170 wakaba 1.52 }
4171 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4172 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
4173 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
4174 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
4175 wakaba 1.52 @{$self->{open_elements}} == 1) {
4176 wakaba 1.79 !!!cp ('t325');
4177 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4178     text => $token->{tag_name}, token => $token);
4179 wakaba 1.52 ## Ignore the token
4180     !!!next-token;
4181     } else {
4182 wakaba 1.79 !!!cp ('t326');
4183 wakaba 1.52 pop @{$self->{open_elements}};
4184     !!!next-token;
4185     }
4186 wakaba 1.47
4187 wakaba 1.52 if (not defined $self->{inner_html_node} and
4188 wakaba 1.206 not ($self->{open_elements}->[-1]->[1] == FRAMESET_EL)) {
4189 wakaba 1.79 !!!cp ('t327');
4190 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
4191 wakaba 1.79 } else {
4192     !!!cp ('t328');
4193 wakaba 1.52 }
4194 wakaba 1.126 next B;
4195 wakaba 1.52 } elsif ($token->{tag_name} eq 'html' and
4196 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
4197 wakaba 1.79 !!!cp ('t329');
4198 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
4199 wakaba 1.52 !!!next-token;
4200 wakaba 1.126 next B;
4201 wakaba 1.52 } else {
4202 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4203 wakaba 1.79 !!!cp ('t330');
4204 wakaba 1.153 !!!parse-error (type => 'in frameset:/',
4205     text => $token->{tag_name}, token => $token);
4206 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4207     !!!cp ('t330.1');
4208     !!!parse-error (type => 'after frameset:/',
4209     text => $token->{tag_name}, token => $token);
4210     } else { # "after after html"
4211 wakaba 1.79 !!!cp ('t331');
4212 wakaba 1.158 !!!parse-error (type => 'after after frameset:/',
4213 wakaba 1.153 text => $token->{tag_name}, token => $token);
4214 wakaba 1.52 }
4215     ## Ignore the token
4216     !!!next-token;
4217 wakaba 1.126 next B;
4218 wakaba 1.52 }
4219 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4220 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4221 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
4222     !!!cp ('t331.1');
4223 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4224 wakaba 1.104 } else {
4225     !!!cp ('t331.2');
4226     }
4227    
4228     ## Stop parsing
4229     last B;
4230 wakaba 1.52 } else {
4231     die "$0: $token->{type}: Unknown token type";
4232     }
4233     } else {
4234     die "$0: $self->{insertion_mode}: Unknown insertion mode";
4235     }
4236 wakaba 1.47
4237 wakaba 1.52 ## "in body" insertion mode
4238 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
4239 wakaba 1.52 if ($token->{tag_name} eq 'script') {
4240 wakaba 1.79 !!!cp ('t332');
4241 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
4242 wakaba 1.100 $script_start_tag->();
4243 wakaba 1.126 next B;
4244 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
4245 wakaba 1.79 !!!cp ('t333');
4246 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
4247 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4248 wakaba 1.126 next B;
4249 wakaba 1.52 } elsif ({
4250 wakaba 1.232 base => 1, command => 1, link => 1,
4251 wakaba 1.52 }->{$token->{tag_name}}) {
4252 wakaba 1.79 !!!cp ('t334');
4253 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
4254 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4255 wakaba 1.194 pop @{$self->{open_elements}};
4256 wakaba 1.125 !!!ack ('t334.1');
4257 wakaba 1.52 !!!next-token;
4258 wakaba 1.126 next B;
4259 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
4260     ## NOTE: This is an "as if in head" code clone, only "-t" differs
4261 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4262 wakaba 1.194 my $meta_el = pop @{$self->{open_elements}};
4263 wakaba 1.46
4264 wakaba 1.52 unless ($self->{confident}) {
4265 wakaba 1.134 if ($token->{attributes}->{charset}) {
4266 wakaba 1.79 !!!cp ('t335');
4267 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4268     ## in the {change_encoding} callback.
4269 wakaba 1.63 $self->{change_encoding}
4270 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value}, $token);
4271 wakaba 1.66
4272     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4273     ->set_user_data (manakai_has_reference =>
4274     $token->{attributes}->{charset}
4275     ->{has_reference});
4276 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
4277     if ($token->{attributes}->{content}->{value}
4278 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4279 wakaba 1.189 [\x09\x0A\x0C\x0D\x20]*=
4280     [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4281     ([^"'\x09\x0A\x0C\x0D\x20][^\x09\x0A\x0C\x0D\x20\x3B]*))
4282     /x) {
4283 wakaba 1.79 !!!cp ('t336');
4284 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4285     ## in the {change_encoding} callback.
4286 wakaba 1.63 $self->{change_encoding}
4287 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
4288 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4289     ->set_user_data (manakai_has_reference =>
4290     $token->{attributes}->{content}
4291     ->{has_reference});
4292 wakaba 1.63 }
4293 wakaba 1.52 }
4294 wakaba 1.66 } else {
4295     if ($token->{attributes}->{charset}) {
4296 wakaba 1.79 !!!cp ('t337');
4297 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4298     ->set_user_data (manakai_has_reference =>
4299     $token->{attributes}->{charset}
4300     ->{has_reference});
4301     }
4302 wakaba 1.68 if ($token->{attributes}->{content}) {
4303 wakaba 1.79 !!!cp ('t338');
4304 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4305     ->set_user_data (manakai_has_reference =>
4306     $token->{attributes}->{content}
4307     ->{has_reference});
4308     }
4309 wakaba 1.52 }
4310 wakaba 1.1
4311 wakaba 1.125 !!!ack ('t338.1');
4312 wakaba 1.52 !!!next-token;
4313 wakaba 1.126 next B;
4314 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
4315 wakaba 1.79 !!!cp ('t341');
4316 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
4317 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
4318 wakaba 1.126 next B;
4319 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
4320 wakaba 1.153 !!!parse-error (type => 'in body', text => 'body', token => $token);
4321 wakaba 1.46
4322 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
4323 wakaba 1.206 not ($self->{open_elements}->[1]->[1] == BODY_EL)) {
4324 wakaba 1.79 !!!cp ('t342');
4325 wakaba 1.52 ## Ignore the token
4326     } else {
4327     my $body_el = $self->{open_elements}->[1]->[0];
4328     for my $attr_name (keys %{$token->{attributes}}) {
4329     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
4330 wakaba 1.79 !!!cp ('t343');
4331 wakaba 1.52 $body_el->set_attribute_ns
4332     (undef, [undef, $attr_name],
4333     $token->{attributes}->{$attr_name}->{value});
4334     }
4335     }
4336     }
4337 wakaba 1.125 !!!nack ('t343.1');
4338 wakaba 1.52 !!!next-token;
4339 wakaba 1.126 next B;
4340 wakaba 1.52 } elsif ({
4341 wakaba 1.195 ## NOTE: Start tags for non-phrasing flow content elements
4342    
4343     ## NOTE: The normal one
4344     address => 1, article => 1, aside => 1, blockquote => 1,
4345     center => 1, datagrid => 1, details => 1, dialog => 1,
4346     dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
4347     footer => 1, h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1,
4348 wakaba 1.237 h6 => 1, header => 1, hgroup => 1,
4349     menu => 1, nav => 1, ol => 1, p => 1,
4350 wakaba 1.195 section => 1, ul => 1,
4351     ## NOTE: As normal, but drops leading newline
4352 wakaba 1.97 pre => 1, listing => 1,
4353 wakaba 1.195 ## NOTE: As normal, but interacts with the form element pointer
4354 wakaba 1.109 form => 1,
4355 wakaba 1.195
4356 wakaba 1.109 table => 1,
4357     hr => 1,
4358 wakaba 1.52 }->{$token->{tag_name}}) {
4359 wakaba 1.225
4360     ## 1. When there is an opening |form| element:
4361 wakaba 1.109 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
4362     !!!cp ('t350');
4363 wakaba 1.113 !!!parse-error (type => 'in form:form', token => $token);
4364 wakaba 1.109 ## Ignore the token
4365 wakaba 1.125 !!!nack ('t350.1');
4366 wakaba 1.109 !!!next-token;
4367 wakaba 1.126 next B;
4368 wakaba 1.109 }
4369    
4370 wakaba 1.225 ## 2. Close the |p| element, if any.
4371 wakaba 1.217 if ($token->{tag_name} ne 'table' or # The Hixie Quirk
4372     $self->{document}->manakai_compat_mode ne 'quirks') {
4373     ## has a p element in scope
4374     INSCOPE: for (reverse @{$self->{open_elements}}) {
4375     if ($_->[1] == P_EL) {
4376     !!!cp ('t344');
4377     !!!back-token; # <form>
4378     $token = {type => END_TAG_TOKEN, tag_name => 'p',
4379     line => $token->{line}, column => $token->{column}};
4380     next B;
4381     } elsif ($_->[1] & SCOPING_EL) {
4382     !!!cp ('t345');
4383     last INSCOPE;
4384     }
4385     } # INSCOPE
4386     }
4387 wakaba 1.225
4388     ## 3. Close the opening <hn> element, if any.
4389     if ({h1 => 1, h2 => 1, h3 => 1,
4390     h4 => 1, h5 => 1, h6 => 1}->{$token->{tag_name}}) {
4391     if ($self->{open_elements}->[-1]->[1] == HEADING_EL) {
4392     !!!parse-error (type => 'not closed',
4393     text => $self->{open_elements}->[-1]->[0]->manakai_local_name,
4394     token => $token);
4395     pop @{$self->{open_elements}};
4396     }
4397     }
4398    
4399     ## 4. Insertion.
4400 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4401 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
4402 wakaba 1.125 !!!nack ('t346.1');
4403 wakaba 1.52 !!!next-token;
4404 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4405 wakaba 1.52 $token->{data} =~ s/^\x0A//;
4406     unless (length $token->{data}) {
4407 wakaba 1.79 !!!cp ('t346');
4408 wakaba 1.1 !!!next-token;
4409 wakaba 1.79 } else {
4410     !!!cp ('t349');
4411 wakaba 1.52 }
4412 wakaba 1.79 } else {
4413     !!!cp ('t348');
4414 wakaba 1.52 }
4415 wakaba 1.109 } elsif ($token->{tag_name} eq 'form') {
4416     !!!cp ('t347.1');
4417     $self->{form_element} = $self->{open_elements}->[-1]->[0];
4418    
4419 wakaba 1.125 !!!nack ('t347.2');
4420 wakaba 1.109 !!!next-token;
4421     } elsif ($token->{tag_name} eq 'table') {
4422     !!!cp ('t382');
4423     push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
4424    
4425     $self->{insertion_mode} = IN_TABLE_IM;
4426    
4427 wakaba 1.125 !!!nack ('t382.1');
4428 wakaba 1.109 !!!next-token;
4429     } elsif ($token->{tag_name} eq 'hr') {
4430     !!!cp ('t386');
4431     pop @{$self->{open_elements}};
4432    
4433 wakaba 1.125 !!!nack ('t386.1');
4434 wakaba 1.109 !!!next-token;
4435 wakaba 1.52 } else {
4436 wakaba 1.125 !!!nack ('t347.1');
4437 wakaba 1.52 !!!next-token;
4438     }
4439 wakaba 1.126 next B;
4440 wakaba 1.196 } elsif ($token->{tag_name} eq 'li') {
4441     ## NOTE: As normal, but imply </li> when there's another <li> ...
4442 wakaba 1.193
4443 wakaba 1.225 ## NOTE: Special, Scope (<li><foo><li> == <li><foo><li/></foo></li>)::
4444     ## Interpreted as <li><foo/></li><li/> (non-conforming):
4445 wakaba 1.193 ## blockquote (O9.27), center (O), dd (Fx3, O, S3.1.2, IE7),
4446     ## dt (Fx, O, S, IE), dl (O), fieldset (O, S, IE), form (Fx, O, S),
4447     ## hn (O), pre (O), applet (O, S), button (O, S), marquee (Fx, O, S),
4448     ## object (Fx)
4449 wakaba 1.225 ## Generate non-tree (non-conforming):
4450 wakaba 1.193 ## basefont (IE7 (where basefont is non-void)), center (IE),
4451     ## form (IE), hn (IE)
4452 wakaba 1.225 ## address, div, p (<li><foo><li> == <li><foo/></li><li/>)::
4453     ## Interpreted as <li><foo><li/></foo></li> (non-conforming):
4454 wakaba 1.193 ## div (Fx, S)
4455 wakaba 1.196
4456     my $non_optional;
4457 wakaba 1.52 my $i = -1;
4458 wakaba 1.196
4459     ## 1.
4460     for my $node (reverse @{$self->{open_elements}}) {
4461 wakaba 1.206 if ($node->[1] == LI_EL) {
4462 wakaba 1.196 ## 2. (a) As if </li>
4463     {
4464     ## If no </li> - not applied
4465     #
4466    
4467     ## Otherwise
4468    
4469     ## 1. generate implied end tags, except for </li>
4470     #
4471    
4472     ## 2. If current node != "li", parse error
4473     if ($non_optional) {
4474     !!!parse-error (type => 'not closed',
4475     text => $non_optional->[0]->manakai_local_name,
4476     token => $token);
4477     !!!cp ('t355');
4478     } else {
4479     !!!cp ('t356');
4480     }
4481    
4482     ## 3. Pop
4483     splice @{$self->{open_elements}}, $i;
4484 wakaba 1.52 }
4485 wakaba 1.196
4486     last; ## 2. (b) goto 5.
4487     } elsif (
4488     ## NOTE: not "formatting" and not "phrasing"
4489     ($node->[1] & SPECIAL_EL or
4490     $node->[1] & SCOPING_EL) and
4491     ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4492 wakaba 1.206 (not $node->[1] & ADDRESS_DIV_P_EL)
4493     ) {
4494 wakaba 1.196 ## 3.
4495 wakaba 1.79 !!!cp ('t357');
4496 wakaba 1.196 last; ## goto 5.
4497     } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4498 wakaba 1.79 !!!cp ('t358');
4499 wakaba 1.196 #
4500     } else {
4501     !!!cp ('t359');
4502     $non_optional ||= $node;
4503     #
4504 wakaba 1.52 }
4505 wakaba 1.196 ## 4.
4506     ## goto 2.
4507 wakaba 1.52 $i--;
4508 wakaba 1.196 }
4509    
4510     ## 5. (a) has a |p| element in scope
4511     INSCOPE: for (reverse @{$self->{open_elements}}) {
4512 wakaba 1.206 if ($_->[1] == P_EL) {
4513 wakaba 1.196 !!!cp ('t353');
4514 wakaba 1.198
4515     ## NOTE: |<p><li>|, for example.
4516    
4517 wakaba 1.196 !!!back-token; # <x>
4518     $token = {type => END_TAG_TOKEN, tag_name => 'p',
4519     line => $token->{line}, column => $token->{column}};
4520     next B;
4521     } elsif ($_->[1] & SCOPING_EL) {
4522     !!!cp ('t354');
4523     last INSCOPE;
4524     }
4525     } # INSCOPE
4526    
4527     ## 5. (b) insert
4528 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4529 wakaba 1.125 !!!nack ('t359.1');
4530 wakaba 1.52 !!!next-token;
4531 wakaba 1.126 next B;
4532 wakaba 1.196 } elsif ($token->{tag_name} eq 'dt' or
4533     $token->{tag_name} eq 'dd') {
4534     ## NOTE: As normal, but imply </dt> or </dd> when ...
4535    
4536     my $non_optional;
4537     my $i = -1;
4538    
4539     ## 1.
4540     for my $node (reverse @{$self->{open_elements}}) {
4541 wakaba 1.207 if ($node->[1] == DTDD_EL) {
4542 wakaba 1.196 ## 2. (a) As if </li>
4543     {
4544     ## If no </li> - not applied
4545     #
4546    
4547     ## Otherwise
4548    
4549     ## 1. generate implied end tags, except for </dt> or </dd>
4550     #
4551    
4552     ## 2. If current node != "dt"|"dd", parse error
4553     if ($non_optional) {
4554     !!!parse-error (type => 'not closed',
4555     text => $non_optional->[0]->manakai_local_name,
4556     token => $token);
4557     !!!cp ('t355.1');
4558     } else {
4559     !!!cp ('t356.1');
4560     }
4561    
4562     ## 3. Pop
4563     splice @{$self->{open_elements}}, $i;
4564     }
4565    
4566     last; ## 2. (b) goto 5.
4567     } elsif (
4568     ## NOTE: not "formatting" and not "phrasing"
4569     ($node->[1] & SPECIAL_EL or
4570     $node->[1] & SCOPING_EL) and
4571     ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4572    
4573 wakaba 1.206 (not $node->[1] & ADDRESS_DIV_P_EL)
4574     ) {
4575 wakaba 1.196 ## 3.
4576     !!!cp ('t357.1');
4577     last; ## goto 5.
4578     } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4579     !!!cp ('t358.1');
4580     #
4581     } else {
4582     !!!cp ('t359.1');
4583     $non_optional ||= $node;
4584     #
4585     }
4586     ## 4.
4587     ## goto 2.
4588     $i--;
4589     }
4590    
4591     ## 5. (a) has a |p| element in scope
4592     INSCOPE: for (reverse @{$self->{open_elements}}) {
4593 wakaba 1.206 if ($_->[1] == P_EL) {
4594 wakaba 1.196 !!!cp ('t353.1');
4595     !!!back-token; # <x>
4596     $token = {type => END_TAG_TOKEN, tag_name => 'p',
4597     line => $token->{line}, column => $token->{column}};
4598     next B;
4599     } elsif ($_->[1] & SCOPING_EL) {
4600     !!!cp ('t354.1');
4601     last INSCOPE;
4602     }
4603     } # INSCOPE
4604    
4605     ## 5. (b) insert
4606     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4607     !!!nack ('t359.2');
4608     !!!next-token;
4609     next B;
4610 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
4611 wakaba 1.195 ## NOTE: As normal, but effectively ends parsing
4612    
4613 wakaba 1.52 ## has a p element in scope
4614     INSCOPE: for (reverse @{$self->{open_elements}}) {
4615 wakaba 1.206 if ($_->[1] == P_EL) {
4616 wakaba 1.79 !!!cp ('t367');
4617 wakaba 1.125 !!!back-token; # <plaintext>
4618 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4619     line => $token->{line}, column => $token->{column}};
4620 wakaba 1.126 next B;
4621 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
4622 wakaba 1.79 !!!cp ('t368');
4623 wakaba 1.52 last INSCOPE;
4624 wakaba 1.46 }
4625 wakaba 1.52 } # INSCOPE
4626    
4627 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4628 wakaba 1.52
4629     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
4630    
4631 wakaba 1.125 !!!nack ('t368.1');
4632 wakaba 1.52 !!!next-token;
4633 wakaba 1.126 next B;
4634 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
4635     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
4636     my $node = $active_formatting_elements->[$i];
4637 wakaba 1.206 if ($node->[1] == A_EL) {
4638 wakaba 1.79 !!!cp ('t371');
4639 wakaba 1.113 !!!parse-error (type => 'in a:a', token => $token);
4640 wakaba 1.52
4641 wakaba 1.125 !!!back-token; # <a>
4642 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'a',
4643     line => $token->{line}, column => $token->{column}};
4644 wakaba 1.113 $formatting_end_tag->($token);
4645 wakaba 1.52
4646     AFE2: for (reverse 0..$#$active_formatting_elements) {
4647     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
4648 wakaba 1.79 !!!cp ('t372');
4649 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
4650     last AFE2;
4651 wakaba 1.1 }
4652 wakaba 1.52 } # AFE2
4653     OE: for (reverse 0..$#{$self->{open_elements}}) {
4654     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
4655 wakaba 1.79 !!!cp ('t373');
4656 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
4657     last OE;
4658 wakaba 1.1 }
4659 wakaba 1.52 } # OE
4660     last AFE;
4661     } elsif ($node->[0] eq '#marker') {
4662 wakaba 1.79 !!!cp ('t374');
4663 wakaba 1.52 last AFE;
4664     }
4665     } # AFE
4666    
4667     $reconstruct_active_formatting_elements->($insert_to_current);
4668 wakaba 1.1
4669 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4670 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
4671 wakaba 1.1
4672 wakaba 1.125 !!!nack ('t374.1');
4673 wakaba 1.52 !!!next-token;
4674 wakaba 1.126 next B;
4675 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
4676     $reconstruct_active_formatting_elements->($insert_to_current);
4677 wakaba 1.1
4678 wakaba 1.52 ## has a |nobr| element in scope
4679     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4680     my $node = $self->{open_elements}->[$_];
4681 wakaba 1.206 if ($node->[1] == NOBR_EL) {
4682 wakaba 1.79 !!!cp ('t376');
4683 wakaba 1.113 !!!parse-error (type => 'in nobr:nobr', token => $token);
4684 wakaba 1.125 !!!back-token; # <nobr>
4685 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
4686     line => $token->{line}, column => $token->{column}};
4687 wakaba 1.126 next B;
4688 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
4689 wakaba 1.79 !!!cp ('t377');
4690 wakaba 1.52 last INSCOPE;
4691     }
4692     } # INSCOPE
4693    
4694 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4695 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
4696    
4697 wakaba 1.125 !!!nack ('t377.1');
4698 wakaba 1.52 !!!next-token;
4699 wakaba 1.126 next B;
4700 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
4701     ## has a button element in scope
4702     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4703     my $node = $self->{open_elements}->[$_];
4704 wakaba 1.206 if ($node->[1] == BUTTON_EL) {
4705 wakaba 1.79 !!!cp ('t378');
4706 wakaba 1.113 !!!parse-error (type => 'in button:button', token => $token);
4707 wakaba 1.125 !!!back-token; # <button>
4708 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'button',
4709     line => $token->{line}, column => $token->{column}};
4710 wakaba 1.126 next B;
4711 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
4712 wakaba 1.79 !!!cp ('t379');
4713 wakaba 1.52 last INSCOPE;
4714     }
4715     } # INSCOPE
4716    
4717     $reconstruct_active_formatting_elements->($insert_to_current);
4718    
4719 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4720 wakaba 1.85
4721     ## TODO: associate with $self->{form_element} if defined
4722    
4723 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
4724 wakaba 1.1
4725 wakaba 1.125 !!!nack ('t379.1');
4726 wakaba 1.52 !!!next-token;
4727 wakaba 1.126 next B;
4728 wakaba 1.103 } elsif ({
4729 wakaba 1.109 xmp => 1,
4730     iframe => 1,
4731     noembed => 1,
4732 wakaba 1.148 noframes => 1, ## NOTE: This is an "as if in head" code clone.
4733 wakaba 1.109 noscript => 0, ## TODO: 1 if scripting is enabled
4734 wakaba 1.103 }->{$token->{tag_name}}) {
4735 wakaba 1.109 if ($token->{tag_name} eq 'xmp') {
4736     !!!cp ('t381');
4737     $reconstruct_active_formatting_elements->($insert_to_current);
4738     } else {
4739     !!!cp ('t399');
4740     }
4741     ## NOTE: There is an "as if in body" code clone.
4742 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4743 wakaba 1.126 next B;
4744 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
4745 wakaba 1.113 !!!parse-error (type => 'isindex', token => $token);
4746 wakaba 1.52
4747     if (defined $self->{form_element}) {
4748 wakaba 1.79 !!!cp ('t389');
4749 wakaba 1.52 ## Ignore the token
4750 wakaba 1.125 !!!nack ('t389'); ## NOTE: Not acknowledged.
4751 wakaba 1.52 !!!next-token;
4752 wakaba 1.126 next B;
4753 wakaba 1.52 } else {
4754 wakaba 1.147 !!!ack ('t391.1');
4755    
4756 wakaba 1.52 my $at = $token->{attributes};
4757     my $form_attrs;
4758     $form_attrs->{action} = $at->{action} if $at->{action};
4759     my $prompt_attr = $at->{prompt};
4760     $at->{name} = {name => 'name', value => 'isindex'};
4761     delete $at->{action};
4762     delete $at->{prompt};
4763     my @tokens = (
4764 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
4765 wakaba 1.114 attributes => $form_attrs,
4766     line => $token->{line}, column => $token->{column}},
4767     {type => START_TAG_TOKEN, tag_name => 'hr',
4768     line => $token->{line}, column => $token->{column}},
4769     {type => START_TAG_TOKEN, tag_name => 'label',
4770     line => $token->{line}, column => $token->{column}},
4771 wakaba 1.52 );
4772     if ($prompt_attr) {
4773 wakaba 1.79 !!!cp ('t390');
4774 wakaba 1.114 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
4775 wakaba 1.118 #line => $token->{line}, column => $token->{column},
4776     };
4777 wakaba 1.1 } else {
4778 wakaba 1.79 !!!cp ('t391');
4779 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
4780 wakaba 1.114 data => 'This is a searchable index. Insert your search keywords here: ',
4781 wakaba 1.118 #line => $token->{line}, column => $token->{column},
4782     }; # SHOULD
4783 wakaba 1.52 ## TODO: make this configurable
4784 wakaba 1.1 }
4785 wakaba 1.52 push @tokens,
4786 wakaba 1.114 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
4787     line => $token->{line}, column => $token->{column}},
4788 wakaba 1.55 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
4789 wakaba 1.114 {type => END_TAG_TOKEN, tag_name => 'label',
4790     line => $token->{line}, column => $token->{column}},
4791     {type => START_TAG_TOKEN, tag_name => 'hr',
4792     line => $token->{line}, column => $token->{column}},
4793     {type => END_TAG_TOKEN, tag_name => 'form',
4794     line => $token->{line}, column => $token->{column}};
4795 wakaba 1.52 !!!back-token (@tokens);
4796 wakaba 1.125 !!!next-token;
4797 wakaba 1.126 next B;
4798 wakaba 1.52 }
4799     } elsif ($token->{tag_name} eq 'textarea') {
4800 wakaba 1.224 ## 1. Insert
4801 wakaba 1.205 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4802 wakaba 1.52
4803 wakaba 1.224 ## Step 2 # XXX
4804 wakaba 1.52 ## TODO: $self->{form_element} if defined
4805 wakaba 1.205
4806 wakaba 1.224 ## 2. Drop U+000A LINE FEED
4807 wakaba 1.205 $self->{ignore_newline} = 1;
4808    
4809 wakaba 1.224 ## 3. RCDATA
4810 wakaba 1.52 $self->{content_model} = RCDATA_CONTENT_MODEL;
4811     delete $self->{escape}; # MUST
4812 wakaba 1.205
4813 wakaba 1.224 ## 4., 6. Insertion mode
4814 wakaba 1.205 $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
4815    
4816 wakaba 1.224 ## XXX: 5. frameset-ok flag
4817    
4818 wakaba 1.125 !!!nack ('t392.1');
4819 wakaba 1.52 !!!next-token;
4820 wakaba 1.126 next B;
4821 wakaba 1.201 } elsif ($token->{tag_name} eq 'optgroup' or
4822     $token->{tag_name} eq 'option') {
4823     ## has an |option| element in scope
4824     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4825     my $node = $self->{open_elements}->[$_];
4826 wakaba 1.206 if ($node->[1] == OPTION_EL) {
4827 wakaba 1.201 !!!cp ('t397.1');
4828     ## NOTE: As if </option>
4829     !!!back-token; # <option> or <optgroup>
4830     $token = {type => END_TAG_TOKEN, tag_name => 'option',
4831     line => $token->{line}, column => $token->{column}};
4832     next B;
4833     } elsif ($node->[1] & SCOPING_EL) {
4834     !!!cp ('t397.2');
4835     last INSCOPE;
4836     }
4837     } # INSCOPE
4838    
4839     $reconstruct_active_formatting_elements->($insert_to_current);
4840    
4841     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4842    
4843     !!!nack ('t397.3');
4844     !!!next-token;
4845     redo B;
4846 wakaba 1.151 } elsif ($token->{tag_name} eq 'rt' or
4847     $token->{tag_name} eq 'rp') {
4848     ## has a |ruby| element in scope
4849     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4850     my $node = $self->{open_elements}->[$_];
4851 wakaba 1.206 if ($node->[1] == RUBY_EL) {
4852 wakaba 1.151 !!!cp ('t398.1');
4853     ## generate implied end tags
4854     while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
4855     !!!cp ('t398.2');
4856     pop @{$self->{open_elements}};
4857     }
4858 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == RUBY_EL) {
4859 wakaba 1.151 !!!cp ('t398.3');
4860     !!!parse-error (type => 'not closed',
4861 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4862 wakaba 1.151 ->manakai_local_name,
4863     token => $token);
4864     pop @{$self->{open_elements}}
4865 wakaba 1.206 while not $self->{open_elements}->[-1]->[1] == RUBY_EL;
4866 wakaba 1.151 }
4867     last INSCOPE;
4868     } elsif ($node->[1] & SCOPING_EL) {
4869     !!!cp ('t398.4');
4870     last INSCOPE;
4871     }
4872     } # INSCOPE
4873 wakaba 1.212
4874     ## TODO: <non-ruby><rt> is not allowed.
4875 wakaba 1.151
4876     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4877    
4878     !!!nack ('t398.5');
4879     !!!next-token;
4880     redo B;
4881 wakaba 1.126 } elsif ($token->{tag_name} eq 'math' or
4882     $token->{tag_name} eq 'svg') {
4883     $reconstruct_active_formatting_elements->($insert_to_current);
4884 wakaba 1.131
4885 wakaba 1.155 ## "Adjust MathML attributes" ('math' only) - done in insert-element-f
4886    
4887 wakaba 1.131 ## "adjust SVG attributes" ('svg' only) - done in insert-element-f
4888    
4889     ## "adjust foreign attributes" - done in insert-element-f
4890 wakaba 1.126
4891 wakaba 1.131 !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
4892 wakaba 1.126
4893     if ($self->{self_closing}) {
4894     pop @{$self->{open_elements}};
4895 wakaba 1.201 !!!ack ('t398.6');
4896 wakaba 1.126 } else {
4897 wakaba 1.201 !!!cp ('t398.7');
4898 wakaba 1.126 $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
4899     ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
4900     ## mode, "in body" (not "in foreign content") secondary insertion
4901     ## mode, maybe.
4902     }
4903    
4904     !!!next-token;
4905     next B;
4906 wakaba 1.52 } elsif ({
4907     caption => 1, col => 1, colgroup => 1, frame => 1,
4908 wakaba 1.201 frameset => 1, head => 1,
4909 wakaba 1.52 tbody => 1, td => 1, tfoot => 1, th => 1,
4910     thead => 1, tr => 1,
4911     }->{$token->{tag_name}}) {
4912 wakaba 1.79 !!!cp ('t401');
4913 wakaba 1.153 !!!parse-error (type => 'in body',
4914     text => $token->{tag_name}, token => $token);
4915 wakaba 1.52 ## Ignore the token
4916 wakaba 1.125 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
4917 wakaba 1.52 !!!next-token;
4918 wakaba 1.126 next B;
4919 wakaba 1.198 } elsif ($token->{tag_name} eq 'param' or
4920     $token->{tag_name} eq 'source') {
4921     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4922     pop @{$self->{open_elements}};
4923    
4924     !!!ack ('t398.5');
4925     !!!next-token;
4926     redo B;
4927 wakaba 1.52 } else {
4928 wakaba 1.110 if ($token->{tag_name} eq 'image') {
4929     !!!cp ('t384');
4930 wakaba 1.113 !!!parse-error (type => 'image', token => $token);
4931 wakaba 1.110 $token->{tag_name} = 'img';
4932     } else {
4933     !!!cp ('t385');
4934     }
4935    
4936     ## NOTE: There is an "as if <br>" code clone.
4937 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
4938    
4939 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4940 wakaba 1.109
4941 wakaba 1.110 if ({
4942     applet => 1, marquee => 1, object => 1,
4943     }->{$token->{tag_name}}) {
4944     !!!cp ('t380');
4945     push @$active_formatting_elements, ['#marker', ''];
4946 wakaba 1.125 !!!nack ('t380.1');
4947 wakaba 1.110 } elsif ({
4948     b => 1, big => 1, em => 1, font => 1, i => 1,
4949 wakaba 1.193 s => 1, small => 1, strike => 1,
4950 wakaba 1.110 strong => 1, tt => 1, u => 1,
4951     }->{$token->{tag_name}}) {
4952     !!!cp ('t375');
4953     push @$active_formatting_elements, $self->{open_elements}->[-1];
4954 wakaba 1.125 !!!nack ('t375.1');
4955 wakaba 1.110 } elsif ($token->{tag_name} eq 'input') {
4956     !!!cp ('t388');
4957     ## TODO: associate with $self->{form_element} if defined
4958     pop @{$self->{open_elements}};
4959 wakaba 1.125 !!!ack ('t388.2');
4960 wakaba 1.110 } elsif ({
4961     area => 1, basefont => 1, bgsound => 1, br => 1,
4962 wakaba 1.198 embed => 1, img => 1, spacer => 1, wbr => 1,
4963 wakaba 1.231 keygen => 1,
4964 wakaba 1.110 }->{$token->{tag_name}}) {
4965     !!!cp ('t388.1');
4966     pop @{$self->{open_elements}};
4967 wakaba 1.125 !!!ack ('t388.3');
4968 wakaba 1.110 } elsif ($token->{tag_name} eq 'select') {
4969 wakaba 1.109 ## TODO: associate with $self->{form_element} if defined
4970    
4971     if ($self->{insertion_mode} & TABLE_IMS or
4972     $self->{insertion_mode} & BODY_TABLE_IMS or
4973 wakaba 1.210 ($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
4974 wakaba 1.109 !!!cp ('t400.1');
4975     $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
4976     } else {
4977     !!!cp ('t400.2');
4978     $self->{insertion_mode} = IN_SELECT_IM;
4979     }
4980 wakaba 1.125 !!!nack ('t400.3');
4981 wakaba 1.110 } else {
4982 wakaba 1.125 !!!nack ('t402');
4983 wakaba 1.109 }
4984 wakaba 1.51
4985 wakaba 1.52 !!!next-token;
4986 wakaba 1.126 next B;
4987 wakaba 1.52 }
4988 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4989 wakaba 1.52 if ($token->{tag_name} eq 'body') {
4990 wakaba 1.225
4991     ## 1. If not "have an element in scope":
4992     ## "has a |body| element in scope"
4993 wakaba 1.107 my $i;
4994 wakaba 1.111 INSCOPE: {
4995     for (reverse @{$self->{open_elements}}) {
4996 wakaba 1.206 if ($_->[1] == BODY_EL) {
4997 wakaba 1.111 !!!cp ('t405');
4998     $i = $_;
4999     last INSCOPE;
5000 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
5001 wakaba 1.111 !!!cp ('t405.1');
5002     last;
5003     }
5004 wakaba 1.52 }
5005 wakaba 1.111
5006 wakaba 1.200 ## NOTE: |<marquee></body>|, |<svg><foreignobject></body>|
5007    
5008     !!!parse-error (type => 'unmatched end tag',
5009 wakaba 1.153 text => $token->{tag_name}, token => $token);
5010 wakaba 1.107 ## NOTE: Ignore the token.
5011 wakaba 1.52 !!!next-token;
5012 wakaba 1.126 next B;
5013 wakaba 1.111 } # INSCOPE
5014 wakaba 1.107
5015 wakaba 1.225 ## 2. If unclosed elements:
5016 wakaba 1.107 for (@{$self->{open_elements}}) {
5017 wakaba 1.220 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL ||
5018     $_->[1] == OPTGROUP_EL ||
5019     $_->[1] == OPTION_EL ||
5020     $_->[1] == RUBY_COMPONENT_EL) {
5021 wakaba 1.107 !!!cp ('t403');
5022 wakaba 1.122 !!!parse-error (type => 'not closed',
5023 wakaba 1.153 text => $_->[0]->manakai_local_name,
5024 wakaba 1.122 token => $token);
5025 wakaba 1.107 last;
5026     } else {
5027     !!!cp ('t404');
5028     }
5029     }
5030    
5031 wakaba 1.225 ## 3. Switch the insertion mode.
5032 wakaba 1.107 $self->{insertion_mode} = AFTER_BODY_IM;
5033     !!!next-token;
5034 wakaba 1.126 next B;
5035 wakaba 1.52 } elsif ($token->{tag_name} eq 'html') {
5036 wakaba 1.122 ## TODO: Update this code. It seems that the code below is not
5037     ## up-to-date, though it has same effect as speced.
5038 wakaba 1.123 if (@{$self->{open_elements}} > 1 and
5039 wakaba 1.206 $self->{open_elements}->[1]->[1] == BODY_EL) {
5040     unless ($self->{open_elements}->[-1]->[1] == BODY_EL) {
5041 wakaba 1.79 !!!cp ('t406');
5042 wakaba 1.122 !!!parse-error (type => 'not closed',
5043 wakaba 1.153 text => $self->{open_elements}->[1]->[0]
5044 wakaba 1.122 ->manakai_local_name,
5045     token => $token);
5046 wakaba 1.79 } else {
5047     !!!cp ('t407');
5048 wakaba 1.1 }
5049 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
5050 wakaba 1.52 ## reprocess
5051 wakaba 1.126 next B;
5052 wakaba 1.51 } else {
5053 wakaba 1.79 !!!cp ('t408');
5054 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5055     text => $token->{tag_name}, token => $token);
5056 wakaba 1.52 ## Ignore the token
5057     !!!next-token;
5058 wakaba 1.126 next B;
5059 wakaba 1.51 }
5060 wakaba 1.52 } elsif ({
5061 wakaba 1.195 ## NOTE: End tags for non-phrasing flow content elements
5062    
5063     ## NOTE: The normal ones
5064     address => 1, article => 1, aside => 1, blockquote => 1,
5065     center => 1, datagrid => 1, details => 1, dialog => 1,
5066     dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
5067 wakaba 1.237 footer => 1, header => 1, hgroup => 1,
5068     listing => 1, menu => 1, nav => 1,
5069 wakaba 1.195 ol => 1, pre => 1, section => 1, ul => 1,
5070    
5071     ## NOTE: As normal, but ... optional tags
5072 wakaba 1.52 dd => 1, dt => 1, li => 1,
5073 wakaba 1.195
5074 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
5075 wakaba 1.52 }->{$token->{tag_name}}) {
5076 wakaba 1.197 ## NOTE: Code for <li> start tags includes "as if </li>" code.
5077     ## Code for <dt> or <dd> start tags includes "as if </dt> or
5078     ## </dd>" code.
5079    
5080 wakaba 1.52 ## has an element in scope
5081     my $i;
5082     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5083     my $node = $self->{open_elements}->[$_];
5084 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5085 wakaba 1.79 !!!cp ('t410');
5086 wakaba 1.52 $i = $_;
5087 wakaba 1.87 last INSCOPE;
5088 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5089 wakaba 1.79 !!!cp ('t411');
5090 wakaba 1.52 last INSCOPE;
5091 wakaba 1.51 }
5092 wakaba 1.52 } # INSCOPE
5093 wakaba 1.89
5094     unless (defined $i) { # has an element in scope
5095     !!!cp ('t413');
5096 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5097     text => $token->{tag_name}, token => $token);
5098 wakaba 1.157 ## NOTE: Ignore the token.
5099 wakaba 1.89 } else {
5100     ## Step 1. generate implied end tags
5101     while ({
5102 wakaba 1.151 ## END_TAG_OPTIONAL_EL
5103 wakaba 1.89 dd => ($token->{tag_name} ne 'dd'),
5104     dt => ($token->{tag_name} ne 'dt'),
5105     li => ($token->{tag_name} ne 'li'),
5106 wakaba 1.194 option => 1,
5107     optgroup => 1,
5108 wakaba 1.89 p => 1,
5109 wakaba 1.151 rt => 1,
5110     rp => 1,
5111 wakaba 1.123 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
5112 wakaba 1.89 !!!cp ('t409');
5113     pop @{$self->{open_elements}};
5114     }
5115    
5116     ## Step 2.
5117 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5118     ne $token->{tag_name}) {
5119 wakaba 1.79 !!!cp ('t412');
5120 wakaba 1.122 !!!parse-error (type => 'not closed',
5121 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5122 wakaba 1.122 ->manakai_local_name,
5123     token => $token);
5124 wakaba 1.51 } else {
5125 wakaba 1.89 !!!cp ('t414');
5126 wakaba 1.51 }
5127 wakaba 1.89
5128     ## Step 3.
5129 wakaba 1.52 splice @{$self->{open_elements}}, $i;
5130 wakaba 1.89
5131     ## Step 4.
5132     $clear_up_to_marker->()
5133     if {
5134 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
5135 wakaba 1.89 }->{$token->{tag_name}};
5136 wakaba 1.51 }
5137 wakaba 1.52 !!!next-token;
5138 wakaba 1.126 next B;
5139 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
5140 wakaba 1.195 ## NOTE: As normal, but interacts with the form element pointer
5141    
5142 wakaba 1.92 undef $self->{form_element};
5143    
5144 wakaba 1.52 ## has an element in scope
5145 wakaba 1.92 my $i;
5146 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5147     my $node = $self->{open_elements}->[$_];
5148 wakaba 1.206 if ($node->[1] == FORM_EL) {
5149 wakaba 1.79 !!!cp ('t418');
5150 wakaba 1.92 $i = $_;
5151 wakaba 1.52 last INSCOPE;
5152 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5153 wakaba 1.79 !!!cp ('t419');
5154 wakaba 1.52 last INSCOPE;
5155     }
5156     } # INSCOPE
5157 wakaba 1.92
5158     unless (defined $i) { # has an element in scope
5159 wakaba 1.79 !!!cp ('t421');
5160 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5161     text => $token->{tag_name}, token => $token);
5162 wakaba 1.157 ## NOTE: Ignore the token.
5163 wakaba 1.92 } else {
5164     ## Step 1. generate implied end tags
5165 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5166 wakaba 1.92 !!!cp ('t417');
5167     pop @{$self->{open_elements}};
5168     }
5169    
5170     ## Step 2.
5171 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5172     ne $token->{tag_name}) {
5173 wakaba 1.92 !!!cp ('t417.1');
5174 wakaba 1.122 !!!parse-error (type => 'not closed',
5175 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5176 wakaba 1.122 ->manakai_local_name,
5177     token => $token);
5178 wakaba 1.92 } else {
5179     !!!cp ('t420');
5180     }
5181    
5182     ## Step 3.
5183     splice @{$self->{open_elements}}, $i;
5184 wakaba 1.52 }
5185    
5186     !!!next-token;
5187 wakaba 1.126 next B;
5188 wakaba 1.52 } elsif ({
5189 wakaba 1.195 ## NOTE: As normal, except acts as a closer for any ...
5190 wakaba 1.52 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
5191     }->{$token->{tag_name}}) {
5192     ## has an element in scope
5193     my $i;
5194     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5195     my $node = $self->{open_elements}->[$_];
5196 wakaba 1.206 if ($node->[1] == HEADING_EL) {
5197 wakaba 1.79 !!!cp ('t423');
5198 wakaba 1.52 $i = $_;
5199     last INSCOPE;
5200 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5201 wakaba 1.79 !!!cp ('t424');
5202 wakaba 1.52 last INSCOPE;
5203 wakaba 1.51 }
5204 wakaba 1.52 } # INSCOPE
5205 wakaba 1.93
5206     unless (defined $i) { # has an element in scope
5207     !!!cp ('t425.1');
5208 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5209     text => $token->{tag_name}, token => $token);
5210 wakaba 1.157 ## NOTE: Ignore the token.
5211 wakaba 1.79 } else {
5212 wakaba 1.93 ## Step 1. generate implied end tags
5213 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5214 wakaba 1.93 !!!cp ('t422');
5215     pop @{$self->{open_elements}};
5216     }
5217    
5218     ## Step 2.
5219 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5220     ne $token->{tag_name}) {
5221 wakaba 1.93 !!!cp ('t425');
5222 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5223     text => $token->{tag_name}, token => $token);
5224 wakaba 1.93 } else {
5225     !!!cp ('t426');
5226     }
5227    
5228     ## Step 3.
5229     splice @{$self->{open_elements}}, $i;
5230 wakaba 1.36 }
5231 wakaba 1.52
5232     !!!next-token;
5233 wakaba 1.126 next B;
5234 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
5235 wakaba 1.195 ## NOTE: As normal, except </p> implies <p> and ...
5236    
5237 wakaba 1.87 ## has an element in scope
5238 wakaba 1.197 my $non_optional;
5239 wakaba 1.87 my $i;
5240     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5241     my $node = $self->{open_elements}->[$_];
5242 wakaba 1.206 if ($node->[1] == P_EL) {
5243 wakaba 1.87 !!!cp ('t410.1');
5244     $i = $_;
5245 wakaba 1.88 last INSCOPE;
5246 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5247 wakaba 1.87 !!!cp ('t411.1');
5248     last INSCOPE;
5249 wakaba 1.197 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
5250     ## NOTE: |END_TAG_OPTIONAL_EL| includes "p"
5251     !!!cp ('t411.2');
5252     #
5253     } else {
5254     !!!cp ('t411.3');
5255     $non_optional ||= $node;
5256     #
5257 wakaba 1.87 }
5258     } # INSCOPE
5259 wakaba 1.91
5260     if (defined $i) {
5261 wakaba 1.197 ## 1. Generate implied end tags
5262     #
5263    
5264     ## 2. If current node != "p", parse error
5265     if ($non_optional) {
5266 wakaba 1.87 !!!cp ('t412.1');
5267 wakaba 1.122 !!!parse-error (type => 'not closed',
5268 wakaba 1.197 text => $non_optional->[0]->manakai_local_name,
5269 wakaba 1.122 token => $token);
5270 wakaba 1.87 } else {
5271 wakaba 1.91 !!!cp ('t414.1');
5272 wakaba 1.87 }
5273 wakaba 1.91
5274 wakaba 1.197 ## 3. Pop
5275 wakaba 1.87 splice @{$self->{open_elements}}, $i;
5276     } else {
5277 wakaba 1.91 !!!cp ('t413.1');
5278 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5279     text => $token->{tag_name}, token => $token);
5280 wakaba 1.91
5281 wakaba 1.87 !!!cp ('t415.1');
5282     ## As if <p>, then reprocess the current token
5283     my $el;
5284 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'p',, $token);
5285 wakaba 1.87 $insert->($el);
5286 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
5287 wakaba 1.87 }
5288 wakaba 1.91
5289 wakaba 1.87 !!!next-token;
5290 wakaba 1.126 next B;
5291 wakaba 1.52 } elsif ({
5292     a => 1,
5293     b => 1, big => 1, em => 1, font => 1, i => 1,
5294 wakaba 1.193 nobr => 1, s => 1, small => 1, strike => 1,
5295 wakaba 1.52 strong => 1, tt => 1, u => 1,
5296     }->{$token->{tag_name}}) {
5297 wakaba 1.79 !!!cp ('t427');
5298 wakaba 1.113 $formatting_end_tag->($token);
5299 wakaba 1.126 next B;
5300 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
5301 wakaba 1.79 !!!cp ('t428');
5302 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5303     text => 'br', token => $token);
5304 wakaba 1.52
5305     ## As if <br>
5306     $reconstruct_active_formatting_elements->($insert_to_current);
5307    
5308     my $el;
5309 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'br',, $token);
5310 wakaba 1.52 $insert->($el);
5311    
5312     ## Ignore the token.
5313     !!!next-token;
5314 wakaba 1.126 next B;
5315 wakaba 1.52 } else {
5316 wakaba 1.195 if ($token->{tag_name} eq 'sarcasm') {
5317     sleep 0.001; # take a deep breath
5318     }
5319    
5320 wakaba 1.52 ## Step 1
5321     my $node_i = -1;
5322     my $node = $self->{open_elements}->[$node_i];
5323 wakaba 1.51
5324 wakaba 1.52 ## Step 2
5325     S2: {
5326 wakaba 1.200 my $node_tag_name = $node->[0]->manakai_local_name;
5327     $node_tag_name =~ tr/A-Z/a-z/; # for SVG camelCase tag names
5328     if ($node_tag_name eq $token->{tag_name}) {
5329 wakaba 1.52 ## Step 1
5330     ## generate implied end tags
5331 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5332 wakaba 1.79 !!!cp ('t430');
5333 wakaba 1.151 ## NOTE: |<ruby><rt></ruby>|.
5334     ## ISSUE: <ruby><rt></rt> will also take this code path,
5335     ## which seems wrong.
5336 wakaba 1.86 pop @{$self->{open_elements}};
5337 wakaba 1.151 $node_i++;
5338 wakaba 1.52 }
5339    
5340     ## Step 2
5341 wakaba 1.200 my $current_tag_name
5342     = $self->{open_elements}->[-1]->[0]->manakai_local_name;
5343     $current_tag_name =~ tr/A-Z/a-z/;
5344     if ($current_tag_name ne $token->{tag_name}) {
5345 wakaba 1.79 !!!cp ('t431');
5346 wakaba 1.58 ## NOTE: <x><y></x>
5347 wakaba 1.122 !!!parse-error (type => 'not closed',
5348 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5349 wakaba 1.122 ->manakai_local_name,
5350     token => $token);
5351 wakaba 1.79 } else {
5352     !!!cp ('t432');
5353 wakaba 1.52 }
5354    
5355     ## Step 3
5356 wakaba 1.151 splice @{$self->{open_elements}}, $node_i if $node_i < 0;
5357 wakaba 1.51
5358 wakaba 1.1 !!!next-token;
5359 wakaba 1.52 last S2;
5360 wakaba 1.1 } else {
5361 wakaba 1.52 ## Step 3
5362 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
5363 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
5364 wakaba 1.123 ($node->[1] & SPECIAL_EL or
5365     $node->[1] & SCOPING_EL)) {
5366 wakaba 1.79 !!!cp ('t433');
5367 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5368     text => $token->{tag_name}, token => $token);
5369 wakaba 1.52 ## Ignore the token
5370     !!!next-token;
5371     last S2;
5372 wakaba 1.193
5373     ## NOTE: |<span><dd></span>a|: In Safari 3.1.2 and Opera
5374     ## 9.27, "a" is a child of <dd> (conforming). In
5375     ## Firefox 3.0.2, "a" is a child of <body>. In WinIE 7,
5376     ## "a" is a child of both <body> and <dd>.
5377 wakaba 1.52 }
5378 wakaba 1.193
5379 wakaba 1.79 !!!cp ('t434');
5380 wakaba 1.1 }
5381 wakaba 1.52
5382     ## Step 4
5383     $node_i--;
5384     $node = $self->{open_elements}->[$node_i];
5385    
5386     ## Step 5;
5387     redo S2;
5388     } # S2
5389 wakaba 1.126 next B;
5390 wakaba 1.1 }
5391     }
5392 wakaba 1.126 next B;
5393     } continue { # B
5394     if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
5395     ## NOTE: The code below is executed in cases where it does not have
5396     ## to be, but it it is harmless even in those cases.
5397     ## has an element in scope
5398     INSCOPE: {
5399     for (reverse 0..$#{$self->{open_elements}}) {
5400     my $node = $self->{open_elements}->[$_];
5401     if ($node->[1] & FOREIGN_EL) {
5402     last INSCOPE;
5403     } elsif ($node->[1] & SCOPING_EL) {
5404     last;
5405     }
5406     }
5407    
5408     ## NOTE: No foreign element in scope.
5409     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
5410     } # INSCOPE
5411     }
5412 wakaba 1.1 } # B
5413    
5414     ## Stop parsing # MUST
5415    
5416     ## TODO: script stuffs
5417 wakaba 1.3 } # _tree_construct_main
5418    
5419 wakaba 1.218 ## XXX: How this method is organized is somewhat out of date, although
5420     ## it still does what the current spec documents.
5421 wakaba 1.177 sub set_inner_html ($$$$;$) {
5422 wakaba 1.3 my $class = shift;
5423 wakaba 1.218 my $node = shift; # /context/
5424 wakaba 1.177 #my $s = \$_[0];
5425 wakaba 1.3 my $onerror = $_[1];
5426 wakaba 1.162 my $get_wrapper = $_[2] || sub ($) { return $_[0] };
5427 wakaba 1.3
5428     my $nt = $node->node_type;
5429 wakaba 1.218 if ($nt == 9) { # Document (invoke the algorithm with no /context/ element)
5430 wakaba 1.3 # MUST
5431    
5432     ## Step 1 # MUST
5433     ## TODO: If the document has an active parser, ...
5434     ## ISSUE: There is an issue in the spec.
5435    
5436     ## Step 2 # MUST
5437     my @cn = @{$node->child_nodes};
5438     for (@cn) {
5439     $node->remove_child ($_);
5440     }
5441    
5442     ## Step 3, 4, 5 # MUST
5443 wakaba 1.177 $class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper);
5444 wakaba 1.218 } elsif ($nt == 1) { # Element (invoke the algorithm with /context/ element)
5445 wakaba 1.3 ## TODO: If non-html element
5446    
5447     ## NOTE: Most of this code is copied from |parse_string|
5448    
5449 wakaba 1.162 ## TODO: Support for $get_wrapper
5450    
5451 wakaba 1.218 ## F1. Create an HTML document.
5452 wakaba 1.14 my $this_doc = $node->owner_document;
5453     my $doc = $this_doc->implementation->create_document;
5454 wakaba 1.18 $doc->manakai_is_html (1);
5455 wakaba 1.218
5456     ## F2. Propagate quirkness flag
5457     my $node_doc = $node->owner_document;
5458     $doc->manakai_compat_mode ($node_doc->manakai_compat_mode);
5459    
5460     ## F3. Create an HTML parser
5461 wakaba 1.3 my $p = $class->new;
5462     $p->{document} = $doc;
5463    
5464 wakaba 1.84 ## Step 8 # MUST
5465 wakaba 1.3 my $i = 0;
5466 wakaba 1.121 $p->{line_prev} = $p->{line} = 1;
5467     $p->{column_prev} = $p->{column} = 0;
5468 wakaba 1.177 require Whatpm::Charset::DecodeHandle;
5469     my $input = Whatpm::Charset::DecodeHandle::CharString->new (\($_[0]));
5470     $input = $get_wrapper->($input);
5471 wakaba 1.183 $p->{set_nc} = sub {
5472 wakaba 1.3 my $self = shift;
5473 wakaba 1.14
5474 wakaba 1.178 my $char = '';
5475 wakaba 1.183 if (defined $self->{next_nc}) {
5476     $char = $self->{next_nc};
5477     delete $self->{next_nc};
5478     $self->{nc} = ord $char;
5479 wakaba 1.177 } else {
5480 wakaba 1.180 $self->{char_buffer} = '';
5481     $self->{char_buffer_pos} = 0;
5482    
5483     my $count = $input->manakai_read_until
5484 wakaba 1.182 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/,
5485     $self->{char_buffer_pos});
5486 wakaba 1.180 if ($count) {
5487     $self->{line_prev} = $self->{line};
5488     $self->{column_prev} = $self->{column};
5489     $self->{column}++;
5490 wakaba 1.183 $self->{nc}
5491 wakaba 1.180 = ord substr ($self->{char_buffer},
5492     $self->{char_buffer_pos}++, 1);
5493     return;
5494     }
5495    
5496 wakaba 1.178 if ($input->read ($char, 1)) {
5497 wakaba 1.183 $self->{nc} = ord $char;
5498 wakaba 1.178 } else {
5499 wakaba 1.183 $self->{nc} = -1;
5500 wakaba 1.178 return;
5501     }
5502 wakaba 1.177 }
5503 wakaba 1.121
5504     ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
5505     $p->{column}++;
5506 wakaba 1.4
5507 wakaba 1.183 if ($self->{nc} == 0x000A) { # LF
5508 wakaba 1.121 $p->{line}++;
5509     $p->{column} = 0;
5510 wakaba 1.79 !!!cp ('i1');
5511 wakaba 1.183 } elsif ($self->{nc} == 0x000D) { # CR
5512 wakaba 1.177 ## TODO: support for abort/streaming
5513 wakaba 1.178 my $next = '';
5514     if ($input->read ($next, 1) and $next ne "\x0A") {
5515 wakaba 1.183 $self->{next_nc} = $next;
5516 wakaba 1.177 }
5517 wakaba 1.183 $self->{nc} = 0x000A; # LF # MUST
5518 wakaba 1.121 $p->{line}++;
5519     $p->{column} = 0;
5520 wakaba 1.79 !!!cp ('i2');
5521 wakaba 1.183 } elsif ($self->{nc} == 0x0000) { # NULL
5522 wakaba 1.79 !!!cp ('i4');
5523 wakaba 1.14 !!!parse-error (type => 'NULL');
5524 wakaba 1.183 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5525 wakaba 1.3 }
5526     };
5527 wakaba 1.171
5528 wakaba 1.172 $p->{read_until} = sub {
5529 wakaba 1.177 #my ($scalar, $specials_range, $offset) = @_;
5530 wakaba 1.183 return 0 if defined $p->{next_nc};
5531 wakaba 1.180
5532 wakaba 1.182 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
5533 wakaba 1.180 my $offset = $_[2] || 0;
5534    
5535     if ($p->{char_buffer_pos} < length $p->{char_buffer}) {
5536     pos ($p->{char_buffer}) = $p->{char_buffer_pos};
5537     if ($p->{char_buffer} =~ /\G(?>$pattern)+/) {
5538     substr ($_[0], $offset)
5539     = substr ($p->{char_buffer}, $-[0], $+[0] - $-[0]);
5540     my $count = $+[0] - $-[0];
5541     if ($count) {
5542     $p->{column} += $count;
5543     $p->{char_buffer_pos} += $count;
5544     $p->{line_prev} = $p->{line};
5545     $p->{column_prev} = $p->{column} - 1;
5546 wakaba 1.183 $p->{nc} = -1;
5547 wakaba 1.180 }
5548     return $count;
5549     } else {
5550     return 0;
5551     }
5552     } else {
5553     my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
5554     if ($count) {
5555     $p->{column} += $count;
5556     $p->{column_prev} += $count;
5557 wakaba 1.183 $p->{nc} = -1;
5558 wakaba 1.180 }
5559     return $count;
5560 wakaba 1.177 }
5561     }; # $p->{read_until}
5562 wakaba 1.171
5563 wakaba 1.3 my $ponerror = $onerror || sub {
5564     my (%opt) = @_;
5565 wakaba 1.121 my $line = $opt{line};
5566     my $column = $opt{column};
5567     if (defined $opt{token} and defined $opt{token}->{line}) {
5568     $line = $opt{token}->{line};
5569     $column = $opt{token}->{column};
5570     }
5571     warn "Parse error ($opt{type}) at line $line column $column\n";
5572 wakaba 1.3 };
5573     $p->{parse_error} = sub {
5574 wakaba 1.121 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
5575 wakaba 1.3 };
5576    
5577 wakaba 1.178 my $char_onerror = sub {
5578     my (undef, $type, %opt) = @_;
5579     $ponerror->(layer => 'encode',
5580     line => $p->{line}, column => $p->{column} + 1,
5581     %opt, type => $type);
5582     }; # $char_onerror
5583     $input->onerror ($char_onerror);
5584    
5585 wakaba 1.3 $p->_initialize_tokenizer;
5586     $p->_initialize_tree_constructor;
5587    
5588 wakaba 1.218 ## F4. If /context/ is not undef...
5589    
5590     ## F4.1. content model flag
5591 wakaba 1.71 my $node_ln = $node->manakai_local_name;
5592 wakaba 1.40 $p->{content_model} = {
5593     title => RCDATA_CONTENT_MODEL,
5594     textarea => RCDATA_CONTENT_MODEL,
5595     style => CDATA_CONTENT_MODEL,
5596     script => CDATA_CONTENT_MODEL,
5597     xmp => CDATA_CONTENT_MODEL,
5598     iframe => CDATA_CONTENT_MODEL,
5599     noembed => CDATA_CONTENT_MODEL,
5600     noframes => CDATA_CONTENT_MODEL,
5601     noscript => CDATA_CONTENT_MODEL,
5602     plaintext => PLAINTEXT_CONTENT_MODEL,
5603     }->{$node_ln};
5604     $p->{content_model} = PCDATA_CONTENT_MODEL
5605     unless defined $p->{content_model};
5606 wakaba 1.3
5607 wakaba 1.123 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
5608     ## TODO: Foreign element OK?
5609 wakaba 1.3
5610 wakaba 1.218 ## F4.2. Root |html| element
5611 wakaba 1.3 my $root = $doc->create_element_ns
5612     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
5613    
5614 wakaba 1.218 ## F4.3.
5615 wakaba 1.3 $doc->append_child ($root);
5616    
5617 wakaba 1.218 ## F4.4.
5618 wakaba 1.123 push @{$p->{open_elements}}, [$root, $el_category->{html}];
5619 wakaba 1.3
5620     undef $p->{head_element};
5621 wakaba 1.202 undef $p->{head_element_inserted};
5622 wakaba 1.3
5623 wakaba 1.218 ## F4.5.
5624 wakaba 1.3 $p->_reset_insertion_mode;
5625    
5626 wakaba 1.218 ## F4.6.
5627 wakaba 1.3 my $anode = $node;
5628     AN: while (defined $anode) {
5629     if ($anode->node_type == 1) {
5630     my $nsuri = $anode->namespace_uri;
5631     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
5632 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
5633 wakaba 1.79 !!!cp ('i5');
5634 wakaba 1.3 $p->{form_element} = $anode;
5635     last AN;
5636     }
5637     }
5638     }
5639     $anode = $anode->parent_node;
5640     } # AN
5641 wakaba 1.218
5642 wakaba 1.235 ## F.5. Set the input stream.
5643 wakaba 1.236 $p->{confident} = 1; ## Confident: irrelevant.
5644 wakaba 1.235
5645 wakaba 1.218 ## F.6. Start the parser.
5646 wakaba 1.3 {
5647     my $self = $p;
5648     !!!next-token;
5649     }
5650     $p->_tree_construction_main;
5651    
5652 wakaba 1.218 ## F.7.
5653 wakaba 1.3 my @cn = @{$node->child_nodes};
5654     for (@cn) {
5655     $node->remove_child ($_);
5656     }
5657     ## ISSUE: mutation events? read-only?
5658    
5659 wakaba 1.84 ## Step 11 # MUST
5660 wakaba 1.3 @cn = @{$root->child_nodes};
5661     for (@cn) {
5662 wakaba 1.14 $this_doc->adopt_node ($_);
5663 wakaba 1.3 $node->append_child ($_);
5664     }
5665 wakaba 1.14 ## ISSUE: mutation events?
5666 wakaba 1.3
5667     $p->_terminate_tree_constructor;
5668 wakaba 1.121
5669     delete $p->{parse_error}; # delete loop
5670 wakaba 1.3 } else {
5671     die "$0: |set_inner_html| is not defined for node of type $nt";
5672     }
5673     } # set_inner_html
5674    
5675     } # tree construction stage
5676 wakaba 1.1
5677 wakaba 1.63 package Whatpm::HTML::RestartParser;
5678     push our @ISA, 'Error';
5679    
5680 wakaba 1.1 1;
5681 wakaba 1.238 # $Date: 2009/09/06 08:29:32 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24