/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.236 - (hide annotations) (download) (as text)
Sun Sep 6 08:15:37 2009 UTC (15 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.235: +4 -3 lines
File MIME type: application/x-wais-source
++ whatpm/Whatpm/ChangeLog	6 Sep 2009 08:15:17 -0000
	* HTML.pm.src: Added |xmp| to the list of Special elements (HTML5
	revision 3689).  It should make no difference since the |xmp|
	element has the PCDATA content and cannot be the non-bottommost
	element in the stack of open elements.

2009-09-06  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.236 our $VERSION=do{my @r=(q$Revision: 1.235 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.208 use Whatpm::HTML::Tokenizer;
7    
8 wakaba 1.182 ## NOTE: This module don't check all HTML5 parse errors; character
9     ## encoding related parse errors are expected to be handled by relevant
10     ## modules.
11     ## Parse errors for control characters that are not allowed in HTML5
12     ## documents, for surrogate code points, and for noncharacter code
13     ## points, as well as U+FFFD substitions for characters whose code points
14     ## is higher than U+10FFFF may be detected by combining the parser with
15     ## the checker implemented by Whatpm::Charset::UnicodeChecker (for its
16     ## usage example, see |t/HTML-tree.t| in the Whatpm package or the
17     ## WebHACC::Language::HTML module in the WebHACC package).
18    
19 wakaba 1.18 ## ISSUE:
20     ## var doc = implementation.createDocument (null, null, null);
21     ## doc.write ('');
22     ## alert (doc.compatMode);
23 wakaba 1.1
24 wakaba 1.139 require IO::Handle;
25    
26 wakaba 1.208 ## Namespace URLs
27    
28 wakaba 1.126 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
29     my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
30     my $SVG_NS = q<http://www.w3.org/2000/svg>;
31     my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
32     my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
33     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
34    
35 wakaba 1.208 ## Element categories
36    
37 wakaba 1.206 ## Bits 12-15
38     sub SPECIAL_EL () { 0b1_000000000000000 }
39     sub SCOPING_EL () { 0b1_00000000000000 }
40     sub FORMATTING_EL () { 0b1_0000000000000 }
41     sub PHRASING_EL () { 0b1_000000000000 }
42    
43     ## Bits 10-11
44 wakaba 1.208 #sub FOREIGN_EL () { 0b1_00000000000 } # see Whatpm::HTML::Tokenizer
45 wakaba 1.206 sub FOREIGN_FLOW_CONTENT_EL () { 0b1_0000000000 }
46    
47     ## Bits 6-9
48     sub TABLE_SCOPING_EL () { 0b1_000000000 }
49     sub TABLE_ROWS_SCOPING_EL () { 0b1_00000000 }
50     sub TABLE_ROW_SCOPING_EL () { 0b1_0000000 }
51     sub TABLE_ROWS_EL () { 0b1_000000 }
52    
53     ## Bit 5
54     sub ADDRESS_DIV_P_EL () { 0b1_00000 }
55    
56     ## NOTE: Used in </body> and EOF algorithms.
57     ## Bit 4
58     sub ALL_END_TAG_OPTIONAL_EL () { 0b1_0000 }
59 wakaba 1.123
60 wakaba 1.151 ## NOTE: Used in "generate implied end tags" algorithm.
61 wakaba 1.194 ## NOTE: There is a code where a modified version of
62     ## END_TAG_OPTIONAL_EL is used in "generate implied end tags"
63     ## implementation (search for the algorithm name).
64 wakaba 1.206 ## Bit 3
65     sub END_TAG_OPTIONAL_EL () { 0b1_000 }
66    
67     ## Bits 0-2
68    
69     sub MISC_SPECIAL_EL () { SPECIAL_EL | 0b000 }
70     sub FORM_EL () { SPECIAL_EL | 0b001 }
71     sub FRAMESET_EL () { SPECIAL_EL | 0b010 }
72     sub HEADING_EL () { SPECIAL_EL | 0b011 }
73     sub SELECT_EL () { SPECIAL_EL | 0b100 }
74     sub SCRIPT_EL () { SPECIAL_EL | 0b101 }
75    
76     sub ADDRESS_DIV_EL () { SPECIAL_EL | ADDRESS_DIV_P_EL | 0b001 }
77     sub BODY_EL () { SPECIAL_EL | ALL_END_TAG_OPTIONAL_EL | 0b001 }
78    
79 wakaba 1.207 sub DTDD_EL () {
80 wakaba 1.206 SPECIAL_EL |
81     END_TAG_OPTIONAL_EL |
82     ALL_END_TAG_OPTIONAL_EL |
83     0b010
84     }
85     sub LI_EL () {
86     SPECIAL_EL |
87     END_TAG_OPTIONAL_EL |
88     ALL_END_TAG_OPTIONAL_EL |
89     0b100
90     }
91     sub P_EL () {
92     SPECIAL_EL |
93     ADDRESS_DIV_P_EL |
94     END_TAG_OPTIONAL_EL |
95     ALL_END_TAG_OPTIONAL_EL |
96     0b001
97 wakaba 1.123 }
98    
99 wakaba 1.206 sub TABLE_ROW_EL () {
100     SPECIAL_EL |
101     TABLE_ROWS_EL |
102     TABLE_ROW_SCOPING_EL |
103     ALL_END_TAG_OPTIONAL_EL |
104     0b001
105     }
106     sub TABLE_ROW_GROUP_EL () {
107     SPECIAL_EL |
108     TABLE_ROWS_EL |
109     TABLE_ROWS_SCOPING_EL |
110     ALL_END_TAG_OPTIONAL_EL |
111     0b001
112 wakaba 1.123 }
113    
114 wakaba 1.206 sub MISC_SCOPING_EL () { SCOPING_EL | 0b000 }
115     sub BUTTON_EL () { SCOPING_EL | 0b001 }
116     sub CAPTION_EL () { SCOPING_EL | 0b010 }
117     sub HTML_EL () {
118     SCOPING_EL |
119     TABLE_SCOPING_EL |
120     TABLE_ROWS_SCOPING_EL |
121     TABLE_ROW_SCOPING_EL |
122     ALL_END_TAG_OPTIONAL_EL |
123     0b001
124 wakaba 1.123 }
125 wakaba 1.206 sub TABLE_EL () {
126     SCOPING_EL |
127     TABLE_ROWS_EL |
128     TABLE_SCOPING_EL |
129     0b001
130 wakaba 1.123 }
131 wakaba 1.206 sub TABLE_CELL_EL () {
132     SCOPING_EL |
133     TABLE_ROW_SCOPING_EL |
134     ALL_END_TAG_OPTIONAL_EL |
135     0b001
136 wakaba 1.123 }
137    
138 wakaba 1.206 sub MISC_FORMATTING_EL () { FORMATTING_EL | 0b000 }
139     sub A_EL () { FORMATTING_EL | 0b001 }
140     sub NOBR_EL () { FORMATTING_EL | 0b010 }
141    
142     sub RUBY_EL () { PHRASING_EL | 0b001 }
143    
144     ## ISSUE: ALL_END_TAG_OPTIONAL_EL?
145     sub OPTGROUP_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b001 }
146     sub OPTION_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b010 }
147     sub RUBY_COMPONENT_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b100 }
148 wakaba 1.123
149 wakaba 1.206 sub MML_AXML_EL () { PHRASING_EL | FOREIGN_EL | 0b001 }
150 wakaba 1.123
151     my $el_category = {
152 wakaba 1.206 a => A_EL,
153     address => ADDRESS_DIV_EL,
154 wakaba 1.123 applet => MISC_SCOPING_EL,
155     area => MISC_SPECIAL_EL,
156 wakaba 1.193 article => MISC_SPECIAL_EL,
157     aside => MISC_SPECIAL_EL,
158 wakaba 1.123 b => FORMATTING_EL,
159     base => MISC_SPECIAL_EL,
160     basefont => MISC_SPECIAL_EL,
161     bgsound => MISC_SPECIAL_EL,
162     big => FORMATTING_EL,
163     blockquote => MISC_SPECIAL_EL,
164     body => BODY_EL,
165     br => MISC_SPECIAL_EL,
166     button => BUTTON_EL,
167     caption => CAPTION_EL,
168     center => MISC_SPECIAL_EL,
169     col => MISC_SPECIAL_EL,
170     colgroup => MISC_SPECIAL_EL,
171 wakaba 1.193 command => MISC_SPECIAL_EL,
172     datagrid => MISC_SPECIAL_EL,
173 wakaba 1.207 dd => DTDD_EL,
174 wakaba 1.193 details => MISC_SPECIAL_EL,
175     dialog => MISC_SPECIAL_EL,
176 wakaba 1.123 dir => MISC_SPECIAL_EL,
177 wakaba 1.206 div => ADDRESS_DIV_EL,
178 wakaba 1.123 dl => MISC_SPECIAL_EL,
179 wakaba 1.207 dt => DTDD_EL,
180 wakaba 1.123 em => FORMATTING_EL,
181     embed => MISC_SPECIAL_EL,
182     fieldset => MISC_SPECIAL_EL,
183 wakaba 1.193 figure => MISC_SPECIAL_EL,
184 wakaba 1.123 font => FORMATTING_EL,
185 wakaba 1.193 footer => MISC_SPECIAL_EL,
186 wakaba 1.123 form => FORM_EL,
187     frame => MISC_SPECIAL_EL,
188     frameset => FRAMESET_EL,
189     h1 => HEADING_EL,
190     h2 => HEADING_EL,
191     h3 => HEADING_EL,
192     h4 => HEADING_EL,
193     h5 => HEADING_EL,
194     h6 => HEADING_EL,
195     head => MISC_SPECIAL_EL,
196 wakaba 1.193 header => MISC_SPECIAL_EL,
197 wakaba 1.123 hr => MISC_SPECIAL_EL,
198     html => HTML_EL,
199     i => FORMATTING_EL,
200     iframe => MISC_SPECIAL_EL,
201     img => MISC_SPECIAL_EL,
202 wakaba 1.193 #image => MISC_SPECIAL_EL, ## NOTE: Commented out in the spec.
203 wakaba 1.123 input => MISC_SPECIAL_EL,
204     isindex => MISC_SPECIAL_EL,
205 wakaba 1.232 ## XXX keygen? (Whether a void element is in Special or not does not
206     ## affect to the processing, however.)
207 wakaba 1.123 li => LI_EL,
208     link => MISC_SPECIAL_EL,
209     listing => MISC_SPECIAL_EL,
210     marquee => MISC_SCOPING_EL,
211     menu => MISC_SPECIAL_EL,
212     meta => MISC_SPECIAL_EL,
213 wakaba 1.193 nav => MISC_SPECIAL_EL,
214 wakaba 1.206 nobr => NOBR_EL,
215 wakaba 1.123 noembed => MISC_SPECIAL_EL,
216     noframes => MISC_SPECIAL_EL,
217     noscript => MISC_SPECIAL_EL,
218     object => MISC_SCOPING_EL,
219     ol => MISC_SPECIAL_EL,
220     optgroup => OPTGROUP_EL,
221     option => OPTION_EL,
222     p => P_EL,
223     param => MISC_SPECIAL_EL,
224     plaintext => MISC_SPECIAL_EL,
225     pre => MISC_SPECIAL_EL,
226 wakaba 1.151 rp => RUBY_COMPONENT_EL,
227     rt => RUBY_COMPONENT_EL,
228     ruby => RUBY_EL,
229 wakaba 1.123 s => FORMATTING_EL,
230     script => MISC_SPECIAL_EL,
231     select => SELECT_EL,
232 wakaba 1.193 section => MISC_SPECIAL_EL,
233 wakaba 1.123 small => FORMATTING_EL,
234     spacer => MISC_SPECIAL_EL,
235     strike => FORMATTING_EL,
236     strong => FORMATTING_EL,
237     style => MISC_SPECIAL_EL,
238     table => TABLE_EL,
239     tbody => TABLE_ROW_GROUP_EL,
240     td => TABLE_CELL_EL,
241     textarea => MISC_SPECIAL_EL,
242     tfoot => TABLE_ROW_GROUP_EL,
243     th => TABLE_CELL_EL,
244     thead => TABLE_ROW_GROUP_EL,
245     title => MISC_SPECIAL_EL,
246     tr => TABLE_ROW_EL,
247     tt => FORMATTING_EL,
248     u => FORMATTING_EL,
249     ul => MISC_SPECIAL_EL,
250     wbr => MISC_SPECIAL_EL,
251 wakaba 1.236 xmp => MISC_SPECIAL_EL,
252 wakaba 1.123 };
253    
254 wakaba 1.126 my $el_category_f = {
255     $MML_NS => {
256     'annotation-xml' => MML_AXML_EL,
257 wakaba 1.206 mi => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
258     mo => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
259     mn => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
260     ms => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
261     mtext => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
262 wakaba 1.126 },
263     $SVG_NS => {
264 wakaba 1.206 foreignObject => SCOPING_EL | FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
265     desc => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
266     title => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
267 wakaba 1.126 },
268     ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
269     };
270    
271 wakaba 1.131 my $svg_attr_name = {
272 wakaba 1.146 attributename => 'attributeName',
273 wakaba 1.131 attributetype => 'attributeType',
274     basefrequency => 'baseFrequency',
275     baseprofile => 'baseProfile',
276     calcmode => 'calcMode',
277     clippathunits => 'clipPathUnits',
278     contentscripttype => 'contentScriptType',
279     contentstyletype => 'contentStyleType',
280     diffuseconstant => 'diffuseConstant',
281     edgemode => 'edgeMode',
282     externalresourcesrequired => 'externalResourcesRequired',
283     filterres => 'filterRes',
284     filterunits => 'filterUnits',
285     glyphref => 'glyphRef',
286     gradienttransform => 'gradientTransform',
287     gradientunits => 'gradientUnits',
288     kernelmatrix => 'kernelMatrix',
289     kernelunitlength => 'kernelUnitLength',
290     keypoints => 'keyPoints',
291     keysplines => 'keySplines',
292     keytimes => 'keyTimes',
293     lengthadjust => 'lengthAdjust',
294     limitingconeangle => 'limitingConeAngle',
295     markerheight => 'markerHeight',
296     markerunits => 'markerUnits',
297     markerwidth => 'markerWidth',
298     maskcontentunits => 'maskContentUnits',
299     maskunits => 'maskUnits',
300     numoctaves => 'numOctaves',
301     pathlength => 'pathLength',
302     patterncontentunits => 'patternContentUnits',
303     patterntransform => 'patternTransform',
304     patternunits => 'patternUnits',
305     pointsatx => 'pointsAtX',
306     pointsaty => 'pointsAtY',
307     pointsatz => 'pointsAtZ',
308     preservealpha => 'preserveAlpha',
309     preserveaspectratio => 'preserveAspectRatio',
310     primitiveunits => 'primitiveUnits',
311     refx => 'refX',
312     refy => 'refY',
313     repeatcount => 'repeatCount',
314     repeatdur => 'repeatDur',
315     requiredextensions => 'requiredExtensions',
316 wakaba 1.146 requiredfeatures => 'requiredFeatures',
317 wakaba 1.131 specularconstant => 'specularConstant',
318     specularexponent => 'specularExponent',
319     spreadmethod => 'spreadMethod',
320     startoffset => 'startOffset',
321     stddeviation => 'stdDeviation',
322     stitchtiles => 'stitchTiles',
323     surfacescale => 'surfaceScale',
324     systemlanguage => 'systemLanguage',
325     tablevalues => 'tableValues',
326     targetx => 'targetX',
327     targety => 'targetY',
328     textlength => 'textLength',
329     viewbox => 'viewBox',
330     viewtarget => 'viewTarget',
331     xchannelselector => 'xChannelSelector',
332     ychannelselector => 'yChannelSelector',
333     zoomandpan => 'zoomAndPan',
334     };
335    
336     my $foreign_attr_xname = {
337     'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
338     'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
339     'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
340     'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
341     'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
342     'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
343     'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
344     'xml:base' => [$XML_NS, ['xml', 'base']],
345     'xml:lang' => [$XML_NS, ['xml', 'lang']],
346     'xml:space' => [$XML_NS, ['xml', 'space']],
347     'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
348     'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
349     };
350    
351     ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
352    
353 wakaba 1.192 ## TODO: Invoke the reset algorithm when a resettable element is
354     ## created (cf. HTML5 revision 2259).
355    
356 wakaba 1.63 sub parse_byte_string ($$$$;$) {
357 wakaba 1.138 my $self = shift;
358     my $charset_name = shift;
359     open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
360     return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
361     } # parse_byte_string
362    
363 wakaba 1.162 sub parse_byte_stream ($$$$;$$) {
364     # my ($self, $charset_name, $byte_stream, $doc, $onerror, $get_wrapper) = @_;
365 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
366 wakaba 1.133 my $charset_name = shift;
367 wakaba 1.138 my $byte_stream = $_[0];
368 wakaba 1.133
369 wakaba 1.134 my $onerror = $_[2] || sub {
370     my (%opt) = @_;
371     warn "Parse error ($opt{type})\n";
372     };
373     $self->{parse_error} = $onerror; # updated later by parse_char_string
374    
375 wakaba 1.162 my $get_wrapper = $_[3] || sub ($) {
376     return $_[0]; # $_[0] = byte stream handle, returned = arg to char handle
377     };
378    
379 wakaba 1.133 ## HTML5 encoding sniffing algorithm
380     require Message::Charset::Info;
381     my $charset;
382 wakaba 1.136 my $buffer;
383     my ($char_stream, $e_status);
384 wakaba 1.133
385     SNIFFING: {
386 wakaba 1.160 ## NOTE: By setting |allow_fallback| option true when the
387     ## |get_decode_handle| method is invoked, we ignore what the HTML5
388     ## spec requires, i.e. unsupported encoding should be ignored.
389     ## TODO: We should not do this unless the parser is invoked
390     ## in the conformance checking mode, in which this behavior
391     ## would be useful.
392 wakaba 1.133
393     ## Step 1
394     if (defined $charset_name) {
395 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
396     ## TODO: Is this ok? Transfer protocol's parameter should be
397     ## interpreted in its semantics?
398 wakaba 1.133
399 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
400     ($byte_stream, allow_error_reporting => 1,
401 wakaba 1.133 allow_fallback => 1);
402 wakaba 1.136 if ($char_stream) {
403 wakaba 1.133 $self->{confident} = 1;
404     last SNIFFING;
405 wakaba 1.136 } else {
406 wakaba 1.190 !!!parse-error (type => 'charset:not supported',
407     layer => 'encode',
408     line => 1, column => 1,
409     value => $charset_name,
410     level => $self->{level}->{uncertain});
411 wakaba 1.133 }
412     }
413    
414     ## Step 2
415 wakaba 1.136 my $byte_buffer = '';
416     for (1..1024) {
417     my $char = $byte_stream->getc;
418     last unless defined $char;
419     $byte_buffer .= $char;
420     } ## TODO: timeout
421 wakaba 1.133
422     ## Step 3
423 wakaba 1.136 if ($byte_buffer =~ /^\xFE\xFF/) {
424 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-16be');
425 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
426     ($byte_stream, allow_error_reporting => 1,
427     allow_fallback => 1, byte_buffer => \$byte_buffer);
428 wakaba 1.133 $self->{confident} = 1;
429     last SNIFFING;
430 wakaba 1.136 } elsif ($byte_buffer =~ /^\xFF\xFE/) {
431 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-16le');
432 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
433     ($byte_stream, allow_error_reporting => 1,
434     allow_fallback => 1, byte_buffer => \$byte_buffer);
435 wakaba 1.133 $self->{confident} = 1;
436     last SNIFFING;
437 wakaba 1.136 } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
438 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
439 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
440     ($byte_stream, allow_error_reporting => 1,
441     allow_fallback => 1, byte_buffer => \$byte_buffer);
442 wakaba 1.133 $self->{confident} = 1;
443     last SNIFFING;
444     }
445    
446     ## Step 4
447     ## TODO: <meta charset>
448    
449     ## Step 5
450     ## TODO: from history
451    
452     ## Step 6
453 wakaba 1.65 require Whatpm::Charset::UniversalCharDet;
454 wakaba 1.133 $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
455 wakaba 1.136 ($byte_buffer);
456 wakaba 1.133 if (defined $charset_name) {
457 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
458 wakaba 1.133
459 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
460     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
461     ($byte_stream);
462     ($char_stream, $e_status) = $charset->get_decode_handle
463     ($buffer, allow_error_reporting => 1,
464     allow_fallback => 1, byte_buffer => \$byte_buffer);
465     if ($char_stream) {
466     $buffer->{buffer} = $byte_buffer;
467 wakaba 1.153 !!!parse-error (type => 'sniffing:chardet',
468     text => $charset_name,
469     level => $self->{level}->{info},
470     layer => 'encode',
471 wakaba 1.134 line => 1, column => 1);
472 wakaba 1.133 $self->{confident} = 0;
473     last SNIFFING;
474     }
475     }
476    
477     ## Step 7: default
478     ## TODO: Make this configurable.
479 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('windows-1252');
480 wakaba 1.133 ## NOTE: We choose |windows-1252| here, since |utf-8| should be
481     ## detectable in the step 6.
482 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
483     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
484     ($byte_stream);
485     ($char_stream, $e_status)
486     = $charset->get_decode_handle ($buffer,
487     allow_error_reporting => 1,
488     allow_fallback => 1,
489     byte_buffer => \$byte_buffer);
490     $buffer->{buffer} = $byte_buffer;
491 wakaba 1.153 !!!parse-error (type => 'sniffing:default',
492     text => 'windows-1252',
493     level => $self->{level}->{info},
494     line => 1, column => 1,
495     layer => 'encode');
496 wakaba 1.63 $self->{confident} = 0;
497 wakaba 1.133 } # SNIFFING
498    
499     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
500 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
501 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
502 wakaba 1.160 #text => $self->{input_encoding},
503 wakaba 1.153 level => $self->{level}->{uncertain},
504     line => 1, column => 1,
505     layer => 'encode');
506 wakaba 1.133 } elsif (not ($e_status &
507 wakaba 1.178 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
508 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name;
509 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
510     text => $self->{input_encoding},
511     level => $self->{level}->{uncertain},
512     line => 1, column => 1,
513     layer => 'encode');
514 wakaba 1.160 } else {
515     $self->{input_encoding} = $charset->get_iana_name;
516 wakaba 1.63 }
517    
518     $self->{change_encoding} = sub {
519     my $self = shift;
520 wakaba 1.134 $charset_name = shift;
521 wakaba 1.114 my $token = shift;
522 wakaba 1.63
523 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
524 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
525     ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
526     byte_buffer => \ $buffer->{buffer});
527 wakaba 1.134
528 wakaba 1.136 if ($char_stream) { # if supported
529 wakaba 1.134 ## "Change the encoding" algorithm:
530 wakaba 1.215
531     ## Step 1
532     if (defined $self->{input_encoding} and
533     $self->{input_encoding} eq $charset_name) {
534     !!!parse-error (type => 'charset label:matching',
535     text => $charset_name,
536     level => $self->{level}->{info});
537     $self->{confident} = 1;
538     return;
539     }
540 wakaba 1.63
541 wakaba 1.214 ## Step 2 (HTML5 revision 3205)
542     if (defined $self->{input_encoding} and
543     Message::Charset::Info->get_by_html_name ($self->{input_encoding})
544     ->{category} & Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
545     $self->{confident} = 1;
546     return;
547     }
548    
549     ## Step 3
550 wakaba 1.149 if ($charset->{category} &
551     Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
552 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
553 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
554     ($byte_stream,
555     byte_buffer => \ $buffer->{buffer});
556 wakaba 1.134 }
557     $charset_name = $charset->get_iana_name;
558 wakaba 1.63
559 wakaba 1.153 !!!parse-error (type => 'charset label detected',
560     text => $self->{input_encoding},
561     value => $charset_name,
562     level => $self->{level}->{warn},
563     token => $token);
564 wakaba 1.134
565 wakaba 1.214 ## Step 4
566 wakaba 1.134 # if (can) {
567     ## change the encoding on the fly.
568     #$self->{confident} = 1;
569     #return;
570     # }
571    
572 wakaba 1.214 ## Step 5
573 wakaba 1.134 throw Whatpm::HTML::RestartParser ();
574 wakaba 1.63 }
575     }; # $self->{change_encoding}
576    
577 wakaba 1.136 my $char_onerror = sub {
578     my (undef, $type, %opt) = @_;
579 wakaba 1.153 !!!parse-error (layer => 'encode',
580 wakaba 1.174 line => $self->{line}, column => $self->{column} + 1,
581     %opt, type => $type);
582 wakaba 1.136 if ($opt{octets}) {
583     ${$opt{octets}} = "\x{FFFD}"; # relacement character
584     }
585     };
586 wakaba 1.162
587     my $wrapped_char_stream = $get_wrapper->($char_stream);
588     $wrapped_char_stream->onerror ($char_onerror);
589 wakaba 1.136
590 wakaba 1.182 my @args = ($_[1], $_[2]); # $doc, $onerror - $get_wrapper = undef;
591 wakaba 1.63 my $return;
592     try {
593 wakaba 1.162 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
594 wakaba 1.63 } catch Whatpm::HTML::RestartParser with {
595 wakaba 1.134 ## NOTE: Invoked after {change_encoding}.
596    
597     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
598 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
599 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
600     level => $self->{level}->{uncertain},
601 wakaba 1.160 #text => $self->{input_encoding},
602 wakaba 1.153 line => 1, column => 1,
603     layer => 'encode');
604 wakaba 1.134 } elsif (not ($e_status &
605 wakaba 1.178 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
606 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name;
607 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
608     text => $self->{input_encoding},
609     level => $self->{level}->{uncertain},
610     line => 1, column => 1,
611     layer => 'encode');
612 wakaba 1.160 } else {
613     $self->{input_encoding} = $charset->get_iana_name;
614 wakaba 1.134 }
615 wakaba 1.63 $self->{confident} = 1;
616 wakaba 1.162
617     $wrapped_char_stream = $get_wrapper->($char_stream);
618     $wrapped_char_stream->onerror ($char_onerror);
619    
620     $return = $self->parse_char_stream ($wrapped_char_stream, @args);
621 wakaba 1.63 };
622     return $return;
623 wakaba 1.138 } # parse_byte_stream
624 wakaba 1.63
625 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
626     ## and the HTML layer MUST ignore it. However, we does strip BOM in
627     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
628     ## because the core part of our HTML parser expects a string of character,
629     ## not a string of bytes or code units or anything which might contain a BOM.
630     ## Therefore, any parser interface that accepts a string of bytes,
631     ## such as |parse_byte_string| in this module, must ensure that it does
632     ## strip the BOM and never strip any ZWNBSP.
633    
634 wakaba 1.162 sub parse_char_string ($$$;$$) {
635     #my ($self, $s, $doc, $onerror, $get_wrapper) = @_;
636 wakaba 1.135 my $self = shift;
637 wakaba 1.139 my $s = ref $_[0] ? $_[0] : \($_[0]);
638 wakaba 1.171 require Whatpm::Charset::DecodeHandle;
639     my $input = Whatpm::Charset::DecodeHandle::CharString->new ($s);
640 wakaba 1.135 return $self->parse_char_stream ($input, @_[1..$#_]);
641     } # parse_char_string
642 wakaba 1.162 *parse_string = \&parse_char_string; ## NOTE: Alias for backward compatibility.
643 wakaba 1.63
644 wakaba 1.182 sub parse_char_stream ($$$;$$) {
645 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
646 wakaba 1.135 my $input = $_[0];
647 wakaba 1.1 $self->{document} = $_[1];
648 wakaba 1.63 @{$self->{document}->child_nodes} = ();
649 wakaba 1.1
650 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
651    
652 wakaba 1.235 ## Confidence: irrelevant.
653 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
654 wakaba 1.235
655 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
656     if defined $self->{input_encoding};
657 wakaba 1.178 ## TODO: |{input_encoding}| is needless?
658 wakaba 1.63
659 wakaba 1.112 $self->{line_prev} = $self->{line} = 1;
660 wakaba 1.179 $self->{column_prev} = -1;
661     $self->{column} = 0;
662 wakaba 1.183 $self->{set_nc} = sub {
663 wakaba 1.1 my $self = shift;
664 wakaba 1.13
665 wakaba 1.178 my $char = '';
666 wakaba 1.183 if (defined $self->{next_nc}) {
667     $char = $self->{next_nc};
668     delete $self->{next_nc};
669     $self->{nc} = ord $char;
670 wakaba 1.139 } else {
671 wakaba 1.179 $self->{char_buffer} = '';
672     $self->{char_buffer_pos} = 0;
673    
674     my $count = $input->manakai_read_until
675 wakaba 1.182 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/, $self->{char_buffer_pos});
676 wakaba 1.179 if ($count) {
677     $self->{line_prev} = $self->{line};
678     $self->{column_prev} = $self->{column};
679     $self->{column}++;
680 wakaba 1.183 $self->{nc}
681 wakaba 1.179 = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
682     return;
683     }
684    
685 wakaba 1.178 if ($input->read ($char, 1)) {
686 wakaba 1.183 $self->{nc} = ord $char;
687 wakaba 1.178 } else {
688 wakaba 1.183 $self->{nc} = -1;
689 wakaba 1.178 return;
690     }
691 wakaba 1.139 }
692 wakaba 1.112
693     ($self->{line_prev}, $self->{column_prev})
694     = ($self->{line}, $self->{column});
695     $self->{column}++;
696 wakaba 1.1
697 wakaba 1.183 if ($self->{nc} == 0x000A) { # LF
698 wakaba 1.132 !!!cp ('j1');
699 wakaba 1.112 $self->{line}++;
700     $self->{column} = 0;
701 wakaba 1.183 } elsif ($self->{nc} == 0x000D) { # CR
702 wakaba 1.132 !!!cp ('j2');
703 wakaba 1.170 ## TODO: support for abort/streaming
704 wakaba 1.178 my $next = '';
705     if ($input->read ($next, 1) and $next ne "\x0A") {
706 wakaba 1.183 $self->{next_nc} = $next;
707 wakaba 1.135 }
708 wakaba 1.183 $self->{nc} = 0x000A; # LF # MUST
709 wakaba 1.112 $self->{line}++;
710     $self->{column} = 0;
711 wakaba 1.183 } elsif ($self->{nc} == 0x0000) { # NULL
712 wakaba 1.132 !!!cp ('j4');
713 wakaba 1.8 !!!parse-error (type => 'NULL');
714 wakaba 1.183 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
715 wakaba 1.1 }
716     };
717    
718 wakaba 1.172 $self->{read_until} = sub {
719     #my ($scalar, $specials_range, $offset) = @_;
720 wakaba 1.183 return 0 if defined $self->{next_nc};
721 wakaba 1.180
722 wakaba 1.182 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
723 wakaba 1.180 my $offset = $_[2] || 0;
724    
725     if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
726     pos ($self->{char_buffer}) = $self->{char_buffer_pos};
727     if ($self->{char_buffer} =~ /\G(?>$pattern)+/) {
728     substr ($_[0], $offset)
729     = substr ($self->{char_buffer}, $-[0], $+[0] - $-[0]);
730     my $count = $+[0] - $-[0];
731     if ($count) {
732     $self->{column} += $count;
733     $self->{char_buffer_pos} += $count;
734     $self->{line_prev} = $self->{line};
735     $self->{column_prev} = $self->{column} - 1;
736 wakaba 1.183 $self->{nc} = -1;
737 wakaba 1.180 }
738     return $count;
739     } else {
740     return 0;
741     }
742     } else {
743     my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
744     if ($count) {
745     $self->{column} += $count;
746     $self->{line_prev} = $self->{line};
747     $self->{column_prev} = $self->{column} - 1;
748 wakaba 1.183 $self->{nc} = -1;
749 wakaba 1.180 }
750     return $count;
751 wakaba 1.172 }
752     }; # $self->{read_until}
753 wakaba 1.171
754 wakaba 1.3 my $onerror = $_[2] || sub {
755     my (%opt) = @_;
756 wakaba 1.112 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
757     my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
758     warn "Parse error ($opt{type}) at line $line column $column\n";
759 wakaba 1.3 };
760     $self->{parse_error} = sub {
761 wakaba 1.112 $onerror->(line => $self->{line}, column => $self->{column}, @_);
762 wakaba 1.1 };
763    
764 wakaba 1.182 my $char_onerror = sub {
765     my (undef, $type, %opt) = @_;
766     !!!parse-error (layer => 'encode',
767     line => $self->{line}, column => $self->{column} + 1,
768     %opt, type => $type);
769     }; # $char_onerror
770    
771     if ($_[3]) {
772     $input = $_[3]->($input);
773     $input->onerror ($char_onerror);
774     } else {
775     $input->onerror ($char_onerror) unless defined $input->onerror;
776     }
777    
778 wakaba 1.1 $self->_initialize_tokenizer;
779     $self->_initialize_tree_constructor;
780     $self->_construct_tree;
781     $self->_terminate_tree_constructor;
782    
783 wakaba 1.112 delete $self->{parse_error}; # remove loop
784    
785 wakaba 1.1 return $self->{document};
786 wakaba 1.135 } # parse_char_stream
787 wakaba 1.1
788     sub new ($) {
789     my $class = shift;
790 wakaba 1.134 my $self = bless {
791 wakaba 1.153 level => {must => 'm',
792 wakaba 1.159 should => 's',
793 wakaba 1.153 warn => 'w',
794     info => 'i',
795     uncertain => 'u'},
796 wakaba 1.134 }, $class;
797 wakaba 1.183 $self->{set_nc} = sub {
798     $self->{nc} = -1;
799 wakaba 1.1 };
800     $self->{parse_error} = sub {
801     #
802     };
803 wakaba 1.63 $self->{change_encoding} = sub {
804     # if ($_[0] is a supported encoding) {
805     # run "change the encoding" algorithm;
806     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
807     # }
808     };
809 wakaba 1.61 $self->{application_cache_selection} = sub {
810     #
811     };
812 wakaba 1.1 return $self;
813     } # new
814    
815 wakaba 1.208 ## Insertion modes
816 wakaba 1.55
817 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
818     sub HEAD_IMS () { 0b1000 }
819     sub BODY_IMS () { 0b10000 }
820 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
821 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
822 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
823 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
824     sub FRAME_IMS () { 0b1000000000 }
825 wakaba 1.101 sub SELECT_IMS () { 0b10000000000 }
826 wakaba 1.208 #sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 } # see Whatpm::HTML::Tokenizer
827 wakaba 1.126 ## NOTE: "in foreign content" insertion mode is special; it is combined
828     ## with the secondary insertion mode. In this parser, they are stored
829     ## together in the bit-or'ed form.
830 wakaba 1.205 sub IN_CDATA_RCDATA_IM () { 0b1000000000000 }
831     ## NOTE: "in CDATA/RCDATA" insertion mode is also special; it is
832     ## combined with the original insertion mode. In thie parser,
833     ## they are stored together in the bit-or'ed form.
834 wakaba 1.54
835 wakaba 1.210 sub IM_MASK () { 0b11111111111 }
836    
837 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
838    
839     ## NOTE: "after after body" insertion mode.
840 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
841 wakaba 1.84
842     ## NOTE: "after after frameset" insertion mode.
843 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
844 wakaba 1.84
845 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
846     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
847     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
848     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
849     sub IN_BODY_IM () { BODY_IMS }
850 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
851     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
852     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
853     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
854 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
855     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
856     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
857     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
858 wakaba 1.101 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
859     sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
860 wakaba 1.54 sub IN_COLUMN_GROUP_IM () { 0b10 }
861    
862 wakaba 1.1 sub _initialize_tree_constructor ($) {
863     my $self = shift;
864     ## NOTE: $self->{document} MUST be specified before this method is called
865     $self->{document}->strict_error_checking (0);
866     ## TODO: Turn mutation events off # MUST
867     ## TODO: Turn loose Document option (manakai extension) on
868 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
869 wakaba 1.154 $self->{document}->set_user_data (manakai_source_line => 1);
870     $self->{document}->set_user_data (manakai_source_column => 1);
871 wakaba 1.1 } # _initialize_tree_constructor
872    
873     sub _terminate_tree_constructor ($) {
874     my $self = shift;
875     $self->{document}->strict_error_checking (1);
876     ## TODO: Turn mutation events on
877     } # _terminate_tree_constructor
878    
879     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
880    
881 wakaba 1.3 { # tree construction stage
882     my $token;
883    
884 wakaba 1.1 sub _construct_tree ($) {
885     my ($self) = @_;
886    
887     ## When an interactive UA render the $self->{document} available
888     ## to the user, or when it begin accepting user input, are
889     ## not defined.
890    
891     !!!next-token;
892    
893 wakaba 1.3 undef $self->{form_element};
894     undef $self->{head_element};
895 wakaba 1.202 undef $self->{head_element_inserted};
896 wakaba 1.3 $self->{open_elements} = [];
897     undef $self->{inner_html_node};
898 wakaba 1.206 undef $self->{ignore_newline};
899 wakaba 1.3
900 wakaba 1.84 ## NOTE: The "initial" insertion mode.
901 wakaba 1.3 $self->_tree_construction_initial; # MUST
902 wakaba 1.84
903     ## NOTE: The "before html" insertion mode.
904 wakaba 1.3 $self->_tree_construction_root_element;
905 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
906    
907     ## NOTE: The "before head" insertion mode and so on.
908 wakaba 1.3 $self->_tree_construction_main;
909     } # _construct_tree
910    
911     sub _tree_construction_initial ($) {
912     my $self = shift;
913 wakaba 1.84
914     ## NOTE: "initial" insertion mode
915    
916 wakaba 1.18 INITIAL: {
917 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
918 wakaba 1.227 ## NOTE: Conformance checkers MAY, instead of reporting "not
919     ## HTML5" error, switch to a conformance checking mode for
920     ## another language. (We don't support such mode switchings; it
921     ## is nonsense to do anything different from what browsers do.)
922 wakaba 1.18 my $doctype_name = $token->{name};
923     $doctype_name = '' unless defined $doctype_name;
924 wakaba 1.227 my $doctype = $self->{document}->create_document_type_definition
925     ($doctype_name);
926    
927 wakaba 1.228 $doctype_name =~ tr/A-Z/a-z/; # ASCII case-insensitive
928     if ($doctype_name ne 'html') {
929 wakaba 1.79 !!!cp ('t1');
930 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
931 wakaba 1.228 } elsif (defined $token->{pubid}) {
932 wakaba 1.79 !!!cp ('t2');
933 wakaba 1.228 ## XXX Obsolete permitted DOCTYPEs
934 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
935 wakaba 1.228 } elsif (defined $token->{sysid}) {
936     if ($token->{sysid} eq 'about:legacy-compat') {
937     !!!cp ('t1.2'); ## <!DOCTYPE HTML SYSTEM "about:legacy-compat">
938 wakaba 1.159 !!!parse-error (type => 'XSLT-compat', token => $token,
939     level => $self->{level}->{should});
940     } else {
941     !!!parse-error (type => 'not HTML5', token => $token);
942     }
943 wakaba 1.228 } else { ## <!DOCTYPE HTML>
944 wakaba 1.79 !!!cp ('t3');
945 wakaba 1.159 #
946 wakaba 1.18 }
947    
948 wakaba 1.122 ## NOTE: Default value for both |public_id| and |system_id| attributes
949     ## are empty strings, so that we don't set any value in missing cases.
950 wakaba 1.183 $doctype->public_id ($token->{pubid}) if defined $token->{pubid};
951     $doctype->system_id ($token->{sysid}) if defined $token->{sysid};
952 wakaba 1.227
953 wakaba 1.18 ## NOTE: Other DocumentType attributes are null or empty lists.
954 wakaba 1.211 ## In Firefox3, |internalSubset| attribute is set to the empty
955     ## string, while |null| is an allowed value for the attribute
956     ## according to DOM3 Core.
957 wakaba 1.18 $self->{document}->append_child ($doctype);
958    
959 wakaba 1.228 if ($token->{quirks} or $doctype_name ne 'html') {
960 wakaba 1.79 !!!cp ('t4');
961 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
962 wakaba 1.183 } elsif (defined $token->{pubid}) {
963     my $pubid = $token->{pubid};
964 wakaba 1.18 $pubid =~ tr/a-z/A-z/;
965 wakaba 1.143 my $prefix = [
966     "+//SILMARIL//DTD HTML PRO V0R11 19970101//",
967     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
968     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
969     "-//IETF//DTD HTML 2.0 LEVEL 1//",
970     "-//IETF//DTD HTML 2.0 LEVEL 2//",
971     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//",
972     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//",
973     "-//IETF//DTD HTML 2.0 STRICT//",
974     "-//IETF//DTD HTML 2.0//",
975     "-//IETF//DTD HTML 2.1E//",
976     "-//IETF//DTD HTML 3.0//",
977     "-//IETF//DTD HTML 3.2 FINAL//",
978     "-//IETF//DTD HTML 3.2//",
979     "-//IETF//DTD HTML 3//",
980     "-//IETF//DTD HTML LEVEL 0//",
981     "-//IETF//DTD HTML LEVEL 1//",
982     "-//IETF//DTD HTML LEVEL 2//",
983     "-//IETF//DTD HTML LEVEL 3//",
984     "-//IETF//DTD HTML STRICT LEVEL 0//",
985     "-//IETF//DTD HTML STRICT LEVEL 1//",
986     "-//IETF//DTD HTML STRICT LEVEL 2//",
987     "-//IETF//DTD HTML STRICT LEVEL 3//",
988     "-//IETF//DTD HTML STRICT//",
989     "-//IETF//DTD HTML//",
990     "-//METRIUS//DTD METRIUS PRESENTATIONAL//",
991     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//",
992     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//",
993     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//",
994     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//",
995     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//",
996     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//",
997     "-//NETSCAPE COMM. CORP.//DTD HTML//",
998     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//",
999     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//",
1000     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//",
1001     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//",
1002     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//",
1003     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//",
1004     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//",
1005     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//",
1006     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//",
1007     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//",
1008     "-//W3C//DTD HTML 3 1995-03-24//",
1009     "-//W3C//DTD HTML 3.2 DRAFT//",
1010     "-//W3C//DTD HTML 3.2 FINAL//",
1011     "-//W3C//DTD HTML 3.2//",
1012     "-//W3C//DTD HTML 3.2S DRAFT//",
1013     "-//W3C//DTD HTML 4.0 FRAMESET//",
1014     "-//W3C//DTD HTML 4.0 TRANSITIONAL//",
1015     "-//W3C//DTD HTML EXPERIMETNAL 19960712//",
1016     "-//W3C//DTD HTML EXPERIMENTAL 970421//",
1017     "-//W3C//DTD W3 HTML//",
1018     "-//W3O//DTD W3 HTML 3.0//",
1019     "-//WEBTECHS//DTD MOZILLA HTML 2.0//",
1020     "-//WEBTECHS//DTD MOZILLA HTML//",
1021     ]; # $prefix
1022     my $match;
1023     for (@$prefix) {
1024     if (substr ($prefix, 0, length $_) eq $_) {
1025     $match = 1;
1026     last;
1027     }
1028     }
1029     if ($match or
1030     $pubid eq "-//W3O//DTD W3 HTML STRICT 3.0//EN//" or
1031     $pubid eq "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" or
1032     $pubid eq "HTML") {
1033 wakaba 1.79 !!!cp ('t5');
1034 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1035 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
1036     $pubid =~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
1037 wakaba 1.183 if (defined $token->{sysid}) {
1038 wakaba 1.79 !!!cp ('t6');
1039 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1040     } else {
1041 wakaba 1.79 !!!cp ('t7');
1042 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
1043 wakaba 1.3 }
1044 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
1045     $pubid =~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
1046 wakaba 1.79 !!!cp ('t8');
1047 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
1048 wakaba 1.79 } else {
1049     !!!cp ('t9');
1050 wakaba 1.18 }
1051 wakaba 1.79 } else {
1052     !!!cp ('t10');
1053 wakaba 1.18 }
1054 wakaba 1.183 if (defined $token->{sysid}) {
1055     my $sysid = $token->{sysid};
1056 wakaba 1.18 $sysid =~ tr/A-Z/a-z/;
1057     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
1058 wakaba 1.143 ## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"| is
1059     ## marked as quirks.
1060 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1061 wakaba 1.79 !!!cp ('t11');
1062     } else {
1063     !!!cp ('t12');
1064 wakaba 1.18 }
1065 wakaba 1.79 } else {
1066     !!!cp ('t13');
1067 wakaba 1.18 }
1068    
1069 wakaba 1.84 ## Go to the "before html" insertion mode.
1070 wakaba 1.18 !!!next-token;
1071     return;
1072     } elsif ({
1073 wakaba 1.55 START_TAG_TOKEN, 1,
1074     END_TAG_TOKEN, 1,
1075     END_OF_FILE_TOKEN, 1,
1076 wakaba 1.18 }->{$token->{type}}) {
1077 wakaba 1.79 !!!cp ('t14');
1078 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
1079 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1080 wakaba 1.84 ## Go to the "before html" insertion mode.
1081 wakaba 1.18 ## reprocess
1082 wakaba 1.125 !!!ack-later;
1083 wakaba 1.18 return;
1084 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
1085 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1086 wakaba 1.18 ## Ignore the token
1087 wakaba 1.26
1088 wakaba 1.18 unless (length $token->{data}) {
1089 wakaba 1.79 !!!cp ('t15');
1090 wakaba 1.84 ## Stay in the insertion mode.
1091 wakaba 1.18 !!!next-token;
1092     redo INITIAL;
1093 wakaba 1.79 } else {
1094     !!!cp ('t16');
1095 wakaba 1.3 }
1096 wakaba 1.79 } else {
1097     !!!cp ('t17');
1098 wakaba 1.3 }
1099 wakaba 1.18
1100 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
1101 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1102 wakaba 1.84 ## Go to the "before html" insertion mode.
1103 wakaba 1.18 ## reprocess
1104     return;
1105 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
1106 wakaba 1.79 !!!cp ('t18');
1107 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
1108     $self->{document}->append_child ($comment);
1109    
1110 wakaba 1.84 ## Stay in the insertion mode.
1111 wakaba 1.18 !!!next-token;
1112     redo INITIAL;
1113     } else {
1114 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
1115 wakaba 1.18 }
1116     } # INITIAL
1117 wakaba 1.79
1118     die "$0: _tree_construction_initial: This should be never reached";
1119 wakaba 1.3 } # _tree_construction_initial
1120    
1121     sub _tree_construction_root_element ($) {
1122     my $self = shift;
1123 wakaba 1.84
1124     ## NOTE: "before html" insertion mode.
1125 wakaba 1.3
1126     B: {
1127 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
1128 wakaba 1.79 !!!cp ('t19');
1129 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1130 wakaba 1.3 ## Ignore the token
1131 wakaba 1.84 ## Stay in the insertion mode.
1132 wakaba 1.3 !!!next-token;
1133     redo B;
1134 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
1135 wakaba 1.79 !!!cp ('t20');
1136 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
1137     $self->{document}->append_child ($comment);
1138 wakaba 1.84 ## Stay in the insertion mode.
1139 wakaba 1.3 !!!next-token;
1140     redo B;
1141 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
1142 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1143 wakaba 1.26 ## Ignore the token.
1144    
1145 wakaba 1.3 unless (length $token->{data}) {
1146 wakaba 1.79 !!!cp ('t21');
1147 wakaba 1.84 ## Stay in the insertion mode.
1148 wakaba 1.3 !!!next-token;
1149     redo B;
1150 wakaba 1.79 } else {
1151     !!!cp ('t22');
1152 wakaba 1.3 }
1153 wakaba 1.79 } else {
1154     !!!cp ('t23');
1155 wakaba 1.3 }
1156 wakaba 1.61
1157     $self->{application_cache_selection}->(undef);
1158    
1159     #
1160     } elsif ($token->{type} == START_TAG_TOKEN) {
1161 wakaba 1.84 if ($token->{tag_name} eq 'html') {
1162     my $root_element;
1163 wakaba 1.126 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
1164 wakaba 1.84 $self->{document}->append_child ($root_element);
1165 wakaba 1.123 push @{$self->{open_elements}},
1166     [$root_element, $el_category->{html}];
1167 wakaba 1.84
1168     if ($token->{attributes}->{manifest}) {
1169     !!!cp ('t24');
1170     $self->{application_cache_selection}
1171     ->($token->{attributes}->{manifest}->{value});
1172 wakaba 1.118 ## ISSUE: Spec is unclear on relative references.
1173     ## According to Hixie (#whatwg 2008-03-19), it should be
1174     ## resolved against the base URI of the document in HTML
1175     ## or xml:base of the element in XHTML.
1176 wakaba 1.84 } else {
1177     !!!cp ('t25');
1178     $self->{application_cache_selection}->(undef);
1179     }
1180    
1181 wakaba 1.125 !!!nack ('t25c');
1182    
1183 wakaba 1.84 !!!next-token;
1184     return; ## Go to the "before head" insertion mode.
1185 wakaba 1.61 } else {
1186 wakaba 1.84 !!!cp ('t25.1');
1187     #
1188 wakaba 1.61 }
1189 wakaba 1.3 } elsif ({
1190 wakaba 1.55 END_TAG_TOKEN, 1,
1191     END_OF_FILE_TOKEN, 1,
1192 wakaba 1.3 }->{$token->{type}}) {
1193 wakaba 1.79 !!!cp ('t26');
1194 wakaba 1.3 #
1195     } else {
1196 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
1197 wakaba 1.3 }
1198 wakaba 1.61
1199 wakaba 1.126 my $root_element;
1200     !!!create-element ($root_element, $HTML_NS, 'html',, $token);
1201 wakaba 1.84 $self->{document}->append_child ($root_element);
1202 wakaba 1.123 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
1203 wakaba 1.84
1204     $self->{application_cache_selection}->(undef);
1205    
1206     ## NOTE: Reprocess the token.
1207 wakaba 1.125 !!!ack-later;
1208 wakaba 1.84 return; ## Go to the "before head" insertion mode.
1209 wakaba 1.3 } # B
1210 wakaba 1.79
1211     die "$0: _tree_construction_root_element: This should never be reached";
1212 wakaba 1.3 } # _tree_construction_root_element
1213    
1214     sub _reset_insertion_mode ($) {
1215     my $self = shift;
1216    
1217     ## Step 1
1218     my $last;
1219    
1220     ## Step 2
1221     my $i = -1;
1222     my $node = $self->{open_elements}->[$i];
1223    
1224     ## Step 3
1225     S3: {
1226 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
1227     $last = 1;
1228     if (defined $self->{inner_html_node}) {
1229 wakaba 1.140 !!!cp ('t28');
1230     $node = $self->{inner_html_node};
1231     } else {
1232     die "_reset_insertion_mode: t27";
1233 wakaba 1.3 }
1234     }
1235 wakaba 1.140
1236     ## Step 4..14
1237     my $new_mode;
1238     if ($node->[1] & FOREIGN_EL) {
1239     !!!cp ('t28.1');
1240     ## NOTE: Strictly spaking, the line below only applies to MathML and
1241     ## SVG elements. Currently the HTML syntax supports only MathML and
1242     ## SVG elements as foreigners.
1243 wakaba 1.148 $new_mode = IN_BODY_IM | IN_FOREIGN_CONTENT_IM;
1244 wakaba 1.206 } elsif ($node->[1] == TABLE_CELL_EL) {
1245 wakaba 1.140 if ($last) {
1246     !!!cp ('t28.2');
1247     #
1248     } else {
1249     !!!cp ('t28.3');
1250     $new_mode = IN_CELL_IM;
1251     }
1252     } else {
1253     !!!cp ('t28.4');
1254     $new_mode = {
1255 wakaba 1.54 select => IN_SELECT_IM,
1256 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
1257     ## insertion mode to "in select" by themselves.
1258 wakaba 1.54 tr => IN_ROW_IM,
1259     tbody => IN_TABLE_BODY_IM,
1260     thead => IN_TABLE_BODY_IM,
1261     tfoot => IN_TABLE_BODY_IM,
1262     caption => IN_CAPTION_IM,
1263     colgroup => IN_COLUMN_GROUP_IM,
1264     table => IN_TABLE_IM,
1265     head => IN_BODY_IM, # not in head!
1266     body => IN_BODY_IM,
1267     frameset => IN_FRAMESET_IM,
1268 wakaba 1.123 }->{$node->[0]->manakai_local_name};
1269 wakaba 1.140 }
1270     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1271 wakaba 1.3
1272 wakaba 1.126 ## Step 15
1273 wakaba 1.206 if ($node->[1] == HTML_EL) {
1274 wakaba 1.3 unless (defined $self->{head_element}) {
1275 wakaba 1.79 !!!cp ('t29');
1276 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
1277 wakaba 1.3 } else {
1278 wakaba 1.81 ## ISSUE: Can this state be reached?
1279 wakaba 1.79 !!!cp ('t30');
1280 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
1281 wakaba 1.3 }
1282     return;
1283 wakaba 1.79 } else {
1284     !!!cp ('t31');
1285 wakaba 1.3 }
1286    
1287 wakaba 1.126 ## Step 16
1288 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
1289 wakaba 1.3
1290 wakaba 1.126 ## Step 17
1291 wakaba 1.3 $i--;
1292     $node = $self->{open_elements}->[$i];
1293    
1294 wakaba 1.126 ## Step 18
1295 wakaba 1.3 redo S3;
1296     } # S3
1297 wakaba 1.79
1298     die "$0: _reset_insertion_mode: This line should never be reached";
1299 wakaba 1.3 } # _reset_insertion_mode
1300    
1301     sub _tree_construction_main ($) {
1302     my $self = shift;
1303    
1304 wakaba 1.1 my $active_formatting_elements = [];
1305    
1306     my $reconstruct_active_formatting_elements = sub { # MUST
1307     my $insert = shift;
1308    
1309     ## Step 1
1310     return unless @$active_formatting_elements;
1311    
1312     ## Step 3
1313     my $i = -1;
1314     my $entry = $active_formatting_elements->[$i];
1315    
1316     ## Step 2
1317     return if $entry->[0] eq '#marker';
1318 wakaba 1.3 for (@{$self->{open_elements}}) {
1319 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1320 wakaba 1.79 !!!cp ('t32');
1321 wakaba 1.1 return;
1322     }
1323     }
1324    
1325     S4: {
1326     ## Step 4
1327     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1328    
1329     ## Step 5
1330     $i--;
1331     $entry = $active_formatting_elements->[$i];
1332    
1333     ## Step 6
1334     if ($entry->[0] eq '#marker') {
1335 wakaba 1.81 !!!cp ('t33_1');
1336 wakaba 1.1 #
1337     } else {
1338     my $in_open_elements;
1339 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
1340 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1341 wakaba 1.79 !!!cp ('t33');
1342 wakaba 1.1 $in_open_elements = 1;
1343     last OE;
1344     }
1345     }
1346     if ($in_open_elements) {
1347 wakaba 1.79 !!!cp ('t34');
1348 wakaba 1.1 #
1349     } else {
1350 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
1351 wakaba 1.79 !!!cp ('t35');
1352 wakaba 1.1 redo S4;
1353     }
1354     }
1355    
1356     ## Step 7
1357     $i++;
1358     $entry = $active_formatting_elements->[$i];
1359     } # S4
1360    
1361     S7: {
1362     ## Step 8
1363     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1364    
1365     ## Step 9
1366     $insert->($clone->[0]);
1367 wakaba 1.3 push @{$self->{open_elements}}, $clone;
1368 wakaba 1.1
1369     ## Step 10
1370 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1371 wakaba 1.1
1372     ## Step 11
1373     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1374 wakaba 1.79 !!!cp ('t36');
1375 wakaba 1.1 ## Step 7'
1376     $i++;
1377     $entry = $active_formatting_elements->[$i];
1378    
1379     redo S7;
1380     }
1381 wakaba 1.79
1382     !!!cp ('t37');
1383 wakaba 1.1 } # S7
1384     }; # $reconstruct_active_formatting_elements
1385    
1386     my $clear_up_to_marker = sub {
1387     for (reverse 0..$#$active_formatting_elements) {
1388     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1389 wakaba 1.79 !!!cp ('t38');
1390 wakaba 1.1 splice @$active_formatting_elements, $_;
1391     return;
1392     }
1393     }
1394 wakaba 1.79
1395     !!!cp ('t39');
1396 wakaba 1.1 }; # $clear_up_to_marker
1397    
1398 wakaba 1.96 my $insert;
1399    
1400     my $parse_rcdata = sub ($) {
1401     my ($content_model_flag) = @_;
1402 wakaba 1.25
1403     ## Step 1
1404     my $start_tag_name = $token->{tag_name};
1405 wakaba 1.205 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
1406 wakaba 1.25
1407     ## Step 2
1408 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
1409 wakaba 1.13 delete $self->{escape}; # MUST
1410 wakaba 1.25
1411 wakaba 1.205 ## Step 3, 4
1412     $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1413    
1414 wakaba 1.125 !!!nack ('t40.1');
1415 wakaba 1.1 !!!next-token;
1416 wakaba 1.25 }; # $parse_rcdata
1417 wakaba 1.1
1418 wakaba 1.96 my $script_start_tag = sub () {
1419 wakaba 1.205 ## Step 1
1420 wakaba 1.1 my $script_el;
1421 wakaba 1.126 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
1422 wakaba 1.205
1423     ## Step 2
1424 wakaba 1.1 ## TODO: mark as "parser-inserted"
1425    
1426 wakaba 1.205 ## Step 3
1427     ## TODO: Mark as "already executed", if ...
1428    
1429 wakaba 1.221 ## Step 4 (HTML5 revision 2702)
1430 wakaba 1.205 $insert->($script_el);
1431     push @{$self->{open_elements}}, [$script_el, $el_category->{script}];
1432    
1433     ## Step 5
1434 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
1435 wakaba 1.13 delete $self->{escape}; # MUST
1436 wakaba 1.1
1437 wakaba 1.205 ## Step 6-7
1438     $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1439 wakaba 1.25
1440 wakaba 1.205 !!!nack ('t40.2');
1441 wakaba 1.1 !!!next-token;
1442     }; # $script_start_tag
1443    
1444 wakaba 1.102 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
1445 wakaba 1.229 ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag (OBSOLETE; unused).
1446 wakaba 1.202 ## NOTE: $open_tables->[-1]->[2] is set false when non-Text node inserted.
1447 wakaba 1.102 my $open_tables = [[$self->{open_elements}->[0]->[0]]];
1448    
1449 wakaba 1.1 my $formatting_end_tag = sub {
1450 wakaba 1.113 my $end_tag_token = shift;
1451     my $tag_name = $end_tag_token->{tag_name};
1452 wakaba 1.1
1453 wakaba 1.103 ## NOTE: The adoption agency algorithm (AAA).
1454 wakaba 1.102
1455 wakaba 1.1 FET: {
1456     ## Step 1
1457     my $formatting_element;
1458     my $formatting_element_i_in_active;
1459     AFE: for (reverse 0..$#$active_formatting_elements) {
1460 wakaba 1.123 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1461     !!!cp ('t52');
1462     last AFE;
1463     } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
1464     eq $tag_name) {
1465 wakaba 1.79 !!!cp ('t51');
1466 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
1467     $formatting_element_i_in_active = $_;
1468     last AFE;
1469     }
1470     } # AFE
1471     unless (defined $formatting_element) {
1472 wakaba 1.79 !!!cp ('t53');
1473 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => $tag_name, token => $end_tag_token);
1474 wakaba 1.1 ## Ignore the token
1475     !!!next-token;
1476     return;
1477     }
1478     ## has an element in scope
1479     my $in_scope = 1;
1480     my $formatting_element_i_in_open;
1481 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
1482     my $node = $self->{open_elements}->[$_];
1483 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
1484     if ($in_scope) {
1485 wakaba 1.79 !!!cp ('t54');
1486 wakaba 1.1 $formatting_element_i_in_open = $_;
1487     last INSCOPE;
1488     } else { # in open elements but not in scope
1489 wakaba 1.79 !!!cp ('t55');
1490 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
1491     text => $token->{tag_name},
1492 wakaba 1.113 token => $end_tag_token);
1493 wakaba 1.1 ## Ignore the token
1494     !!!next-token;
1495     return;
1496     }
1497 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
1498 wakaba 1.79 !!!cp ('t56');
1499 wakaba 1.1 $in_scope = 0;
1500     }
1501     } # INSCOPE
1502     unless (defined $formatting_element_i_in_open) {
1503 wakaba 1.79 !!!cp ('t57');
1504 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
1505     text => $token->{tag_name},
1506 wakaba 1.113 token => $end_tag_token);
1507 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
1508     !!!next-token; ## TODO: ok?
1509     return;
1510     }
1511 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
1512 wakaba 1.79 !!!cp ('t58');
1513 wakaba 1.122 !!!parse-error (type => 'not closed',
1514 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
1515 wakaba 1.122 ->manakai_local_name,
1516 wakaba 1.113 token => $end_tag_token);
1517 wakaba 1.1 }
1518    
1519     ## Step 2
1520     my $furthest_block;
1521     my $furthest_block_i_in_open;
1522 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
1523     my $node = $self->{open_elements}->[$_];
1524 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
1525 wakaba 1.1 #not $phrasing_category->{$node->[1]} and
1526 wakaba 1.123 ($node->[1] & SPECIAL_EL or
1527     $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
1528 wakaba 1.79 !!!cp ('t59');
1529 wakaba 1.1 $furthest_block = $node;
1530     $furthest_block_i_in_open = $_;
1531 wakaba 1.203 ## NOTE: The topmost (eldest) node.
1532 wakaba 1.1 } elsif ($node->[0] eq $formatting_element->[0]) {
1533 wakaba 1.79 !!!cp ('t60');
1534 wakaba 1.1 last OE;
1535     }
1536     } # OE
1537    
1538     ## Step 3
1539     unless (defined $furthest_block) { # MUST
1540 wakaba 1.79 !!!cp ('t61');
1541 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
1542 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
1543     !!!next-token;
1544     return;
1545     }
1546    
1547     ## Step 4
1548 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
1549 wakaba 1.1
1550     ## Step 5
1551     my $furthest_block_parent = $furthest_block->[0]->parent_node;
1552     if (defined $furthest_block_parent) {
1553 wakaba 1.79 !!!cp ('t62');
1554 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
1555     }
1556    
1557     ## Step 6
1558     my $bookmark_prev_el
1559     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
1560     ->[0];
1561    
1562     ## Step 7
1563     my $node = $furthest_block;
1564     my $node_i_in_open = $furthest_block_i_in_open;
1565     my $last_node = $furthest_block;
1566     S7: {
1567     ## Step 1
1568     $node_i_in_open--;
1569 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
1570 wakaba 1.1
1571     ## Step 2
1572     my $node_i_in_active;
1573     S7S2: {
1574     for (reverse 0..$#$active_formatting_elements) {
1575     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
1576 wakaba 1.79 !!!cp ('t63');
1577 wakaba 1.1 $node_i_in_active = $_;
1578     last S7S2;
1579     }
1580     }
1581 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
1582 wakaba 1.1 redo S7;
1583     } # S7S2
1584    
1585     ## Step 3
1586     last S7 if $node->[0] eq $formatting_element->[0];
1587    
1588     ## Step 4
1589     if ($last_node->[0] eq $furthest_block->[0]) {
1590 wakaba 1.79 !!!cp ('t64');
1591 wakaba 1.1 $bookmark_prev_el = $node->[0];
1592     }
1593    
1594     ## Step 5
1595     if ($node->[0]->has_child_nodes ()) {
1596 wakaba 1.79 !!!cp ('t65');
1597 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
1598     $active_formatting_elements->[$node_i_in_active] = $clone;
1599 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
1600 wakaba 1.1 $node = $clone;
1601     }
1602    
1603     ## Step 6
1604     $node->[0]->append_child ($last_node->[0]);
1605    
1606     ## Step 7
1607     $last_node = $node;
1608    
1609     ## Step 8
1610     redo S7;
1611     } # S7
1612    
1613     ## Step 8
1614 wakaba 1.123 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
1615 wakaba 1.234 ## Foster parenting.
1616 wakaba 1.102 my $foster_parent_element;
1617     my $next_sibling;
1618 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
1619 wakaba 1.206 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1620 wakaba 1.234 !!!cp ('t65.2');
1621     $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
1622     $next_sibling = $self->{open_elements}->[$_]->[0];
1623     undef $next_sibling
1624     unless $next_sibling->parent_node eq $foster_parent_element;
1625     last OE;
1626     }
1627     } # OE
1628     $foster_parent_element ||= $self->{open_elements}->[0]->[0];
1629    
1630 wakaba 1.102 $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
1631     $open_tables->[-1]->[1] = 1; # tainted
1632     } else {
1633     !!!cp ('t65.3');
1634     $common_ancestor_node->[0]->append_child ($last_node->[0]);
1635     }
1636 wakaba 1.1
1637     ## Step 9
1638     my $clone = [$formatting_element->[0]->clone_node (0),
1639     $formatting_element->[1]];
1640    
1641     ## Step 10
1642     my @cn = @{$furthest_block->[0]->child_nodes};
1643     $clone->[0]->append_child ($_) for @cn;
1644    
1645     ## Step 11
1646     $furthest_block->[0]->append_child ($clone->[0]);
1647    
1648     ## Step 12
1649     my $i;
1650     AFE: for (reverse 0..$#$active_formatting_elements) {
1651     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
1652 wakaba 1.79 !!!cp ('t66');
1653 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
1654     $i-- and last AFE if defined $i;
1655     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
1656 wakaba 1.79 !!!cp ('t67');
1657 wakaba 1.1 $i = $_;
1658     }
1659     } # AFE
1660     splice @$active_formatting_elements, $i + 1, 0, $clone;
1661    
1662     ## Step 13
1663     undef $i;
1664 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
1665     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
1666 wakaba 1.79 !!!cp ('t68');
1667 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
1668 wakaba 1.1 $i-- and last OE if defined $i;
1669 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
1670 wakaba 1.79 !!!cp ('t69');
1671 wakaba 1.1 $i = $_;
1672     }
1673     } # OE
1674 wakaba 1.203 splice @{$self->{open_elements}}, $i + 1, 0, $clone;
1675 wakaba 1.1
1676     ## Step 14
1677     redo FET;
1678     } # FET
1679     }; # $formatting_end_tag
1680    
1681 wakaba 1.96 $insert = my $insert_to_current = sub {
1682 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
1683 wakaba 1.1 }; # $insert_to_current
1684    
1685 wakaba 1.234 ## Foster parenting. Note that there are three "foster parenting"
1686     ## code in the parser: for elements (this one), for texts, and for
1687     ## elements in the AAA code.
1688 wakaba 1.1 my $insert_to_foster = sub {
1689 wakaba 1.95 my $child = shift;
1690 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
1691 wakaba 1.95 # MUST
1692     my $foster_parent_element;
1693     my $next_sibling;
1694 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
1695 wakaba 1.206 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1696 wakaba 1.234 !!!cp ('t71');
1697     $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
1698     $next_sibling = $self->{open_elements}->[$_]->[0];
1699     undef $next_sibling
1700     unless $next_sibling->parent_node eq $foster_parent_element;
1701     last OE;
1702     }
1703     } # OE
1704     $foster_parent_element ||= $self->{open_elements}->[0]->[0];
1705    
1706     $foster_parent_element->insert_before ($child, $next_sibling);
1707 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
1708     } else {
1709     !!!cp ('t72');
1710     $self->{open_elements}->[-1]->[0]->append_child ($child);
1711     }
1712 wakaba 1.1 }; # $insert_to_foster
1713    
1714 wakaba 1.204 ## NOTE: Insert a character (MUST): When a character is inserted, if
1715     ## the last node that was inserted by the parser is a Text node and
1716     ## the character has to be inserted after that node, then the
1717     ## character is appended to the Text node. However, if any other
1718     ## node is inserted by the parser, then a new Text node is created
1719     ## and the character is appended as that Text node. If I'm not
1720     ## wrong, for a parser with scripting disabled, there are only two
1721     ## cases where this occurs. One is the case where an element node
1722     ## is inserted to the |head| element. This is covered by using the
1723 wakaba 1.202 ## |$self->{head_element_inserted}| flag. Another is the case where
1724     ## an element or comment is inserted into the |table| subtree while
1725     ## foster parenting happens. This is covered by using the [2] flag
1726     ## of the |$open_tables| structure. All other cases are handled
1727     ## simply by calling |manakai_append_text| method.
1728    
1729 wakaba 1.204 ## TODO: |<body><script>document.write("a<br>");
1730     ## document.body.removeChild (document.body.lastChild);
1731     ## document.write ("b")</script>|
1732    
1733 wakaba 1.126 B: while (1) {
1734 wakaba 1.230
1735     ## The "in table text" insertion mode.
1736     if ($self->{insertion_mode} & TABLE_IMS and
1737     not $self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
1738     not $self->{insertion_mode} & IN_CDATA_RCDATA_IM) {
1739     C: {
1740     my $s;
1741     if ($token->{type} == CHARACTER_TOKEN) {
1742     !!!cp ('t194');
1743     $self->{pending_chars} ||= [];
1744     push @{$self->{pending_chars}}, $token;
1745     !!!next-token;
1746     next B;
1747     } else {
1748     if ($self->{pending_chars}) {
1749     $s = join '', map { $_->{data} } @{$self->{pending_chars}};
1750     delete $self->{pending_chars};
1751     if ($s =~ /[^\x09\x0A\x0C\x0D\x20]/) {
1752     !!!cp ('t195');
1753     #
1754     } else {
1755     !!!cp ('t195.1');
1756     #$self->{open_elements}->[-1]->[0]->manakai_append_text ($s);
1757     $self->{open_elements}->[-1]->[0]->append_child
1758     ($self->{document}->create_text_node ($s));
1759     last C;
1760     }
1761     } else {
1762     !!!cp ('t195.2');
1763     last C;
1764     }
1765     }
1766    
1767 wakaba 1.234 ## Foster parenting.
1768 wakaba 1.230 !!!parse-error (type => 'in table:#text', token => $token);
1769    
1770     ## NOTE: As if in body, but insert into the foster parent element.
1771     $reconstruct_active_formatting_elements->($insert_to_foster);
1772    
1773     if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
1774     # MUST
1775     my $foster_parent_element;
1776     my $next_sibling;
1777     OE: for (reverse 0..$#{$self->{open_elements}}) {
1778     if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1779 wakaba 1.234 !!!cp ('t197');
1780     $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
1781     $next_sibling = $self->{open_elements}->[$_]->[0];
1782     undef $next_sibling
1783     unless $next_sibling->parent_node eq $foster_parent_element;
1784 wakaba 1.230 last OE;
1785     }
1786     } # OE
1787 wakaba 1.234 $foster_parent_element ||= $self->{open_elements}->[0]->[0];
1788    
1789     !!!cp ('t199');
1790     $foster_parent_element->insert_before
1791     ($self->{document}->create_text_node ($s), $next_sibling);
1792    
1793 wakaba 1.230 $open_tables->[-1]->[1] = 1; # tainted
1794     $open_tables->[-1]->[2] = 1; # ~node inserted
1795     } else {
1796     ## NOTE: Fragment case or in a foster parent'ed element
1797     ## (e.g. |<table><span>a|). In fragment case, whether the
1798     ## character is appended to existing node or a new node is
1799     ## created is irrelevant, since the foster parent'ed nodes
1800     ## are discarded and fragment parsing does not invoke any
1801     ## script.
1802     !!!cp ('t200');
1803     $self->{open_elements}->[-1]->[0]->manakai_append_text ($s);
1804     }
1805     } # C
1806     } # TABLE_IMS
1807    
1808 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
1809 wakaba 1.79 !!!cp ('t73');
1810 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1811 wakaba 1.52 ## Ignore the token
1812     ## Stay in the phase
1813     !!!next-token;
1814 wakaba 1.126 next B;
1815 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
1816 wakaba 1.52 $token->{tag_name} eq 'html') {
1817 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
1818 wakaba 1.79 !!!cp ('t79');
1819 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
1820 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
1821     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
1822 wakaba 1.79 !!!cp ('t80');
1823 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
1824 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
1825 wakaba 1.79 } else {
1826     !!!cp ('t81');
1827 wakaba 1.52 }
1828    
1829 wakaba 1.84 !!!cp ('t82');
1830 wakaba 1.113 !!!parse-error (type => 'not first start tag', token => $token);
1831 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
1832     for my $attr_name (keys %{$token->{attributes}}) {
1833     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
1834 wakaba 1.79 !!!cp ('t84');
1835 wakaba 1.52 $top_el->set_attribute_ns
1836     (undef, [undef, $attr_name],
1837     $token->{attributes}->{$attr_name}->{value});
1838     }
1839     }
1840 wakaba 1.125 !!!nack ('t84.1');
1841 wakaba 1.52 !!!next-token;
1842 wakaba 1.126 next B;
1843 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
1844 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
1845 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
1846 wakaba 1.79 !!!cp ('t85');
1847 wakaba 1.52 $self->{document}->append_child ($comment);
1848 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
1849 wakaba 1.79 !!!cp ('t86');
1850 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
1851     } else {
1852 wakaba 1.79 !!!cp ('t87');
1853 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
1854 wakaba 1.202 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
1855 wakaba 1.52 }
1856     !!!next-token;
1857 wakaba 1.126 next B;
1858 wakaba 1.205 } elsif ($self->{insertion_mode} & IN_CDATA_RCDATA_IM) {
1859     if ($token->{type} == CHARACTER_TOKEN) {
1860     $token->{data} =~ s/^\x0A// if $self->{ignore_newline};
1861     delete $self->{ignore_newline};
1862    
1863     if (length $token->{data}) {
1864     !!!cp ('t43');
1865     $self->{open_elements}->[-1]->[0]->manakai_append_text
1866     ($token->{data});
1867     } else {
1868     !!!cp ('t43.1');
1869     }
1870     !!!next-token;
1871     next B;
1872     } elsif ($token->{type} == END_TAG_TOKEN) {
1873     delete $self->{ignore_newline};
1874    
1875     if ($token->{tag_name} eq 'script') {
1876     !!!cp ('t50');
1877    
1878     ## Para 1-2
1879     my $script = pop @{$self->{open_elements}};
1880    
1881     ## Para 3
1882     $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1883    
1884     ## Para 4
1885     ## TODO: $old_insertion_point = $current_insertion_point;
1886     ## TODO: $current_insertion_point = just before $self->{nc};
1887    
1888     ## Para 5
1889     ## TODO: Run the $script->[0].
1890    
1891     ## Para 6
1892     ## TODO: $current_insertion_point = $old_insertion_point;
1893    
1894     ## Para 7
1895     ## TODO: if ($pending_external_script) {
1896     ## TODO: ...
1897     ## TODO: }
1898    
1899     !!!next-token;
1900     next B;
1901     } else {
1902     !!!cp ('t42');
1903    
1904     pop @{$self->{open_elements}};
1905    
1906     $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1907     !!!next-token;
1908     next B;
1909     }
1910     } elsif ($token->{type} == END_OF_FILE_TOKEN) {
1911     delete $self->{ignore_newline};
1912    
1913     !!!cp ('t44');
1914     !!!parse-error (type => 'not closed',
1915     text => $self->{open_elements}->[-1]->[0]
1916     ->manakai_local_name,
1917     token => $token);
1918    
1919 wakaba 1.206 #if ($self->{open_elements}->[-1]->[1] == SCRIPT_EL) {
1920 wakaba 1.205 # ## TODO: Mark as "already executed"
1921     #}
1922    
1923     pop @{$self->{open_elements}};
1924    
1925     $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1926     ## Reprocess.
1927     next B;
1928     } else {
1929     die "$0: $token->{type}: In CDATA/RCDATA: Unknown token type";
1930     }
1931 wakaba 1.126 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
1932     if ($token->{type} == CHARACTER_TOKEN) {
1933     !!!cp ('t87.1');
1934     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
1935     !!!next-token;
1936     next B;
1937     } elsif ($token->{type} == START_TAG_TOKEN) {
1938 wakaba 1.129 if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
1939     $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
1940 wakaba 1.126 not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
1941     ($token->{tag_name} eq 'svg' and
1942 wakaba 1.206 $self->{open_elements}->[-1]->[1] == MML_AXML_EL)) {
1943 wakaba 1.126 ## NOTE: "using the rules for secondary insertion mode"then"continue"
1944     !!!cp ('t87.2');
1945     #
1946     } elsif ({
1947 wakaba 1.130 b => 1, big => 1, blockquote => 1, body => 1, br => 1,
1948 wakaba 1.146 center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1,
1949 wakaba 1.223 em => 1, embed => 1, h1 => 1, h2 => 1, h3 => 1,
1950 wakaba 1.146 h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1,
1951     img => 1, li => 1, listing => 1, menu => 1, meta => 1,
1952     nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1,
1953     small => 1, span => 1, strong => 1, strike => 1, sub => 1,
1954     sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1,
1955 wakaba 1.223 }->{$token->{tag_name}} or
1956     ($token->{tag_name} eq 'font' and
1957     ($token->{attributes}->{color} or
1958     $token->{attributes}->{face} or
1959     $token->{attributes}->{size}))) {
1960 wakaba 1.126 !!!cp ('t87.2');
1961     !!!parse-error (type => 'not closed',
1962 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
1963 wakaba 1.126 ->manakai_local_name,
1964     token => $token);
1965    
1966     pop @{$self->{open_elements}}
1967     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
1968    
1969 wakaba 1.130 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
1970 wakaba 1.126 ## Reprocess.
1971     next B;
1972     } else {
1973 wakaba 1.131 my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
1974     my $tag_name = $token->{tag_name};
1975     if ($nsuri eq $SVG_NS) {
1976     $tag_name = {
1977     altglyph => 'altGlyph',
1978     altglyphdef => 'altGlyphDef',
1979     altglyphitem => 'altGlyphItem',
1980     animatecolor => 'animateColor',
1981     animatemotion => 'animateMotion',
1982     animatetransform => 'animateTransform',
1983     clippath => 'clipPath',
1984     feblend => 'feBlend',
1985     fecolormatrix => 'feColorMatrix',
1986     fecomponenttransfer => 'feComponentTransfer',
1987     fecomposite => 'feComposite',
1988     feconvolvematrix => 'feConvolveMatrix',
1989     fediffuselighting => 'feDiffuseLighting',
1990     fedisplacementmap => 'feDisplacementMap',
1991     fedistantlight => 'feDistantLight',
1992     feflood => 'feFlood',
1993     fefunca => 'feFuncA',
1994     fefuncb => 'feFuncB',
1995     fefuncg => 'feFuncG',
1996     fefuncr => 'feFuncR',
1997     fegaussianblur => 'feGaussianBlur',
1998     feimage => 'feImage',
1999     femerge => 'feMerge',
2000     femergenode => 'feMergeNode',
2001     femorphology => 'feMorphology',
2002     feoffset => 'feOffset',
2003     fepointlight => 'fePointLight',
2004     fespecularlighting => 'feSpecularLighting',
2005     fespotlight => 'feSpotLight',
2006     fetile => 'feTile',
2007     feturbulence => 'feTurbulence',
2008     foreignobject => 'foreignObject',
2009     glyphref => 'glyphRef',
2010     lineargradient => 'linearGradient',
2011     radialgradient => 'radialGradient',
2012     #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
2013     textpath => 'textPath',
2014     }->{$tag_name} || $tag_name;
2015     }
2016    
2017     ## "adjust SVG attributes" (SVG only) - done in insert-element-f
2018    
2019     ## "adjust foreign attributes" - done in insert-element-f
2020 wakaba 1.126
2021 wakaba 1.131 !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);
2022 wakaba 1.126
2023     if ($self->{self_closing}) {
2024     pop @{$self->{open_elements}};
2025     !!!ack ('t87.3');
2026     } else {
2027     !!!cp ('t87.4');
2028     }
2029    
2030     !!!next-token;
2031     next B;
2032     }
2033     } elsif ($token->{type} == END_TAG_TOKEN) {
2034     ## NOTE: "using the rules for secondary insertion mode" then "continue"
2035 wakaba 1.219 if ($token->{tag_name} eq 'script') {
2036     !!!cp ('t87.41');
2037     #
2038     ## XXXscript: Execute script here.
2039     } else {
2040     !!!cp ('t87.5');
2041     #
2042     }
2043 wakaba 1.126 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2044     !!!cp ('t87.6');
2045 wakaba 1.146 !!!parse-error (type => 'not closed',
2046 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2047 wakaba 1.146 ->manakai_local_name,
2048     token => $token);
2049    
2050     pop @{$self->{open_elements}}
2051     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
2052    
2053 wakaba 1.200 ## NOTE: |<span><svg>| ... two parse errors, |<svg>| ... a parse error.
2054    
2055 wakaba 1.146 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
2056     ## Reprocess.
2057     next B;
2058 wakaba 1.126 } else {
2059     die "$0: $token->{type}: Unknown token type";
2060     }
2061     }
2062    
2063     if ($self->{insertion_mode} & HEAD_IMS) {
2064 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
2065 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
2066 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2067 wakaba 1.202 if ($self->{head_element_inserted}) {
2068     !!!cp ('t88.3');
2069     $self->{open_elements}->[-1]->[0]->append_child
2070     ($self->{document}->create_text_node ($1));
2071     delete $self->{head_element_inserted};
2072     ## NOTE: |</head> <link> |
2073     #
2074     } else {
2075     !!!cp ('t88.2');
2076     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
2077     ## NOTE: |</head> &#x20;|
2078     #
2079     }
2080 wakaba 1.99 } else {
2081     !!!cp ('t88.1');
2082     ## Ignore the token.
2083 wakaba 1.177 #
2084 wakaba 1.99 }
2085 wakaba 1.52 unless (length $token->{data}) {
2086 wakaba 1.79 !!!cp ('t88');
2087 wakaba 1.52 !!!next-token;
2088 wakaba 1.126 next B;
2089 wakaba 1.1 }
2090 wakaba 1.177 ## TODO: set $token->{column} appropriately
2091 wakaba 1.1 }
2092 wakaba 1.52
2093 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2094 wakaba 1.79 !!!cp ('t89');
2095 wakaba 1.52 ## As if <head>
2096 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2097 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2098 wakaba 1.123 push @{$self->{open_elements}},
2099     [$self->{head_element}, $el_category->{head}];
2100 wakaba 1.52
2101     ## Reprocess in the "in head" insertion mode...
2102     pop @{$self->{open_elements}};
2103    
2104     ## Reprocess in the "after head" insertion mode...
2105 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2106 wakaba 1.79 !!!cp ('t90');
2107 wakaba 1.52 ## As if </noscript>
2108     pop @{$self->{open_elements}};
2109 wakaba 1.153 !!!parse-error (type => 'in noscript:#text', token => $token);
2110 wakaba 1.1
2111 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
2112     ## As if </head>
2113     pop @{$self->{open_elements}};
2114    
2115     ## Reprocess in the "after head" insertion mode...
2116 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2117 wakaba 1.79 !!!cp ('t91');
2118 wakaba 1.52 pop @{$self->{open_elements}};
2119    
2120     ## Reprocess in the "after head" insertion mode...
2121 wakaba 1.79 } else {
2122     !!!cp ('t92');
2123 wakaba 1.1 }
2124 wakaba 1.52
2125 wakaba 1.123 ## "after head" insertion mode
2126     ## As if <body>
2127     !!!insert-element ('body',, $token);
2128     $self->{insertion_mode} = IN_BODY_IM;
2129     ## reprocess
2130 wakaba 1.126 next B;
2131 wakaba 1.123 } elsif ($token->{type} == START_TAG_TOKEN) {
2132     if ($token->{tag_name} eq 'head') {
2133     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2134     !!!cp ('t93');
2135 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
2136 wakaba 1.123 $self->{open_elements}->[-1]->[0]->append_child
2137     ($self->{head_element});
2138     push @{$self->{open_elements}},
2139     [$self->{head_element}, $el_category->{head}];
2140     $self->{insertion_mode} = IN_HEAD_IM;
2141 wakaba 1.125 !!!nack ('t93.1');
2142 wakaba 1.123 !!!next-token;
2143 wakaba 1.126 next B;
2144 wakaba 1.125 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2145 wakaba 1.139 !!!cp ('t93.2');
2146 wakaba 1.153 !!!parse-error (type => 'after head', text => 'head',
2147     token => $token);
2148 wakaba 1.139 ## Ignore the token
2149     !!!nack ('t93.3');
2150     !!!next-token;
2151     next B;
2152 wakaba 1.125 } else {
2153     !!!cp ('t95');
2154 wakaba 1.153 !!!parse-error (type => 'in head:head',
2155     token => $token); # or in head noscript
2156 wakaba 1.125 ## Ignore the token
2157     !!!nack ('t95.1');
2158     !!!next-token;
2159 wakaba 1.126 next B;
2160 wakaba 1.125 }
2161     } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2162 wakaba 1.126 !!!cp ('t96');
2163     ## As if <head>
2164     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2165     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2166     push @{$self->{open_elements}},
2167     [$self->{head_element}, $el_category->{head}];
2168 wakaba 1.52
2169 wakaba 1.126 $self->{insertion_mode} = IN_HEAD_IM;
2170     ## Reprocess in the "in head" insertion mode...
2171     } else {
2172     !!!cp ('t97');
2173     }
2174 wakaba 1.52
2175 wakaba 1.202 if ($token->{tag_name} eq 'base') {
2176     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2177     !!!cp ('t98');
2178     ## As if </noscript>
2179     pop @{$self->{open_elements}};
2180     !!!parse-error (type => 'in noscript', text => 'base',
2181     token => $token);
2182    
2183     $self->{insertion_mode} = IN_HEAD_IM;
2184     ## Reprocess in the "in head" insertion mode...
2185     } else {
2186     !!!cp ('t99');
2187     }
2188 wakaba 1.49
2189 wakaba 1.202 ## NOTE: There is a "as if in head" code clone.
2190     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2191     !!!cp ('t100');
2192     !!!parse-error (type => 'after head',
2193     text => $token->{tag_name}, token => $token);
2194     push @{$self->{open_elements}},
2195     [$self->{head_element}, $el_category->{head}];
2196     $self->{head_element_inserted} = 1;
2197     } else {
2198     !!!cp ('t101');
2199     }
2200     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2201     pop @{$self->{open_elements}};
2202     pop @{$self->{open_elements}} # <head>
2203     if $self->{insertion_mode} == AFTER_HEAD_IM;
2204     !!!nack ('t101.1');
2205     !!!next-token;
2206     next B;
2207 wakaba 1.194 } elsif ($token->{tag_name} eq 'link') {
2208     ## NOTE: There is a "as if in head" code clone.
2209     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2210     !!!cp ('t102');
2211     !!!parse-error (type => 'after head',
2212     text => $token->{tag_name}, token => $token);
2213     push @{$self->{open_elements}},
2214     [$self->{head_element}, $el_category->{head}];
2215 wakaba 1.202 $self->{head_element_inserted} = 1;
2216 wakaba 1.194 } else {
2217     !!!cp ('t103');
2218     }
2219     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2220     pop @{$self->{open_elements}};
2221     pop @{$self->{open_elements}} # <head>
2222     if $self->{insertion_mode} == AFTER_HEAD_IM;
2223     !!!ack ('t103.1');
2224     !!!next-token;
2225     next B;
2226 wakaba 1.232 } elsif ($token->{tag_name} eq 'command') {
2227 wakaba 1.194 if ($self->{insertion_mode} == IN_HEAD_IM) {
2228     ## NOTE: If the insertion mode at the time of the emission
2229     ## of the token was "before head", $self->{insertion_mode}
2230     ## is already changed to |IN_HEAD_IM|.
2231    
2232     ## NOTE: There is a "as if in head" code clone.
2233     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2234     pop @{$self->{open_elements}};
2235     pop @{$self->{open_elements}} # <head>
2236     if $self->{insertion_mode} == AFTER_HEAD_IM;
2237     !!!ack ('t103.2');
2238     !!!next-token;
2239     next B;
2240     } else {
2241     ## NOTE: "in head noscript" or "after head" insertion mode
2242     ## - in these cases, these tags are treated as same as
2243     ## normal in-body tags.
2244     !!!cp ('t103.3');
2245     #
2246     }
2247 wakaba 1.202 } elsif ($token->{tag_name} eq 'meta') {
2248     ## NOTE: There is a "as if in head" code clone.
2249     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2250     !!!cp ('t104');
2251     !!!parse-error (type => 'after head',
2252     text => $token->{tag_name}, token => $token);
2253     push @{$self->{open_elements}},
2254     [$self->{head_element}, $el_category->{head}];
2255     $self->{head_element_inserted} = 1;
2256     } else {
2257     !!!cp ('t105');
2258     }
2259     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2260     my $meta_el = pop @{$self->{open_elements}};
2261 wakaba 1.34
2262     unless ($self->{confident}) {
2263 wakaba 1.134 if ($token->{attributes}->{charset}) {
2264 wakaba 1.79 !!!cp ('t106');
2265 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
2266     ## in the {change_encoding} callback.
2267 wakaba 1.63 $self->{change_encoding}
2268 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value},
2269     $token);
2270 wakaba 1.66
2271     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2272     ->set_user_data (manakai_has_reference =>
2273     $token->{attributes}->{charset}
2274     ->{has_reference});
2275 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
2276     if ($token->{attributes}->{content}->{value}
2277 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2278 wakaba 1.186 [\x09\x0A\x0C\x0D\x20]*=
2279     [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
2280     ([^"'\x09\x0A\x0C\x0D\x20]
2281     [^\x09\x0A\x0C\x0D\x20\x3B]*))/x) {
2282 wakaba 1.79 !!!cp ('t107');
2283 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
2284     ## in the {change_encoding} callback.
2285 wakaba 1.63 $self->{change_encoding}
2286 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
2287     $token);
2288 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2289     ->set_user_data (manakai_has_reference =>
2290     $token->{attributes}->{content}
2291     ->{has_reference});
2292 wakaba 1.79 } else {
2293     !!!cp ('t108');
2294 wakaba 1.63 }
2295 wakaba 1.34 }
2296 wakaba 1.66 } else {
2297     if ($token->{attributes}->{charset}) {
2298 wakaba 1.79 !!!cp ('t109');
2299 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2300     ->set_user_data (manakai_has_reference =>
2301     $token->{attributes}->{charset}
2302     ->{has_reference});
2303     }
2304 wakaba 1.68 if ($token->{attributes}->{content}) {
2305 wakaba 1.79 !!!cp ('t110');
2306 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2307     ->set_user_data (manakai_has_reference =>
2308     $token->{attributes}->{content}
2309     ->{has_reference});
2310     }
2311 wakaba 1.34 }
2312    
2313 wakaba 1.100 pop @{$self->{open_elements}} # <head>
2314 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
2315 wakaba 1.125 !!!ack ('t110.1');
2316 wakaba 1.34 !!!next-token;
2317 wakaba 1.126 next B;
2318 wakaba 1.202 } elsif ($token->{tag_name} eq 'title') {
2319     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2320     !!!cp ('t111');
2321     ## As if </noscript>
2322     pop @{$self->{open_elements}};
2323     !!!parse-error (type => 'in noscript', text => 'title',
2324     token => $token);
2325    
2326     $self->{insertion_mode} = IN_HEAD_IM;
2327     ## Reprocess in the "in head" insertion mode...
2328     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2329     !!!cp ('t112');
2330     !!!parse-error (type => 'after head',
2331     text => $token->{tag_name}, token => $token);
2332     push @{$self->{open_elements}},
2333     [$self->{head_element}, $el_category->{head}];
2334     $self->{head_element_inserted} = 1;
2335     } else {
2336     !!!cp ('t113');
2337     }
2338 wakaba 1.49
2339 wakaba 1.202 ## NOTE: There is a "as if in head" code clone.
2340     $parse_rcdata->(RCDATA_CONTENT_MODEL);
2341 wakaba 1.225
2342     ## NOTE: At this point the stack of open elements contain
2343     ## the |head| element (index == -2) and the |script| element
2344     ## (index == -1). In the "after head" insertion mode the
2345     ## |head| element is inserted only for the purpose of
2346     ## providing the context for the |script| element, and
2347     ## therefore we can now and have to remove the element from
2348     ## the stack.
2349 wakaba 1.205 splice @{$self->{open_elements}}, -2, 1, () # <head>
2350 wakaba 1.210 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2351 wakaba 1.202 next B;
2352     } elsif ($token->{tag_name} eq 'style' or
2353     $token->{tag_name} eq 'noframes') {
2354     ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
2355     ## insertion mode IN_HEAD_IM)
2356     ## NOTE: There is a "as if in head" code clone.
2357     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2358     !!!cp ('t114');
2359     !!!parse-error (type => 'after head',
2360     text => $token->{tag_name}, token => $token);
2361     push @{$self->{open_elements}},
2362     [$self->{head_element}, $el_category->{head}];
2363     $self->{head_element_inserted} = 1;
2364     } else {
2365     !!!cp ('t115');
2366     }
2367     $parse_rcdata->(CDATA_CONTENT_MODEL);
2368 wakaba 1.205 ## ISSUE: A spec bug [Bug 6038]
2369     splice @{$self->{open_elements}}, -2, 1, () # <head>
2370 wakaba 1.210 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2371 wakaba 1.202 next B;
2372 wakaba 1.205 } elsif ($token->{tag_name} eq 'noscript') {
2373 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
2374 wakaba 1.79 !!!cp ('t116');
2375 wakaba 1.25 ## NOTE: and scripting is disalbed
2376 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2377 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
2378 wakaba 1.125 !!!nack ('t116.1');
2379 wakaba 1.1 !!!next-token;
2380 wakaba 1.126 next B;
2381 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2382 wakaba 1.79 !!!cp ('t117');
2383 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'noscript',
2384     token => $token);
2385 wakaba 1.1 ## Ignore the token
2386 wakaba 1.125 !!!nack ('t117.1');
2387 wakaba 1.41 !!!next-token;
2388 wakaba 1.126 next B;
2389 wakaba 1.1 } else {
2390 wakaba 1.79 !!!cp ('t118');
2391 wakaba 1.25 #
2392 wakaba 1.1 }
2393 wakaba 1.202 } elsif ($token->{tag_name} eq 'script') {
2394     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2395     !!!cp ('t119');
2396     ## As if </noscript>
2397     pop @{$self->{open_elements}};
2398     !!!parse-error (type => 'in noscript', text => 'script',
2399     token => $token);
2400    
2401     $self->{insertion_mode} = IN_HEAD_IM;
2402     ## Reprocess in the "in head" insertion mode...
2403     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2404     !!!cp ('t120');
2405     !!!parse-error (type => 'after head',
2406     text => $token->{tag_name}, token => $token);
2407     push @{$self->{open_elements}},
2408     [$self->{head_element}, $el_category->{head}];
2409     $self->{head_element_inserted} = 1;
2410     } else {
2411     !!!cp ('t121');
2412     }
2413 wakaba 1.49
2414 wakaba 1.202 ## NOTE: There is a "as if in head" code clone.
2415     $script_start_tag->();
2416 wakaba 1.205 ## ISSUE: A spec bug [Bug 6038]
2417     splice @{$self->{open_elements}}, -2, 1 # <head>
2418 wakaba 1.210 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2419 wakaba 1.202 next B;
2420     } elsif ($token->{tag_name} eq 'body' or
2421     $token->{tag_name} eq 'frameset') {
2422 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2423 wakaba 1.79 !!!cp ('t122');
2424 wakaba 1.49 ## As if </noscript>
2425     pop @{$self->{open_elements}};
2426 wakaba 1.153 !!!parse-error (type => 'in noscript',
2427     text => $token->{tag_name}, token => $token);
2428 wakaba 1.49
2429     ## Reprocess in the "in head" insertion mode...
2430     ## As if </head>
2431     pop @{$self->{open_elements}};
2432    
2433     ## Reprocess in the "after head" insertion mode...
2434 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2435 wakaba 1.79 !!!cp ('t124');
2436 wakaba 1.49 pop @{$self->{open_elements}};
2437    
2438     ## Reprocess in the "after head" insertion mode...
2439 wakaba 1.79 } else {
2440     !!!cp ('t125');
2441 wakaba 1.49 }
2442    
2443     ## "after head" insertion mode
2444 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2445 wakaba 1.54 if ($token->{tag_name} eq 'body') {
2446 wakaba 1.79 !!!cp ('t126');
2447 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
2448     } elsif ($token->{tag_name} eq 'frameset') {
2449 wakaba 1.79 !!!cp ('t127');
2450 wakaba 1.54 $self->{insertion_mode} = IN_FRAMESET_IM;
2451     } else {
2452     die "$0: tag name: $self->{tag_name}";
2453     }
2454 wakaba 1.125 !!!nack ('t127.1');
2455 wakaba 1.1 !!!next-token;
2456 wakaba 1.126 next B;
2457 wakaba 1.1 } else {
2458 wakaba 1.79 !!!cp ('t128');
2459 wakaba 1.1 #
2460     }
2461 wakaba 1.49
2462 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2463 wakaba 1.79 !!!cp ('t129');
2464 wakaba 1.49 ## As if </noscript>
2465     pop @{$self->{open_elements}};
2466 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
2467     text => $token->{tag_name}, token => $token);
2468 wakaba 1.49
2469     ## Reprocess in the "in head" insertion mode...
2470     ## As if </head>
2471 wakaba 1.25 pop @{$self->{open_elements}};
2472 wakaba 1.49
2473     ## Reprocess in the "after head" insertion mode...
2474 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2475 wakaba 1.79 !!!cp ('t130');
2476 wakaba 1.49 ## As if </head>
2477 wakaba 1.25 pop @{$self->{open_elements}};
2478 wakaba 1.49
2479     ## Reprocess in the "after head" insertion mode...
2480 wakaba 1.79 } else {
2481     !!!cp ('t131');
2482 wakaba 1.49 }
2483    
2484     ## "after head" insertion mode
2485     ## As if <body>
2486 wakaba 1.116 !!!insert-element ('body',, $token);
2487 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
2488 wakaba 1.49 ## reprocess
2489 wakaba 1.125 !!!ack-later;
2490 wakaba 1.126 next B;
2491 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
2492 wakaba 1.49 if ($token->{tag_name} eq 'head') {
2493 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2494 wakaba 1.79 !!!cp ('t132');
2495 wakaba 1.50 ## As if <head>
2496 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2497 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2498 wakaba 1.123 push @{$self->{open_elements}},
2499     [$self->{head_element}, $el_category->{head}];
2500 wakaba 1.50
2501     ## Reprocess in the "in head" insertion mode...
2502     pop @{$self->{open_elements}};
2503 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
2504 wakaba 1.50 !!!next-token;
2505 wakaba 1.126 next B;
2506 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2507 wakaba 1.79 !!!cp ('t133');
2508 wakaba 1.49 ## As if </noscript>
2509     pop @{$self->{open_elements}};
2510 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
2511     text => 'head', token => $token);
2512 wakaba 1.49
2513     ## Reprocess in the "in head" insertion mode...
2514 wakaba 1.50 pop @{$self->{open_elements}};
2515 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
2516 wakaba 1.50 !!!next-token;
2517 wakaba 1.126 next B;
2518 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2519 wakaba 1.79 !!!cp ('t134');
2520 wakaba 1.49 pop @{$self->{open_elements}};
2521 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
2522 wakaba 1.49 !!!next-token;
2523 wakaba 1.126 next B;
2524 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2525     !!!cp ('t134.1');
2526 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => 'head',
2527     token => $token);
2528 wakaba 1.139 ## Ignore the token
2529     !!!next-token;
2530     next B;
2531 wakaba 1.49 } else {
2532 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2533 wakaba 1.49 }
2534     } elsif ($token->{tag_name} eq 'noscript') {
2535 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2536 wakaba 1.79 !!!cp ('t136');
2537 wakaba 1.49 pop @{$self->{open_elements}};
2538 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
2539 wakaba 1.49 !!!next-token;
2540 wakaba 1.126 next B;
2541 wakaba 1.139 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or
2542     $self->{insertion_mode} == AFTER_HEAD_IM) {
2543 wakaba 1.79 !!!cp ('t137');
2544 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2545     text => 'noscript', token => $token);
2546 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
2547     !!!next-token;
2548 wakaba 1.126 next B;
2549 wakaba 1.49 } else {
2550 wakaba 1.79 !!!cp ('t138');
2551 wakaba 1.49 #
2552     }
2553     } elsif ({
2554 wakaba 1.31 body => 1, html => 1,
2555     }->{$token->{tag_name}}) {
2556 wakaba 1.203 ## TODO: This branch is entirely redundant.
2557     if ($self->{insertion_mode} == BEFORE_HEAD_IM or
2558 wakaba 1.139 $self->{insertion_mode} == IN_HEAD_IM or
2559     $self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2560 wakaba 1.79 !!!cp ('t140');
2561 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2562     text => $token->{tag_name}, token => $token);
2563 wakaba 1.49 ## Ignore the token
2564     !!!next-token;
2565 wakaba 1.126 next B;
2566 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2567     !!!cp ('t140.1');
2568 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2569     text => $token->{tag_name}, token => $token);
2570 wakaba 1.139 ## Ignore the token
2571     !!!next-token;
2572     next B;
2573 wakaba 1.79 } else {
2574 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2575 wakaba 1.49 }
2576 wakaba 1.139 } elsif ($token->{tag_name} eq 'p') {
2577     !!!cp ('t142');
2578 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2579     text => $token->{tag_name}, token => $token);
2580 wakaba 1.139 ## Ignore the token
2581     !!!next-token;
2582     next B;
2583 wakaba 1.224 } elsif ($token->{tag_name} eq 'br') {
2584     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2585     !!!cp ('t142.2');
2586     ## (before head) as if <head>, (in head) as if </head>
2587     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2588     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2589     $self->{insertion_mode} = AFTER_HEAD_IM;
2590 wakaba 1.139
2591 wakaba 1.224 ## Reprocess in the "after head" insertion mode...
2592     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2593     !!!cp ('t143.2');
2594     ## As if </head>
2595     pop @{$self->{open_elements}};
2596     $self->{insertion_mode} = AFTER_HEAD_IM;
2597 wakaba 1.139
2598 wakaba 1.224 ## Reprocess in the "after head" insertion mode...
2599     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2600     !!!cp ('t143.3');
2601     ## NOTE: Two parse errors for <head><noscript></br>
2602     !!!parse-error (type => 'unmatched end tag',
2603     text => 'br', token => $token);
2604     ## As if </noscript>
2605     pop @{$self->{open_elements}};
2606     $self->{insertion_mode} = IN_HEAD_IM;
2607 wakaba 1.50
2608 wakaba 1.224 ## Reprocess in the "in head" insertion mode...
2609     ## As if </head>
2610     pop @{$self->{open_elements}};
2611     $self->{insertion_mode} = AFTER_HEAD_IM;
2612 wakaba 1.139
2613 wakaba 1.224 ## Reprocess in the "after head" insertion mode...
2614     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2615     !!!cp ('t143.4');
2616     #
2617     } else {
2618     die "$0: $self->{insertion_mode}: Unknown insertion mode";
2619     }
2620 wakaba 1.50
2621 wakaba 1.224 #
2622     } else { ## Other end tags
2623 wakaba 1.139 !!!cp ('t145');
2624 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2625     text => $token->{tag_name}, token => $token);
2626 wakaba 1.139 ## Ignore the token
2627     !!!next-token;
2628     next B;
2629 wakaba 1.49 }
2630    
2631 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2632 wakaba 1.79 !!!cp ('t146');
2633 wakaba 1.49 ## As if </noscript>
2634     pop @{$self->{open_elements}};
2635 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
2636     text => $token->{tag_name}, token => $token);
2637 wakaba 1.49
2638     ## Reprocess in the "in head" insertion mode...
2639     ## As if </head>
2640     pop @{$self->{open_elements}};
2641    
2642     ## Reprocess in the "after head" insertion mode...
2643 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2644 wakaba 1.79 !!!cp ('t147');
2645 wakaba 1.49 ## As if </head>
2646     pop @{$self->{open_elements}};
2647    
2648     ## Reprocess in the "after head" insertion mode...
2649 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2650 wakaba 1.82 ## ISSUE: This case cannot be reached?
2651 wakaba 1.79 !!!cp ('t148');
2652 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2653     text => $token->{tag_name}, token => $token);
2654 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
2655     !!!next-token;
2656 wakaba 1.126 next B;
2657 wakaba 1.79 } else {
2658     !!!cp ('t149');
2659 wakaba 1.1 }
2660    
2661 wakaba 1.49 ## "after head" insertion mode
2662     ## As if <body>
2663 wakaba 1.116 !!!insert-element ('body',, $token);
2664 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
2665 wakaba 1.52 ## reprocess
2666 wakaba 1.224 next B;
2667 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2668     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2669     !!!cp ('t149.1');
2670    
2671     ## NOTE: As if <head>
2672 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2673 wakaba 1.104 $self->{open_elements}->[-1]->[0]->append_child
2674     ($self->{head_element});
2675 wakaba 1.123 #push @{$self->{open_elements}},
2676     # [$self->{head_element}, $el_category->{head}];
2677 wakaba 1.104 #$self->{insertion_mode} = IN_HEAD_IM;
2678     ## NOTE: Reprocess.
2679    
2680     ## NOTE: As if </head>
2681     #pop @{$self->{open_elements}};
2682     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2683     ## NOTE: Reprocess.
2684    
2685     #
2686     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2687     !!!cp ('t149.2');
2688    
2689     ## NOTE: As if </head>
2690     pop @{$self->{open_elements}};
2691     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2692     ## NOTE: Reprocess.
2693    
2694     #
2695     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2696     !!!cp ('t149.3');
2697    
2698 wakaba 1.113 !!!parse-error (type => 'in noscript:#eof', token => $token);
2699 wakaba 1.104
2700     ## As if </noscript>
2701     pop @{$self->{open_elements}};
2702     #$self->{insertion_mode} = IN_HEAD_IM;
2703     ## NOTE: Reprocess.
2704    
2705     ## NOTE: As if </head>
2706     pop @{$self->{open_elements}};
2707     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2708     ## NOTE: Reprocess.
2709    
2710     #
2711     } else {
2712     !!!cp ('t149.4');
2713     #
2714     }
2715    
2716     ## NOTE: As if <body>
2717 wakaba 1.116 !!!insert-element ('body',, $token);
2718 wakaba 1.104 $self->{insertion_mode} = IN_BODY_IM;
2719     ## NOTE: Reprocess.
2720 wakaba 1.126 next B;
2721 wakaba 1.104 } else {
2722     die "$0: $token->{type}: Unknown token type";
2723     }
2724 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
2725 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
2726 wakaba 1.79 !!!cp ('t150');
2727 wakaba 1.52 ## NOTE: There is a code clone of "character in body".
2728     $reconstruct_active_formatting_elements->($insert_to_current);
2729    
2730     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
2731    
2732     !!!next-token;
2733 wakaba 1.126 next B;
2734 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
2735 wakaba 1.52 if ({
2736     caption => 1, col => 1, colgroup => 1, tbody => 1,
2737     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
2738     }->{$token->{tag_name}}) {
2739 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2740 wakaba 1.52 ## have an element in table scope
2741 wakaba 1.108 for (reverse 0..$#{$self->{open_elements}}) {
2742 wakaba 1.52 my $node = $self->{open_elements}->[$_];
2743 wakaba 1.206 if ($node->[1] == TABLE_CELL_EL) {
2744 wakaba 1.79 !!!cp ('t151');
2745 wakaba 1.108
2746     ## Close the cell
2747 wakaba 1.125 !!!back-token; # <x>
2748 wakaba 1.122 $token = {type => END_TAG_TOKEN,
2749     tag_name => $node->[0]->manakai_local_name,
2750 wakaba 1.114 line => $token->{line},
2751     column => $token->{column}};
2752 wakaba 1.126 next B;
2753 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2754 wakaba 1.79 !!!cp ('t152');
2755 wakaba 1.108 ## ISSUE: This case can never be reached, maybe.
2756     last;
2757 wakaba 1.52 }
2758 wakaba 1.108 }
2759    
2760     !!!cp ('t153');
2761     !!!parse-error (type => 'start tag not allowed',
2762 wakaba 1.153 text => $token->{tag_name}, token => $token);
2763 wakaba 1.108 ## Ignore the token
2764 wakaba 1.125 !!!nack ('t153.1');
2765 wakaba 1.108 !!!next-token;
2766 wakaba 1.126 next B;
2767 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2768 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
2769     token => $token);
2770 wakaba 1.52
2771 wakaba 1.108 ## NOTE: As if </caption>.
2772 wakaba 1.52 ## have a table element in table scope
2773     my $i;
2774 wakaba 1.108 INSCOPE: {
2775     for (reverse 0..$#{$self->{open_elements}}) {
2776     my $node = $self->{open_elements}->[$_];
2777 wakaba 1.206 if ($node->[1] == CAPTION_EL) {
2778 wakaba 1.108 !!!cp ('t155');
2779     $i = $_;
2780     last INSCOPE;
2781 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2782 wakaba 1.108 !!!cp ('t156');
2783     last;
2784     }
2785 wakaba 1.52 }
2786 wakaba 1.108
2787     !!!cp ('t157');
2788     !!!parse-error (type => 'start tag not allowed',
2789 wakaba 1.153 text => $token->{tag_name}, token => $token);
2790 wakaba 1.108 ## Ignore the token
2791 wakaba 1.125 !!!nack ('t157.1');
2792 wakaba 1.108 !!!next-token;
2793 wakaba 1.126 next B;
2794 wakaba 1.52 } # INSCOPE
2795    
2796     ## generate implied end tags
2797 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
2798     & END_TAG_OPTIONAL_EL) {
2799 wakaba 1.79 !!!cp ('t158');
2800 wakaba 1.86 pop @{$self->{open_elements}};
2801 wakaba 1.52 }
2802    
2803 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2804 wakaba 1.79 !!!cp ('t159');
2805 wakaba 1.122 !!!parse-error (type => 'not closed',
2806 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2807 wakaba 1.122 ->manakai_local_name,
2808     token => $token);
2809 wakaba 1.79 } else {
2810     !!!cp ('t160');
2811 wakaba 1.52 }
2812    
2813     splice @{$self->{open_elements}}, $i;
2814    
2815     $clear_up_to_marker->();
2816    
2817 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
2818 wakaba 1.52
2819     ## reprocess
2820 wakaba 1.125 !!!ack-later;
2821 wakaba 1.126 next B;
2822 wakaba 1.52 } else {
2823 wakaba 1.79 !!!cp ('t161');
2824 wakaba 1.52 #
2825     }
2826     } else {
2827 wakaba 1.79 !!!cp ('t162');
2828 wakaba 1.52 #
2829     }
2830 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
2831 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
2832 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2833 wakaba 1.43 ## have an element in table scope
2834 wakaba 1.52 my $i;
2835 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2836     my $node = $self->{open_elements}->[$_];
2837 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2838 wakaba 1.79 !!!cp ('t163');
2839 wakaba 1.52 $i = $_;
2840 wakaba 1.43 last INSCOPE;
2841 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2842 wakaba 1.79 !!!cp ('t164');
2843 wakaba 1.43 last INSCOPE;
2844     }
2845     } # INSCOPE
2846 wakaba 1.52 unless (defined $i) {
2847 wakaba 1.79 !!!cp ('t165');
2848 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2849     text => $token->{tag_name},
2850     token => $token);
2851 wakaba 1.43 ## Ignore the token
2852     !!!next-token;
2853 wakaba 1.126 next B;
2854 wakaba 1.43 }
2855    
2856 wakaba 1.52 ## generate implied end tags
2857 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
2858     & END_TAG_OPTIONAL_EL) {
2859 wakaba 1.79 !!!cp ('t166');
2860 wakaba 1.86 pop @{$self->{open_elements}};
2861 wakaba 1.52 }
2862 wakaba 1.86
2863 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
2864     ne $token->{tag_name}) {
2865 wakaba 1.79 !!!cp ('t167');
2866 wakaba 1.122 !!!parse-error (type => 'not closed',
2867 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2868 wakaba 1.122 ->manakai_local_name,
2869     token => $token);
2870 wakaba 1.79 } else {
2871     !!!cp ('t168');
2872 wakaba 1.52 }
2873    
2874     splice @{$self->{open_elements}}, $i;
2875    
2876     $clear_up_to_marker->();
2877    
2878 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
2879 wakaba 1.52
2880     !!!next-token;
2881 wakaba 1.126 next B;
2882 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2883 wakaba 1.79 !!!cp ('t169');
2884 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2885     text => $token->{tag_name}, token => $token);
2886 wakaba 1.52 ## Ignore the token
2887     !!!next-token;
2888 wakaba 1.126 next B;
2889 wakaba 1.52 } else {
2890 wakaba 1.79 !!!cp ('t170');
2891 wakaba 1.52 #
2892     }
2893     } elsif ($token->{tag_name} eq 'caption') {
2894 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2895 wakaba 1.43 ## have a table element in table scope
2896     my $i;
2897 wakaba 1.108 INSCOPE: {
2898     for (reverse 0..$#{$self->{open_elements}}) {
2899     my $node = $self->{open_elements}->[$_];
2900 wakaba 1.206 if ($node->[1] == CAPTION_EL) {
2901 wakaba 1.108 !!!cp ('t171');
2902     $i = $_;
2903     last INSCOPE;
2904 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2905 wakaba 1.108 !!!cp ('t172');
2906     last;
2907     }
2908 wakaba 1.43 }
2909 wakaba 1.108
2910     !!!cp ('t173');
2911     !!!parse-error (type => 'unmatched end tag',
2912 wakaba 1.153 text => $token->{tag_name}, token => $token);
2913 wakaba 1.108 ## Ignore the token
2914     !!!next-token;
2915 wakaba 1.126 next B;
2916 wakaba 1.43 } # INSCOPE
2917    
2918     ## generate implied end tags
2919 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
2920     & END_TAG_OPTIONAL_EL) {
2921 wakaba 1.79 !!!cp ('t174');
2922 wakaba 1.86 pop @{$self->{open_elements}};
2923 wakaba 1.43 }
2924 wakaba 1.52
2925 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2926 wakaba 1.79 !!!cp ('t175');
2927 wakaba 1.122 !!!parse-error (type => 'not closed',
2928 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2929 wakaba 1.122 ->manakai_local_name,
2930     token => $token);
2931 wakaba 1.79 } else {
2932     !!!cp ('t176');
2933 wakaba 1.52 }
2934    
2935     splice @{$self->{open_elements}}, $i;
2936    
2937     $clear_up_to_marker->();
2938    
2939 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
2940 wakaba 1.52
2941     !!!next-token;
2942 wakaba 1.126 next B;
2943 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2944 wakaba 1.79 !!!cp ('t177');
2945 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2946     text => $token->{tag_name}, token => $token);
2947 wakaba 1.52 ## Ignore the token
2948     !!!next-token;
2949 wakaba 1.126 next B;
2950 wakaba 1.52 } else {
2951 wakaba 1.79 !!!cp ('t178');
2952 wakaba 1.52 #
2953     }
2954     } elsif ({
2955     table => 1, tbody => 1, tfoot => 1,
2956     thead => 1, tr => 1,
2957     }->{$token->{tag_name}} and
2958 wakaba 1.210 ($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2959 wakaba 1.52 ## have an element in table scope
2960     my $i;
2961     my $tn;
2962 wakaba 1.108 INSCOPE: {
2963     for (reverse 0..$#{$self->{open_elements}}) {
2964     my $node = $self->{open_elements}->[$_];
2965 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2966 wakaba 1.108 !!!cp ('t179');
2967     $i = $_;
2968    
2969     ## Close the cell
2970 wakaba 1.125 !!!back-token; # </x>
2971 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $tn,
2972     line => $token->{line},
2973     column => $token->{column}};
2974 wakaba 1.126 next B;
2975 wakaba 1.206 } elsif ($node->[1] == TABLE_CELL_EL) {
2976 wakaba 1.108 !!!cp ('t180');
2977 wakaba 1.123 $tn = $node->[0]->manakai_local_name;
2978 wakaba 1.108 ## NOTE: There is exactly one |td| or |th| element
2979     ## in scope in the stack of open elements by definition.
2980 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2981 wakaba 1.108 ## ISSUE: Can this be reached?
2982     !!!cp ('t181');
2983     last;
2984     }
2985 wakaba 1.52 }
2986 wakaba 1.108
2987 wakaba 1.79 !!!cp ('t182');
2988 wakaba 1.108 !!!parse-error (type => 'unmatched end tag',
2989 wakaba 1.153 text => $token->{tag_name}, token => $token);
2990 wakaba 1.52 ## Ignore the token
2991     !!!next-token;
2992 wakaba 1.126 next B;
2993 wakaba 1.108 } # INSCOPE
2994 wakaba 1.52 } elsif ($token->{tag_name} eq 'table' and
2995 wakaba 1.210 ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2996 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
2997     token => $token);
2998 wakaba 1.52
2999     ## As if </caption>
3000     ## have a table element in table scope
3001     my $i;
3002     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3003     my $node = $self->{open_elements}->[$_];
3004 wakaba 1.206 if ($node->[1] == CAPTION_EL) {
3005 wakaba 1.79 !!!cp ('t184');
3006 wakaba 1.52 $i = $_;
3007     last INSCOPE;
3008 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3009 wakaba 1.79 !!!cp ('t185');
3010 wakaba 1.52 last INSCOPE;
3011     }
3012     } # INSCOPE
3013     unless (defined $i) {
3014 wakaba 1.79 !!!cp ('t186');
3015 wakaba 1.209 ## TODO: Wrong error type?
3016 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3017     text => 'caption', token => $token);
3018 wakaba 1.52 ## Ignore the token
3019     !!!next-token;
3020 wakaba 1.126 next B;
3021 wakaba 1.52 }
3022    
3023     ## generate implied end tags
3024 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
3025 wakaba 1.79 !!!cp ('t187');
3026 wakaba 1.86 pop @{$self->{open_elements}};
3027 wakaba 1.52 }
3028    
3029 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
3030 wakaba 1.79 !!!cp ('t188');
3031 wakaba 1.122 !!!parse-error (type => 'not closed',
3032 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
3033 wakaba 1.122 ->manakai_local_name,
3034     token => $token);
3035 wakaba 1.79 } else {
3036     !!!cp ('t189');
3037 wakaba 1.52 }
3038    
3039     splice @{$self->{open_elements}}, $i;
3040    
3041     $clear_up_to_marker->();
3042    
3043 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3044 wakaba 1.52
3045     ## reprocess
3046 wakaba 1.126 next B;
3047 wakaba 1.52 } elsif ({
3048     body => 1, col => 1, colgroup => 1, html => 1,
3049     }->{$token->{tag_name}}) {
3050 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
3051 wakaba 1.79 !!!cp ('t190');
3052 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3053     text => $token->{tag_name}, token => $token);
3054 wakaba 1.52 ## Ignore the token
3055     !!!next-token;
3056 wakaba 1.126 next B;
3057 wakaba 1.52 } else {
3058 wakaba 1.79 !!!cp ('t191');
3059 wakaba 1.52 #
3060     }
3061 wakaba 1.210 } elsif ({
3062     tbody => 1, tfoot => 1,
3063     thead => 1, tr => 1,
3064     }->{$token->{tag_name}} and
3065     ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
3066     !!!cp ('t192');
3067     !!!parse-error (type => 'unmatched end tag',
3068     text => $token->{tag_name}, token => $token);
3069     ## Ignore the token
3070     !!!next-token;
3071     next B;
3072     } else {
3073     !!!cp ('t193');
3074     #
3075     }
3076 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3077     for my $entry (@{$self->{open_elements}}) {
3078 wakaba 1.123 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
3079 wakaba 1.104 !!!cp ('t75');
3080 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
3081 wakaba 1.104 last;
3082     }
3083     }
3084    
3085     ## Stop parsing.
3086     last B;
3087 wakaba 1.52 } else {
3088     die "$0: $token->{type}: Unknown token type";
3089     }
3090    
3091     $insert = $insert_to_current;
3092     #
3093 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
3094 wakaba 1.229 if ($token->{type} == START_TAG_TOKEN) {
3095 wakaba 1.153 if ({
3096 wakaba 1.210 tr => (($self->{insertion_mode} & IM_MASK) != IN_ROW_IM),
3097 wakaba 1.153 th => 1, td => 1,
3098     }->{$token->{tag_name}}) {
3099 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_IM) {
3100 wakaba 1.153 ## Clear back to table context
3101     while (not ($self->{open_elements}->[-1]->[1]
3102     & TABLE_SCOPING_EL)) {
3103     !!!cp ('t201');
3104     pop @{$self->{open_elements}};
3105     }
3106    
3107     !!!insert-element ('tbody',, $token);
3108     $self->{insertion_mode} = IN_TABLE_BODY_IM;
3109     ## reprocess in the "in table body" insertion mode...
3110     }
3111    
3112 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3113 wakaba 1.153 unless ($token->{tag_name} eq 'tr') {
3114     !!!cp ('t202');
3115     !!!parse-error (type => 'missing start tag:tr', token => $token);
3116     }
3117 wakaba 1.43
3118 wakaba 1.153 ## Clear back to table body context
3119     while (not ($self->{open_elements}->[-1]->[1]
3120     & TABLE_ROWS_SCOPING_EL)) {
3121     !!!cp ('t203');
3122     ## ISSUE: Can this case be reached?
3123     pop @{$self->{open_elements}};
3124     }
3125 wakaba 1.43
3126 wakaba 1.202 $self->{insertion_mode} = IN_ROW_IM;
3127     if ($token->{tag_name} eq 'tr') {
3128     !!!cp ('t204');
3129     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3130     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3131     !!!nack ('t204');
3132     !!!next-token;
3133     next B;
3134     } else {
3135     !!!cp ('t205');
3136     !!!insert-element ('tr',, $token);
3137     ## reprocess in the "in row" insertion mode
3138     }
3139     } else {
3140     !!!cp ('t206');
3141     }
3142 wakaba 1.52
3143     ## Clear back to table row context
3144 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3145     & TABLE_ROW_SCOPING_EL)) {
3146 wakaba 1.79 !!!cp ('t207');
3147 wakaba 1.52 pop @{$self->{open_elements}};
3148 wakaba 1.43 }
3149 wakaba 1.52
3150 wakaba 1.202 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3151     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3152     $self->{insertion_mode} = IN_CELL_IM;
3153 wakaba 1.52
3154 wakaba 1.202 push @$active_formatting_elements, ['#marker', ''];
3155 wakaba 1.52
3156 wakaba 1.202 !!!nack ('t207.1');
3157     !!!next-token;
3158     next B;
3159     } elsif ({
3160     caption => 1, col => 1, colgroup => 1,
3161     tbody => 1, tfoot => 1, thead => 1,
3162     tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3163     }->{$token->{tag_name}}) {
3164 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3165 wakaba 1.202 ## As if </tr>
3166     ## have an element in table scope
3167     my $i;
3168     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3169     my $node = $self->{open_elements}->[$_];
3170 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3171 wakaba 1.202 !!!cp ('t208');
3172     $i = $_;
3173     last INSCOPE;
3174     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3175     !!!cp ('t209');
3176     last INSCOPE;
3177     }
3178     } # INSCOPE
3179     unless (defined $i) {
3180     !!!cp ('t210');
3181     ## TODO: This type is wrong.
3182     !!!parse-error (type => 'unmacthed end tag',
3183     text => $token->{tag_name}, token => $token);
3184     ## Ignore the token
3185     !!!nack ('t210.1');
3186 wakaba 1.52 !!!next-token;
3187 wakaba 1.126 next B;
3188 wakaba 1.202 }
3189 wakaba 1.43
3190 wakaba 1.52 ## Clear back to table row context
3191 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3192     & TABLE_ROW_SCOPING_EL)) {
3193 wakaba 1.79 !!!cp ('t211');
3194 wakaba 1.83 ## ISSUE: Can this case be reached?
3195 wakaba 1.52 pop @{$self->{open_elements}};
3196 wakaba 1.1 }
3197 wakaba 1.43
3198 wakaba 1.52 pop @{$self->{open_elements}}; # tr
3199 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3200 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
3201 wakaba 1.79 !!!cp ('t212');
3202 wakaba 1.52 ## reprocess
3203 wakaba 1.125 !!!ack-later;
3204 wakaba 1.126 next B;
3205 wakaba 1.52 } else {
3206 wakaba 1.79 !!!cp ('t213');
3207 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
3208     }
3209 wakaba 1.1 }
3210 wakaba 1.52
3211 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3212 wakaba 1.52 ## have an element in table scope
3213 wakaba 1.43 my $i;
3214     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3215     my $node = $self->{open_elements}->[$_];
3216 wakaba 1.206 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3217 wakaba 1.79 !!!cp ('t214');
3218 wakaba 1.43 $i = $_;
3219     last INSCOPE;
3220 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3221 wakaba 1.79 !!!cp ('t215');
3222 wakaba 1.43 last INSCOPE;
3223     }
3224     } # INSCOPE
3225 wakaba 1.52 unless (defined $i) {
3226 wakaba 1.79 !!!cp ('t216');
3227 wakaba 1.153 ## TODO: This erorr type is wrong.
3228     !!!parse-error (type => 'unmatched end tag',
3229     text => $token->{tag_name}, token => $token);
3230 wakaba 1.52 ## Ignore the token
3231 wakaba 1.125 !!!nack ('t216.1');
3232 wakaba 1.52 !!!next-token;
3233 wakaba 1.126 next B;
3234 wakaba 1.43 }
3235 wakaba 1.52
3236     ## Clear back to table body context
3237 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3238     & TABLE_ROWS_SCOPING_EL)) {
3239 wakaba 1.79 !!!cp ('t217');
3240 wakaba 1.83 ## ISSUE: Can this state be reached?
3241 wakaba 1.52 pop @{$self->{open_elements}};
3242 wakaba 1.43 }
3243    
3244 wakaba 1.52 ## As if <{current node}>
3245     ## have an element in table scope
3246     ## true by definition
3247 wakaba 1.43
3248 wakaba 1.52 ## Clear back to table body context
3249     ## nop by definition
3250 wakaba 1.43
3251 wakaba 1.52 pop @{$self->{open_elements}};
3252 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3253 wakaba 1.52 ## reprocess in "in table" insertion mode...
3254 wakaba 1.79 } else {
3255     !!!cp ('t218');
3256 wakaba 1.52 }
3257    
3258 wakaba 1.202 if ($token->{tag_name} eq 'col') {
3259     ## Clear back to table context
3260     while (not ($self->{open_elements}->[-1]->[1]
3261     & TABLE_SCOPING_EL)) {
3262     !!!cp ('t219');
3263     ## ISSUE: Can this state be reached?
3264     pop @{$self->{open_elements}};
3265     }
3266    
3267     !!!insert-element ('colgroup',, $token);
3268     $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
3269     ## reprocess
3270     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3271     !!!ack-later;
3272     next B;
3273     } elsif ({
3274     caption => 1,
3275     colgroup => 1,
3276     tbody => 1, tfoot => 1, thead => 1,
3277     }->{$token->{tag_name}}) {
3278     ## Clear back to table context
3279 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3280     & TABLE_SCOPING_EL)) {
3281 wakaba 1.79 !!!cp ('t220');
3282 wakaba 1.83 ## ISSUE: Can this state be reached?
3283 wakaba 1.52 pop @{$self->{open_elements}};
3284 wakaba 1.1 }
3285 wakaba 1.52
3286 wakaba 1.202 push @$active_formatting_elements, ['#marker', '']
3287     if $token->{tag_name} eq 'caption';
3288 wakaba 1.52
3289 wakaba 1.202 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3290     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3291     $self->{insertion_mode} = {
3292     caption => IN_CAPTION_IM,
3293     colgroup => IN_COLUMN_GROUP_IM,
3294     tbody => IN_TABLE_BODY_IM,
3295     tfoot => IN_TABLE_BODY_IM,
3296     thead => IN_TABLE_BODY_IM,
3297     }->{$token->{tag_name}};
3298     !!!next-token;
3299     !!!nack ('t220.1');
3300     next B;
3301     } else {
3302     die "$0: in table: <>: $token->{tag_name}";
3303     }
3304 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
3305 wakaba 1.122 !!!parse-error (type => 'not closed',
3306 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
3307 wakaba 1.122 ->manakai_local_name,
3308     token => $token);
3309 wakaba 1.1
3310 wakaba 1.52 ## As if </table>
3311 wakaba 1.1 ## have a table element in table scope
3312     my $i;
3313 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3314     my $node = $self->{open_elements}->[$_];
3315 wakaba 1.206 if ($node->[1] == TABLE_EL) {
3316 wakaba 1.79 !!!cp ('t221');
3317 wakaba 1.1 $i = $_;
3318     last INSCOPE;
3319 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3320 wakaba 1.79 !!!cp ('t222');
3321 wakaba 1.1 last INSCOPE;
3322     }
3323     } # INSCOPE
3324     unless (defined $i) {
3325 wakaba 1.79 !!!cp ('t223');
3326 wakaba 1.83 ## TODO: The following is wrong, maybe.
3327 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => 'table',
3328     token => $token);
3329 wakaba 1.52 ## Ignore tokens </table><table>
3330 wakaba 1.125 !!!nack ('t223.1');
3331 wakaba 1.1 !!!next-token;
3332 wakaba 1.126 next B;
3333 wakaba 1.1 }
3334    
3335 wakaba 1.151 ## TODO: Followings are removed from the latest spec.
3336 wakaba 1.1 ## generate implied end tags
3337 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
3338 wakaba 1.79 !!!cp ('t224');
3339 wakaba 1.86 pop @{$self->{open_elements}};
3340 wakaba 1.1 }
3341    
3342 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == TABLE_EL) {
3343 wakaba 1.79 !!!cp ('t225');
3344 wakaba 1.122 ## NOTE: |<table><tr><table>|
3345     !!!parse-error (type => 'not closed',
3346 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
3347 wakaba 1.122 ->manakai_local_name,
3348     token => $token);
3349 wakaba 1.79 } else {
3350     !!!cp ('t226');
3351 wakaba 1.1 }
3352    
3353 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3354 wakaba 1.95 pop @{$open_tables};
3355 wakaba 1.1
3356 wakaba 1.52 $self->_reset_insertion_mode;
3357 wakaba 1.1
3358 wakaba 1.125 ## reprocess
3359     !!!ack-later;
3360 wakaba 1.126 next B;
3361 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
3362 wakaba 1.233 !!!cp ('t227.8');
3363     ## NOTE: This is a "as if in head" code clone.
3364     $parse_rcdata->(CDATA_CONTENT_MODEL);
3365     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3366     next B;
3367 wakaba 1.100 } elsif ($token->{tag_name} eq 'script') {
3368 wakaba 1.233 !!!cp ('t227.6');
3369     ## NOTE: This is a "as if in head" code clone.
3370     $script_start_tag->();
3371     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3372     next B;
3373 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
3374 wakaba 1.233 if ($token->{attributes}->{type}) {
3375     my $type = $token->{attributes}->{type}->{value};
3376     $type =~ tr/A-Z/a-z/; ## ASCII case-insensitive.
3377     if ($type eq 'hidden') {
3378     !!!cp ('t227.3');
3379     !!!parse-error (type => 'in table',
3380     text => $token->{tag_name}, token => $token);
3381 wakaba 1.98
3382 wakaba 1.233 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3383     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3384 wakaba 1.98
3385 wakaba 1.233 ## TODO: form element pointer
3386 wakaba 1.98
3387 wakaba 1.233 pop @{$self->{open_elements}};
3388 wakaba 1.98
3389 wakaba 1.233 !!!next-token;
3390     !!!ack ('t227.2.1');
3391     next B;
3392 wakaba 1.98 } else {
3393     !!!cp ('t227.1');
3394     #
3395     }
3396     } else {
3397     !!!cp ('t227.4');
3398     #
3399     }
3400 wakaba 1.58 } else {
3401 wakaba 1.79 !!!cp ('t227');
3402 wakaba 1.58 #
3403     }
3404 wakaba 1.98
3405 wakaba 1.153 !!!parse-error (type => 'in table', text => $token->{tag_name},
3406     token => $token);
3407 wakaba 1.98
3408     $insert = $insert_to_foster;
3409     #
3410 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
3411 wakaba 1.210 if ($token->{tag_name} eq 'tr' and
3412     ($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3413     ## have an element in table scope
3414 wakaba 1.52 my $i;
3415     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3416     my $node = $self->{open_elements}->[$_];
3417 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3418 wakaba 1.79 !!!cp ('t228');
3419 wakaba 1.52 $i = $_;
3420     last INSCOPE;
3421 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3422 wakaba 1.79 !!!cp ('t229');
3423 wakaba 1.52 last INSCOPE;
3424     }
3425     } # INSCOPE
3426     unless (defined $i) {
3427 wakaba 1.79 !!!cp ('t230');
3428 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3429     text => $token->{tag_name}, token => $token);
3430 wakaba 1.52 ## Ignore the token
3431 wakaba 1.125 !!!nack ('t230.1');
3432 wakaba 1.42 !!!next-token;
3433 wakaba 1.126 next B;
3434 wakaba 1.79 } else {
3435     !!!cp ('t232');
3436 wakaba 1.42 }
3437    
3438 wakaba 1.52 ## Clear back to table row context
3439 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3440     & TABLE_ROW_SCOPING_EL)) {
3441 wakaba 1.79 !!!cp ('t231');
3442 wakaba 1.83 ## ISSUE: Can this state be reached?
3443 wakaba 1.52 pop @{$self->{open_elements}};
3444     }
3445 wakaba 1.42
3446 wakaba 1.52 pop @{$self->{open_elements}}; # tr
3447 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3448 wakaba 1.52 !!!next-token;
3449 wakaba 1.125 !!!nack ('t231.1');
3450 wakaba 1.126 next B;
3451 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
3452 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3453 wakaba 1.52 ## As if </tr>
3454     ## have an element in table scope
3455     my $i;
3456     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3457     my $node = $self->{open_elements}->[$_];
3458 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3459 wakaba 1.79 !!!cp ('t233');
3460 wakaba 1.52 $i = $_;
3461     last INSCOPE;
3462 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3463 wakaba 1.79 !!!cp ('t234');
3464 wakaba 1.52 last INSCOPE;
3465 wakaba 1.42 }
3466 wakaba 1.52 } # INSCOPE
3467     unless (defined $i) {
3468 wakaba 1.79 !!!cp ('t235');
3469 wakaba 1.83 ## TODO: The following is wrong.
3470 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3471     text => $token->{type}, token => $token);
3472 wakaba 1.52 ## Ignore the token
3473 wakaba 1.125 !!!nack ('t236.1');
3474 wakaba 1.52 !!!next-token;
3475 wakaba 1.126 next B;
3476 wakaba 1.42 }
3477 wakaba 1.52
3478     ## Clear back to table row context
3479 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3480     & TABLE_ROW_SCOPING_EL)) {
3481 wakaba 1.79 !!!cp ('t236');
3482 wakaba 1.83 ## ISSUE: Can this state be reached?
3483 wakaba 1.46 pop @{$self->{open_elements}};
3484 wakaba 1.1 }
3485 wakaba 1.46
3486 wakaba 1.52 pop @{$self->{open_elements}}; # tr
3487 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3488 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
3489 wakaba 1.1 }
3490    
3491 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3492 wakaba 1.52 ## have an element in table scope
3493     my $i;
3494     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3495     my $node = $self->{open_elements}->[$_];
3496 wakaba 1.206 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3497 wakaba 1.79 !!!cp ('t237');
3498 wakaba 1.52 $i = $_;
3499     last INSCOPE;
3500 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3501 wakaba 1.79 !!!cp ('t238');
3502 wakaba 1.52 last INSCOPE;
3503     }
3504     } # INSCOPE
3505     unless (defined $i) {
3506 wakaba 1.79 !!!cp ('t239');
3507 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3508     text => $token->{tag_name}, token => $token);
3509 wakaba 1.52 ## Ignore the token
3510 wakaba 1.125 !!!nack ('t239.1');
3511 wakaba 1.52 !!!next-token;
3512 wakaba 1.126 next B;
3513 wakaba 1.47 }
3514    
3515     ## Clear back to table body context
3516 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3517     & TABLE_ROWS_SCOPING_EL)) {
3518 wakaba 1.79 !!!cp ('t240');
3519 wakaba 1.47 pop @{$self->{open_elements}};
3520     }
3521    
3522 wakaba 1.52 ## As if <{current node}>
3523     ## have an element in table scope
3524     ## true by definition
3525    
3526     ## Clear back to table body context
3527     ## nop by definition
3528    
3529     pop @{$self->{open_elements}};
3530 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3531 wakaba 1.52 ## reprocess in the "in table" insertion mode...
3532     }
3533    
3534 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
3535     ## When you edit the code fragment below, please ensure that
3536     ## the code for <table> in the "in table" insertion mode
3537     ## is synced with it.
3538    
3539 wakaba 1.52 ## have a table element in table scope
3540     my $i;
3541     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3542     my $node = $self->{open_elements}->[$_];
3543 wakaba 1.206 if ($node->[1] == TABLE_EL) {
3544 wakaba 1.79 !!!cp ('t241');
3545 wakaba 1.52 $i = $_;
3546     last INSCOPE;
3547 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3548 wakaba 1.79 !!!cp ('t242');
3549 wakaba 1.52 last INSCOPE;
3550 wakaba 1.47 }
3551 wakaba 1.52 } # INSCOPE
3552     unless (defined $i) {
3553 wakaba 1.79 !!!cp ('t243');
3554 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3555     text => $token->{tag_name}, token => $token);
3556 wakaba 1.52 ## Ignore the token
3557 wakaba 1.125 !!!nack ('t243.1');
3558 wakaba 1.52 !!!next-token;
3559 wakaba 1.126 next B;
3560 wakaba 1.3 }
3561 wakaba 1.52
3562     splice @{$self->{open_elements}}, $i;
3563 wakaba 1.95 pop @{$open_tables};
3564 wakaba 1.1
3565 wakaba 1.52 $self->_reset_insertion_mode;
3566 wakaba 1.47
3567     !!!next-token;
3568 wakaba 1.126 next B;
3569 wakaba 1.47 } elsif ({
3570 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
3571 wakaba 1.52 }->{$token->{tag_name}} and
3572 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
3573 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3574 wakaba 1.52 ## have an element in table scope
3575     my $i;
3576     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3577     my $node = $self->{open_elements}->[$_];
3578 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3579 wakaba 1.79 !!!cp ('t247');
3580 wakaba 1.52 $i = $_;
3581     last INSCOPE;
3582 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3583 wakaba 1.79 !!!cp ('t248');
3584 wakaba 1.52 last INSCOPE;
3585     }
3586     } # INSCOPE
3587     unless (defined $i) {
3588 wakaba 1.79 !!!cp ('t249');
3589 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3590     text => $token->{tag_name}, token => $token);
3591 wakaba 1.52 ## Ignore the token
3592 wakaba 1.125 !!!nack ('t249.1');
3593 wakaba 1.52 !!!next-token;
3594 wakaba 1.126 next B;
3595 wakaba 1.52 }
3596    
3597 wakaba 1.48 ## As if </tr>
3598     ## have an element in table scope
3599     my $i;
3600     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3601     my $node = $self->{open_elements}->[$_];
3602 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3603 wakaba 1.79 !!!cp ('t250');
3604 wakaba 1.48 $i = $_;
3605     last INSCOPE;
3606 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3607 wakaba 1.79 !!!cp ('t251');
3608 wakaba 1.48 last INSCOPE;
3609     }
3610     } # INSCOPE
3611 wakaba 1.52 unless (defined $i) {
3612 wakaba 1.79 !!!cp ('t252');
3613 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3614     text => 'tr', token => $token);
3615 wakaba 1.52 ## Ignore the token
3616 wakaba 1.125 !!!nack ('t252.1');
3617 wakaba 1.52 !!!next-token;
3618 wakaba 1.126 next B;
3619 wakaba 1.52 }
3620 wakaba 1.48
3621     ## Clear back to table row context
3622 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3623     & TABLE_ROW_SCOPING_EL)) {
3624 wakaba 1.79 !!!cp ('t253');
3625 wakaba 1.83 ## ISSUE: Can this case be reached?
3626 wakaba 1.48 pop @{$self->{open_elements}};
3627     }
3628    
3629     pop @{$self->{open_elements}}; # tr
3630 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3631 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
3632     }
3633    
3634     ## have an element in table scope
3635     my $i;
3636     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3637     my $node = $self->{open_elements}->[$_];
3638 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3639 wakaba 1.79 !!!cp ('t254');
3640 wakaba 1.52 $i = $_;
3641     last INSCOPE;
3642 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3643 wakaba 1.79 !!!cp ('t255');
3644 wakaba 1.52 last INSCOPE;
3645     }
3646     } # INSCOPE
3647     unless (defined $i) {
3648 wakaba 1.79 !!!cp ('t256');
3649 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3650     text => $token->{tag_name}, token => $token);
3651 wakaba 1.52 ## Ignore the token
3652 wakaba 1.125 !!!nack ('t256.1');
3653 wakaba 1.52 !!!next-token;
3654 wakaba 1.126 next B;
3655 wakaba 1.52 }
3656    
3657     ## Clear back to table body context
3658 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3659     & TABLE_ROWS_SCOPING_EL)) {
3660 wakaba 1.79 !!!cp ('t257');
3661 wakaba 1.83 ## ISSUE: Can this case be reached?
3662 wakaba 1.52 pop @{$self->{open_elements}};
3663     }
3664    
3665     pop @{$self->{open_elements}};
3666 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3667 wakaba 1.125 !!!nack ('t257.1');
3668 wakaba 1.52 !!!next-token;
3669 wakaba 1.126 next B;
3670 wakaba 1.52 } elsif ({
3671     body => 1, caption => 1, col => 1, colgroup => 1,
3672     html => 1, td => 1, th => 1,
3673 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3674     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
3675 wakaba 1.52 }->{$token->{tag_name}}) {
3676 wakaba 1.125 !!!cp ('t258');
3677 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3678     text => $token->{tag_name}, token => $token);
3679 wakaba 1.125 ## Ignore the token
3680     !!!nack ('t258.1');
3681     !!!next-token;
3682 wakaba 1.126 next B;
3683 wakaba 1.58 } else {
3684 wakaba 1.79 !!!cp ('t259');
3685 wakaba 1.153 !!!parse-error (type => 'in table:/',
3686     text => $token->{tag_name}, token => $token);
3687 wakaba 1.52
3688 wakaba 1.58 $insert = $insert_to_foster;
3689     #
3690     }
3691 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3692 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
3693 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
3694 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
3695 wakaba 1.104 !!!cp ('t259.1');
3696 wakaba 1.105 #
3697 wakaba 1.104 } else {
3698     !!!cp ('t259.2');
3699 wakaba 1.105 #
3700 wakaba 1.104 }
3701    
3702     ## Stop parsing
3703     last B;
3704 wakaba 1.58 } else {
3705     die "$0: $token->{type}: Unknown token type";
3706     }
3707 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
3708 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
3709 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3710 wakaba 1.52 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3711     unless (length $token->{data}) {
3712 wakaba 1.79 !!!cp ('t260');
3713 wakaba 1.52 !!!next-token;
3714 wakaba 1.126 next B;
3715 wakaba 1.52 }
3716     }
3717    
3718 wakaba 1.79 !!!cp ('t261');
3719 wakaba 1.52 #
3720 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
3721 wakaba 1.52 if ($token->{tag_name} eq 'col') {
3722 wakaba 1.79 !!!cp ('t262');
3723 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3724 wakaba 1.52 pop @{$self->{open_elements}};
3725 wakaba 1.125 !!!ack ('t262.1');
3726 wakaba 1.52 !!!next-token;
3727 wakaba 1.126 next B;
3728 wakaba 1.52 } else {
3729 wakaba 1.79 !!!cp ('t263');
3730 wakaba 1.52 #
3731     }
3732 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
3733 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
3734 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3735 wakaba 1.79 !!!cp ('t264');
3736 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3737     text => 'colgroup', token => $token);
3738 wakaba 1.52 ## Ignore the token
3739     !!!next-token;
3740 wakaba 1.126 next B;
3741 wakaba 1.52 } else {
3742 wakaba 1.79 !!!cp ('t265');
3743 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
3744 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3745 wakaba 1.52 !!!next-token;
3746 wakaba 1.126 next B;
3747 wakaba 1.52 }
3748     } elsif ($token->{tag_name} eq 'col') {
3749 wakaba 1.79 !!!cp ('t266');
3750 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3751     text => 'col', token => $token);
3752 wakaba 1.52 ## Ignore the token
3753     !!!next-token;
3754 wakaba 1.126 next B;
3755 wakaba 1.52 } else {
3756 wakaba 1.79 !!!cp ('t267');
3757 wakaba 1.52 #
3758     }
3759 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3760 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
3761 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
3762     !!!cp ('t270.2');
3763     ## Stop parsing.
3764     last B;
3765     } else {
3766     ## NOTE: As if </colgroup>.
3767     !!!cp ('t270.1');
3768     pop @{$self->{open_elements}}; # colgroup
3769     $self->{insertion_mode} = IN_TABLE_IM;
3770     ## Reprocess.
3771 wakaba 1.126 next B;
3772 wakaba 1.104 }
3773     } else {
3774     die "$0: $token->{type}: Unknown token type";
3775     }
3776 wakaba 1.52
3777     ## As if </colgroup>
3778 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3779 wakaba 1.79 !!!cp ('t269');
3780 wakaba 1.104 ## TODO: Wrong error type?
3781 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3782     text => 'colgroup', token => $token);
3783 wakaba 1.52 ## Ignore the token
3784 wakaba 1.125 !!!nack ('t269.1');
3785 wakaba 1.52 !!!next-token;
3786 wakaba 1.126 next B;
3787 wakaba 1.52 } else {
3788 wakaba 1.79 !!!cp ('t270');
3789 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
3790 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3791 wakaba 1.125 !!!ack-later;
3792 wakaba 1.52 ## reprocess
3793 wakaba 1.126 next B;
3794 wakaba 1.52 }
3795 wakaba 1.101 } elsif ($self->{insertion_mode} & SELECT_IMS) {
3796 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
3797 wakaba 1.79 !!!cp ('t271');
3798 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3799     !!!next-token;
3800 wakaba 1.126 next B;
3801 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
3802 wakaba 1.123 if ($token->{tag_name} eq 'option') {
3803 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3804 wakaba 1.123 !!!cp ('t272');
3805     ## As if </option>
3806     pop @{$self->{open_elements}};
3807     } else {
3808     !!!cp ('t273');
3809     }
3810 wakaba 1.52
3811 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3812 wakaba 1.125 !!!nack ('t273.1');
3813 wakaba 1.123 !!!next-token;
3814 wakaba 1.126 next B;
3815 wakaba 1.123 } elsif ($token->{tag_name} eq 'optgroup') {
3816 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3817 wakaba 1.123 !!!cp ('t274');
3818     ## As if </option>
3819     pop @{$self->{open_elements}};
3820     } else {
3821     !!!cp ('t275');
3822     }
3823 wakaba 1.52
3824 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3825 wakaba 1.123 !!!cp ('t276');
3826     ## As if </optgroup>
3827     pop @{$self->{open_elements}};
3828     } else {
3829     !!!cp ('t277');
3830     }
3831 wakaba 1.52
3832 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3833 wakaba 1.125 !!!nack ('t277.1');
3834 wakaba 1.123 !!!next-token;
3835 wakaba 1.126 next B;
3836 wakaba 1.146 } elsif ({
3837 wakaba 1.216 select => 1, input => 1, textarea => 1, keygen => 1,
3838 wakaba 1.146 }->{$token->{tag_name}} or
3839 wakaba 1.210 (($self->{insertion_mode} & IM_MASK)
3840     == IN_SELECT_IN_TABLE_IM and
3841 wakaba 1.101 {
3842     caption => 1, table => 1,
3843     tbody => 1, tfoot => 1, thead => 1,
3844     tr => 1, td => 1, th => 1,
3845     }->{$token->{tag_name}})) {
3846 wakaba 1.222
3847     ## 1. Parse error.
3848     if ($token->{tag_name} eq 'select') {
3849     !!!parse-error (type => 'select in select', ## XXX: documentation
3850     token => $token);
3851     } else {
3852     !!!parse-error (type => 'not closed', text => 'select',
3853     token => $token);
3854     }
3855    
3856     ## 2./<select>-1. Unless "have an element in table scope" (select):
3857 wakaba 1.123 my $i;
3858     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3859     my $node = $self->{open_elements}->[$_];
3860 wakaba 1.206 if ($node->[1] == SELECT_EL) {
3861 wakaba 1.123 !!!cp ('t278');
3862     $i = $_;
3863     last INSCOPE;
3864     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3865     !!!cp ('t279');
3866     last INSCOPE;
3867     }
3868     } # INSCOPE
3869     unless (defined $i) {
3870     !!!cp ('t280');
3871 wakaba 1.222 if ($token->{tag_name} eq 'select') {
3872     ## NOTE: This error would be raised when
3873     ## |select.innerHTML = '<select>'| is executed; in this
3874     ## case two errors, "select in select" and "unmatched
3875     ## end tags" are reported to the user, the latter might
3876     ## be confusing but this is what the spec requires.
3877     !!!parse-error (type => 'unmatched end tag',
3878     text => 'select',
3879     token => $token);
3880     }
3881     ## Ignore the token.
3882 wakaba 1.125 !!!nack ('t280.1');
3883 wakaba 1.123 !!!next-token;
3884 wakaba 1.126 next B;
3885 wakaba 1.123 }
3886 wakaba 1.222
3887     ## 3. Otherwise, as if there were <select>:
3888 wakaba 1.52
3889 wakaba 1.123 !!!cp ('t281');
3890     splice @{$self->{open_elements}}, $i;
3891 wakaba 1.52
3892 wakaba 1.123 $self->_reset_insertion_mode;
3893 wakaba 1.47
3894 wakaba 1.101 if ($token->{tag_name} eq 'select') {
3895 wakaba 1.125 !!!nack ('t281.2');
3896 wakaba 1.101 !!!next-token;
3897 wakaba 1.126 next B;
3898 wakaba 1.101 } else {
3899     !!!cp ('t281.1');
3900 wakaba 1.125 !!!ack-later;
3901 wakaba 1.101 ## Reprocess the token.
3902 wakaba 1.126 next B;
3903 wakaba 1.101 }
3904 wakaba 1.226 } elsif ($token->{tag_name} eq 'script') {
3905     !!!cp ('t281.3');
3906     ## NOTE: This is an "as if in head" code clone
3907     $script_start_tag->();
3908     next B;
3909 wakaba 1.58 } else {
3910 wakaba 1.79 !!!cp ('t282');
3911 wakaba 1.153 !!!parse-error (type => 'in select',
3912     text => $token->{tag_name}, token => $token);
3913 wakaba 1.58 ## Ignore the token
3914 wakaba 1.125 !!!nack ('t282.1');
3915 wakaba 1.58 !!!next-token;
3916 wakaba 1.126 next B;
3917 wakaba 1.58 }
3918     } elsif ($token->{type} == END_TAG_TOKEN) {
3919 wakaba 1.123 if ($token->{tag_name} eq 'optgroup') {
3920 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL and
3921     $self->{open_elements}->[-2]->[1] == OPTGROUP_EL) {
3922 wakaba 1.123 !!!cp ('t283');
3923     ## As if </option>
3924     splice @{$self->{open_elements}}, -2;
3925 wakaba 1.206 } elsif ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3926 wakaba 1.123 !!!cp ('t284');
3927     pop @{$self->{open_elements}};
3928     } else {
3929     !!!cp ('t285');
3930 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3931     text => $token->{tag_name}, token => $token);
3932 wakaba 1.123 ## Ignore the token
3933     }
3934 wakaba 1.125 !!!nack ('t285.1');
3935 wakaba 1.123 !!!next-token;
3936 wakaba 1.126 next B;
3937 wakaba 1.123 } elsif ($token->{tag_name} eq 'option') {
3938 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3939 wakaba 1.123 !!!cp ('t286');
3940     pop @{$self->{open_elements}};
3941     } else {
3942     !!!cp ('t287');
3943 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3944     text => $token->{tag_name}, token => $token);
3945 wakaba 1.123 ## Ignore the token
3946     }
3947 wakaba 1.125 !!!nack ('t287.1');
3948 wakaba 1.123 !!!next-token;
3949 wakaba 1.126 next B;
3950 wakaba 1.123 } elsif ($token->{tag_name} eq 'select') {
3951     ## have an element in table scope
3952     my $i;
3953     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3954     my $node = $self->{open_elements}->[$_];
3955 wakaba 1.206 if ($node->[1] == SELECT_EL) {
3956 wakaba 1.123 !!!cp ('t288');
3957     $i = $_;
3958     last INSCOPE;
3959     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3960     !!!cp ('t289');
3961     last INSCOPE;
3962     }
3963     } # INSCOPE
3964     unless (defined $i) {
3965     !!!cp ('t290');
3966 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3967     text => $token->{tag_name}, token => $token);
3968 wakaba 1.123 ## Ignore the token
3969 wakaba 1.125 !!!nack ('t290.1');
3970 wakaba 1.123 !!!next-token;
3971 wakaba 1.126 next B;
3972 wakaba 1.123 }
3973 wakaba 1.52
3974 wakaba 1.123 !!!cp ('t291');
3975     splice @{$self->{open_elements}}, $i;
3976 wakaba 1.52
3977 wakaba 1.123 $self->_reset_insertion_mode;
3978 wakaba 1.52
3979 wakaba 1.125 !!!nack ('t291.1');
3980 wakaba 1.123 !!!next-token;
3981 wakaba 1.126 next B;
3982 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK)
3983     == IN_SELECT_IN_TABLE_IM and
3984 wakaba 1.101 {
3985     caption => 1, table => 1, tbody => 1,
3986     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
3987     }->{$token->{tag_name}}) {
3988 wakaba 1.83 ## TODO: The following is wrong?
3989 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3990     text => $token->{tag_name}, token => $token);
3991 wakaba 1.52
3992 wakaba 1.123 ## have an element in table scope
3993     my $i;
3994     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3995     my $node = $self->{open_elements}->[$_];
3996     if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3997     !!!cp ('t292');
3998     $i = $_;
3999     last INSCOPE;
4000     } elsif ($node->[1] & TABLE_SCOPING_EL) {
4001     !!!cp ('t293');
4002     last INSCOPE;
4003     }
4004     } # INSCOPE
4005     unless (defined $i) {
4006     !!!cp ('t294');
4007     ## Ignore the token
4008 wakaba 1.125 !!!nack ('t294.1');
4009 wakaba 1.123 !!!next-token;
4010 wakaba 1.126 next B;
4011 wakaba 1.123 }
4012 wakaba 1.52
4013 wakaba 1.123 ## As if </select>
4014     ## have an element in table scope
4015     undef $i;
4016     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4017     my $node = $self->{open_elements}->[$_];
4018 wakaba 1.206 if ($node->[1] == SELECT_EL) {
4019 wakaba 1.123 !!!cp ('t295');
4020     $i = $_;
4021     last INSCOPE;
4022     } elsif ($node->[1] & TABLE_SCOPING_EL) {
4023 wakaba 1.83 ## ISSUE: Can this state be reached?
4024 wakaba 1.123 !!!cp ('t296');
4025     last INSCOPE;
4026     }
4027     } # INSCOPE
4028     unless (defined $i) {
4029     !!!cp ('t297');
4030 wakaba 1.83 ## TODO: The following error type is correct?
4031 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4032     text => 'select', token => $token);
4033 wakaba 1.123 ## Ignore the </select> token
4034 wakaba 1.125 !!!nack ('t297.1');
4035 wakaba 1.123 !!!next-token; ## TODO: ok?
4036 wakaba 1.126 next B;
4037 wakaba 1.123 }
4038 wakaba 1.52
4039 wakaba 1.123 !!!cp ('t298');
4040     splice @{$self->{open_elements}}, $i;
4041 wakaba 1.52
4042 wakaba 1.123 $self->_reset_insertion_mode;
4043 wakaba 1.52
4044 wakaba 1.125 !!!ack-later;
4045 wakaba 1.123 ## reprocess
4046 wakaba 1.126 next B;
4047 wakaba 1.58 } else {
4048 wakaba 1.79 !!!cp ('t299');
4049 wakaba 1.153 !!!parse-error (type => 'in select:/',
4050     text => $token->{tag_name}, token => $token);
4051 wakaba 1.52 ## Ignore the token
4052 wakaba 1.125 !!!nack ('t299.3');
4053 wakaba 1.52 !!!next-token;
4054 wakaba 1.126 next B;
4055 wakaba 1.58 }
4056 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4057 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4058 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
4059     !!!cp ('t299.1');
4060 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4061 wakaba 1.104 } else {
4062     !!!cp ('t299.2');
4063     }
4064    
4065     ## Stop parsing.
4066     last B;
4067 wakaba 1.58 } else {
4068     die "$0: $token->{type}: Unknown token type";
4069     }
4070 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
4071 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4072 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4073 wakaba 1.52 my $data = $1;
4074     ## As if in body
4075     $reconstruct_active_formatting_elements->($insert_to_current);
4076    
4077     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4078    
4079     unless (length $token->{data}) {
4080 wakaba 1.79 !!!cp ('t300');
4081 wakaba 1.52 !!!next-token;
4082 wakaba 1.126 next B;
4083 wakaba 1.52 }
4084     }
4085    
4086 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4087 wakaba 1.79 !!!cp ('t301');
4088 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
4089 wakaba 1.188 #
4090 wakaba 1.79 } else {
4091     !!!cp ('t302');
4092 wakaba 1.188 ## "after body" insertion mode
4093     !!!parse-error (type => 'after body:#text', token => $token);
4094     #
4095 wakaba 1.52 }
4096    
4097 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4098 wakaba 1.52 ## reprocess
4099 wakaba 1.126 next B;
4100 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4101 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4102 wakaba 1.79 !!!cp ('t303');
4103 wakaba 1.153 !!!parse-error (type => 'after html',
4104     text => $token->{tag_name}, token => $token);
4105 wakaba 1.188 #
4106 wakaba 1.79 } else {
4107     !!!cp ('t304');
4108 wakaba 1.188 ## "after body" insertion mode
4109     !!!parse-error (type => 'after body',
4110     text => $token->{tag_name}, token => $token);
4111     #
4112 wakaba 1.52 }
4113    
4114 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4115 wakaba 1.125 !!!ack-later;
4116 wakaba 1.52 ## reprocess
4117 wakaba 1.126 next B;
4118 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4119 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4120 wakaba 1.79 !!!cp ('t305');
4121 wakaba 1.153 !!!parse-error (type => 'after html:/',
4122     text => $token->{tag_name}, token => $token);
4123 wakaba 1.52
4124 wakaba 1.188 $self->{insertion_mode} = IN_BODY_IM;
4125     ## Reprocess.
4126     next B;
4127 wakaba 1.79 } else {
4128     !!!cp ('t306');
4129 wakaba 1.52 }
4130    
4131     ## "after body" insertion mode
4132     if ($token->{tag_name} eq 'html') {
4133     if (defined $self->{inner_html_node}) {
4134 wakaba 1.79 !!!cp ('t307');
4135 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4136     text => 'html', token => $token);
4137 wakaba 1.52 ## Ignore the token
4138     !!!next-token;
4139 wakaba 1.126 next B;
4140 wakaba 1.52 } else {
4141 wakaba 1.79 !!!cp ('t308');
4142 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
4143 wakaba 1.52 !!!next-token;
4144 wakaba 1.126 next B;
4145 wakaba 1.52 }
4146     } else {
4147 wakaba 1.79 !!!cp ('t309');
4148 wakaba 1.153 !!!parse-error (type => 'after body:/',
4149     text => $token->{tag_name}, token => $token);
4150 wakaba 1.52
4151 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4152 wakaba 1.52 ## reprocess
4153 wakaba 1.126 next B;
4154 wakaba 1.52 }
4155 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4156     !!!cp ('t309.2');
4157     ## Stop parsing
4158     last B;
4159 wakaba 1.52 } else {
4160     die "$0: $token->{type}: Unknown token type";
4161     }
4162 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
4163 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4164 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4165 wakaba 1.52 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4166    
4167     unless (length $token->{data}) {
4168 wakaba 1.79 !!!cp ('t310');
4169 wakaba 1.52 !!!next-token;
4170 wakaba 1.126 next B;
4171 wakaba 1.52 }
4172     }
4173    
4174 wakaba 1.188 if ($token->{data} =~ s/^[^\x09\x0A\x0C\x20]+//) {
4175 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4176 wakaba 1.79 !!!cp ('t311');
4177 wakaba 1.153 !!!parse-error (type => 'in frameset:#text', token => $token);
4178 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4179 wakaba 1.79 !!!cp ('t312');
4180 wakaba 1.153 !!!parse-error (type => 'after frameset:#text', token => $token);
4181 wakaba 1.158 } else { # "after after frameset"
4182 wakaba 1.79 !!!cp ('t313');
4183 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
4184 wakaba 1.52 }
4185    
4186     ## Ignore the token.
4187     if (length $token->{data}) {
4188 wakaba 1.79 !!!cp ('t314');
4189 wakaba 1.52 ## reprocess the rest of characters
4190     } else {
4191 wakaba 1.79 !!!cp ('t315');
4192 wakaba 1.52 !!!next-token;
4193     }
4194 wakaba 1.126 next B;
4195 wakaba 1.52 }
4196    
4197     die qq[$0: Character "$token->{data}"];
4198 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4199 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
4200 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
4201 wakaba 1.79 !!!cp ('t318');
4202 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4203 wakaba 1.125 !!!nack ('t318.1');
4204 wakaba 1.52 !!!next-token;
4205 wakaba 1.126 next B;
4206 wakaba 1.52 } elsif ($token->{tag_name} eq 'frame' and
4207 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
4208 wakaba 1.79 !!!cp ('t319');
4209 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4210 wakaba 1.52 pop @{$self->{open_elements}};
4211 wakaba 1.125 !!!ack ('t319.1');
4212 wakaba 1.52 !!!next-token;
4213 wakaba 1.126 next B;
4214 wakaba 1.52 } elsif ($token->{tag_name} eq 'noframes') {
4215 wakaba 1.79 !!!cp ('t320');
4216 wakaba 1.148 ## NOTE: As if in head.
4217 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4218 wakaba 1.126 next B;
4219 wakaba 1.158
4220     ## NOTE: |<!DOCTYPE HTML><frameset></frameset></html><noframes></noframes>|
4221     ## has no parse error.
4222 wakaba 1.52 } else {
4223 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4224 wakaba 1.79 !!!cp ('t321');
4225 wakaba 1.153 !!!parse-error (type => 'in frameset',
4226     text => $token->{tag_name}, token => $token);
4227 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4228 wakaba 1.79 !!!cp ('t322');
4229 wakaba 1.153 !!!parse-error (type => 'after frameset',
4230     text => $token->{tag_name}, token => $token);
4231 wakaba 1.158 } else { # "after after frameset"
4232     !!!cp ('t322.2');
4233     !!!parse-error (type => 'after after frameset',
4234     text => $token->{tag_name}, token => $token);
4235 wakaba 1.52 }
4236     ## Ignore the token
4237 wakaba 1.125 !!!nack ('t322.1');
4238 wakaba 1.52 !!!next-token;
4239 wakaba 1.126 next B;
4240 wakaba 1.52 }
4241 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4242 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
4243 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
4244 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
4245 wakaba 1.52 @{$self->{open_elements}} == 1) {
4246 wakaba 1.79 !!!cp ('t325');
4247 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4248     text => $token->{tag_name}, token => $token);
4249 wakaba 1.52 ## Ignore the token
4250     !!!next-token;
4251     } else {
4252 wakaba 1.79 !!!cp ('t326');
4253 wakaba 1.52 pop @{$self->{open_elements}};
4254     !!!next-token;
4255     }
4256 wakaba 1.47
4257 wakaba 1.52 if (not defined $self->{inner_html_node} and
4258 wakaba 1.206 not ($self->{open_elements}->[-1]->[1] == FRAMESET_EL)) {
4259 wakaba 1.79 !!!cp ('t327');
4260 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
4261 wakaba 1.79 } else {
4262     !!!cp ('t328');
4263 wakaba 1.52 }
4264 wakaba 1.126 next B;
4265 wakaba 1.52 } elsif ($token->{tag_name} eq 'html' and
4266 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
4267 wakaba 1.79 !!!cp ('t329');
4268 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
4269 wakaba 1.52 !!!next-token;
4270 wakaba 1.126 next B;
4271 wakaba 1.52 } else {
4272 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4273 wakaba 1.79 !!!cp ('t330');
4274 wakaba 1.153 !!!parse-error (type => 'in frameset:/',
4275     text => $token->{tag_name}, token => $token);
4276 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4277     !!!cp ('t330.1');
4278     !!!parse-error (type => 'after frameset:/',
4279     text => $token->{tag_name}, token => $token);
4280     } else { # "after after html"
4281 wakaba 1.79 !!!cp ('t331');
4282 wakaba 1.158 !!!parse-error (type => 'after after frameset:/',
4283 wakaba 1.153 text => $token->{tag_name}, token => $token);
4284 wakaba 1.52 }
4285     ## Ignore the token
4286     !!!next-token;
4287 wakaba 1.126 next B;
4288 wakaba 1.52 }
4289 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4290 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4291 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
4292     !!!cp ('t331.1');
4293 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4294 wakaba 1.104 } else {
4295     !!!cp ('t331.2');
4296     }
4297    
4298     ## Stop parsing
4299     last B;
4300 wakaba 1.52 } else {
4301     die "$0: $token->{type}: Unknown token type";
4302     }
4303     } else {
4304     die "$0: $self->{insertion_mode}: Unknown insertion mode";
4305     }
4306 wakaba 1.47
4307 wakaba 1.52 ## "in body" insertion mode
4308 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
4309 wakaba 1.52 if ($token->{tag_name} eq 'script') {
4310 wakaba 1.79 !!!cp ('t332');
4311 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
4312 wakaba 1.100 $script_start_tag->();
4313 wakaba 1.126 next B;
4314 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
4315 wakaba 1.79 !!!cp ('t333');
4316 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
4317 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4318 wakaba 1.126 next B;
4319 wakaba 1.52 } elsif ({
4320 wakaba 1.232 base => 1, command => 1, link => 1,
4321 wakaba 1.52 }->{$token->{tag_name}}) {
4322 wakaba 1.79 !!!cp ('t334');
4323 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
4324 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4325 wakaba 1.194 pop @{$self->{open_elements}};
4326 wakaba 1.125 !!!ack ('t334.1');
4327 wakaba 1.52 !!!next-token;
4328 wakaba 1.126 next B;
4329 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
4330     ## NOTE: This is an "as if in head" code clone, only "-t" differs
4331 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4332 wakaba 1.194 my $meta_el = pop @{$self->{open_elements}};
4333 wakaba 1.46
4334 wakaba 1.52 unless ($self->{confident}) {
4335 wakaba 1.134 if ($token->{attributes}->{charset}) {
4336 wakaba 1.79 !!!cp ('t335');
4337 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4338     ## in the {change_encoding} callback.
4339 wakaba 1.63 $self->{change_encoding}
4340 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value}, $token);
4341 wakaba 1.66
4342     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4343     ->set_user_data (manakai_has_reference =>
4344     $token->{attributes}->{charset}
4345     ->{has_reference});
4346 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
4347     if ($token->{attributes}->{content}->{value}
4348 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4349 wakaba 1.189 [\x09\x0A\x0C\x0D\x20]*=
4350     [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4351     ([^"'\x09\x0A\x0C\x0D\x20][^\x09\x0A\x0C\x0D\x20\x3B]*))
4352     /x) {
4353 wakaba 1.79 !!!cp ('t336');
4354 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4355     ## in the {change_encoding} callback.
4356 wakaba 1.63 $self->{change_encoding}
4357 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
4358 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4359     ->set_user_data (manakai_has_reference =>
4360     $token->{attributes}->{content}
4361     ->{has_reference});
4362 wakaba 1.63 }
4363 wakaba 1.52 }
4364 wakaba 1.66 } else {
4365     if ($token->{attributes}->{charset}) {
4366 wakaba 1.79 !!!cp ('t337');
4367 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4368     ->set_user_data (manakai_has_reference =>
4369     $token->{attributes}->{charset}
4370     ->{has_reference});
4371     }
4372 wakaba 1.68 if ($token->{attributes}->{content}) {
4373 wakaba 1.79 !!!cp ('t338');
4374 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4375     ->set_user_data (manakai_has_reference =>
4376     $token->{attributes}->{content}
4377     ->{has_reference});
4378     }
4379 wakaba 1.52 }
4380 wakaba 1.1
4381 wakaba 1.125 !!!ack ('t338.1');
4382 wakaba 1.52 !!!next-token;
4383 wakaba 1.126 next B;
4384 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
4385 wakaba 1.79 !!!cp ('t341');
4386 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
4387 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
4388 wakaba 1.126 next B;
4389 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
4390 wakaba 1.153 !!!parse-error (type => 'in body', text => 'body', token => $token);
4391 wakaba 1.46
4392 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
4393 wakaba 1.206 not ($self->{open_elements}->[1]->[1] == BODY_EL)) {
4394 wakaba 1.79 !!!cp ('t342');
4395 wakaba 1.52 ## Ignore the token
4396     } else {
4397     my $body_el = $self->{open_elements}->[1]->[0];
4398     for my $attr_name (keys %{$token->{attributes}}) {
4399     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
4400 wakaba 1.79 !!!cp ('t343');
4401 wakaba 1.52 $body_el->set_attribute_ns
4402     (undef, [undef, $attr_name],
4403     $token->{attributes}->{$attr_name}->{value});
4404     }
4405     }
4406     }
4407 wakaba 1.125 !!!nack ('t343.1');
4408 wakaba 1.52 !!!next-token;
4409 wakaba 1.126 next B;
4410 wakaba 1.52 } elsif ({
4411 wakaba 1.195 ## NOTE: Start tags for non-phrasing flow content elements
4412    
4413     ## NOTE: The normal one
4414     address => 1, article => 1, aside => 1, blockquote => 1,
4415     center => 1, datagrid => 1, details => 1, dialog => 1,
4416     dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
4417     footer => 1, h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1,
4418     h6 => 1, header => 1, menu => 1, nav => 1, ol => 1, p => 1,
4419     section => 1, ul => 1,
4420     ## NOTE: As normal, but drops leading newline
4421 wakaba 1.97 pre => 1, listing => 1,
4422 wakaba 1.195 ## NOTE: As normal, but interacts with the form element pointer
4423 wakaba 1.109 form => 1,
4424 wakaba 1.195
4425 wakaba 1.109 table => 1,
4426     hr => 1,
4427 wakaba 1.52 }->{$token->{tag_name}}) {
4428 wakaba 1.225
4429     ## 1. When there is an opening |form| element:
4430 wakaba 1.109 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
4431     !!!cp ('t350');
4432 wakaba 1.113 !!!parse-error (type => 'in form:form', token => $token);
4433 wakaba 1.109 ## Ignore the token
4434 wakaba 1.125 !!!nack ('t350.1');
4435 wakaba 1.109 !!!next-token;
4436 wakaba 1.126 next B;
4437 wakaba 1.109 }
4438    
4439 wakaba 1.225 ## 2. Close the |p| element, if any.
4440 wakaba 1.217 if ($token->{tag_name} ne 'table' or # The Hixie Quirk
4441     $self->{document}->manakai_compat_mode ne 'quirks') {
4442     ## has a p element in scope
4443     INSCOPE: for (reverse @{$self->{open_elements}}) {
4444     if ($_->[1] == P_EL) {
4445     !!!cp ('t344');
4446     !!!back-token; # <form>
4447     $token = {type => END_TAG_TOKEN, tag_name => 'p',
4448     line => $token->{line}, column => $token->{column}};
4449     next B;
4450     } elsif ($_->[1] & SCOPING_EL) {
4451     !!!cp ('t345');
4452     last INSCOPE;
4453     }
4454     } # INSCOPE
4455     }
4456 wakaba 1.225
4457     ## 3. Close the opening <hn> element, if any.
4458     if ({h1 => 1, h2 => 1, h3 => 1,
4459     h4 => 1, h5 => 1, h6 => 1}->{$token->{tag_name}}) {
4460     if ($self->{open_elements}->[-1]->[1] == HEADING_EL) {
4461     !!!parse-error (type => 'not closed',
4462     text => $self->{open_elements}->[-1]->[0]->manakai_local_name,
4463     token => $token);
4464     pop @{$self->{open_elements}};
4465     }
4466     }
4467    
4468     ## 4. Insertion.
4469 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4470 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
4471 wakaba 1.125 !!!nack ('t346.1');
4472 wakaba 1.52 !!!next-token;
4473 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4474 wakaba 1.52 $token->{data} =~ s/^\x0A//;
4475     unless (length $token->{data}) {
4476 wakaba 1.79 !!!cp ('t346');
4477 wakaba 1.1 !!!next-token;
4478 wakaba 1.79 } else {
4479     !!!cp ('t349');
4480 wakaba 1.52 }
4481 wakaba 1.79 } else {
4482     !!!cp ('t348');
4483 wakaba 1.52 }
4484 wakaba 1.109 } elsif ($token->{tag_name} eq 'form') {
4485     !!!cp ('t347.1');
4486     $self->{form_element} = $self->{open_elements}->[-1]->[0];
4487    
4488 wakaba 1.125 !!!nack ('t347.2');
4489 wakaba 1.109 !!!next-token;
4490     } elsif ($token->{tag_name} eq 'table') {
4491     !!!cp ('t382');
4492     push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
4493    
4494     $self->{insertion_mode} = IN_TABLE_IM;
4495    
4496 wakaba 1.125 !!!nack ('t382.1');
4497 wakaba 1.109 !!!next-token;
4498     } elsif ($token->{tag_name} eq 'hr') {
4499     !!!cp ('t386');
4500     pop @{$self->{open_elements}};
4501    
4502 wakaba 1.125 !!!nack ('t386.1');
4503 wakaba 1.109 !!!next-token;
4504 wakaba 1.52 } else {
4505 wakaba 1.125 !!!nack ('t347.1');
4506 wakaba 1.52 !!!next-token;
4507     }
4508 wakaba 1.126 next B;
4509 wakaba 1.196 } elsif ($token->{tag_name} eq 'li') {
4510     ## NOTE: As normal, but imply </li> when there's another <li> ...
4511 wakaba 1.193
4512 wakaba 1.225 ## NOTE: Special, Scope (<li><foo><li> == <li><foo><li/></foo></li>)::
4513     ## Interpreted as <li><foo/></li><li/> (non-conforming):
4514 wakaba 1.193 ## blockquote (O9.27), center (O), dd (Fx3, O, S3.1.2, IE7),
4515     ## dt (Fx, O, S, IE), dl (O), fieldset (O, S, IE), form (Fx, O, S),
4516     ## hn (O), pre (O), applet (O, S), button (O, S), marquee (Fx, O, S),
4517     ## object (Fx)
4518 wakaba 1.225 ## Generate non-tree (non-conforming):
4519 wakaba 1.193 ## basefont (IE7 (where basefont is non-void)), center (IE),
4520     ## form (IE), hn (IE)
4521 wakaba 1.225 ## address, div, p (<li><foo><li> == <li><foo/></li><li/>)::
4522     ## Interpreted as <li><foo><li/></foo></li> (non-conforming):
4523 wakaba 1.193 ## div (Fx, S)
4524 wakaba 1.196
4525     my $non_optional;
4526 wakaba 1.52 my $i = -1;
4527 wakaba 1.196
4528     ## 1.
4529     for my $node (reverse @{$self->{open_elements}}) {
4530 wakaba 1.206 if ($node->[1] == LI_EL) {
4531 wakaba 1.196 ## 2. (a) As if </li>
4532     {
4533     ## If no </li> - not applied
4534     #
4535    
4536     ## Otherwise
4537    
4538     ## 1. generate implied end tags, except for </li>
4539     #
4540    
4541     ## 2. If current node != "li", parse error
4542     if ($non_optional) {
4543     !!!parse-error (type => 'not closed',
4544     text => $non_optional->[0]->manakai_local_name,
4545     token => $token);
4546     !!!cp ('t355');
4547     } else {
4548     !!!cp ('t356');
4549     }
4550    
4551     ## 3. Pop
4552     splice @{$self->{open_elements}}, $i;
4553 wakaba 1.52 }
4554 wakaba 1.196
4555     last; ## 2. (b) goto 5.
4556     } elsif (
4557     ## NOTE: not "formatting" and not "phrasing"
4558     ($node->[1] & SPECIAL_EL or
4559     $node->[1] & SCOPING_EL) and
4560     ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4561 wakaba 1.206 (not $node->[1] & ADDRESS_DIV_P_EL)
4562     ) {
4563 wakaba 1.196 ## 3.
4564 wakaba 1.79 !!!cp ('t357');
4565 wakaba 1.196 last; ## goto 5.
4566     } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4567 wakaba 1.79 !!!cp ('t358');
4568 wakaba 1.196 #
4569     } else {
4570     !!!cp ('t359');
4571     $non_optional ||= $node;
4572     #
4573 wakaba 1.52 }
4574 wakaba 1.196 ## 4.
4575     ## goto 2.
4576 wakaba 1.52 $i--;
4577 wakaba 1.196 }
4578    
4579     ## 5. (a) has a |p| element in scope
4580     INSCOPE: for (reverse @{$self->{open_elements}}) {
4581 wakaba 1.206 if ($_->[1] == P_EL) {
4582 wakaba 1.196 !!!cp ('t353');
4583 wakaba 1.198
4584     ## NOTE: |<p><li>|, for example.
4585    
4586 wakaba 1.196 !!!back-token; # <x>
4587     $token = {type => END_TAG_TOKEN, tag_name => 'p',
4588     line => $token->{line}, column => $token->{column}};
4589     next B;
4590     } elsif ($_->[1] & SCOPING_EL) {
4591     !!!cp ('t354');
4592     last INSCOPE;
4593     }
4594     } # INSCOPE
4595    
4596     ## 5. (b) insert
4597 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4598 wakaba 1.125 !!!nack ('t359.1');
4599 wakaba 1.52 !!!next-token;
4600 wakaba 1.126 next B;
4601 wakaba 1.196 } elsif ($token->{tag_name} eq 'dt' or
4602     $token->{tag_name} eq 'dd') {
4603     ## NOTE: As normal, but imply </dt> or </dd> when ...
4604    
4605     my $non_optional;
4606     my $i = -1;
4607    
4608     ## 1.
4609     for my $node (reverse @{$self->{open_elements}}) {
4610 wakaba 1.207 if ($node->[1] == DTDD_EL) {
4611 wakaba 1.196 ## 2. (a) As if </li>
4612     {
4613     ## If no </li> - not applied
4614     #
4615    
4616     ## Otherwise
4617    
4618     ## 1. generate implied end tags, except for </dt> or </dd>
4619     #
4620    
4621     ## 2. If current node != "dt"|"dd", parse error
4622     if ($non_optional) {
4623     !!!parse-error (type => 'not closed',
4624     text => $non_optional->[0]->manakai_local_name,
4625     token => $token);
4626     !!!cp ('t355.1');
4627     } else {
4628     !!!cp ('t356.1');
4629     }
4630    
4631     ## 3. Pop
4632     splice @{$self->{open_elements}}, $i;
4633     }
4634    
4635     last; ## 2. (b) goto 5.
4636     } elsif (
4637     ## NOTE: not "formatting" and not "phrasing"
4638     ($node->[1] & SPECIAL_EL or
4639     $node->[1] & SCOPING_EL) and
4640     ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4641    
4642 wakaba 1.206 (not $node->[1] & ADDRESS_DIV_P_EL)
4643     ) {
4644 wakaba 1.196 ## 3.
4645     !!!cp ('t357.1');
4646     last; ## goto 5.
4647     } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4648     !!!cp ('t358.1');
4649     #
4650     } else {
4651     !!!cp ('t359.1');
4652     $non_optional ||= $node;
4653     #
4654     }
4655     ## 4.
4656     ## goto 2.
4657     $i--;
4658     }
4659    
4660     ## 5. (a) has a |p| element in scope
4661     INSCOPE: for (reverse @{$self->{open_elements}}) {
4662 wakaba 1.206 if ($_->[1] == P_EL) {
4663 wakaba 1.196 !!!cp ('t353.1');
4664     !!!back-token; # <x>
4665     $token = {type => END_TAG_TOKEN, tag_name => 'p',
4666     line => $token->{line}, column => $token->{column}};
4667     next B;
4668     } elsif ($_->[1] & SCOPING_EL) {
4669     !!!cp ('t354.1');
4670     last INSCOPE;
4671     }
4672     } # INSCOPE
4673    
4674     ## 5. (b) insert
4675     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4676     !!!nack ('t359.2');
4677     !!!next-token;
4678     next B;
4679 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
4680 wakaba 1.195 ## NOTE: As normal, but effectively ends parsing
4681    
4682 wakaba 1.52 ## has a p element in scope
4683     INSCOPE: for (reverse @{$self->{open_elements}}) {
4684 wakaba 1.206 if ($_->[1] == P_EL) {
4685 wakaba 1.79 !!!cp ('t367');
4686 wakaba 1.125 !!!back-token; # <plaintext>
4687 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4688     line => $token->{line}, column => $token->{column}};
4689 wakaba 1.126 next B;
4690 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
4691 wakaba 1.79 !!!cp ('t368');
4692 wakaba 1.52 last INSCOPE;
4693 wakaba 1.46 }
4694 wakaba 1.52 } # INSCOPE
4695    
4696 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4697 wakaba 1.52
4698     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
4699    
4700 wakaba 1.125 !!!nack ('t368.1');
4701 wakaba 1.52 !!!next-token;
4702 wakaba 1.126 next B;
4703 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
4704     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
4705     my $node = $active_formatting_elements->[$i];
4706 wakaba 1.206 if ($node->[1] == A_EL) {
4707 wakaba 1.79 !!!cp ('t371');
4708 wakaba 1.113 !!!parse-error (type => 'in a:a', token => $token);
4709 wakaba 1.52
4710 wakaba 1.125 !!!back-token; # <a>
4711 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'a',
4712     line => $token->{line}, column => $token->{column}};
4713 wakaba 1.113 $formatting_end_tag->($token);
4714 wakaba 1.52
4715     AFE2: for (reverse 0..$#$active_formatting_elements) {
4716     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
4717 wakaba 1.79 !!!cp ('t372');
4718 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
4719     last AFE2;
4720 wakaba 1.1 }
4721 wakaba 1.52 } # AFE2
4722     OE: for (reverse 0..$#{$self->{open_elements}}) {
4723     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
4724 wakaba 1.79 !!!cp ('t373');
4725 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
4726     last OE;
4727 wakaba 1.1 }
4728 wakaba 1.52 } # OE
4729     last AFE;
4730     } elsif ($node->[0] eq '#marker') {
4731 wakaba 1.79 !!!cp ('t374');
4732 wakaba 1.52 last AFE;
4733     }
4734     } # AFE
4735    
4736     $reconstruct_active_formatting_elements->($insert_to_current);
4737 wakaba 1.1
4738 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4739 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
4740 wakaba 1.1
4741 wakaba 1.125 !!!nack ('t374.1');
4742 wakaba 1.52 !!!next-token;
4743 wakaba 1.126 next B;
4744 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
4745     $reconstruct_active_formatting_elements->($insert_to_current);
4746 wakaba 1.1
4747 wakaba 1.52 ## has a |nobr| element in scope
4748     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4749     my $node = $self->{open_elements}->[$_];
4750 wakaba 1.206 if ($node->[1] == NOBR_EL) {
4751 wakaba 1.79 !!!cp ('t376');
4752 wakaba 1.113 !!!parse-error (type => 'in nobr:nobr', token => $token);
4753 wakaba 1.125 !!!back-token; # <nobr>
4754 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
4755     line => $token->{line}, column => $token->{column}};
4756 wakaba 1.126 next B;
4757 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
4758 wakaba 1.79 !!!cp ('t377');
4759 wakaba 1.52 last INSCOPE;
4760     }
4761     } # INSCOPE
4762    
4763 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4764 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
4765    
4766 wakaba 1.125 !!!nack ('t377.1');
4767 wakaba 1.52 !!!next-token;
4768 wakaba 1.126 next B;
4769 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
4770     ## has a button element in scope
4771     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4772     my $node = $self->{open_elements}->[$_];
4773 wakaba 1.206 if ($node->[1] == BUTTON_EL) {
4774 wakaba 1.79 !!!cp ('t378');
4775 wakaba 1.113 !!!parse-error (type => 'in button:button', token => $token);
4776 wakaba 1.125 !!!back-token; # <button>
4777 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'button',
4778     line => $token->{line}, column => $token->{column}};
4779 wakaba 1.126 next B;
4780 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
4781 wakaba 1.79 !!!cp ('t379');
4782 wakaba 1.52 last INSCOPE;
4783     }
4784     } # INSCOPE
4785    
4786     $reconstruct_active_formatting_elements->($insert_to_current);
4787    
4788 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4789 wakaba 1.85
4790     ## TODO: associate with $self->{form_element} if defined
4791    
4792 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
4793 wakaba 1.1
4794 wakaba 1.125 !!!nack ('t379.1');
4795 wakaba 1.52 !!!next-token;
4796 wakaba 1.126 next B;
4797 wakaba 1.103 } elsif ({
4798 wakaba 1.109 xmp => 1,
4799     iframe => 1,
4800     noembed => 1,
4801 wakaba 1.148 noframes => 1, ## NOTE: This is an "as if in head" code clone.
4802 wakaba 1.109 noscript => 0, ## TODO: 1 if scripting is enabled
4803 wakaba 1.103 }->{$token->{tag_name}}) {
4804 wakaba 1.109 if ($token->{tag_name} eq 'xmp') {
4805     !!!cp ('t381');
4806     $reconstruct_active_formatting_elements->($insert_to_current);
4807     } else {
4808     !!!cp ('t399');
4809     }
4810     ## NOTE: There is an "as if in body" code clone.
4811 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4812 wakaba 1.126 next B;
4813 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
4814 wakaba 1.113 !!!parse-error (type => 'isindex', token => $token);
4815 wakaba 1.52
4816     if (defined $self->{form_element}) {
4817 wakaba 1.79 !!!cp ('t389');
4818 wakaba 1.52 ## Ignore the token
4819 wakaba 1.125 !!!nack ('t389'); ## NOTE: Not acknowledged.
4820 wakaba 1.52 !!!next-token;
4821 wakaba 1.126 next B;
4822 wakaba 1.52 } else {
4823 wakaba 1.147 !!!ack ('t391.1');
4824    
4825 wakaba 1.52 my $at = $token->{attributes};
4826     my $form_attrs;
4827     $form_attrs->{action} = $at->{action} if $at->{action};
4828     my $prompt_attr = $at->{prompt};
4829     $at->{name} = {name => 'name', value => 'isindex'};
4830     delete $at->{action};
4831     delete $at->{prompt};
4832     my @tokens = (
4833 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
4834 wakaba 1.114 attributes => $form_attrs,
4835     line => $token->{line}, column => $token->{column}},
4836     {type => START_TAG_TOKEN, tag_name => 'hr',
4837     line => $token->{line}, column => $token->{column}},
4838     {type => START_TAG_TOKEN, tag_name => 'label',
4839     line => $token->{line}, column => $token->{column}},
4840 wakaba 1.52 );
4841     if ($prompt_attr) {
4842 wakaba 1.79 !!!cp ('t390');
4843 wakaba 1.114 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
4844 wakaba 1.118 #line => $token->{line}, column => $token->{column},
4845     };
4846 wakaba 1.1 } else {
4847 wakaba 1.79 !!!cp ('t391');
4848 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
4849 wakaba 1.114 data => 'This is a searchable index. Insert your search keywords here: ',
4850 wakaba 1.118 #line => $token->{line}, column => $token->{column},
4851     }; # SHOULD
4852 wakaba 1.52 ## TODO: make this configurable
4853 wakaba 1.1 }
4854 wakaba 1.52 push @tokens,
4855 wakaba 1.114 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
4856     line => $token->{line}, column => $token->{column}},
4857 wakaba 1.55 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
4858 wakaba 1.114 {type => END_TAG_TOKEN, tag_name => 'label',
4859     line => $token->{line}, column => $token->{column}},
4860     {type => START_TAG_TOKEN, tag_name => 'hr',
4861     line => $token->{line}, column => $token->{column}},
4862     {type => END_TAG_TOKEN, tag_name => 'form',
4863     line => $token->{line}, column => $token->{column}};
4864 wakaba 1.52 !!!back-token (@tokens);
4865 wakaba 1.125 !!!next-token;
4866 wakaba 1.126 next B;
4867 wakaba 1.52 }
4868     } elsif ($token->{tag_name} eq 'textarea') {
4869 wakaba 1.224 ## 1. Insert
4870 wakaba 1.205 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4871 wakaba 1.52
4872 wakaba 1.224 ## Step 2 # XXX
4873 wakaba 1.52 ## TODO: $self->{form_element} if defined
4874 wakaba 1.205
4875 wakaba 1.224 ## 2. Drop U+000A LINE FEED
4876 wakaba 1.205 $self->{ignore_newline} = 1;
4877    
4878 wakaba 1.224 ## 3. RCDATA
4879 wakaba 1.52 $self->{content_model} = RCDATA_CONTENT_MODEL;
4880     delete $self->{escape}; # MUST
4881 wakaba 1.205
4882 wakaba 1.224 ## 4., 6. Insertion mode
4883 wakaba 1.205 $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
4884    
4885 wakaba 1.224 ## XXX: 5. frameset-ok flag
4886    
4887 wakaba 1.125 !!!nack ('t392.1');
4888 wakaba 1.52 !!!next-token;
4889 wakaba 1.126 next B;
4890 wakaba 1.201 } elsif ($token->{tag_name} eq 'optgroup' or
4891     $token->{tag_name} eq 'option') {
4892     ## has an |option| element in scope
4893     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4894     my $node = $self->{open_elements}->[$_];
4895 wakaba 1.206 if ($node->[1] == OPTION_EL) {
4896 wakaba 1.201 !!!cp ('t397.1');
4897     ## NOTE: As if </option>
4898     !!!back-token; # <option> or <optgroup>
4899     $token = {type => END_TAG_TOKEN, tag_name => 'option',
4900     line => $token->{line}, column => $token->{column}};
4901     next B;
4902     } elsif ($node->[1] & SCOPING_EL) {
4903     !!!cp ('t397.2');
4904     last INSCOPE;
4905     }
4906     } # INSCOPE
4907    
4908     $reconstruct_active_formatting_elements->($insert_to_current);
4909    
4910     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4911    
4912     !!!nack ('t397.3');
4913     !!!next-token;
4914     redo B;
4915 wakaba 1.151 } elsif ($token->{tag_name} eq 'rt' or
4916     $token->{tag_name} eq 'rp') {
4917     ## has a |ruby| element in scope
4918     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4919     my $node = $self->{open_elements}->[$_];
4920 wakaba 1.206 if ($node->[1] == RUBY_EL) {
4921 wakaba 1.151 !!!cp ('t398.1');
4922     ## generate implied end tags
4923     while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
4924     !!!cp ('t398.2');
4925     pop @{$self->{open_elements}};
4926     }
4927 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == RUBY_EL) {
4928 wakaba 1.151 !!!cp ('t398.3');
4929     !!!parse-error (type => 'not closed',
4930 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4931 wakaba 1.151 ->manakai_local_name,
4932     token => $token);
4933     pop @{$self->{open_elements}}
4934 wakaba 1.206 while not $self->{open_elements}->[-1]->[1] == RUBY_EL;
4935 wakaba 1.151 }
4936     last INSCOPE;
4937     } elsif ($node->[1] & SCOPING_EL) {
4938     !!!cp ('t398.4');
4939     last INSCOPE;
4940     }
4941     } # INSCOPE
4942 wakaba 1.212
4943     ## TODO: <non-ruby><rt> is not allowed.
4944 wakaba 1.151
4945     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4946    
4947     !!!nack ('t398.5');
4948     !!!next-token;
4949     redo B;
4950 wakaba 1.126 } elsif ($token->{tag_name} eq 'math' or
4951     $token->{tag_name} eq 'svg') {
4952     $reconstruct_active_formatting_elements->($insert_to_current);
4953 wakaba 1.131
4954 wakaba 1.155 ## "Adjust MathML attributes" ('math' only) - done in insert-element-f
4955    
4956 wakaba 1.131 ## "adjust SVG attributes" ('svg' only) - done in insert-element-f
4957    
4958     ## "adjust foreign attributes" - done in insert-element-f
4959 wakaba 1.126
4960 wakaba 1.131 !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
4961 wakaba 1.126
4962     if ($self->{self_closing}) {
4963     pop @{$self->{open_elements}};
4964 wakaba 1.201 !!!ack ('t398.6');
4965 wakaba 1.126 } else {
4966 wakaba 1.201 !!!cp ('t398.7');
4967 wakaba 1.126 $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
4968     ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
4969     ## mode, "in body" (not "in foreign content") secondary insertion
4970     ## mode, maybe.
4971     }
4972    
4973     !!!next-token;
4974     next B;
4975 wakaba 1.52 } elsif ({
4976     caption => 1, col => 1, colgroup => 1, frame => 1,
4977 wakaba 1.201 frameset => 1, head => 1,
4978 wakaba 1.52 tbody => 1, td => 1, tfoot => 1, th => 1,
4979     thead => 1, tr => 1,
4980     }->{$token->{tag_name}}) {
4981 wakaba 1.79 !!!cp ('t401');
4982 wakaba 1.153 !!!parse-error (type => 'in body',
4983     text => $token->{tag_name}, token => $token);
4984 wakaba 1.52 ## Ignore the token
4985 wakaba 1.125 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
4986 wakaba 1.52 !!!next-token;
4987 wakaba 1.126 next B;
4988 wakaba 1.198 } elsif ($token->{tag_name} eq 'param' or
4989     $token->{tag_name} eq 'source') {
4990     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4991     pop @{$self->{open_elements}};
4992    
4993     !!!ack ('t398.5');
4994     !!!next-token;
4995     redo B;
4996 wakaba 1.52 } else {
4997 wakaba 1.110 if ($token->{tag_name} eq 'image') {
4998     !!!cp ('t384');
4999 wakaba 1.113 !!!parse-error (type => 'image', token => $token);
5000 wakaba 1.110 $token->{tag_name} = 'img';
5001     } else {
5002     !!!cp ('t385');
5003     }
5004    
5005     ## NOTE: There is an "as if <br>" code clone.
5006 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
5007    
5008 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
5009 wakaba 1.109
5010 wakaba 1.110 if ({
5011     applet => 1, marquee => 1, object => 1,
5012     }->{$token->{tag_name}}) {
5013     !!!cp ('t380');
5014     push @$active_formatting_elements, ['#marker', ''];
5015 wakaba 1.125 !!!nack ('t380.1');
5016 wakaba 1.110 } elsif ({
5017     b => 1, big => 1, em => 1, font => 1, i => 1,
5018 wakaba 1.193 s => 1, small => 1, strike => 1,
5019 wakaba 1.110 strong => 1, tt => 1, u => 1,
5020     }->{$token->{tag_name}}) {
5021     !!!cp ('t375');
5022     push @$active_formatting_elements, $self->{open_elements}->[-1];
5023 wakaba 1.125 !!!nack ('t375.1');
5024 wakaba 1.110 } elsif ($token->{tag_name} eq 'input') {
5025     !!!cp ('t388');
5026     ## TODO: associate with $self->{form_element} if defined
5027     pop @{$self->{open_elements}};
5028 wakaba 1.125 !!!ack ('t388.2');
5029 wakaba 1.110 } elsif ({
5030     area => 1, basefont => 1, bgsound => 1, br => 1,
5031 wakaba 1.198 embed => 1, img => 1, spacer => 1, wbr => 1,
5032 wakaba 1.231 keygen => 1,
5033 wakaba 1.110 }->{$token->{tag_name}}) {
5034     !!!cp ('t388.1');
5035     pop @{$self->{open_elements}};
5036 wakaba 1.125 !!!ack ('t388.3');
5037 wakaba 1.110 } elsif ($token->{tag_name} eq 'select') {
5038 wakaba 1.109 ## TODO: associate with $self->{form_element} if defined
5039    
5040     if ($self->{insertion_mode} & TABLE_IMS or
5041     $self->{insertion_mode} & BODY_TABLE_IMS or
5042 wakaba 1.210 ($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
5043 wakaba 1.109 !!!cp ('t400.1');
5044     $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
5045     } else {
5046     !!!cp ('t400.2');
5047     $self->{insertion_mode} = IN_SELECT_IM;
5048     }
5049 wakaba 1.125 !!!nack ('t400.3');
5050 wakaba 1.110 } else {
5051 wakaba 1.125 !!!nack ('t402');
5052 wakaba 1.109 }
5053 wakaba 1.51
5054 wakaba 1.52 !!!next-token;
5055 wakaba 1.126 next B;
5056 wakaba 1.52 }
5057 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5058 wakaba 1.52 if ($token->{tag_name} eq 'body') {
5059 wakaba 1.225
5060     ## 1. If not "have an element in scope":
5061     ## "has a |body| element in scope"
5062 wakaba 1.107 my $i;
5063 wakaba 1.111 INSCOPE: {
5064     for (reverse @{$self->{open_elements}}) {
5065 wakaba 1.206 if ($_->[1] == BODY_EL) {
5066 wakaba 1.111 !!!cp ('t405');
5067     $i = $_;
5068     last INSCOPE;
5069 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
5070 wakaba 1.111 !!!cp ('t405.1');
5071     last;
5072     }
5073 wakaba 1.52 }
5074 wakaba 1.111
5075 wakaba 1.200 ## NOTE: |<marquee></body>|, |<svg><foreignobject></body>|
5076    
5077     !!!parse-error (type => 'unmatched end tag',
5078 wakaba 1.153 text => $token->{tag_name}, token => $token);
5079 wakaba 1.107 ## NOTE: Ignore the token.
5080 wakaba 1.52 !!!next-token;
5081 wakaba 1.126 next B;
5082 wakaba 1.111 } # INSCOPE
5083 wakaba 1.107
5084 wakaba 1.225 ## 2. If unclosed elements:
5085 wakaba 1.107 for (@{$self->{open_elements}}) {
5086 wakaba 1.220 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL ||
5087     $_->[1] == OPTGROUP_EL ||
5088     $_->[1] == OPTION_EL ||
5089     $_->[1] == RUBY_COMPONENT_EL) {
5090 wakaba 1.107 !!!cp ('t403');
5091 wakaba 1.122 !!!parse-error (type => 'not closed',
5092 wakaba 1.153 text => $_->[0]->manakai_local_name,
5093 wakaba 1.122 token => $token);
5094 wakaba 1.107 last;
5095     } else {
5096     !!!cp ('t404');
5097     }
5098     }
5099    
5100 wakaba 1.225 ## 3. Switch the insertion mode.
5101 wakaba 1.107 $self->{insertion_mode} = AFTER_BODY_IM;
5102     !!!next-token;
5103 wakaba 1.126 next B;
5104 wakaba 1.52 } elsif ($token->{tag_name} eq 'html') {
5105 wakaba 1.122 ## TODO: Update this code. It seems that the code below is not
5106     ## up-to-date, though it has same effect as speced.
5107 wakaba 1.123 if (@{$self->{open_elements}} > 1 and
5108 wakaba 1.206 $self->{open_elements}->[1]->[1] == BODY_EL) {
5109     unless ($self->{open_elements}->[-1]->[1] == BODY_EL) {
5110 wakaba 1.79 !!!cp ('t406');
5111 wakaba 1.122 !!!parse-error (type => 'not closed',
5112 wakaba 1.153 text => $self->{open_elements}->[1]->[0]
5113 wakaba 1.122 ->manakai_local_name,
5114     token => $token);
5115 wakaba 1.79 } else {
5116     !!!cp ('t407');
5117 wakaba 1.1 }
5118 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
5119 wakaba 1.52 ## reprocess
5120 wakaba 1.126 next B;
5121 wakaba 1.51 } else {
5122 wakaba 1.79 !!!cp ('t408');
5123 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5124     text => $token->{tag_name}, token => $token);
5125 wakaba 1.52 ## Ignore the token
5126     !!!next-token;
5127 wakaba 1.126 next B;
5128 wakaba 1.51 }
5129 wakaba 1.52 } elsif ({
5130 wakaba 1.195 ## NOTE: End tags for non-phrasing flow content elements
5131    
5132     ## NOTE: The normal ones
5133     address => 1, article => 1, aside => 1, blockquote => 1,
5134     center => 1, datagrid => 1, details => 1, dialog => 1,
5135     dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
5136     footer => 1, header => 1, listing => 1, menu => 1, nav => 1,
5137     ol => 1, pre => 1, section => 1, ul => 1,
5138    
5139     ## NOTE: As normal, but ... optional tags
5140 wakaba 1.52 dd => 1, dt => 1, li => 1,
5141 wakaba 1.195
5142 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
5143 wakaba 1.52 }->{$token->{tag_name}}) {
5144 wakaba 1.197 ## NOTE: Code for <li> start tags includes "as if </li>" code.
5145     ## Code for <dt> or <dd> start tags includes "as if </dt> or
5146     ## </dd>" code.
5147    
5148 wakaba 1.52 ## has an element in scope
5149     my $i;
5150     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5151     my $node = $self->{open_elements}->[$_];
5152 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5153 wakaba 1.79 !!!cp ('t410');
5154 wakaba 1.52 $i = $_;
5155 wakaba 1.87 last INSCOPE;
5156 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5157 wakaba 1.79 !!!cp ('t411');
5158 wakaba 1.52 last INSCOPE;
5159 wakaba 1.51 }
5160 wakaba 1.52 } # INSCOPE
5161 wakaba 1.89
5162     unless (defined $i) { # has an element in scope
5163     !!!cp ('t413');
5164 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5165     text => $token->{tag_name}, token => $token);
5166 wakaba 1.157 ## NOTE: Ignore the token.
5167 wakaba 1.89 } else {
5168     ## Step 1. generate implied end tags
5169     while ({
5170 wakaba 1.151 ## END_TAG_OPTIONAL_EL
5171 wakaba 1.89 dd => ($token->{tag_name} ne 'dd'),
5172     dt => ($token->{tag_name} ne 'dt'),
5173     li => ($token->{tag_name} ne 'li'),
5174 wakaba 1.194 option => 1,
5175     optgroup => 1,
5176 wakaba 1.89 p => 1,
5177 wakaba 1.151 rt => 1,
5178     rp => 1,
5179 wakaba 1.123 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
5180 wakaba 1.89 !!!cp ('t409');
5181     pop @{$self->{open_elements}};
5182     }
5183    
5184     ## Step 2.
5185 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5186     ne $token->{tag_name}) {
5187 wakaba 1.79 !!!cp ('t412');
5188 wakaba 1.122 !!!parse-error (type => 'not closed',
5189 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5190 wakaba 1.122 ->manakai_local_name,
5191     token => $token);
5192 wakaba 1.51 } else {
5193 wakaba 1.89 !!!cp ('t414');
5194 wakaba 1.51 }
5195 wakaba 1.89
5196     ## Step 3.
5197 wakaba 1.52 splice @{$self->{open_elements}}, $i;
5198 wakaba 1.89
5199     ## Step 4.
5200     $clear_up_to_marker->()
5201     if {
5202 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
5203 wakaba 1.89 }->{$token->{tag_name}};
5204 wakaba 1.51 }
5205 wakaba 1.52 !!!next-token;
5206 wakaba 1.126 next B;
5207 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
5208 wakaba 1.195 ## NOTE: As normal, but interacts with the form element pointer
5209    
5210 wakaba 1.92 undef $self->{form_element};
5211    
5212 wakaba 1.52 ## has an element in scope
5213 wakaba 1.92 my $i;
5214 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5215     my $node = $self->{open_elements}->[$_];
5216 wakaba 1.206 if ($node->[1] == FORM_EL) {
5217 wakaba 1.79 !!!cp ('t418');
5218 wakaba 1.92 $i = $_;
5219 wakaba 1.52 last INSCOPE;
5220 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5221 wakaba 1.79 !!!cp ('t419');
5222 wakaba 1.52 last INSCOPE;
5223     }
5224     } # INSCOPE
5225 wakaba 1.92
5226     unless (defined $i) { # has an element in scope
5227 wakaba 1.79 !!!cp ('t421');
5228 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5229     text => $token->{tag_name}, token => $token);
5230 wakaba 1.157 ## NOTE: Ignore the token.
5231 wakaba 1.92 } else {
5232     ## Step 1. generate implied end tags
5233 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5234 wakaba 1.92 !!!cp ('t417');
5235     pop @{$self->{open_elements}};
5236     }
5237    
5238     ## Step 2.
5239 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5240     ne $token->{tag_name}) {
5241 wakaba 1.92 !!!cp ('t417.1');
5242 wakaba 1.122 !!!parse-error (type => 'not closed',
5243 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5244 wakaba 1.122 ->manakai_local_name,
5245     token => $token);
5246 wakaba 1.92 } else {
5247     !!!cp ('t420');
5248     }
5249    
5250     ## Step 3.
5251     splice @{$self->{open_elements}}, $i;
5252 wakaba 1.52 }
5253    
5254     !!!next-token;
5255 wakaba 1.126 next B;
5256 wakaba 1.52 } elsif ({
5257 wakaba 1.195 ## NOTE: As normal, except acts as a closer for any ...
5258 wakaba 1.52 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
5259     }->{$token->{tag_name}}) {
5260     ## has an element in scope
5261     my $i;
5262     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5263     my $node = $self->{open_elements}->[$_];
5264 wakaba 1.206 if ($node->[1] == HEADING_EL) {
5265 wakaba 1.79 !!!cp ('t423');
5266 wakaba 1.52 $i = $_;
5267     last INSCOPE;
5268 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5269 wakaba 1.79 !!!cp ('t424');
5270 wakaba 1.52 last INSCOPE;
5271 wakaba 1.51 }
5272 wakaba 1.52 } # INSCOPE
5273 wakaba 1.93
5274     unless (defined $i) { # has an element in scope
5275     !!!cp ('t425.1');
5276 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5277     text => $token->{tag_name}, token => $token);
5278 wakaba 1.157 ## NOTE: Ignore the token.
5279 wakaba 1.79 } else {
5280 wakaba 1.93 ## Step 1. generate implied end tags
5281 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5282 wakaba 1.93 !!!cp ('t422');
5283     pop @{$self->{open_elements}};
5284     }
5285    
5286     ## Step 2.
5287 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5288     ne $token->{tag_name}) {
5289 wakaba 1.93 !!!cp ('t425');
5290 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5291     text => $token->{tag_name}, token => $token);
5292 wakaba 1.93 } else {
5293     !!!cp ('t426');
5294     }
5295    
5296     ## Step 3.
5297     splice @{$self->{open_elements}}, $i;
5298 wakaba 1.36 }
5299 wakaba 1.52
5300     !!!next-token;
5301 wakaba 1.126 next B;
5302 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
5303 wakaba 1.195 ## NOTE: As normal, except </p> implies <p> and ...
5304    
5305 wakaba 1.87 ## has an element in scope
5306 wakaba 1.197 my $non_optional;
5307 wakaba 1.87 my $i;
5308     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5309     my $node = $self->{open_elements}->[$_];
5310 wakaba 1.206 if ($node->[1] == P_EL) {
5311 wakaba 1.87 !!!cp ('t410.1');
5312     $i = $_;
5313 wakaba 1.88 last INSCOPE;
5314 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5315 wakaba 1.87 !!!cp ('t411.1');
5316     last INSCOPE;
5317 wakaba 1.197 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
5318     ## NOTE: |END_TAG_OPTIONAL_EL| includes "p"
5319     !!!cp ('t411.2');
5320     #
5321     } else {
5322     !!!cp ('t411.3');
5323     $non_optional ||= $node;
5324     #
5325 wakaba 1.87 }
5326     } # INSCOPE
5327 wakaba 1.91
5328     if (defined $i) {
5329 wakaba 1.197 ## 1. Generate implied end tags
5330     #
5331    
5332     ## 2. If current node != "p", parse error
5333     if ($non_optional) {
5334 wakaba 1.87 !!!cp ('t412.1');
5335 wakaba 1.122 !!!parse-error (type => 'not closed',
5336 wakaba 1.197 text => $non_optional->[0]->manakai_local_name,
5337 wakaba 1.122 token => $token);
5338 wakaba 1.87 } else {
5339 wakaba 1.91 !!!cp ('t414.1');
5340 wakaba 1.87 }
5341 wakaba 1.91
5342 wakaba 1.197 ## 3. Pop
5343 wakaba 1.87 splice @{$self->{open_elements}}, $i;
5344     } else {
5345 wakaba 1.91 !!!cp ('t413.1');
5346 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5347     text => $token->{tag_name}, token => $token);
5348 wakaba 1.91
5349 wakaba 1.87 !!!cp ('t415.1');
5350     ## As if <p>, then reprocess the current token
5351     my $el;
5352 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'p',, $token);
5353 wakaba 1.87 $insert->($el);
5354 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
5355 wakaba 1.87 }
5356 wakaba 1.91
5357 wakaba 1.87 !!!next-token;
5358 wakaba 1.126 next B;
5359 wakaba 1.52 } elsif ({
5360     a => 1,
5361     b => 1, big => 1, em => 1, font => 1, i => 1,
5362 wakaba 1.193 nobr => 1, s => 1, small => 1, strike => 1,
5363 wakaba 1.52 strong => 1, tt => 1, u => 1,
5364     }->{$token->{tag_name}}) {
5365 wakaba 1.79 !!!cp ('t427');
5366 wakaba 1.113 $formatting_end_tag->($token);
5367 wakaba 1.126 next B;
5368 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
5369 wakaba 1.79 !!!cp ('t428');
5370 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5371     text => 'br', token => $token);
5372 wakaba 1.52
5373     ## As if <br>
5374     $reconstruct_active_formatting_elements->($insert_to_current);
5375    
5376     my $el;
5377 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'br',, $token);
5378 wakaba 1.52 $insert->($el);
5379    
5380     ## Ignore the token.
5381     !!!next-token;
5382 wakaba 1.126 next B;
5383 wakaba 1.52 } else {
5384 wakaba 1.195 if ($token->{tag_name} eq 'sarcasm') {
5385     sleep 0.001; # take a deep breath
5386     }
5387    
5388 wakaba 1.52 ## Step 1
5389     my $node_i = -1;
5390     my $node = $self->{open_elements}->[$node_i];
5391 wakaba 1.51
5392 wakaba 1.52 ## Step 2
5393     S2: {
5394 wakaba 1.200 my $node_tag_name = $node->[0]->manakai_local_name;
5395     $node_tag_name =~ tr/A-Z/a-z/; # for SVG camelCase tag names
5396     if ($node_tag_name eq $token->{tag_name}) {
5397 wakaba 1.52 ## Step 1
5398     ## generate implied end tags
5399 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5400 wakaba 1.79 !!!cp ('t430');
5401 wakaba 1.151 ## NOTE: |<ruby><rt></ruby>|.
5402     ## ISSUE: <ruby><rt></rt> will also take this code path,
5403     ## which seems wrong.
5404 wakaba 1.86 pop @{$self->{open_elements}};
5405 wakaba 1.151 $node_i++;
5406 wakaba 1.52 }
5407    
5408     ## Step 2
5409 wakaba 1.200 my $current_tag_name
5410     = $self->{open_elements}->[-1]->[0]->manakai_local_name;
5411     $current_tag_name =~ tr/A-Z/a-z/;
5412     if ($current_tag_name ne $token->{tag_name}) {
5413 wakaba 1.79 !!!cp ('t431');
5414 wakaba 1.58 ## NOTE: <x><y></x>
5415 wakaba 1.122 !!!parse-error (type => 'not closed',
5416 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5417 wakaba 1.122 ->manakai_local_name,
5418     token => $token);
5419 wakaba 1.79 } else {
5420     !!!cp ('t432');
5421 wakaba 1.52 }
5422    
5423     ## Step 3
5424 wakaba 1.151 splice @{$self->{open_elements}}, $node_i if $node_i < 0;
5425 wakaba 1.51
5426 wakaba 1.1 !!!next-token;
5427 wakaba 1.52 last S2;
5428 wakaba 1.1 } else {
5429 wakaba 1.52 ## Step 3
5430 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
5431 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
5432 wakaba 1.123 ($node->[1] & SPECIAL_EL or
5433     $node->[1] & SCOPING_EL)) {
5434 wakaba 1.79 !!!cp ('t433');
5435 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5436     text => $token->{tag_name}, token => $token);
5437 wakaba 1.52 ## Ignore the token
5438     !!!next-token;
5439     last S2;
5440 wakaba 1.193
5441     ## NOTE: |<span><dd></span>a|: In Safari 3.1.2 and Opera
5442     ## 9.27, "a" is a child of <dd> (conforming). In
5443     ## Firefox 3.0.2, "a" is a child of <body>. In WinIE 7,
5444     ## "a" is a child of both <body> and <dd>.
5445 wakaba 1.52 }
5446 wakaba 1.193
5447 wakaba 1.79 !!!cp ('t434');
5448 wakaba 1.1 }
5449 wakaba 1.52
5450     ## Step 4
5451     $node_i--;
5452     $node = $self->{open_elements}->[$node_i];
5453    
5454     ## Step 5;
5455     redo S2;
5456     } # S2
5457 wakaba 1.126 next B;
5458 wakaba 1.1 }
5459     }
5460 wakaba 1.126 next B;
5461     } continue { # B
5462     if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
5463     ## NOTE: The code below is executed in cases where it does not have
5464     ## to be, but it it is harmless even in those cases.
5465     ## has an element in scope
5466     INSCOPE: {
5467     for (reverse 0..$#{$self->{open_elements}}) {
5468     my $node = $self->{open_elements}->[$_];
5469     if ($node->[1] & FOREIGN_EL) {
5470     last INSCOPE;
5471     } elsif ($node->[1] & SCOPING_EL) {
5472     last;
5473     }
5474     }
5475    
5476     ## NOTE: No foreign element in scope.
5477     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
5478     } # INSCOPE
5479     }
5480 wakaba 1.1 } # B
5481    
5482     ## Stop parsing # MUST
5483    
5484     ## TODO: script stuffs
5485 wakaba 1.3 } # _tree_construct_main
5486    
5487 wakaba 1.218 ## XXX: How this method is organized is somewhat out of date, although
5488     ## it still does what the current spec documents.
5489 wakaba 1.177 sub set_inner_html ($$$$;$) {
5490 wakaba 1.3 my $class = shift;
5491 wakaba 1.218 my $node = shift; # /context/
5492 wakaba 1.177 #my $s = \$_[0];
5493 wakaba 1.3 my $onerror = $_[1];
5494 wakaba 1.162 my $get_wrapper = $_[2] || sub ($) { return $_[0] };
5495 wakaba 1.3
5496     my $nt = $node->node_type;
5497 wakaba 1.218 if ($nt == 9) { # Document (invoke the algorithm with no /context/ element)
5498 wakaba 1.3 # MUST
5499    
5500     ## Step 1 # MUST
5501     ## TODO: If the document has an active parser, ...
5502     ## ISSUE: There is an issue in the spec.
5503    
5504     ## Step 2 # MUST
5505     my @cn = @{$node->child_nodes};
5506     for (@cn) {
5507     $node->remove_child ($_);
5508     }
5509    
5510     ## Step 3, 4, 5 # MUST
5511 wakaba 1.177 $class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper);
5512 wakaba 1.218 } elsif ($nt == 1) { # Element (invoke the algorithm with /context/ element)
5513 wakaba 1.3 ## TODO: If non-html element
5514    
5515     ## NOTE: Most of this code is copied from |parse_string|
5516    
5517 wakaba 1.162 ## TODO: Support for $get_wrapper
5518    
5519 wakaba 1.218 ## F1. Create an HTML document.
5520 wakaba 1.14 my $this_doc = $node->owner_document;
5521     my $doc = $this_doc->implementation->create_document;
5522 wakaba 1.18 $doc->manakai_is_html (1);
5523 wakaba 1.218
5524     ## F2. Propagate quirkness flag
5525     my $node_doc = $node->owner_document;
5526     $doc->manakai_compat_mode ($node_doc->manakai_compat_mode);
5527    
5528     ## F3. Create an HTML parser
5529 wakaba 1.3 my $p = $class->new;
5530     $p->{document} = $doc;
5531    
5532 wakaba 1.84 ## Step 8 # MUST
5533 wakaba 1.3 my $i = 0;
5534 wakaba 1.121 $p->{line_prev} = $p->{line} = 1;
5535     $p->{column_prev} = $p->{column} = 0;
5536 wakaba 1.177 require Whatpm::Charset::DecodeHandle;
5537     my $input = Whatpm::Charset::DecodeHandle::CharString->new (\($_[0]));
5538     $input = $get_wrapper->($input);
5539 wakaba 1.183 $p->{set_nc} = sub {
5540 wakaba 1.3 my $self = shift;
5541 wakaba 1.14
5542 wakaba 1.178 my $char = '';
5543 wakaba 1.183 if (defined $self->{next_nc}) {
5544     $char = $self->{next_nc};
5545     delete $self->{next_nc};
5546     $self->{nc} = ord $char;
5547 wakaba 1.177 } else {
5548 wakaba 1.180 $self->{char_buffer} = '';
5549     $self->{char_buffer_pos} = 0;
5550    
5551     my $count = $input->manakai_read_until
5552 wakaba 1.182 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/,
5553     $self->{char_buffer_pos});
5554 wakaba 1.180 if ($count) {
5555     $self->{line_prev} = $self->{line};
5556     $self->{column_prev} = $self->{column};
5557     $self->{column}++;
5558 wakaba 1.183 $self->{nc}
5559 wakaba 1.180 = ord substr ($self->{char_buffer},
5560     $self->{char_buffer_pos}++, 1);
5561     return;
5562     }
5563    
5564 wakaba 1.178 if ($input->read ($char, 1)) {
5565 wakaba 1.183 $self->{nc} = ord $char;
5566 wakaba 1.178 } else {
5567 wakaba 1.183 $self->{nc} = -1;
5568 wakaba 1.178 return;
5569     }
5570 wakaba 1.177 }
5571 wakaba 1.121
5572     ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
5573     $p->{column}++;
5574 wakaba 1.4
5575 wakaba 1.183 if ($self->{nc} == 0x000A) { # LF
5576 wakaba 1.121 $p->{line}++;
5577     $p->{column} = 0;
5578 wakaba 1.79 !!!cp ('i1');
5579 wakaba 1.183 } elsif ($self->{nc} == 0x000D) { # CR
5580 wakaba 1.177 ## TODO: support for abort/streaming
5581 wakaba 1.178 my $next = '';
5582     if ($input->read ($next, 1) and $next ne "\x0A") {
5583 wakaba 1.183 $self->{next_nc} = $next;
5584 wakaba 1.177 }
5585 wakaba 1.183 $self->{nc} = 0x000A; # LF # MUST
5586 wakaba 1.121 $p->{line}++;
5587     $p->{column} = 0;
5588 wakaba 1.79 !!!cp ('i2');
5589 wakaba 1.183 } elsif ($self->{nc} == 0x0000) { # NULL
5590 wakaba 1.79 !!!cp ('i4');
5591 wakaba 1.14 !!!parse-error (type => 'NULL');
5592 wakaba 1.183 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5593 wakaba 1.3 }
5594     };
5595 wakaba 1.171
5596 wakaba 1.172 $p->{read_until} = sub {
5597 wakaba 1.177 #my ($scalar, $specials_range, $offset) = @_;
5598 wakaba 1.183 return 0 if defined $p->{next_nc};
5599 wakaba 1.180
5600 wakaba 1.182 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
5601 wakaba 1.180 my $offset = $_[2] || 0;
5602    
5603     if ($p->{char_buffer_pos} < length $p->{char_buffer}) {
5604     pos ($p->{char_buffer}) = $p->{char_buffer_pos};
5605     if ($p->{char_buffer} =~ /\G(?>$pattern)+/) {
5606     substr ($_[0], $offset)
5607     = substr ($p->{char_buffer}, $-[0], $+[0] - $-[0]);
5608     my $count = $+[0] - $-[0];
5609     if ($count) {
5610     $p->{column} += $count;
5611     $p->{char_buffer_pos} += $count;
5612     $p->{line_prev} = $p->{line};
5613     $p->{column_prev} = $p->{column} - 1;
5614 wakaba 1.183 $p->{nc} = -1;
5615 wakaba 1.180 }
5616     return $count;
5617     } else {
5618     return 0;
5619     }
5620     } else {
5621     my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
5622     if ($count) {
5623     $p->{column} += $count;
5624     $p->{column_prev} += $count;
5625 wakaba 1.183 $p->{nc} = -1;
5626 wakaba 1.180 }
5627     return $count;
5628 wakaba 1.177 }
5629     }; # $p->{read_until}
5630 wakaba 1.171
5631 wakaba 1.3 my $ponerror = $onerror || sub {
5632     my (%opt) = @_;
5633 wakaba 1.121 my $line = $opt{line};
5634     my $column = $opt{column};
5635     if (defined $opt{token} and defined $opt{token}->{line}) {
5636     $line = $opt{token}->{line};
5637     $column = $opt{token}->{column};
5638     }
5639     warn "Parse error ($opt{type}) at line $line column $column\n";
5640 wakaba 1.3 };
5641     $p->{parse_error} = sub {
5642 wakaba 1.121 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
5643 wakaba 1.3 };
5644    
5645 wakaba 1.178 my $char_onerror = sub {
5646     my (undef, $type, %opt) = @_;
5647     $ponerror->(layer => 'encode',
5648     line => $p->{line}, column => $p->{column} + 1,
5649     %opt, type => $type);
5650     }; # $char_onerror
5651     $input->onerror ($char_onerror);
5652    
5653 wakaba 1.3 $p->_initialize_tokenizer;
5654     $p->_initialize_tree_constructor;
5655    
5656 wakaba 1.218 ## F4. If /context/ is not undef...
5657    
5658     ## F4.1. content model flag
5659 wakaba 1.71 my $node_ln = $node->manakai_local_name;
5660 wakaba 1.40 $p->{content_model} = {
5661     title => RCDATA_CONTENT_MODEL,
5662     textarea => RCDATA_CONTENT_MODEL,
5663     style => CDATA_CONTENT_MODEL,
5664     script => CDATA_CONTENT_MODEL,
5665     xmp => CDATA_CONTENT_MODEL,
5666     iframe => CDATA_CONTENT_MODEL,
5667     noembed => CDATA_CONTENT_MODEL,
5668     noframes => CDATA_CONTENT_MODEL,
5669     noscript => CDATA_CONTENT_MODEL,
5670     plaintext => PLAINTEXT_CONTENT_MODEL,
5671     }->{$node_ln};
5672     $p->{content_model} = PCDATA_CONTENT_MODEL
5673     unless defined $p->{content_model};
5674 wakaba 1.3
5675 wakaba 1.123 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
5676     ## TODO: Foreign element OK?
5677 wakaba 1.3
5678 wakaba 1.218 ## F4.2. Root |html| element
5679 wakaba 1.3 my $root = $doc->create_element_ns
5680     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
5681    
5682 wakaba 1.218 ## F4.3.
5683 wakaba 1.3 $doc->append_child ($root);
5684    
5685 wakaba 1.218 ## F4.4.
5686 wakaba 1.123 push @{$p->{open_elements}}, [$root, $el_category->{html}];
5687 wakaba 1.3
5688     undef $p->{head_element};
5689 wakaba 1.202 undef $p->{head_element_inserted};
5690 wakaba 1.3
5691 wakaba 1.218 ## F4.5.
5692 wakaba 1.3 $p->_reset_insertion_mode;
5693    
5694 wakaba 1.218 ## F4.6.
5695 wakaba 1.3 my $anode = $node;
5696     AN: while (defined $anode) {
5697     if ($anode->node_type == 1) {
5698     my $nsuri = $anode->namespace_uri;
5699     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
5700 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
5701 wakaba 1.79 !!!cp ('i5');
5702 wakaba 1.3 $p->{form_element} = $anode;
5703     last AN;
5704     }
5705     }
5706     }
5707     $anode = $anode->parent_node;
5708     } # AN
5709 wakaba 1.218
5710 wakaba 1.235 ## F.5. Set the input stream.
5711 wakaba 1.236 $p->{confident} = 1; ## Confident: irrelevant.
5712 wakaba 1.235
5713 wakaba 1.218 ## F.6. Start the parser.
5714 wakaba 1.3 {
5715     my $self = $p;
5716     !!!next-token;
5717     }
5718     $p->_tree_construction_main;
5719    
5720 wakaba 1.218 ## F.7.
5721 wakaba 1.3 my @cn = @{$node->child_nodes};
5722     for (@cn) {
5723     $node->remove_child ($_);
5724     }
5725     ## ISSUE: mutation events? read-only?
5726    
5727 wakaba 1.84 ## Step 11 # MUST
5728 wakaba 1.3 @cn = @{$root->child_nodes};
5729     for (@cn) {
5730 wakaba 1.14 $this_doc->adopt_node ($_);
5731 wakaba 1.3 $node->append_child ($_);
5732     }
5733 wakaba 1.14 ## ISSUE: mutation events?
5734 wakaba 1.3
5735     $p->_terminate_tree_constructor;
5736 wakaba 1.121
5737     delete $p->{parse_error}; # delete loop
5738 wakaba 1.3 } else {
5739     die "$0: |set_inner_html| is not defined for node of type $nt";
5740     }
5741     } # set_inner_html
5742    
5743     } # tree construction stage
5744 wakaba 1.1
5745 wakaba 1.63 package Whatpm::HTML::RestartParser;
5746     push our @ISA, 'Error';
5747    
5748 wakaba 1.1 1;
5749 wakaba 1.236 # $Date: 2009/09/06 08:02:54 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24