/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.212 - (hide annotations) (download) (as text)
Sun Jun 28 10:48:30 2009 UTC (15 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.211: +4 -2 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	28 Jun 2009 10:48:13 -0000
2009-06-28  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.t: Added new test data file.

++ whatpm/t/dom-conformance/ChangeLog	28 Jun 2009 10:47:52 -0000
2009-06-28  Wakaba  <wakaba@suika.fam.cx>

	* html-scripting-1.dat: New file.

2009-06-28  Wakaba  <wakaba@suika.fam.cx>

	* html-flows-1.dat, html-tables-1.dat: Test data for flow content
	in |legend| and |caption| are added (c.f. HTML5 revision 3252).

++ whatpm/Whatpm/ContentChecker/ChangeLog	28 Jun 2009 10:46:58 -0000
2009-06-28  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: |ondataunavailable| has been removed (HTML5 revision
	3252).

2009-06-28  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: Block-level contents in |figure|'s |legend| and
	|caption| are now allowed (HTML5 revision 3329).

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.212 our $VERSION=do{my @r=(q$Revision: 1.211 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.208 use Whatpm::HTML::Tokenizer;
7    
8 wakaba 1.182 ## NOTE: This module don't check all HTML5 parse errors; character
9     ## encoding related parse errors are expected to be handled by relevant
10     ## modules.
11     ## Parse errors for control characters that are not allowed in HTML5
12     ## documents, for surrogate code points, and for noncharacter code
13     ## points, as well as U+FFFD substitions for characters whose code points
14     ## is higher than U+10FFFF may be detected by combining the parser with
15     ## the checker implemented by Whatpm::Charset::UnicodeChecker (for its
16     ## usage example, see |t/HTML-tree.t| in the Whatpm package or the
17     ## WebHACC::Language::HTML module in the WebHACC package).
18    
19 wakaba 1.18 ## ISSUE:
20     ## var doc = implementation.createDocument (null, null, null);
21     ## doc.write ('');
22     ## alert (doc.compatMode);
23 wakaba 1.1
24 wakaba 1.139 require IO::Handle;
25    
26 wakaba 1.208 ## Namespace URLs
27    
28 wakaba 1.126 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
29     my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
30     my $SVG_NS = q<http://www.w3.org/2000/svg>;
31     my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
32     my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
33     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
34    
35 wakaba 1.208 ## Element categories
36    
37 wakaba 1.206 ## Bits 12-15
38     sub SPECIAL_EL () { 0b1_000000000000000 }
39     sub SCOPING_EL () { 0b1_00000000000000 }
40     sub FORMATTING_EL () { 0b1_0000000000000 }
41     sub PHRASING_EL () { 0b1_000000000000 }
42    
43     ## Bits 10-11
44 wakaba 1.208 #sub FOREIGN_EL () { 0b1_00000000000 } # see Whatpm::HTML::Tokenizer
45 wakaba 1.206 sub FOREIGN_FLOW_CONTENT_EL () { 0b1_0000000000 }
46    
47     ## Bits 6-9
48     sub TABLE_SCOPING_EL () { 0b1_000000000 }
49     sub TABLE_ROWS_SCOPING_EL () { 0b1_00000000 }
50     sub TABLE_ROW_SCOPING_EL () { 0b1_0000000 }
51     sub TABLE_ROWS_EL () { 0b1_000000 }
52    
53     ## Bit 5
54     sub ADDRESS_DIV_P_EL () { 0b1_00000 }
55    
56     ## NOTE: Used in </body> and EOF algorithms.
57     ## Bit 4
58     sub ALL_END_TAG_OPTIONAL_EL () { 0b1_0000 }
59 wakaba 1.123
60 wakaba 1.151 ## NOTE: Used in "generate implied end tags" algorithm.
61 wakaba 1.194 ## NOTE: There is a code where a modified version of
62     ## END_TAG_OPTIONAL_EL is used in "generate implied end tags"
63     ## implementation (search for the algorithm name).
64 wakaba 1.206 ## Bit 3
65     sub END_TAG_OPTIONAL_EL () { 0b1_000 }
66    
67     ## Bits 0-2
68    
69     sub MISC_SPECIAL_EL () { SPECIAL_EL | 0b000 }
70     sub FORM_EL () { SPECIAL_EL | 0b001 }
71     sub FRAMESET_EL () { SPECIAL_EL | 0b010 }
72     sub HEADING_EL () { SPECIAL_EL | 0b011 }
73     sub SELECT_EL () { SPECIAL_EL | 0b100 }
74     sub SCRIPT_EL () { SPECIAL_EL | 0b101 }
75    
76     sub ADDRESS_DIV_EL () { SPECIAL_EL | ADDRESS_DIV_P_EL | 0b001 }
77     sub BODY_EL () { SPECIAL_EL | ALL_END_TAG_OPTIONAL_EL | 0b001 }
78    
79 wakaba 1.207 sub DTDD_EL () {
80 wakaba 1.206 SPECIAL_EL |
81     END_TAG_OPTIONAL_EL |
82     ALL_END_TAG_OPTIONAL_EL |
83     0b010
84     }
85     sub LI_EL () {
86     SPECIAL_EL |
87     END_TAG_OPTIONAL_EL |
88     ALL_END_TAG_OPTIONAL_EL |
89     0b100
90     }
91     sub P_EL () {
92     SPECIAL_EL |
93     ADDRESS_DIV_P_EL |
94     END_TAG_OPTIONAL_EL |
95     ALL_END_TAG_OPTIONAL_EL |
96     0b001
97 wakaba 1.123 }
98    
99 wakaba 1.206 sub TABLE_ROW_EL () {
100     SPECIAL_EL |
101     TABLE_ROWS_EL |
102     TABLE_ROW_SCOPING_EL |
103     ALL_END_TAG_OPTIONAL_EL |
104     0b001
105     }
106     sub TABLE_ROW_GROUP_EL () {
107     SPECIAL_EL |
108     TABLE_ROWS_EL |
109     TABLE_ROWS_SCOPING_EL |
110     ALL_END_TAG_OPTIONAL_EL |
111     0b001
112 wakaba 1.123 }
113    
114 wakaba 1.206 sub MISC_SCOPING_EL () { SCOPING_EL | 0b000 }
115     sub BUTTON_EL () { SCOPING_EL | 0b001 }
116     sub CAPTION_EL () { SCOPING_EL | 0b010 }
117     sub HTML_EL () {
118     SCOPING_EL |
119     TABLE_SCOPING_EL |
120     TABLE_ROWS_SCOPING_EL |
121     TABLE_ROW_SCOPING_EL |
122     ALL_END_TAG_OPTIONAL_EL |
123     0b001
124 wakaba 1.123 }
125 wakaba 1.206 sub TABLE_EL () {
126     SCOPING_EL |
127     TABLE_ROWS_EL |
128     TABLE_SCOPING_EL |
129     0b001
130 wakaba 1.123 }
131 wakaba 1.206 sub TABLE_CELL_EL () {
132     SCOPING_EL |
133     TABLE_ROW_SCOPING_EL |
134     ALL_END_TAG_OPTIONAL_EL |
135     0b001
136 wakaba 1.123 }
137    
138 wakaba 1.206 sub MISC_FORMATTING_EL () { FORMATTING_EL | 0b000 }
139     sub A_EL () { FORMATTING_EL | 0b001 }
140     sub NOBR_EL () { FORMATTING_EL | 0b010 }
141    
142     sub RUBY_EL () { PHRASING_EL | 0b001 }
143    
144     ## ISSUE: ALL_END_TAG_OPTIONAL_EL?
145     sub OPTGROUP_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b001 }
146     sub OPTION_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b010 }
147     sub RUBY_COMPONENT_EL () { PHRASING_EL | END_TAG_OPTIONAL_EL | 0b100 }
148 wakaba 1.123
149 wakaba 1.206 sub MML_AXML_EL () { PHRASING_EL | FOREIGN_EL | 0b001 }
150 wakaba 1.123
151     my $el_category = {
152 wakaba 1.206 a => A_EL,
153     address => ADDRESS_DIV_EL,
154 wakaba 1.123 applet => MISC_SCOPING_EL,
155     area => MISC_SPECIAL_EL,
156 wakaba 1.193 article => MISC_SPECIAL_EL,
157     aside => MISC_SPECIAL_EL,
158 wakaba 1.123 b => FORMATTING_EL,
159     base => MISC_SPECIAL_EL,
160     basefont => MISC_SPECIAL_EL,
161     bgsound => MISC_SPECIAL_EL,
162     big => FORMATTING_EL,
163     blockquote => MISC_SPECIAL_EL,
164     body => BODY_EL,
165     br => MISC_SPECIAL_EL,
166     button => BUTTON_EL,
167     caption => CAPTION_EL,
168     center => MISC_SPECIAL_EL,
169     col => MISC_SPECIAL_EL,
170     colgroup => MISC_SPECIAL_EL,
171 wakaba 1.193 command => MISC_SPECIAL_EL,
172     datagrid => MISC_SPECIAL_EL,
173 wakaba 1.207 dd => DTDD_EL,
174 wakaba 1.193 details => MISC_SPECIAL_EL,
175     dialog => MISC_SPECIAL_EL,
176 wakaba 1.123 dir => MISC_SPECIAL_EL,
177 wakaba 1.206 div => ADDRESS_DIV_EL,
178 wakaba 1.123 dl => MISC_SPECIAL_EL,
179 wakaba 1.207 dt => DTDD_EL,
180 wakaba 1.123 em => FORMATTING_EL,
181     embed => MISC_SPECIAL_EL,
182 wakaba 1.193 eventsource => MISC_SPECIAL_EL,
183 wakaba 1.123 fieldset => MISC_SPECIAL_EL,
184 wakaba 1.193 figure => MISC_SPECIAL_EL,
185 wakaba 1.123 font => FORMATTING_EL,
186 wakaba 1.193 footer => MISC_SPECIAL_EL,
187 wakaba 1.123 form => FORM_EL,
188     frame => MISC_SPECIAL_EL,
189     frameset => FRAMESET_EL,
190     h1 => HEADING_EL,
191     h2 => HEADING_EL,
192     h3 => HEADING_EL,
193     h4 => HEADING_EL,
194     h5 => HEADING_EL,
195     h6 => HEADING_EL,
196     head => MISC_SPECIAL_EL,
197 wakaba 1.193 header => MISC_SPECIAL_EL,
198 wakaba 1.123 hr => MISC_SPECIAL_EL,
199     html => HTML_EL,
200     i => FORMATTING_EL,
201     iframe => MISC_SPECIAL_EL,
202     img => MISC_SPECIAL_EL,
203 wakaba 1.193 #image => MISC_SPECIAL_EL, ## NOTE: Commented out in the spec.
204 wakaba 1.123 input => MISC_SPECIAL_EL,
205     isindex => MISC_SPECIAL_EL,
206     li => LI_EL,
207     link => MISC_SPECIAL_EL,
208     listing => MISC_SPECIAL_EL,
209     marquee => MISC_SCOPING_EL,
210     menu => MISC_SPECIAL_EL,
211     meta => MISC_SPECIAL_EL,
212 wakaba 1.193 nav => MISC_SPECIAL_EL,
213 wakaba 1.206 nobr => NOBR_EL,
214 wakaba 1.123 noembed => MISC_SPECIAL_EL,
215     noframes => MISC_SPECIAL_EL,
216     noscript => MISC_SPECIAL_EL,
217     object => MISC_SCOPING_EL,
218     ol => MISC_SPECIAL_EL,
219     optgroup => OPTGROUP_EL,
220     option => OPTION_EL,
221     p => P_EL,
222     param => MISC_SPECIAL_EL,
223     plaintext => MISC_SPECIAL_EL,
224     pre => MISC_SPECIAL_EL,
225 wakaba 1.151 rp => RUBY_COMPONENT_EL,
226     rt => RUBY_COMPONENT_EL,
227     ruby => RUBY_EL,
228 wakaba 1.123 s => FORMATTING_EL,
229     script => MISC_SPECIAL_EL,
230     select => SELECT_EL,
231 wakaba 1.193 section => MISC_SPECIAL_EL,
232 wakaba 1.123 small => FORMATTING_EL,
233     spacer => MISC_SPECIAL_EL,
234     strike => FORMATTING_EL,
235     strong => FORMATTING_EL,
236     style => MISC_SPECIAL_EL,
237     table => TABLE_EL,
238     tbody => TABLE_ROW_GROUP_EL,
239     td => TABLE_CELL_EL,
240     textarea => MISC_SPECIAL_EL,
241     tfoot => TABLE_ROW_GROUP_EL,
242     th => TABLE_CELL_EL,
243     thead => TABLE_ROW_GROUP_EL,
244     title => MISC_SPECIAL_EL,
245     tr => TABLE_ROW_EL,
246     tt => FORMATTING_EL,
247     u => FORMATTING_EL,
248     ul => MISC_SPECIAL_EL,
249     wbr => MISC_SPECIAL_EL,
250     };
251    
252 wakaba 1.126 my $el_category_f = {
253     $MML_NS => {
254     'annotation-xml' => MML_AXML_EL,
255 wakaba 1.206 mi => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
256     mo => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
257     mn => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
258     ms => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
259     mtext => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
260 wakaba 1.126 },
261     $SVG_NS => {
262 wakaba 1.206 foreignObject => SCOPING_EL | FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
263     desc => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
264     title => FOREIGN_EL | FOREIGN_FLOW_CONTENT_EL,
265 wakaba 1.126 },
266     ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
267     };
268    
269 wakaba 1.131 my $svg_attr_name = {
270 wakaba 1.146 attributename => 'attributeName',
271 wakaba 1.131 attributetype => 'attributeType',
272     basefrequency => 'baseFrequency',
273     baseprofile => 'baseProfile',
274     calcmode => 'calcMode',
275     clippathunits => 'clipPathUnits',
276     contentscripttype => 'contentScriptType',
277     contentstyletype => 'contentStyleType',
278     diffuseconstant => 'diffuseConstant',
279     edgemode => 'edgeMode',
280     externalresourcesrequired => 'externalResourcesRequired',
281     filterres => 'filterRes',
282     filterunits => 'filterUnits',
283     glyphref => 'glyphRef',
284     gradienttransform => 'gradientTransform',
285     gradientunits => 'gradientUnits',
286     kernelmatrix => 'kernelMatrix',
287     kernelunitlength => 'kernelUnitLength',
288     keypoints => 'keyPoints',
289     keysplines => 'keySplines',
290     keytimes => 'keyTimes',
291     lengthadjust => 'lengthAdjust',
292     limitingconeangle => 'limitingConeAngle',
293     markerheight => 'markerHeight',
294     markerunits => 'markerUnits',
295     markerwidth => 'markerWidth',
296     maskcontentunits => 'maskContentUnits',
297     maskunits => 'maskUnits',
298     numoctaves => 'numOctaves',
299     pathlength => 'pathLength',
300     patterncontentunits => 'patternContentUnits',
301     patterntransform => 'patternTransform',
302     patternunits => 'patternUnits',
303     pointsatx => 'pointsAtX',
304     pointsaty => 'pointsAtY',
305     pointsatz => 'pointsAtZ',
306     preservealpha => 'preserveAlpha',
307     preserveaspectratio => 'preserveAspectRatio',
308     primitiveunits => 'primitiveUnits',
309     refx => 'refX',
310     refy => 'refY',
311     repeatcount => 'repeatCount',
312     repeatdur => 'repeatDur',
313     requiredextensions => 'requiredExtensions',
314 wakaba 1.146 requiredfeatures => 'requiredFeatures',
315 wakaba 1.131 specularconstant => 'specularConstant',
316     specularexponent => 'specularExponent',
317     spreadmethod => 'spreadMethod',
318     startoffset => 'startOffset',
319     stddeviation => 'stdDeviation',
320     stitchtiles => 'stitchTiles',
321     surfacescale => 'surfaceScale',
322     systemlanguage => 'systemLanguage',
323     tablevalues => 'tableValues',
324     targetx => 'targetX',
325     targety => 'targetY',
326     textlength => 'textLength',
327     viewbox => 'viewBox',
328     viewtarget => 'viewTarget',
329     xchannelselector => 'xChannelSelector',
330     ychannelselector => 'yChannelSelector',
331     zoomandpan => 'zoomAndPan',
332     };
333    
334     my $foreign_attr_xname = {
335     'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
336     'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
337     'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
338     'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
339     'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
340     'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
341     'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
342     'xml:base' => [$XML_NS, ['xml', 'base']],
343     'xml:lang' => [$XML_NS, ['xml', 'lang']],
344     'xml:space' => [$XML_NS, ['xml', 'space']],
345     'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
346     'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
347     };
348    
349     ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
350    
351 wakaba 1.192 ## TODO: Invoke the reset algorithm when a resettable element is
352     ## created (cf. HTML5 revision 2259).
353    
354 wakaba 1.63 sub parse_byte_string ($$$$;$) {
355 wakaba 1.138 my $self = shift;
356     my $charset_name = shift;
357     open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
358     return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
359     } # parse_byte_string
360    
361 wakaba 1.162 sub parse_byte_stream ($$$$;$$) {
362     # my ($self, $charset_name, $byte_stream, $doc, $onerror, $get_wrapper) = @_;
363 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
364 wakaba 1.133 my $charset_name = shift;
365 wakaba 1.138 my $byte_stream = $_[0];
366 wakaba 1.133
367 wakaba 1.134 my $onerror = $_[2] || sub {
368     my (%opt) = @_;
369     warn "Parse error ($opt{type})\n";
370     };
371     $self->{parse_error} = $onerror; # updated later by parse_char_string
372    
373 wakaba 1.162 my $get_wrapper = $_[3] || sub ($) {
374     return $_[0]; # $_[0] = byte stream handle, returned = arg to char handle
375     };
376    
377 wakaba 1.133 ## HTML5 encoding sniffing algorithm
378     require Message::Charset::Info;
379     my $charset;
380 wakaba 1.136 my $buffer;
381     my ($char_stream, $e_status);
382 wakaba 1.133
383     SNIFFING: {
384 wakaba 1.160 ## NOTE: By setting |allow_fallback| option true when the
385     ## |get_decode_handle| method is invoked, we ignore what the HTML5
386     ## spec requires, i.e. unsupported encoding should be ignored.
387     ## TODO: We should not do this unless the parser is invoked
388     ## in the conformance checking mode, in which this behavior
389     ## would be useful.
390 wakaba 1.133
391     ## Step 1
392     if (defined $charset_name) {
393 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
394     ## TODO: Is this ok? Transfer protocol's parameter should be
395     ## interpreted in its semantics?
396 wakaba 1.133
397 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
398     ($byte_stream, allow_error_reporting => 1,
399 wakaba 1.133 allow_fallback => 1);
400 wakaba 1.136 if ($char_stream) {
401 wakaba 1.133 $self->{confident} = 1;
402     last SNIFFING;
403 wakaba 1.136 } else {
404 wakaba 1.190 !!!parse-error (type => 'charset:not supported',
405     layer => 'encode',
406     line => 1, column => 1,
407     value => $charset_name,
408     level => $self->{level}->{uncertain});
409 wakaba 1.133 }
410     }
411    
412     ## Step 2
413 wakaba 1.136 my $byte_buffer = '';
414     for (1..1024) {
415     my $char = $byte_stream->getc;
416     last unless defined $char;
417     $byte_buffer .= $char;
418     } ## TODO: timeout
419 wakaba 1.133
420     ## Step 3
421 wakaba 1.136 if ($byte_buffer =~ /^\xFE\xFF/) {
422 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-16be');
423 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
424     ($byte_stream, allow_error_reporting => 1,
425     allow_fallback => 1, byte_buffer => \$byte_buffer);
426 wakaba 1.133 $self->{confident} = 1;
427     last SNIFFING;
428 wakaba 1.136 } elsif ($byte_buffer =~ /^\xFF\xFE/) {
429 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-16le');
430 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
431     ($byte_stream, allow_error_reporting => 1,
432     allow_fallback => 1, byte_buffer => \$byte_buffer);
433 wakaba 1.133 $self->{confident} = 1;
434     last SNIFFING;
435 wakaba 1.136 } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
436 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
437 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
438     ($byte_stream, allow_error_reporting => 1,
439     allow_fallback => 1, byte_buffer => \$byte_buffer);
440 wakaba 1.133 $self->{confident} = 1;
441     last SNIFFING;
442     }
443    
444     ## Step 4
445     ## TODO: <meta charset>
446    
447     ## Step 5
448     ## TODO: from history
449    
450     ## Step 6
451 wakaba 1.65 require Whatpm::Charset::UniversalCharDet;
452 wakaba 1.133 $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
453 wakaba 1.136 ($byte_buffer);
454 wakaba 1.133 if (defined $charset_name) {
455 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
456 wakaba 1.133
457 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
458     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
459     ($byte_stream);
460     ($char_stream, $e_status) = $charset->get_decode_handle
461     ($buffer, allow_error_reporting => 1,
462     allow_fallback => 1, byte_buffer => \$byte_buffer);
463     if ($char_stream) {
464     $buffer->{buffer} = $byte_buffer;
465 wakaba 1.153 !!!parse-error (type => 'sniffing:chardet',
466     text => $charset_name,
467     level => $self->{level}->{info},
468     layer => 'encode',
469 wakaba 1.134 line => 1, column => 1);
470 wakaba 1.133 $self->{confident} = 0;
471     last SNIFFING;
472     }
473     }
474    
475     ## Step 7: default
476     ## TODO: Make this configurable.
477 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('windows-1252');
478 wakaba 1.133 ## NOTE: We choose |windows-1252| here, since |utf-8| should be
479     ## detectable in the step 6.
480 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
481     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
482     ($byte_stream);
483     ($char_stream, $e_status)
484     = $charset->get_decode_handle ($buffer,
485     allow_error_reporting => 1,
486     allow_fallback => 1,
487     byte_buffer => \$byte_buffer);
488     $buffer->{buffer} = $byte_buffer;
489 wakaba 1.153 !!!parse-error (type => 'sniffing:default',
490     text => 'windows-1252',
491     level => $self->{level}->{info},
492     line => 1, column => 1,
493     layer => 'encode');
494 wakaba 1.63 $self->{confident} = 0;
495 wakaba 1.133 } # SNIFFING
496    
497     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
498 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
499 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
500 wakaba 1.160 #text => $self->{input_encoding},
501 wakaba 1.153 level => $self->{level}->{uncertain},
502     line => 1, column => 1,
503     layer => 'encode');
504 wakaba 1.133 } elsif (not ($e_status &
505 wakaba 1.178 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
506 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name;
507 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
508     text => $self->{input_encoding},
509     level => $self->{level}->{uncertain},
510     line => 1, column => 1,
511     layer => 'encode');
512 wakaba 1.160 } else {
513     $self->{input_encoding} = $charset->get_iana_name;
514 wakaba 1.63 }
515    
516     $self->{change_encoding} = sub {
517     my $self = shift;
518 wakaba 1.134 $charset_name = shift;
519 wakaba 1.114 my $token = shift;
520 wakaba 1.63
521 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
522 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
523     ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
524     byte_buffer => \ $buffer->{buffer});
525 wakaba 1.134
526 wakaba 1.136 if ($char_stream) { # if supported
527 wakaba 1.134 ## "Change the encoding" algorithm:
528 wakaba 1.63
529 wakaba 1.134 ## Step 1
530 wakaba 1.149 if ($charset->{category} &
531     Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
532 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
533 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
534     ($byte_stream,
535     byte_buffer => \ $buffer->{buffer});
536 wakaba 1.134 }
537     $charset_name = $charset->get_iana_name;
538    
539     ## Step 2
540     if (defined $self->{input_encoding} and
541     $self->{input_encoding} eq $charset_name) {
542 wakaba 1.153 !!!parse-error (type => 'charset label:matching',
543     text => $charset_name,
544     level => $self->{level}->{info});
545 wakaba 1.134 $self->{confident} = 1;
546     return;
547     }
548 wakaba 1.63
549 wakaba 1.153 !!!parse-error (type => 'charset label detected',
550     text => $self->{input_encoding},
551     value => $charset_name,
552     level => $self->{level}->{warn},
553     token => $token);
554 wakaba 1.134
555     ## Step 3
556     # if (can) {
557     ## change the encoding on the fly.
558     #$self->{confident} = 1;
559     #return;
560     # }
561    
562     ## Step 4
563     throw Whatpm::HTML::RestartParser ();
564 wakaba 1.63 }
565     }; # $self->{change_encoding}
566    
567 wakaba 1.136 my $char_onerror = sub {
568     my (undef, $type, %opt) = @_;
569 wakaba 1.153 !!!parse-error (layer => 'encode',
570 wakaba 1.174 line => $self->{line}, column => $self->{column} + 1,
571     %opt, type => $type);
572 wakaba 1.136 if ($opt{octets}) {
573     ${$opt{octets}} = "\x{FFFD}"; # relacement character
574     }
575     };
576 wakaba 1.162
577     my $wrapped_char_stream = $get_wrapper->($char_stream);
578     $wrapped_char_stream->onerror ($char_onerror);
579 wakaba 1.136
580 wakaba 1.182 my @args = ($_[1], $_[2]); # $doc, $onerror - $get_wrapper = undef;
581 wakaba 1.63 my $return;
582     try {
583 wakaba 1.162 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
584 wakaba 1.63 } catch Whatpm::HTML::RestartParser with {
585 wakaba 1.134 ## NOTE: Invoked after {change_encoding}.
586    
587     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
588 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
589 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
590     level => $self->{level}->{uncertain},
591 wakaba 1.160 #text => $self->{input_encoding},
592 wakaba 1.153 line => 1, column => 1,
593     layer => 'encode');
594 wakaba 1.134 } elsif (not ($e_status &
595 wakaba 1.178 Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL ())) {
596 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name;
597 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
598     text => $self->{input_encoding},
599     level => $self->{level}->{uncertain},
600     line => 1, column => 1,
601     layer => 'encode');
602 wakaba 1.160 } else {
603     $self->{input_encoding} = $charset->get_iana_name;
604 wakaba 1.134 }
605 wakaba 1.63 $self->{confident} = 1;
606 wakaba 1.162
607     $wrapped_char_stream = $get_wrapper->($char_stream);
608     $wrapped_char_stream->onerror ($char_onerror);
609    
610     $return = $self->parse_char_stream ($wrapped_char_stream, @args);
611 wakaba 1.63 };
612     return $return;
613 wakaba 1.138 } # parse_byte_stream
614 wakaba 1.63
615 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
616     ## and the HTML layer MUST ignore it. However, we does strip BOM in
617     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
618     ## because the core part of our HTML parser expects a string of character,
619     ## not a string of bytes or code units or anything which might contain a BOM.
620     ## Therefore, any parser interface that accepts a string of bytes,
621     ## such as |parse_byte_string| in this module, must ensure that it does
622     ## strip the BOM and never strip any ZWNBSP.
623    
624 wakaba 1.162 sub parse_char_string ($$$;$$) {
625     #my ($self, $s, $doc, $onerror, $get_wrapper) = @_;
626 wakaba 1.135 my $self = shift;
627 wakaba 1.139 my $s = ref $_[0] ? $_[0] : \($_[0]);
628 wakaba 1.171 require Whatpm::Charset::DecodeHandle;
629     my $input = Whatpm::Charset::DecodeHandle::CharString->new ($s);
630 wakaba 1.135 return $self->parse_char_stream ($input, @_[1..$#_]);
631     } # parse_char_string
632 wakaba 1.162 *parse_string = \&parse_char_string; ## NOTE: Alias for backward compatibility.
633 wakaba 1.63
634 wakaba 1.182 sub parse_char_stream ($$$;$$) {
635 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
636 wakaba 1.135 my $input = $_[0];
637 wakaba 1.1 $self->{document} = $_[1];
638 wakaba 1.63 @{$self->{document}->child_nodes} = ();
639 wakaba 1.1
640 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
641    
642 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
643 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
644     if defined $self->{input_encoding};
645 wakaba 1.178 ## TODO: |{input_encoding}| is needless?
646 wakaba 1.63
647 wakaba 1.112 $self->{line_prev} = $self->{line} = 1;
648 wakaba 1.179 $self->{column_prev} = -1;
649     $self->{column} = 0;
650 wakaba 1.183 $self->{set_nc} = sub {
651 wakaba 1.1 my $self = shift;
652 wakaba 1.13
653 wakaba 1.178 my $char = '';
654 wakaba 1.183 if (defined $self->{next_nc}) {
655     $char = $self->{next_nc};
656     delete $self->{next_nc};
657     $self->{nc} = ord $char;
658 wakaba 1.139 } else {
659 wakaba 1.179 $self->{char_buffer} = '';
660     $self->{char_buffer_pos} = 0;
661    
662     my $count = $input->manakai_read_until
663 wakaba 1.182 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/, $self->{char_buffer_pos});
664 wakaba 1.179 if ($count) {
665     $self->{line_prev} = $self->{line};
666     $self->{column_prev} = $self->{column};
667     $self->{column}++;
668 wakaba 1.183 $self->{nc}
669 wakaba 1.179 = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1);
670     return;
671     }
672    
673 wakaba 1.178 if ($input->read ($char, 1)) {
674 wakaba 1.183 $self->{nc} = ord $char;
675 wakaba 1.178 } else {
676 wakaba 1.183 $self->{nc} = -1;
677 wakaba 1.178 return;
678     }
679 wakaba 1.139 }
680 wakaba 1.112
681     ($self->{line_prev}, $self->{column_prev})
682     = ($self->{line}, $self->{column});
683     $self->{column}++;
684 wakaba 1.1
685 wakaba 1.183 if ($self->{nc} == 0x000A) { # LF
686 wakaba 1.132 !!!cp ('j1');
687 wakaba 1.112 $self->{line}++;
688     $self->{column} = 0;
689 wakaba 1.183 } elsif ($self->{nc} == 0x000D) { # CR
690 wakaba 1.132 !!!cp ('j2');
691 wakaba 1.170 ## TODO: support for abort/streaming
692 wakaba 1.178 my $next = '';
693     if ($input->read ($next, 1) and $next ne "\x0A") {
694 wakaba 1.183 $self->{next_nc} = $next;
695 wakaba 1.135 }
696 wakaba 1.183 $self->{nc} = 0x000A; # LF # MUST
697 wakaba 1.112 $self->{line}++;
698     $self->{column} = 0;
699 wakaba 1.183 } elsif ($self->{nc} == 0x0000) { # NULL
700 wakaba 1.132 !!!cp ('j4');
701 wakaba 1.8 !!!parse-error (type => 'NULL');
702 wakaba 1.183 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
703 wakaba 1.1 }
704     };
705    
706 wakaba 1.172 $self->{read_until} = sub {
707     #my ($scalar, $specials_range, $offset) = @_;
708 wakaba 1.183 return 0 if defined $self->{next_nc};
709 wakaba 1.180
710 wakaba 1.182 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
711 wakaba 1.180 my $offset = $_[2] || 0;
712    
713     if ($self->{char_buffer_pos} < length $self->{char_buffer}) {
714     pos ($self->{char_buffer}) = $self->{char_buffer_pos};
715     if ($self->{char_buffer} =~ /\G(?>$pattern)+/) {
716     substr ($_[0], $offset)
717     = substr ($self->{char_buffer}, $-[0], $+[0] - $-[0]);
718     my $count = $+[0] - $-[0];
719     if ($count) {
720     $self->{column} += $count;
721     $self->{char_buffer_pos} += $count;
722     $self->{line_prev} = $self->{line};
723     $self->{column_prev} = $self->{column} - 1;
724 wakaba 1.183 $self->{nc} = -1;
725 wakaba 1.180 }
726     return $count;
727     } else {
728     return 0;
729     }
730     } else {
731     my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
732     if ($count) {
733     $self->{column} += $count;
734     $self->{line_prev} = $self->{line};
735     $self->{column_prev} = $self->{column} - 1;
736 wakaba 1.183 $self->{nc} = -1;
737 wakaba 1.180 }
738     return $count;
739 wakaba 1.172 }
740     }; # $self->{read_until}
741 wakaba 1.171
742 wakaba 1.3 my $onerror = $_[2] || sub {
743     my (%opt) = @_;
744 wakaba 1.112 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
745     my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
746     warn "Parse error ($opt{type}) at line $line column $column\n";
747 wakaba 1.3 };
748     $self->{parse_error} = sub {
749 wakaba 1.112 $onerror->(line => $self->{line}, column => $self->{column}, @_);
750 wakaba 1.1 };
751    
752 wakaba 1.182 my $char_onerror = sub {
753     my (undef, $type, %opt) = @_;
754     !!!parse-error (layer => 'encode',
755     line => $self->{line}, column => $self->{column} + 1,
756     %opt, type => $type);
757     }; # $char_onerror
758    
759     if ($_[3]) {
760     $input = $_[3]->($input);
761     $input->onerror ($char_onerror);
762     } else {
763     $input->onerror ($char_onerror) unless defined $input->onerror;
764     }
765    
766 wakaba 1.1 $self->_initialize_tokenizer;
767     $self->_initialize_tree_constructor;
768     $self->_construct_tree;
769     $self->_terminate_tree_constructor;
770    
771 wakaba 1.112 delete $self->{parse_error}; # remove loop
772    
773 wakaba 1.1 return $self->{document};
774 wakaba 1.135 } # parse_char_stream
775 wakaba 1.1
776     sub new ($) {
777     my $class = shift;
778 wakaba 1.134 my $self = bless {
779 wakaba 1.153 level => {must => 'm',
780 wakaba 1.159 should => 's',
781 wakaba 1.153 warn => 'w',
782     info => 'i',
783     uncertain => 'u'},
784 wakaba 1.134 }, $class;
785 wakaba 1.183 $self->{set_nc} = sub {
786     $self->{nc} = -1;
787 wakaba 1.1 };
788     $self->{parse_error} = sub {
789     #
790     };
791 wakaba 1.63 $self->{change_encoding} = sub {
792     # if ($_[0] is a supported encoding) {
793     # run "change the encoding" algorithm;
794     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
795     # }
796     };
797 wakaba 1.61 $self->{application_cache_selection} = sub {
798     #
799     };
800 wakaba 1.1 return $self;
801     } # new
802    
803 wakaba 1.208 ## Insertion modes
804 wakaba 1.55
805 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
806     sub HEAD_IMS () { 0b1000 }
807     sub BODY_IMS () { 0b10000 }
808 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
809 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
810 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
811 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
812     sub FRAME_IMS () { 0b1000000000 }
813 wakaba 1.101 sub SELECT_IMS () { 0b10000000000 }
814 wakaba 1.208 #sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 } # see Whatpm::HTML::Tokenizer
815 wakaba 1.126 ## NOTE: "in foreign content" insertion mode is special; it is combined
816     ## with the secondary insertion mode. In this parser, they are stored
817     ## together in the bit-or'ed form.
818 wakaba 1.205 sub IN_CDATA_RCDATA_IM () { 0b1000000000000 }
819     ## NOTE: "in CDATA/RCDATA" insertion mode is also special; it is
820     ## combined with the original insertion mode. In thie parser,
821     ## they are stored together in the bit-or'ed form.
822 wakaba 1.54
823 wakaba 1.210 sub IM_MASK () { 0b11111111111 }
824    
825 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
826    
827     ## NOTE: "after after body" insertion mode.
828 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
829 wakaba 1.84
830     ## NOTE: "after after frameset" insertion mode.
831 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
832 wakaba 1.84
833 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
834     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
835     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
836     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
837     sub IN_BODY_IM () { BODY_IMS }
838 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
839     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
840     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
841     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
842 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
843     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
844     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
845     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
846 wakaba 1.101 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
847     sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
848 wakaba 1.54 sub IN_COLUMN_GROUP_IM () { 0b10 }
849    
850 wakaba 1.1 sub _initialize_tree_constructor ($) {
851     my $self = shift;
852     ## NOTE: $self->{document} MUST be specified before this method is called
853     $self->{document}->strict_error_checking (0);
854     ## TODO: Turn mutation events off # MUST
855     ## TODO: Turn loose Document option (manakai extension) on
856 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
857 wakaba 1.154 $self->{document}->set_user_data (manakai_source_line => 1);
858     $self->{document}->set_user_data (manakai_source_column => 1);
859 wakaba 1.1 } # _initialize_tree_constructor
860    
861     sub _terminate_tree_constructor ($) {
862     my $self = shift;
863     $self->{document}->strict_error_checking (1);
864     ## TODO: Turn mutation events on
865     } # _terminate_tree_constructor
866    
867     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
868    
869 wakaba 1.3 { # tree construction stage
870     my $token;
871    
872 wakaba 1.1 sub _construct_tree ($) {
873     my ($self) = @_;
874    
875     ## When an interactive UA render the $self->{document} available
876     ## to the user, or when it begin accepting user input, are
877     ## not defined.
878    
879     !!!next-token;
880    
881 wakaba 1.3 undef $self->{form_element};
882     undef $self->{head_element};
883 wakaba 1.202 undef $self->{head_element_inserted};
884 wakaba 1.3 $self->{open_elements} = [];
885     undef $self->{inner_html_node};
886 wakaba 1.206 undef $self->{ignore_newline};
887 wakaba 1.3
888 wakaba 1.84 ## NOTE: The "initial" insertion mode.
889 wakaba 1.3 $self->_tree_construction_initial; # MUST
890 wakaba 1.84
891     ## NOTE: The "before html" insertion mode.
892 wakaba 1.3 $self->_tree_construction_root_element;
893 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
894    
895     ## NOTE: The "before head" insertion mode and so on.
896 wakaba 1.3 $self->_tree_construction_main;
897     } # _construct_tree
898    
899     sub _tree_construction_initial ($) {
900     my $self = shift;
901 wakaba 1.84
902     ## NOTE: "initial" insertion mode
903    
904 wakaba 1.18 INITIAL: {
905 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
906 wakaba 1.18 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
907     ## error, switch to a conformance checking mode for another
908     ## language.
909     my $doctype_name = $token->{name};
910     $doctype_name = '' unless defined $doctype_name;
911 wakaba 1.159 $doctype_name =~ tr/a-z/A-Z/; # ASCII case-insensitive
912 wakaba 1.18 if (not defined $token->{name} or # <!DOCTYPE>
913 wakaba 1.183 defined $token->{sysid}) {
914 wakaba 1.79 !!!cp ('t1');
915 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
916 wakaba 1.18 } elsif ($doctype_name ne 'HTML') {
917 wakaba 1.79 !!!cp ('t2');
918 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
919 wakaba 1.183 } elsif (defined $token->{pubid}) {
920     if ($token->{pubid} eq 'XSLT-compat') {
921 wakaba 1.159 !!!cp ('t1.2');
922     !!!parse-error (type => 'XSLT-compat', token => $token,
923     level => $self->{level}->{should});
924     } else {
925     !!!parse-error (type => 'not HTML5', token => $token);
926     }
927 wakaba 1.79 } else {
928     !!!cp ('t3');
929 wakaba 1.159 #
930 wakaba 1.18 }
931    
932     my $doctype = $self->{document}->create_document_type_definition
933     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
934 wakaba 1.122 ## NOTE: Default value for both |public_id| and |system_id| attributes
935     ## are empty strings, so that we don't set any value in missing cases.
936 wakaba 1.183 $doctype->public_id ($token->{pubid}) if defined $token->{pubid};
937     $doctype->system_id ($token->{sysid}) if defined $token->{sysid};
938 wakaba 1.18 ## NOTE: Other DocumentType attributes are null or empty lists.
939 wakaba 1.211 ## In Firefox3, |internalSubset| attribute is set to the empty
940     ## string, while |null| is an allowed value for the attribute
941     ## according to DOM3 Core.
942 wakaba 1.18 $self->{document}->append_child ($doctype);
943    
944 wakaba 1.75 if ($token->{quirks} or $doctype_name ne 'HTML') {
945 wakaba 1.79 !!!cp ('t4');
946 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
947 wakaba 1.183 } elsif (defined $token->{pubid}) {
948     my $pubid = $token->{pubid};
949 wakaba 1.18 $pubid =~ tr/a-z/A-z/;
950 wakaba 1.143 my $prefix = [
951     "+//SILMARIL//DTD HTML PRO V0R11 19970101//",
952     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
953     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
954     "-//IETF//DTD HTML 2.0 LEVEL 1//",
955     "-//IETF//DTD HTML 2.0 LEVEL 2//",
956     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//",
957     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//",
958     "-//IETF//DTD HTML 2.0 STRICT//",
959     "-//IETF//DTD HTML 2.0//",
960     "-//IETF//DTD HTML 2.1E//",
961     "-//IETF//DTD HTML 3.0//",
962     "-//IETF//DTD HTML 3.2 FINAL//",
963     "-//IETF//DTD HTML 3.2//",
964     "-//IETF//DTD HTML 3//",
965     "-//IETF//DTD HTML LEVEL 0//",
966     "-//IETF//DTD HTML LEVEL 1//",
967     "-//IETF//DTD HTML LEVEL 2//",
968     "-//IETF//DTD HTML LEVEL 3//",
969     "-//IETF//DTD HTML STRICT LEVEL 0//",
970     "-//IETF//DTD HTML STRICT LEVEL 1//",
971     "-//IETF//DTD HTML STRICT LEVEL 2//",
972     "-//IETF//DTD HTML STRICT LEVEL 3//",
973     "-//IETF//DTD HTML STRICT//",
974     "-//IETF//DTD HTML//",
975     "-//METRIUS//DTD METRIUS PRESENTATIONAL//",
976     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//",
977     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//",
978     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//",
979     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//",
980     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//",
981     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//",
982     "-//NETSCAPE COMM. CORP.//DTD HTML//",
983     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//",
984     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//",
985     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//",
986     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//",
987     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//",
988     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//",
989     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//",
990     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//",
991     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//",
992     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//",
993     "-//W3C//DTD HTML 3 1995-03-24//",
994     "-//W3C//DTD HTML 3.2 DRAFT//",
995     "-//W3C//DTD HTML 3.2 FINAL//",
996     "-//W3C//DTD HTML 3.2//",
997     "-//W3C//DTD HTML 3.2S DRAFT//",
998     "-//W3C//DTD HTML 4.0 FRAMESET//",
999     "-//W3C//DTD HTML 4.0 TRANSITIONAL//",
1000     "-//W3C//DTD HTML EXPERIMETNAL 19960712//",
1001     "-//W3C//DTD HTML EXPERIMENTAL 970421//",
1002     "-//W3C//DTD W3 HTML//",
1003     "-//W3O//DTD W3 HTML 3.0//",
1004     "-//WEBTECHS//DTD MOZILLA HTML 2.0//",
1005     "-//WEBTECHS//DTD MOZILLA HTML//",
1006     ]; # $prefix
1007     my $match;
1008     for (@$prefix) {
1009     if (substr ($prefix, 0, length $_) eq $_) {
1010     $match = 1;
1011     last;
1012     }
1013     }
1014     if ($match or
1015     $pubid eq "-//W3O//DTD W3 HTML STRICT 3.0//EN//" or
1016     $pubid eq "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" or
1017     $pubid eq "HTML") {
1018 wakaba 1.79 !!!cp ('t5');
1019 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1020 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
1021     $pubid =~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
1022 wakaba 1.183 if (defined $token->{sysid}) {
1023 wakaba 1.79 !!!cp ('t6');
1024 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1025     } else {
1026 wakaba 1.79 !!!cp ('t7');
1027 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
1028 wakaba 1.3 }
1029 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
1030     $pubid =~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
1031 wakaba 1.79 !!!cp ('t8');
1032 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
1033 wakaba 1.79 } else {
1034     !!!cp ('t9');
1035 wakaba 1.18 }
1036 wakaba 1.79 } else {
1037     !!!cp ('t10');
1038 wakaba 1.18 }
1039 wakaba 1.183 if (defined $token->{sysid}) {
1040     my $sysid = $token->{sysid};
1041 wakaba 1.18 $sysid =~ tr/A-Z/a-z/;
1042     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
1043 wakaba 1.143 ## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"| is
1044     ## marked as quirks.
1045 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1046 wakaba 1.79 !!!cp ('t11');
1047     } else {
1048     !!!cp ('t12');
1049 wakaba 1.18 }
1050 wakaba 1.79 } else {
1051     !!!cp ('t13');
1052 wakaba 1.18 }
1053    
1054 wakaba 1.84 ## Go to the "before html" insertion mode.
1055 wakaba 1.18 !!!next-token;
1056     return;
1057     } elsif ({
1058 wakaba 1.55 START_TAG_TOKEN, 1,
1059     END_TAG_TOKEN, 1,
1060     END_OF_FILE_TOKEN, 1,
1061 wakaba 1.18 }->{$token->{type}}) {
1062 wakaba 1.79 !!!cp ('t14');
1063 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
1064 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1065 wakaba 1.84 ## Go to the "before html" insertion mode.
1066 wakaba 1.18 ## reprocess
1067 wakaba 1.125 !!!ack-later;
1068 wakaba 1.18 return;
1069 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
1070 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1071 wakaba 1.18 ## Ignore the token
1072 wakaba 1.26
1073 wakaba 1.18 unless (length $token->{data}) {
1074 wakaba 1.79 !!!cp ('t15');
1075 wakaba 1.84 ## Stay in the insertion mode.
1076 wakaba 1.18 !!!next-token;
1077     redo INITIAL;
1078 wakaba 1.79 } else {
1079     !!!cp ('t16');
1080 wakaba 1.3 }
1081 wakaba 1.79 } else {
1082     !!!cp ('t17');
1083 wakaba 1.3 }
1084 wakaba 1.18
1085 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
1086 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
1087 wakaba 1.84 ## Go to the "before html" insertion mode.
1088 wakaba 1.18 ## reprocess
1089     return;
1090 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
1091 wakaba 1.79 !!!cp ('t18');
1092 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
1093     $self->{document}->append_child ($comment);
1094    
1095 wakaba 1.84 ## Stay in the insertion mode.
1096 wakaba 1.18 !!!next-token;
1097     redo INITIAL;
1098     } else {
1099 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
1100 wakaba 1.18 }
1101     } # INITIAL
1102 wakaba 1.79
1103     die "$0: _tree_construction_initial: This should be never reached";
1104 wakaba 1.3 } # _tree_construction_initial
1105    
1106     sub _tree_construction_root_element ($) {
1107     my $self = shift;
1108 wakaba 1.84
1109     ## NOTE: "before html" insertion mode.
1110 wakaba 1.3
1111     B: {
1112 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
1113 wakaba 1.79 !!!cp ('t19');
1114 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1115 wakaba 1.3 ## Ignore the token
1116 wakaba 1.84 ## Stay in the insertion mode.
1117 wakaba 1.3 !!!next-token;
1118     redo B;
1119 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
1120 wakaba 1.79 !!!cp ('t20');
1121 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
1122     $self->{document}->append_child ($comment);
1123 wakaba 1.84 ## Stay in the insertion mode.
1124 wakaba 1.3 !!!next-token;
1125     redo B;
1126 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
1127 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1128 wakaba 1.26 ## Ignore the token.
1129    
1130 wakaba 1.3 unless (length $token->{data}) {
1131 wakaba 1.79 !!!cp ('t21');
1132 wakaba 1.84 ## Stay in the insertion mode.
1133 wakaba 1.3 !!!next-token;
1134     redo B;
1135 wakaba 1.79 } else {
1136     !!!cp ('t22');
1137 wakaba 1.3 }
1138 wakaba 1.79 } else {
1139     !!!cp ('t23');
1140 wakaba 1.3 }
1141 wakaba 1.61
1142     $self->{application_cache_selection}->(undef);
1143    
1144     #
1145     } elsif ($token->{type} == START_TAG_TOKEN) {
1146 wakaba 1.84 if ($token->{tag_name} eq 'html') {
1147     my $root_element;
1148 wakaba 1.126 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
1149 wakaba 1.84 $self->{document}->append_child ($root_element);
1150 wakaba 1.123 push @{$self->{open_elements}},
1151     [$root_element, $el_category->{html}];
1152 wakaba 1.84
1153     if ($token->{attributes}->{manifest}) {
1154     !!!cp ('t24');
1155     $self->{application_cache_selection}
1156     ->($token->{attributes}->{manifest}->{value});
1157 wakaba 1.118 ## ISSUE: Spec is unclear on relative references.
1158     ## According to Hixie (#whatwg 2008-03-19), it should be
1159     ## resolved against the base URI of the document in HTML
1160     ## or xml:base of the element in XHTML.
1161 wakaba 1.84 } else {
1162     !!!cp ('t25');
1163     $self->{application_cache_selection}->(undef);
1164     }
1165    
1166 wakaba 1.125 !!!nack ('t25c');
1167    
1168 wakaba 1.84 !!!next-token;
1169     return; ## Go to the "before head" insertion mode.
1170 wakaba 1.61 } else {
1171 wakaba 1.84 !!!cp ('t25.1');
1172     #
1173 wakaba 1.61 }
1174 wakaba 1.3 } elsif ({
1175 wakaba 1.55 END_TAG_TOKEN, 1,
1176     END_OF_FILE_TOKEN, 1,
1177 wakaba 1.3 }->{$token->{type}}) {
1178 wakaba 1.79 !!!cp ('t26');
1179 wakaba 1.3 #
1180     } else {
1181 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
1182 wakaba 1.3 }
1183 wakaba 1.61
1184 wakaba 1.126 my $root_element;
1185     !!!create-element ($root_element, $HTML_NS, 'html',, $token);
1186 wakaba 1.84 $self->{document}->append_child ($root_element);
1187 wakaba 1.123 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
1188 wakaba 1.84
1189     $self->{application_cache_selection}->(undef);
1190    
1191     ## NOTE: Reprocess the token.
1192 wakaba 1.125 !!!ack-later;
1193 wakaba 1.84 return; ## Go to the "before head" insertion mode.
1194 wakaba 1.3 } # B
1195 wakaba 1.79
1196     die "$0: _tree_construction_root_element: This should never be reached";
1197 wakaba 1.3 } # _tree_construction_root_element
1198    
1199     sub _reset_insertion_mode ($) {
1200     my $self = shift;
1201    
1202     ## Step 1
1203     my $last;
1204    
1205     ## Step 2
1206     my $i = -1;
1207     my $node = $self->{open_elements}->[$i];
1208    
1209     ## Step 3
1210     S3: {
1211 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
1212     $last = 1;
1213     if (defined $self->{inner_html_node}) {
1214 wakaba 1.140 !!!cp ('t28');
1215     $node = $self->{inner_html_node};
1216     } else {
1217     die "_reset_insertion_mode: t27";
1218 wakaba 1.3 }
1219     }
1220 wakaba 1.140
1221     ## Step 4..14
1222     my $new_mode;
1223     if ($node->[1] & FOREIGN_EL) {
1224     !!!cp ('t28.1');
1225     ## NOTE: Strictly spaking, the line below only applies to MathML and
1226     ## SVG elements. Currently the HTML syntax supports only MathML and
1227     ## SVG elements as foreigners.
1228 wakaba 1.148 $new_mode = IN_BODY_IM | IN_FOREIGN_CONTENT_IM;
1229 wakaba 1.206 } elsif ($node->[1] == TABLE_CELL_EL) {
1230 wakaba 1.140 if ($last) {
1231     !!!cp ('t28.2');
1232     #
1233     } else {
1234     !!!cp ('t28.3');
1235     $new_mode = IN_CELL_IM;
1236     }
1237     } else {
1238     !!!cp ('t28.4');
1239     $new_mode = {
1240 wakaba 1.54 select => IN_SELECT_IM,
1241 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
1242     ## insertion mode to "in select" by themselves.
1243 wakaba 1.54 tr => IN_ROW_IM,
1244     tbody => IN_TABLE_BODY_IM,
1245     thead => IN_TABLE_BODY_IM,
1246     tfoot => IN_TABLE_BODY_IM,
1247     caption => IN_CAPTION_IM,
1248     colgroup => IN_COLUMN_GROUP_IM,
1249     table => IN_TABLE_IM,
1250     head => IN_BODY_IM, # not in head!
1251     body => IN_BODY_IM,
1252     frameset => IN_FRAMESET_IM,
1253 wakaba 1.123 }->{$node->[0]->manakai_local_name};
1254 wakaba 1.140 }
1255     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1256 wakaba 1.3
1257 wakaba 1.126 ## Step 15
1258 wakaba 1.206 if ($node->[1] == HTML_EL) {
1259 wakaba 1.3 unless (defined $self->{head_element}) {
1260 wakaba 1.79 !!!cp ('t29');
1261 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
1262 wakaba 1.3 } else {
1263 wakaba 1.81 ## ISSUE: Can this state be reached?
1264 wakaba 1.79 !!!cp ('t30');
1265 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
1266 wakaba 1.3 }
1267     return;
1268 wakaba 1.79 } else {
1269     !!!cp ('t31');
1270 wakaba 1.3 }
1271    
1272 wakaba 1.126 ## Step 16
1273 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
1274 wakaba 1.3
1275 wakaba 1.126 ## Step 17
1276 wakaba 1.3 $i--;
1277     $node = $self->{open_elements}->[$i];
1278    
1279 wakaba 1.126 ## Step 18
1280 wakaba 1.3 redo S3;
1281     } # S3
1282 wakaba 1.79
1283     die "$0: _reset_insertion_mode: This line should never be reached";
1284 wakaba 1.3 } # _reset_insertion_mode
1285    
1286     sub _tree_construction_main ($) {
1287     my $self = shift;
1288    
1289 wakaba 1.1 my $active_formatting_elements = [];
1290    
1291     my $reconstruct_active_formatting_elements = sub { # MUST
1292     my $insert = shift;
1293    
1294     ## Step 1
1295     return unless @$active_formatting_elements;
1296    
1297     ## Step 3
1298     my $i = -1;
1299     my $entry = $active_formatting_elements->[$i];
1300    
1301     ## Step 2
1302     return if $entry->[0] eq '#marker';
1303 wakaba 1.3 for (@{$self->{open_elements}}) {
1304 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1305 wakaba 1.79 !!!cp ('t32');
1306 wakaba 1.1 return;
1307     }
1308     }
1309    
1310     S4: {
1311     ## Step 4
1312     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1313    
1314     ## Step 5
1315     $i--;
1316     $entry = $active_formatting_elements->[$i];
1317    
1318     ## Step 6
1319     if ($entry->[0] eq '#marker') {
1320 wakaba 1.81 !!!cp ('t33_1');
1321 wakaba 1.1 #
1322     } else {
1323     my $in_open_elements;
1324 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
1325 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1326 wakaba 1.79 !!!cp ('t33');
1327 wakaba 1.1 $in_open_elements = 1;
1328     last OE;
1329     }
1330     }
1331     if ($in_open_elements) {
1332 wakaba 1.79 !!!cp ('t34');
1333 wakaba 1.1 #
1334     } else {
1335 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
1336 wakaba 1.79 !!!cp ('t35');
1337 wakaba 1.1 redo S4;
1338     }
1339     }
1340    
1341     ## Step 7
1342     $i++;
1343     $entry = $active_formatting_elements->[$i];
1344     } # S4
1345    
1346     S7: {
1347     ## Step 8
1348     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1349    
1350     ## Step 9
1351     $insert->($clone->[0]);
1352 wakaba 1.3 push @{$self->{open_elements}}, $clone;
1353 wakaba 1.1
1354     ## Step 10
1355 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1356 wakaba 1.1
1357     ## Step 11
1358     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1359 wakaba 1.79 !!!cp ('t36');
1360 wakaba 1.1 ## Step 7'
1361     $i++;
1362     $entry = $active_formatting_elements->[$i];
1363    
1364     redo S7;
1365     }
1366 wakaba 1.79
1367     !!!cp ('t37');
1368 wakaba 1.1 } # S7
1369     }; # $reconstruct_active_formatting_elements
1370    
1371     my $clear_up_to_marker = sub {
1372     for (reverse 0..$#$active_formatting_elements) {
1373     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1374 wakaba 1.79 !!!cp ('t38');
1375 wakaba 1.1 splice @$active_formatting_elements, $_;
1376     return;
1377     }
1378     }
1379 wakaba 1.79
1380     !!!cp ('t39');
1381 wakaba 1.1 }; # $clear_up_to_marker
1382    
1383 wakaba 1.96 my $insert;
1384    
1385     my $parse_rcdata = sub ($) {
1386     my ($content_model_flag) = @_;
1387 wakaba 1.25
1388     ## Step 1
1389     my $start_tag_name = $token->{tag_name};
1390 wakaba 1.205 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
1391 wakaba 1.25
1392     ## Step 2
1393 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
1394 wakaba 1.13 delete $self->{escape}; # MUST
1395 wakaba 1.25
1396 wakaba 1.205 ## Step 3, 4
1397     $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1398    
1399 wakaba 1.125 !!!nack ('t40.1');
1400 wakaba 1.1 !!!next-token;
1401 wakaba 1.25 }; # $parse_rcdata
1402 wakaba 1.1
1403 wakaba 1.96 my $script_start_tag = sub () {
1404 wakaba 1.205 ## Step 1
1405 wakaba 1.1 my $script_el;
1406 wakaba 1.126 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
1407 wakaba 1.205
1408     ## Step 2
1409 wakaba 1.1 ## TODO: mark as "parser-inserted"
1410    
1411 wakaba 1.205 ## Step 3
1412     ## TODO: Mark as "already executed", if ...
1413    
1414     ## Step 4
1415     $insert->($script_el);
1416    
1417     ## ISSUE: $script_el is not put into the stack
1418     push @{$self->{open_elements}}, [$script_el, $el_category->{script}];
1419    
1420     ## Step 5
1421 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
1422 wakaba 1.13 delete $self->{escape}; # MUST
1423 wakaba 1.1
1424 wakaba 1.205 ## Step 6-7
1425     $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
1426 wakaba 1.25
1427 wakaba 1.205 !!!nack ('t40.2');
1428 wakaba 1.1 !!!next-token;
1429     }; # $script_start_tag
1430    
1431 wakaba 1.102 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
1432     ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
1433 wakaba 1.202 ## NOTE: $open_tables->[-1]->[2] is set false when non-Text node inserted.
1434 wakaba 1.102 my $open_tables = [[$self->{open_elements}->[0]->[0]]];
1435    
1436 wakaba 1.1 my $formatting_end_tag = sub {
1437 wakaba 1.113 my $end_tag_token = shift;
1438     my $tag_name = $end_tag_token->{tag_name};
1439 wakaba 1.1
1440 wakaba 1.103 ## NOTE: The adoption agency algorithm (AAA).
1441 wakaba 1.102
1442 wakaba 1.1 FET: {
1443     ## Step 1
1444     my $formatting_element;
1445     my $formatting_element_i_in_active;
1446     AFE: for (reverse 0..$#$active_formatting_elements) {
1447 wakaba 1.123 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1448     !!!cp ('t52');
1449     last AFE;
1450     } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
1451     eq $tag_name) {
1452 wakaba 1.79 !!!cp ('t51');
1453 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
1454     $formatting_element_i_in_active = $_;
1455     last AFE;
1456     }
1457     } # AFE
1458     unless (defined $formatting_element) {
1459 wakaba 1.79 !!!cp ('t53');
1460 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => $tag_name, token => $end_tag_token);
1461 wakaba 1.1 ## Ignore the token
1462     !!!next-token;
1463     return;
1464     }
1465     ## has an element in scope
1466     my $in_scope = 1;
1467     my $formatting_element_i_in_open;
1468 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
1469     my $node = $self->{open_elements}->[$_];
1470 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
1471     if ($in_scope) {
1472 wakaba 1.79 !!!cp ('t54');
1473 wakaba 1.1 $formatting_element_i_in_open = $_;
1474     last INSCOPE;
1475     } else { # in open elements but not in scope
1476 wakaba 1.79 !!!cp ('t55');
1477 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
1478     text => $token->{tag_name},
1479 wakaba 1.113 token => $end_tag_token);
1480 wakaba 1.1 ## Ignore the token
1481     !!!next-token;
1482     return;
1483     }
1484 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
1485 wakaba 1.79 !!!cp ('t56');
1486 wakaba 1.1 $in_scope = 0;
1487     }
1488     } # INSCOPE
1489     unless (defined $formatting_element_i_in_open) {
1490 wakaba 1.79 !!!cp ('t57');
1491 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
1492     text => $token->{tag_name},
1493 wakaba 1.113 token => $end_tag_token);
1494 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
1495     !!!next-token; ## TODO: ok?
1496     return;
1497     }
1498 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
1499 wakaba 1.79 !!!cp ('t58');
1500 wakaba 1.122 !!!parse-error (type => 'not closed',
1501 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
1502 wakaba 1.122 ->manakai_local_name,
1503 wakaba 1.113 token => $end_tag_token);
1504 wakaba 1.1 }
1505    
1506     ## Step 2
1507     my $furthest_block;
1508     my $furthest_block_i_in_open;
1509 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
1510     my $node = $self->{open_elements}->[$_];
1511 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
1512 wakaba 1.1 #not $phrasing_category->{$node->[1]} and
1513 wakaba 1.123 ($node->[1] & SPECIAL_EL or
1514     $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
1515 wakaba 1.79 !!!cp ('t59');
1516 wakaba 1.1 $furthest_block = $node;
1517     $furthest_block_i_in_open = $_;
1518 wakaba 1.203 ## NOTE: The topmost (eldest) node.
1519 wakaba 1.1 } elsif ($node->[0] eq $formatting_element->[0]) {
1520 wakaba 1.79 !!!cp ('t60');
1521 wakaba 1.1 last OE;
1522     }
1523     } # OE
1524    
1525     ## Step 3
1526     unless (defined $furthest_block) { # MUST
1527 wakaba 1.79 !!!cp ('t61');
1528 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
1529 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
1530     !!!next-token;
1531     return;
1532     }
1533    
1534     ## Step 4
1535 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
1536 wakaba 1.1
1537     ## Step 5
1538     my $furthest_block_parent = $furthest_block->[0]->parent_node;
1539     if (defined $furthest_block_parent) {
1540 wakaba 1.79 !!!cp ('t62');
1541 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
1542     }
1543    
1544     ## Step 6
1545     my $bookmark_prev_el
1546     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
1547     ->[0];
1548    
1549     ## Step 7
1550     my $node = $furthest_block;
1551     my $node_i_in_open = $furthest_block_i_in_open;
1552     my $last_node = $furthest_block;
1553     S7: {
1554     ## Step 1
1555     $node_i_in_open--;
1556 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
1557 wakaba 1.1
1558     ## Step 2
1559     my $node_i_in_active;
1560     S7S2: {
1561     for (reverse 0..$#$active_formatting_elements) {
1562     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
1563 wakaba 1.79 !!!cp ('t63');
1564 wakaba 1.1 $node_i_in_active = $_;
1565     last S7S2;
1566     }
1567     }
1568 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
1569 wakaba 1.1 redo S7;
1570     } # S7S2
1571    
1572     ## Step 3
1573     last S7 if $node->[0] eq $formatting_element->[0];
1574    
1575     ## Step 4
1576     if ($last_node->[0] eq $furthest_block->[0]) {
1577 wakaba 1.79 !!!cp ('t64');
1578 wakaba 1.1 $bookmark_prev_el = $node->[0];
1579     }
1580    
1581     ## Step 5
1582     if ($node->[0]->has_child_nodes ()) {
1583 wakaba 1.79 !!!cp ('t65');
1584 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
1585     $active_formatting_elements->[$node_i_in_active] = $clone;
1586 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
1587 wakaba 1.1 $node = $clone;
1588     }
1589    
1590     ## Step 6
1591     $node->[0]->append_child ($last_node->[0]);
1592    
1593     ## Step 7
1594     $last_node = $node;
1595    
1596     ## Step 8
1597     redo S7;
1598     } # S7
1599    
1600     ## Step 8
1601 wakaba 1.123 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
1602 wakaba 1.102 my $foster_parent_element;
1603     my $next_sibling;
1604 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
1605 wakaba 1.206 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1606 wakaba 1.102 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
1607     if (defined $parent and $parent->node_type == 1) {
1608     !!!cp ('t65.1');
1609     $foster_parent_element = $parent;
1610     $next_sibling = $self->{open_elements}->[$_]->[0];
1611     } else {
1612     !!!cp ('t65.2');
1613     $foster_parent_element
1614     = $self->{open_elements}->[$_ - 1]->[0];
1615     }
1616     last OE;
1617     }
1618     } # OE
1619     $foster_parent_element = $self->{open_elements}->[0]->[0]
1620     unless defined $foster_parent_element;
1621     $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
1622     $open_tables->[-1]->[1] = 1; # tainted
1623     } else {
1624     !!!cp ('t65.3');
1625     $common_ancestor_node->[0]->append_child ($last_node->[0]);
1626     }
1627 wakaba 1.1
1628     ## Step 9
1629     my $clone = [$formatting_element->[0]->clone_node (0),
1630     $formatting_element->[1]];
1631    
1632     ## Step 10
1633     my @cn = @{$furthest_block->[0]->child_nodes};
1634     $clone->[0]->append_child ($_) for @cn;
1635    
1636     ## Step 11
1637     $furthest_block->[0]->append_child ($clone->[0]);
1638    
1639     ## Step 12
1640     my $i;
1641     AFE: for (reverse 0..$#$active_formatting_elements) {
1642     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
1643 wakaba 1.79 !!!cp ('t66');
1644 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
1645     $i-- and last AFE if defined $i;
1646     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
1647 wakaba 1.79 !!!cp ('t67');
1648 wakaba 1.1 $i = $_;
1649     }
1650     } # AFE
1651     splice @$active_formatting_elements, $i + 1, 0, $clone;
1652    
1653     ## Step 13
1654     undef $i;
1655 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
1656     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
1657 wakaba 1.79 !!!cp ('t68');
1658 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
1659 wakaba 1.1 $i-- and last OE if defined $i;
1660 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
1661 wakaba 1.79 !!!cp ('t69');
1662 wakaba 1.1 $i = $_;
1663     }
1664     } # OE
1665 wakaba 1.203 splice @{$self->{open_elements}}, $i + 1, 0, $clone;
1666 wakaba 1.1
1667     ## Step 14
1668     redo FET;
1669     } # FET
1670     }; # $formatting_end_tag
1671    
1672 wakaba 1.96 $insert = my $insert_to_current = sub {
1673 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
1674 wakaba 1.1 }; # $insert_to_current
1675    
1676     my $insert_to_foster = sub {
1677 wakaba 1.95 my $child = shift;
1678 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
1679 wakaba 1.95 # MUST
1680     my $foster_parent_element;
1681     my $next_sibling;
1682 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
1683 wakaba 1.206 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
1684 wakaba 1.3 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
1685 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
1686 wakaba 1.79 !!!cp ('t70');
1687 wakaba 1.1 $foster_parent_element = $parent;
1688 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
1689 wakaba 1.1 } else {
1690 wakaba 1.79 !!!cp ('t71');
1691 wakaba 1.1 $foster_parent_element
1692 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
1693 wakaba 1.1 }
1694     last OE;
1695     }
1696     } # OE
1697 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
1698 wakaba 1.1 unless defined $foster_parent_element;
1699     $foster_parent_element->insert_before
1700     ($child, $next_sibling);
1701 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
1702     } else {
1703     !!!cp ('t72');
1704     $self->{open_elements}->[-1]->[0]->append_child ($child);
1705     }
1706 wakaba 1.1 }; # $insert_to_foster
1707    
1708 wakaba 1.204 ## NOTE: Insert a character (MUST): When a character is inserted, if
1709     ## the last node that was inserted by the parser is a Text node and
1710     ## the character has to be inserted after that node, then the
1711     ## character is appended to the Text node. However, if any other
1712     ## node is inserted by the parser, then a new Text node is created
1713     ## and the character is appended as that Text node. If I'm not
1714     ## wrong, for a parser with scripting disabled, there are only two
1715     ## cases where this occurs. One is the case where an element node
1716     ## is inserted to the |head| element. This is covered by using the
1717 wakaba 1.202 ## |$self->{head_element_inserted}| flag. Another is the case where
1718     ## an element or comment is inserted into the |table| subtree while
1719     ## foster parenting happens. This is covered by using the [2] flag
1720     ## of the |$open_tables| structure. All other cases are handled
1721     ## simply by calling |manakai_append_text| method.
1722    
1723 wakaba 1.204 ## TODO: |<body><script>document.write("a<br>");
1724     ## document.body.removeChild (document.body.lastChild);
1725     ## document.write ("b")</script>|
1726    
1727 wakaba 1.126 B: while (1) {
1728 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
1729 wakaba 1.79 !!!cp ('t73');
1730 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
1731 wakaba 1.52 ## Ignore the token
1732     ## Stay in the phase
1733     !!!next-token;
1734 wakaba 1.126 next B;
1735 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
1736 wakaba 1.52 $token->{tag_name} eq 'html') {
1737 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
1738 wakaba 1.79 !!!cp ('t79');
1739 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
1740 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
1741     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
1742 wakaba 1.79 !!!cp ('t80');
1743 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
1744 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
1745 wakaba 1.79 } else {
1746     !!!cp ('t81');
1747 wakaba 1.52 }
1748    
1749 wakaba 1.84 !!!cp ('t82');
1750 wakaba 1.113 !!!parse-error (type => 'not first start tag', token => $token);
1751 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
1752     for my $attr_name (keys %{$token->{attributes}}) {
1753     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
1754 wakaba 1.79 !!!cp ('t84');
1755 wakaba 1.52 $top_el->set_attribute_ns
1756     (undef, [undef, $attr_name],
1757     $token->{attributes}->{$attr_name}->{value});
1758     }
1759     }
1760 wakaba 1.125 !!!nack ('t84.1');
1761 wakaba 1.52 !!!next-token;
1762 wakaba 1.126 next B;
1763 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
1764 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
1765 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
1766 wakaba 1.79 !!!cp ('t85');
1767 wakaba 1.52 $self->{document}->append_child ($comment);
1768 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
1769 wakaba 1.79 !!!cp ('t86');
1770 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
1771     } else {
1772 wakaba 1.79 !!!cp ('t87');
1773 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
1774 wakaba 1.202 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
1775 wakaba 1.52 }
1776     !!!next-token;
1777 wakaba 1.126 next B;
1778 wakaba 1.205 } elsif ($self->{insertion_mode} & IN_CDATA_RCDATA_IM) {
1779     if ($token->{type} == CHARACTER_TOKEN) {
1780     $token->{data} =~ s/^\x0A// if $self->{ignore_newline};
1781     delete $self->{ignore_newline};
1782    
1783     if (length $token->{data}) {
1784     !!!cp ('t43');
1785     $self->{open_elements}->[-1]->[0]->manakai_append_text
1786     ($token->{data});
1787     } else {
1788     !!!cp ('t43.1');
1789     }
1790     !!!next-token;
1791     next B;
1792     } elsif ($token->{type} == END_TAG_TOKEN) {
1793     delete $self->{ignore_newline};
1794    
1795     if ($token->{tag_name} eq 'script') {
1796     !!!cp ('t50');
1797    
1798     ## Para 1-2
1799     my $script = pop @{$self->{open_elements}};
1800    
1801     ## Para 3
1802     $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1803    
1804     ## Para 4
1805     ## TODO: $old_insertion_point = $current_insertion_point;
1806     ## TODO: $current_insertion_point = just before $self->{nc};
1807    
1808     ## Para 5
1809     ## TODO: Run the $script->[0].
1810    
1811     ## Para 6
1812     ## TODO: $current_insertion_point = $old_insertion_point;
1813    
1814     ## Para 7
1815     ## TODO: if ($pending_external_script) {
1816     ## TODO: ...
1817     ## TODO: }
1818    
1819     !!!next-token;
1820     next B;
1821     } else {
1822     !!!cp ('t42');
1823    
1824     pop @{$self->{open_elements}};
1825    
1826     $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1827     !!!next-token;
1828     next B;
1829     }
1830     } elsif ($token->{type} == END_OF_FILE_TOKEN) {
1831     delete $self->{ignore_newline};
1832    
1833     !!!cp ('t44');
1834     !!!parse-error (type => 'not closed',
1835     text => $self->{open_elements}->[-1]->[0]
1836     ->manakai_local_name,
1837     token => $token);
1838    
1839 wakaba 1.206 #if ($self->{open_elements}->[-1]->[1] == SCRIPT_EL) {
1840 wakaba 1.205 # ## TODO: Mark as "already executed"
1841     #}
1842    
1843     pop @{$self->{open_elements}};
1844    
1845     $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM;
1846     ## Reprocess.
1847     next B;
1848     } else {
1849     die "$0: $token->{type}: In CDATA/RCDATA: Unknown token type";
1850     }
1851 wakaba 1.126 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
1852     if ($token->{type} == CHARACTER_TOKEN) {
1853     !!!cp ('t87.1');
1854     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
1855     !!!next-token;
1856     next B;
1857     } elsif ($token->{type} == START_TAG_TOKEN) {
1858 wakaba 1.129 if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
1859     $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
1860 wakaba 1.126 not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
1861     ($token->{tag_name} eq 'svg' and
1862 wakaba 1.206 $self->{open_elements}->[-1]->[1] == MML_AXML_EL)) {
1863 wakaba 1.126 ## NOTE: "using the rules for secondary insertion mode"then"continue"
1864     !!!cp ('t87.2');
1865     #
1866     } elsif ({
1867 wakaba 1.130 b => 1, big => 1, blockquote => 1, body => 1, br => 1,
1868 wakaba 1.146 center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1,
1869     em => 1, embed => 1, font => 1, h1 => 1, h2 => 1, h3 => 1,
1870     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1,
1871     img => 1, li => 1, listing => 1, menu => 1, meta => 1,
1872     nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1,
1873     small => 1, span => 1, strong => 1, strike => 1, sub => 1,
1874     sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1,
1875 wakaba 1.126 }->{$token->{tag_name}}) {
1876     !!!cp ('t87.2');
1877     !!!parse-error (type => 'not closed',
1878 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
1879 wakaba 1.126 ->manakai_local_name,
1880     token => $token);
1881    
1882     pop @{$self->{open_elements}}
1883     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
1884    
1885 wakaba 1.130 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
1886 wakaba 1.126 ## Reprocess.
1887     next B;
1888     } else {
1889 wakaba 1.131 my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
1890     my $tag_name = $token->{tag_name};
1891     if ($nsuri eq $SVG_NS) {
1892     $tag_name = {
1893     altglyph => 'altGlyph',
1894     altglyphdef => 'altGlyphDef',
1895     altglyphitem => 'altGlyphItem',
1896     animatecolor => 'animateColor',
1897     animatemotion => 'animateMotion',
1898     animatetransform => 'animateTransform',
1899     clippath => 'clipPath',
1900     feblend => 'feBlend',
1901     fecolormatrix => 'feColorMatrix',
1902     fecomponenttransfer => 'feComponentTransfer',
1903     fecomposite => 'feComposite',
1904     feconvolvematrix => 'feConvolveMatrix',
1905     fediffuselighting => 'feDiffuseLighting',
1906     fedisplacementmap => 'feDisplacementMap',
1907     fedistantlight => 'feDistantLight',
1908     feflood => 'feFlood',
1909     fefunca => 'feFuncA',
1910     fefuncb => 'feFuncB',
1911     fefuncg => 'feFuncG',
1912     fefuncr => 'feFuncR',
1913     fegaussianblur => 'feGaussianBlur',
1914     feimage => 'feImage',
1915     femerge => 'feMerge',
1916     femergenode => 'feMergeNode',
1917     femorphology => 'feMorphology',
1918     feoffset => 'feOffset',
1919     fepointlight => 'fePointLight',
1920     fespecularlighting => 'feSpecularLighting',
1921     fespotlight => 'feSpotLight',
1922     fetile => 'feTile',
1923     feturbulence => 'feTurbulence',
1924     foreignobject => 'foreignObject',
1925     glyphref => 'glyphRef',
1926     lineargradient => 'linearGradient',
1927     radialgradient => 'radialGradient',
1928     #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
1929     textpath => 'textPath',
1930     }->{$tag_name} || $tag_name;
1931     }
1932    
1933     ## "adjust SVG attributes" (SVG only) - done in insert-element-f
1934    
1935     ## "adjust foreign attributes" - done in insert-element-f
1936 wakaba 1.126
1937 wakaba 1.131 !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);
1938 wakaba 1.126
1939     if ($self->{self_closing}) {
1940     pop @{$self->{open_elements}};
1941     !!!ack ('t87.3');
1942     } else {
1943     !!!cp ('t87.4');
1944     }
1945    
1946     !!!next-token;
1947     next B;
1948     }
1949     } elsif ($token->{type} == END_TAG_TOKEN) {
1950     ## NOTE: "using the rules for secondary insertion mode" then "continue"
1951     !!!cp ('t87.5');
1952     #
1953     } elsif ($token->{type} == END_OF_FILE_TOKEN) {
1954     !!!cp ('t87.6');
1955 wakaba 1.146 !!!parse-error (type => 'not closed',
1956 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
1957 wakaba 1.146 ->manakai_local_name,
1958     token => $token);
1959    
1960     pop @{$self->{open_elements}}
1961     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
1962    
1963 wakaba 1.200 ## NOTE: |<span><svg>| ... two parse errors, |<svg>| ... a parse error.
1964    
1965 wakaba 1.146 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
1966     ## Reprocess.
1967     next B;
1968 wakaba 1.126 } else {
1969     die "$0: $token->{type}: Unknown token type";
1970     }
1971     }
1972    
1973     if ($self->{insertion_mode} & HEAD_IMS) {
1974 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
1975 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
1976 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
1977 wakaba 1.202 if ($self->{head_element_inserted}) {
1978     !!!cp ('t88.3');
1979     $self->{open_elements}->[-1]->[0]->append_child
1980     ($self->{document}->create_text_node ($1));
1981     delete $self->{head_element_inserted};
1982     ## NOTE: |</head> <link> |
1983     #
1984     } else {
1985     !!!cp ('t88.2');
1986     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
1987     ## NOTE: |</head> &#x20;|
1988     #
1989     }
1990 wakaba 1.99 } else {
1991     !!!cp ('t88.1');
1992     ## Ignore the token.
1993 wakaba 1.177 #
1994 wakaba 1.99 }
1995 wakaba 1.52 unless (length $token->{data}) {
1996 wakaba 1.79 !!!cp ('t88');
1997 wakaba 1.52 !!!next-token;
1998 wakaba 1.126 next B;
1999 wakaba 1.1 }
2000 wakaba 1.177 ## TODO: set $token->{column} appropriately
2001 wakaba 1.1 }
2002 wakaba 1.52
2003 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2004 wakaba 1.79 !!!cp ('t89');
2005 wakaba 1.52 ## As if <head>
2006 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2007 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2008 wakaba 1.123 push @{$self->{open_elements}},
2009     [$self->{head_element}, $el_category->{head}];
2010 wakaba 1.52
2011     ## Reprocess in the "in head" insertion mode...
2012     pop @{$self->{open_elements}};
2013    
2014     ## Reprocess in the "after head" insertion mode...
2015 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2016 wakaba 1.79 !!!cp ('t90');
2017 wakaba 1.52 ## As if </noscript>
2018     pop @{$self->{open_elements}};
2019 wakaba 1.153 !!!parse-error (type => 'in noscript:#text', token => $token);
2020 wakaba 1.1
2021 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
2022     ## As if </head>
2023     pop @{$self->{open_elements}};
2024    
2025     ## Reprocess in the "after head" insertion mode...
2026 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2027 wakaba 1.79 !!!cp ('t91');
2028 wakaba 1.52 pop @{$self->{open_elements}};
2029    
2030     ## Reprocess in the "after head" insertion mode...
2031 wakaba 1.79 } else {
2032     !!!cp ('t92');
2033 wakaba 1.1 }
2034 wakaba 1.52
2035 wakaba 1.123 ## "after head" insertion mode
2036     ## As if <body>
2037     !!!insert-element ('body',, $token);
2038     $self->{insertion_mode} = IN_BODY_IM;
2039     ## reprocess
2040 wakaba 1.126 next B;
2041 wakaba 1.123 } elsif ($token->{type} == START_TAG_TOKEN) {
2042     if ($token->{tag_name} eq 'head') {
2043     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2044     !!!cp ('t93');
2045 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
2046 wakaba 1.123 $self->{open_elements}->[-1]->[0]->append_child
2047     ($self->{head_element});
2048     push @{$self->{open_elements}},
2049     [$self->{head_element}, $el_category->{head}];
2050     $self->{insertion_mode} = IN_HEAD_IM;
2051 wakaba 1.125 !!!nack ('t93.1');
2052 wakaba 1.123 !!!next-token;
2053 wakaba 1.126 next B;
2054 wakaba 1.125 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2055 wakaba 1.139 !!!cp ('t93.2');
2056 wakaba 1.153 !!!parse-error (type => 'after head', text => 'head',
2057     token => $token);
2058 wakaba 1.139 ## Ignore the token
2059     !!!nack ('t93.3');
2060     !!!next-token;
2061     next B;
2062 wakaba 1.125 } else {
2063     !!!cp ('t95');
2064 wakaba 1.153 !!!parse-error (type => 'in head:head',
2065     token => $token); # or in head noscript
2066 wakaba 1.125 ## Ignore the token
2067     !!!nack ('t95.1');
2068     !!!next-token;
2069 wakaba 1.126 next B;
2070 wakaba 1.125 }
2071     } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2072 wakaba 1.126 !!!cp ('t96');
2073     ## As if <head>
2074     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2075     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2076     push @{$self->{open_elements}},
2077     [$self->{head_element}, $el_category->{head}];
2078 wakaba 1.52
2079 wakaba 1.126 $self->{insertion_mode} = IN_HEAD_IM;
2080     ## Reprocess in the "in head" insertion mode...
2081     } else {
2082     !!!cp ('t97');
2083     }
2084 wakaba 1.52
2085 wakaba 1.202 if ($token->{tag_name} eq 'base') {
2086     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2087     !!!cp ('t98');
2088     ## As if </noscript>
2089     pop @{$self->{open_elements}};
2090     !!!parse-error (type => 'in noscript', text => 'base',
2091     token => $token);
2092    
2093     $self->{insertion_mode} = IN_HEAD_IM;
2094     ## Reprocess in the "in head" insertion mode...
2095     } else {
2096     !!!cp ('t99');
2097     }
2098 wakaba 1.49
2099 wakaba 1.202 ## NOTE: There is a "as if in head" code clone.
2100     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2101     !!!cp ('t100');
2102     !!!parse-error (type => 'after head',
2103     text => $token->{tag_name}, token => $token);
2104     push @{$self->{open_elements}},
2105     [$self->{head_element}, $el_category->{head}];
2106     $self->{head_element_inserted} = 1;
2107     } else {
2108     !!!cp ('t101');
2109     }
2110     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2111     pop @{$self->{open_elements}};
2112     pop @{$self->{open_elements}} # <head>
2113     if $self->{insertion_mode} == AFTER_HEAD_IM;
2114     !!!nack ('t101.1');
2115     !!!next-token;
2116     next B;
2117 wakaba 1.194 } elsif ($token->{tag_name} eq 'link') {
2118     ## NOTE: There is a "as if in head" code clone.
2119     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2120     !!!cp ('t102');
2121     !!!parse-error (type => 'after head',
2122     text => $token->{tag_name}, token => $token);
2123     push @{$self->{open_elements}},
2124     [$self->{head_element}, $el_category->{head}];
2125 wakaba 1.202 $self->{head_element_inserted} = 1;
2126 wakaba 1.194 } else {
2127     !!!cp ('t103');
2128     }
2129     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2130     pop @{$self->{open_elements}};
2131     pop @{$self->{open_elements}} # <head>
2132     if $self->{insertion_mode} == AFTER_HEAD_IM;
2133     !!!ack ('t103.1');
2134     !!!next-token;
2135     next B;
2136     } elsif ($token->{tag_name} eq 'command' or
2137     $token->{tag_name} eq 'eventsource') {
2138     if ($self->{insertion_mode} == IN_HEAD_IM) {
2139     ## NOTE: If the insertion mode at the time of the emission
2140     ## of the token was "before head", $self->{insertion_mode}
2141     ## is already changed to |IN_HEAD_IM|.
2142    
2143     ## NOTE: There is a "as if in head" code clone.
2144     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2145     pop @{$self->{open_elements}};
2146     pop @{$self->{open_elements}} # <head>
2147     if $self->{insertion_mode} == AFTER_HEAD_IM;
2148     !!!ack ('t103.2');
2149     !!!next-token;
2150     next B;
2151     } else {
2152     ## NOTE: "in head noscript" or "after head" insertion mode
2153     ## - in these cases, these tags are treated as same as
2154     ## normal in-body tags.
2155     !!!cp ('t103.3');
2156     #
2157     }
2158 wakaba 1.202 } elsif ($token->{tag_name} eq 'meta') {
2159     ## NOTE: There is a "as if in head" code clone.
2160     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2161     !!!cp ('t104');
2162     !!!parse-error (type => 'after head',
2163     text => $token->{tag_name}, token => $token);
2164     push @{$self->{open_elements}},
2165     [$self->{head_element}, $el_category->{head}];
2166     $self->{head_element_inserted} = 1;
2167     } else {
2168     !!!cp ('t105');
2169     }
2170     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2171     my $meta_el = pop @{$self->{open_elements}};
2172 wakaba 1.34
2173     unless ($self->{confident}) {
2174 wakaba 1.134 if ($token->{attributes}->{charset}) {
2175 wakaba 1.79 !!!cp ('t106');
2176 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
2177     ## in the {change_encoding} callback.
2178 wakaba 1.63 $self->{change_encoding}
2179 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value},
2180     $token);
2181 wakaba 1.66
2182     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2183     ->set_user_data (manakai_has_reference =>
2184     $token->{attributes}->{charset}
2185     ->{has_reference});
2186 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
2187     if ($token->{attributes}->{content}->{value}
2188 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
2189 wakaba 1.186 [\x09\x0A\x0C\x0D\x20]*=
2190     [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
2191     ([^"'\x09\x0A\x0C\x0D\x20]
2192     [^\x09\x0A\x0C\x0D\x20\x3B]*))/x) {
2193 wakaba 1.79 !!!cp ('t107');
2194 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
2195     ## in the {change_encoding} callback.
2196 wakaba 1.63 $self->{change_encoding}
2197 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
2198     $token);
2199 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2200     ->set_user_data (manakai_has_reference =>
2201     $token->{attributes}->{content}
2202     ->{has_reference});
2203 wakaba 1.79 } else {
2204     !!!cp ('t108');
2205 wakaba 1.63 }
2206 wakaba 1.34 }
2207 wakaba 1.66 } else {
2208     if ($token->{attributes}->{charset}) {
2209 wakaba 1.79 !!!cp ('t109');
2210 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
2211     ->set_user_data (manakai_has_reference =>
2212     $token->{attributes}->{charset}
2213     ->{has_reference});
2214     }
2215 wakaba 1.68 if ($token->{attributes}->{content}) {
2216 wakaba 1.79 !!!cp ('t110');
2217 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
2218     ->set_user_data (manakai_has_reference =>
2219     $token->{attributes}->{content}
2220     ->{has_reference});
2221     }
2222 wakaba 1.34 }
2223    
2224 wakaba 1.100 pop @{$self->{open_elements}} # <head>
2225 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
2226 wakaba 1.125 !!!ack ('t110.1');
2227 wakaba 1.34 !!!next-token;
2228 wakaba 1.126 next B;
2229 wakaba 1.202 } elsif ($token->{tag_name} eq 'title') {
2230     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2231     !!!cp ('t111');
2232     ## As if </noscript>
2233     pop @{$self->{open_elements}};
2234     !!!parse-error (type => 'in noscript', text => 'title',
2235     token => $token);
2236    
2237     $self->{insertion_mode} = IN_HEAD_IM;
2238     ## Reprocess in the "in head" insertion mode...
2239     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2240     !!!cp ('t112');
2241     !!!parse-error (type => 'after head',
2242     text => $token->{tag_name}, token => $token);
2243     push @{$self->{open_elements}},
2244     [$self->{head_element}, $el_category->{head}];
2245     $self->{head_element_inserted} = 1;
2246     } else {
2247     !!!cp ('t113');
2248     }
2249 wakaba 1.49
2250 wakaba 1.202 ## NOTE: There is a "as if in head" code clone.
2251     $parse_rcdata->(RCDATA_CONTENT_MODEL);
2252 wakaba 1.205 ## ISSUE: A spec bug [Bug 6038]
2253     splice @{$self->{open_elements}}, -2, 1, () # <head>
2254 wakaba 1.210 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2255 wakaba 1.202 next B;
2256     } elsif ($token->{tag_name} eq 'style' or
2257     $token->{tag_name} eq 'noframes') {
2258     ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
2259     ## insertion mode IN_HEAD_IM)
2260     ## NOTE: There is a "as if in head" code clone.
2261     if ($self->{insertion_mode} == AFTER_HEAD_IM) {
2262     !!!cp ('t114');
2263     !!!parse-error (type => 'after head',
2264     text => $token->{tag_name}, token => $token);
2265     push @{$self->{open_elements}},
2266     [$self->{head_element}, $el_category->{head}];
2267     $self->{head_element_inserted} = 1;
2268     } else {
2269     !!!cp ('t115');
2270     }
2271     $parse_rcdata->(CDATA_CONTENT_MODEL);
2272 wakaba 1.205 ## ISSUE: A spec bug [Bug 6038]
2273     splice @{$self->{open_elements}}, -2, 1, () # <head>
2274 wakaba 1.210 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2275 wakaba 1.202 next B;
2276 wakaba 1.205 } elsif ($token->{tag_name} eq 'noscript') {
2277 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
2278 wakaba 1.79 !!!cp ('t116');
2279 wakaba 1.25 ## NOTE: and scripting is disalbed
2280 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2281 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
2282 wakaba 1.125 !!!nack ('t116.1');
2283 wakaba 1.1 !!!next-token;
2284 wakaba 1.126 next B;
2285 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2286 wakaba 1.79 !!!cp ('t117');
2287 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'noscript',
2288     token => $token);
2289 wakaba 1.1 ## Ignore the token
2290 wakaba 1.125 !!!nack ('t117.1');
2291 wakaba 1.41 !!!next-token;
2292 wakaba 1.126 next B;
2293 wakaba 1.1 } else {
2294 wakaba 1.79 !!!cp ('t118');
2295 wakaba 1.25 #
2296 wakaba 1.1 }
2297 wakaba 1.202 } elsif ($token->{tag_name} eq 'script') {
2298     if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2299     !!!cp ('t119');
2300     ## As if </noscript>
2301     pop @{$self->{open_elements}};
2302     !!!parse-error (type => 'in noscript', text => 'script',
2303     token => $token);
2304    
2305     $self->{insertion_mode} = IN_HEAD_IM;
2306     ## Reprocess in the "in head" insertion mode...
2307     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2308     !!!cp ('t120');
2309     !!!parse-error (type => 'after head',
2310     text => $token->{tag_name}, token => $token);
2311     push @{$self->{open_elements}},
2312     [$self->{head_element}, $el_category->{head}];
2313     $self->{head_element_inserted} = 1;
2314     } else {
2315     !!!cp ('t121');
2316     }
2317 wakaba 1.49
2318 wakaba 1.202 ## NOTE: There is a "as if in head" code clone.
2319     $script_start_tag->();
2320 wakaba 1.205 ## ISSUE: A spec bug [Bug 6038]
2321     splice @{$self->{open_elements}}, -2, 1 # <head>
2322 wakaba 1.210 if ($self->{insertion_mode} & IM_MASK) == AFTER_HEAD_IM;
2323 wakaba 1.202 next B;
2324     } elsif ($token->{tag_name} eq 'body' or
2325     $token->{tag_name} eq 'frameset') {
2326 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2327 wakaba 1.79 !!!cp ('t122');
2328 wakaba 1.49 ## As if </noscript>
2329     pop @{$self->{open_elements}};
2330 wakaba 1.153 !!!parse-error (type => 'in noscript',
2331     text => $token->{tag_name}, token => $token);
2332 wakaba 1.49
2333     ## Reprocess in the "in head" insertion mode...
2334     ## As if </head>
2335     pop @{$self->{open_elements}};
2336    
2337     ## Reprocess in the "after head" insertion mode...
2338 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2339 wakaba 1.79 !!!cp ('t124');
2340 wakaba 1.49 pop @{$self->{open_elements}};
2341    
2342     ## Reprocess in the "after head" insertion mode...
2343 wakaba 1.79 } else {
2344     !!!cp ('t125');
2345 wakaba 1.49 }
2346    
2347     ## "after head" insertion mode
2348 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
2349 wakaba 1.54 if ($token->{tag_name} eq 'body') {
2350 wakaba 1.79 !!!cp ('t126');
2351 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
2352     } elsif ($token->{tag_name} eq 'frameset') {
2353 wakaba 1.79 !!!cp ('t127');
2354 wakaba 1.54 $self->{insertion_mode} = IN_FRAMESET_IM;
2355     } else {
2356     die "$0: tag name: $self->{tag_name}";
2357     }
2358 wakaba 1.125 !!!nack ('t127.1');
2359 wakaba 1.1 !!!next-token;
2360 wakaba 1.126 next B;
2361 wakaba 1.1 } else {
2362 wakaba 1.79 !!!cp ('t128');
2363 wakaba 1.1 #
2364     }
2365 wakaba 1.49
2366 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2367 wakaba 1.79 !!!cp ('t129');
2368 wakaba 1.49 ## As if </noscript>
2369     pop @{$self->{open_elements}};
2370 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
2371     text => $token->{tag_name}, token => $token);
2372 wakaba 1.49
2373     ## Reprocess in the "in head" insertion mode...
2374     ## As if </head>
2375 wakaba 1.25 pop @{$self->{open_elements}};
2376 wakaba 1.49
2377     ## Reprocess in the "after head" insertion mode...
2378 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2379 wakaba 1.79 !!!cp ('t130');
2380 wakaba 1.49 ## As if </head>
2381 wakaba 1.25 pop @{$self->{open_elements}};
2382 wakaba 1.49
2383     ## Reprocess in the "after head" insertion mode...
2384 wakaba 1.79 } else {
2385     !!!cp ('t131');
2386 wakaba 1.49 }
2387    
2388     ## "after head" insertion mode
2389     ## As if <body>
2390 wakaba 1.116 !!!insert-element ('body',, $token);
2391 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
2392 wakaba 1.49 ## reprocess
2393 wakaba 1.125 !!!ack-later;
2394 wakaba 1.126 next B;
2395 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
2396 wakaba 1.49 if ($token->{tag_name} eq 'head') {
2397 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2398 wakaba 1.79 !!!cp ('t132');
2399 wakaba 1.50 ## As if <head>
2400 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2401 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2402 wakaba 1.123 push @{$self->{open_elements}},
2403     [$self->{head_element}, $el_category->{head}];
2404 wakaba 1.50
2405     ## Reprocess in the "in head" insertion mode...
2406     pop @{$self->{open_elements}};
2407 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
2408 wakaba 1.50 !!!next-token;
2409 wakaba 1.126 next B;
2410 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2411 wakaba 1.79 !!!cp ('t133');
2412 wakaba 1.49 ## As if </noscript>
2413     pop @{$self->{open_elements}};
2414 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
2415     text => 'head', token => $token);
2416 wakaba 1.49
2417     ## Reprocess in the "in head" insertion mode...
2418 wakaba 1.50 pop @{$self->{open_elements}};
2419 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
2420 wakaba 1.50 !!!next-token;
2421 wakaba 1.126 next B;
2422 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2423 wakaba 1.79 !!!cp ('t134');
2424 wakaba 1.49 pop @{$self->{open_elements}};
2425 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
2426 wakaba 1.49 !!!next-token;
2427 wakaba 1.126 next B;
2428 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2429     !!!cp ('t134.1');
2430 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => 'head',
2431     token => $token);
2432 wakaba 1.139 ## Ignore the token
2433     !!!next-token;
2434     next B;
2435 wakaba 1.49 } else {
2436 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2437 wakaba 1.49 }
2438     } elsif ($token->{tag_name} eq 'noscript') {
2439 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2440 wakaba 1.79 !!!cp ('t136');
2441 wakaba 1.49 pop @{$self->{open_elements}};
2442 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
2443 wakaba 1.49 !!!next-token;
2444 wakaba 1.126 next B;
2445 wakaba 1.139 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or
2446     $self->{insertion_mode} == AFTER_HEAD_IM) {
2447 wakaba 1.79 !!!cp ('t137');
2448 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2449     text => 'noscript', token => $token);
2450 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
2451     !!!next-token;
2452 wakaba 1.126 next B;
2453 wakaba 1.49 } else {
2454 wakaba 1.79 !!!cp ('t138');
2455 wakaba 1.49 #
2456     }
2457     } elsif ({
2458 wakaba 1.31 body => 1, html => 1,
2459     }->{$token->{tag_name}}) {
2460 wakaba 1.203 ## TODO: This branch is entirely redundant.
2461     if ($self->{insertion_mode} == BEFORE_HEAD_IM or
2462 wakaba 1.139 $self->{insertion_mode} == IN_HEAD_IM or
2463     $self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2464 wakaba 1.79 !!!cp ('t140');
2465 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2466     text => $token->{tag_name}, token => $token);
2467 wakaba 1.49 ## Ignore the token
2468     !!!next-token;
2469 wakaba 1.126 next B;
2470 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2471     !!!cp ('t140.1');
2472 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2473     text => $token->{tag_name}, token => $token);
2474 wakaba 1.139 ## Ignore the token
2475     !!!next-token;
2476     next B;
2477 wakaba 1.79 } else {
2478 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2479 wakaba 1.49 }
2480 wakaba 1.139 } elsif ($token->{tag_name} eq 'p') {
2481     !!!cp ('t142');
2482 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2483     text => $token->{tag_name}, token => $token);
2484 wakaba 1.139 ## Ignore the token
2485     !!!next-token;
2486     next B;
2487     } elsif ($token->{tag_name} eq 'br') {
2488 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2489 wakaba 1.139 !!!cp ('t142.2');
2490     ## (before head) as if <head>, (in head) as if </head>
2491 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2492 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2493 wakaba 1.139 $self->{insertion_mode} = AFTER_HEAD_IM;
2494    
2495     ## Reprocess in the "after head" insertion mode...
2496     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2497     !!!cp ('t143.2');
2498     ## As if </head>
2499     pop @{$self->{open_elements}};
2500     $self->{insertion_mode} = AFTER_HEAD_IM;
2501    
2502     ## Reprocess in the "after head" insertion mode...
2503     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2504     !!!cp ('t143.3');
2505 wakaba 1.211 ## NOTE: Two parse errors for <head><noscript></br>
2506 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2507     text => 'br', token => $token);
2508 wakaba 1.139 ## As if </noscript>
2509     pop @{$self->{open_elements}};
2510     $self->{insertion_mode} = IN_HEAD_IM;
2511 wakaba 1.50
2512     ## Reprocess in the "in head" insertion mode...
2513 wakaba 1.139 ## As if </head>
2514     pop @{$self->{open_elements}};
2515     $self->{insertion_mode} = AFTER_HEAD_IM;
2516    
2517     ## Reprocess in the "after head" insertion mode...
2518     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
2519     !!!cp ('t143.4');
2520     #
2521 wakaba 1.79 } else {
2522 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
2523 wakaba 1.50 }
2524    
2525 wakaba 1.139 ## ISSUE: does not agree with IE7 - it doesn't ignore </br>.
2526 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2527     text => 'br', token => $token);
2528 wakaba 1.139 ## Ignore the token
2529     !!!next-token;
2530     next B;
2531 wakaba 1.25 } else {
2532 wakaba 1.139 !!!cp ('t145');
2533 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2534     text => $token->{tag_name}, token => $token);
2535 wakaba 1.139 ## Ignore the token
2536     !!!next-token;
2537     next B;
2538 wakaba 1.49 }
2539    
2540 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2541 wakaba 1.79 !!!cp ('t146');
2542 wakaba 1.49 ## As if </noscript>
2543     pop @{$self->{open_elements}};
2544 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
2545     text => $token->{tag_name}, token => $token);
2546 wakaba 1.49
2547     ## Reprocess in the "in head" insertion mode...
2548     ## As if </head>
2549     pop @{$self->{open_elements}};
2550    
2551     ## Reprocess in the "after head" insertion mode...
2552 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2553 wakaba 1.79 !!!cp ('t147');
2554 wakaba 1.49 ## As if </head>
2555     pop @{$self->{open_elements}};
2556    
2557     ## Reprocess in the "after head" insertion mode...
2558 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2559 wakaba 1.82 ## ISSUE: This case cannot be reached?
2560 wakaba 1.79 !!!cp ('t148');
2561 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2562     text => $token->{tag_name}, token => $token);
2563 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
2564     !!!next-token;
2565 wakaba 1.126 next B;
2566 wakaba 1.79 } else {
2567     !!!cp ('t149');
2568 wakaba 1.1 }
2569    
2570 wakaba 1.49 ## "after head" insertion mode
2571     ## As if <body>
2572 wakaba 1.116 !!!insert-element ('body',, $token);
2573 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
2574 wakaba 1.52 ## reprocess
2575 wakaba 1.126 next B;
2576 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2577     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
2578     !!!cp ('t149.1');
2579    
2580     ## NOTE: As if <head>
2581 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
2582 wakaba 1.104 $self->{open_elements}->[-1]->[0]->append_child
2583     ($self->{head_element});
2584 wakaba 1.123 #push @{$self->{open_elements}},
2585     # [$self->{head_element}, $el_category->{head}];
2586 wakaba 1.104 #$self->{insertion_mode} = IN_HEAD_IM;
2587     ## NOTE: Reprocess.
2588    
2589     ## NOTE: As if </head>
2590     #pop @{$self->{open_elements}};
2591     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2592     ## NOTE: Reprocess.
2593    
2594     #
2595     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
2596     !!!cp ('t149.2');
2597    
2598     ## NOTE: As if </head>
2599     pop @{$self->{open_elements}};
2600     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2601     ## NOTE: Reprocess.
2602    
2603     #
2604     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
2605     !!!cp ('t149.3');
2606    
2607 wakaba 1.113 !!!parse-error (type => 'in noscript:#eof', token => $token);
2608 wakaba 1.104
2609     ## As if </noscript>
2610     pop @{$self->{open_elements}};
2611     #$self->{insertion_mode} = IN_HEAD_IM;
2612     ## NOTE: Reprocess.
2613    
2614     ## NOTE: As if </head>
2615     pop @{$self->{open_elements}};
2616     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
2617     ## NOTE: Reprocess.
2618    
2619     #
2620     } else {
2621     !!!cp ('t149.4');
2622     #
2623     }
2624    
2625     ## NOTE: As if <body>
2626 wakaba 1.116 !!!insert-element ('body',, $token);
2627 wakaba 1.104 $self->{insertion_mode} = IN_BODY_IM;
2628     ## NOTE: Reprocess.
2629 wakaba 1.126 next B;
2630 wakaba 1.104 } else {
2631     die "$0: $token->{type}: Unknown token type";
2632     }
2633 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
2634 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
2635 wakaba 1.79 !!!cp ('t150');
2636 wakaba 1.52 ## NOTE: There is a code clone of "character in body".
2637     $reconstruct_active_formatting_elements->($insert_to_current);
2638    
2639     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
2640    
2641     !!!next-token;
2642 wakaba 1.126 next B;
2643 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
2644 wakaba 1.52 if ({
2645     caption => 1, col => 1, colgroup => 1, tbody => 1,
2646     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
2647     }->{$token->{tag_name}}) {
2648 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2649 wakaba 1.52 ## have an element in table scope
2650 wakaba 1.108 for (reverse 0..$#{$self->{open_elements}}) {
2651 wakaba 1.52 my $node = $self->{open_elements}->[$_];
2652 wakaba 1.206 if ($node->[1] == TABLE_CELL_EL) {
2653 wakaba 1.79 !!!cp ('t151');
2654 wakaba 1.108
2655     ## Close the cell
2656 wakaba 1.125 !!!back-token; # <x>
2657 wakaba 1.122 $token = {type => END_TAG_TOKEN,
2658     tag_name => $node->[0]->manakai_local_name,
2659 wakaba 1.114 line => $token->{line},
2660     column => $token->{column}};
2661 wakaba 1.126 next B;
2662 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2663 wakaba 1.79 !!!cp ('t152');
2664 wakaba 1.108 ## ISSUE: This case can never be reached, maybe.
2665     last;
2666 wakaba 1.52 }
2667 wakaba 1.108 }
2668    
2669     !!!cp ('t153');
2670     !!!parse-error (type => 'start tag not allowed',
2671 wakaba 1.153 text => $token->{tag_name}, token => $token);
2672 wakaba 1.108 ## Ignore the token
2673 wakaba 1.125 !!!nack ('t153.1');
2674 wakaba 1.108 !!!next-token;
2675 wakaba 1.126 next B;
2676 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2677 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
2678     token => $token);
2679 wakaba 1.52
2680 wakaba 1.108 ## NOTE: As if </caption>.
2681 wakaba 1.52 ## have a table element in table scope
2682     my $i;
2683 wakaba 1.108 INSCOPE: {
2684     for (reverse 0..$#{$self->{open_elements}}) {
2685     my $node = $self->{open_elements}->[$_];
2686 wakaba 1.206 if ($node->[1] == CAPTION_EL) {
2687 wakaba 1.108 !!!cp ('t155');
2688     $i = $_;
2689     last INSCOPE;
2690 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2691 wakaba 1.108 !!!cp ('t156');
2692     last;
2693     }
2694 wakaba 1.52 }
2695 wakaba 1.108
2696     !!!cp ('t157');
2697     !!!parse-error (type => 'start tag not allowed',
2698 wakaba 1.153 text => $token->{tag_name}, token => $token);
2699 wakaba 1.108 ## Ignore the token
2700 wakaba 1.125 !!!nack ('t157.1');
2701 wakaba 1.108 !!!next-token;
2702 wakaba 1.126 next B;
2703 wakaba 1.52 } # INSCOPE
2704    
2705     ## generate implied end tags
2706 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
2707     & END_TAG_OPTIONAL_EL) {
2708 wakaba 1.79 !!!cp ('t158');
2709 wakaba 1.86 pop @{$self->{open_elements}};
2710 wakaba 1.52 }
2711    
2712 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2713 wakaba 1.79 !!!cp ('t159');
2714 wakaba 1.122 !!!parse-error (type => 'not closed',
2715 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2716 wakaba 1.122 ->manakai_local_name,
2717     token => $token);
2718 wakaba 1.79 } else {
2719     !!!cp ('t160');
2720 wakaba 1.52 }
2721    
2722     splice @{$self->{open_elements}}, $i;
2723    
2724     $clear_up_to_marker->();
2725    
2726 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
2727 wakaba 1.52
2728     ## reprocess
2729 wakaba 1.125 !!!ack-later;
2730 wakaba 1.126 next B;
2731 wakaba 1.52 } else {
2732 wakaba 1.79 !!!cp ('t161');
2733 wakaba 1.52 #
2734     }
2735     } else {
2736 wakaba 1.79 !!!cp ('t162');
2737 wakaba 1.52 #
2738     }
2739 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
2740 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
2741 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2742 wakaba 1.43 ## have an element in table scope
2743 wakaba 1.52 my $i;
2744 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2745     my $node = $self->{open_elements}->[$_];
2746 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2747 wakaba 1.79 !!!cp ('t163');
2748 wakaba 1.52 $i = $_;
2749 wakaba 1.43 last INSCOPE;
2750 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2751 wakaba 1.79 !!!cp ('t164');
2752 wakaba 1.43 last INSCOPE;
2753     }
2754     } # INSCOPE
2755 wakaba 1.52 unless (defined $i) {
2756 wakaba 1.79 !!!cp ('t165');
2757 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2758     text => $token->{tag_name},
2759     token => $token);
2760 wakaba 1.43 ## Ignore the token
2761     !!!next-token;
2762 wakaba 1.126 next B;
2763 wakaba 1.43 }
2764    
2765 wakaba 1.52 ## generate implied end tags
2766 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
2767     & END_TAG_OPTIONAL_EL) {
2768 wakaba 1.79 !!!cp ('t166');
2769 wakaba 1.86 pop @{$self->{open_elements}};
2770 wakaba 1.52 }
2771 wakaba 1.86
2772 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
2773     ne $token->{tag_name}) {
2774 wakaba 1.79 !!!cp ('t167');
2775 wakaba 1.122 !!!parse-error (type => 'not closed',
2776 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2777 wakaba 1.122 ->manakai_local_name,
2778     token => $token);
2779 wakaba 1.79 } else {
2780     !!!cp ('t168');
2781 wakaba 1.52 }
2782    
2783     splice @{$self->{open_elements}}, $i;
2784    
2785     $clear_up_to_marker->();
2786    
2787 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
2788 wakaba 1.52
2789     !!!next-token;
2790 wakaba 1.126 next B;
2791 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2792 wakaba 1.79 !!!cp ('t169');
2793 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2794     text => $token->{tag_name}, token => $token);
2795 wakaba 1.52 ## Ignore the token
2796     !!!next-token;
2797 wakaba 1.126 next B;
2798 wakaba 1.52 } else {
2799 wakaba 1.79 !!!cp ('t170');
2800 wakaba 1.52 #
2801     }
2802     } elsif ($token->{tag_name} eq 'caption') {
2803 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2804 wakaba 1.43 ## have a table element in table scope
2805     my $i;
2806 wakaba 1.108 INSCOPE: {
2807     for (reverse 0..$#{$self->{open_elements}}) {
2808     my $node = $self->{open_elements}->[$_];
2809 wakaba 1.206 if ($node->[1] == CAPTION_EL) {
2810 wakaba 1.108 !!!cp ('t171');
2811     $i = $_;
2812     last INSCOPE;
2813 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2814 wakaba 1.108 !!!cp ('t172');
2815     last;
2816     }
2817 wakaba 1.43 }
2818 wakaba 1.108
2819     !!!cp ('t173');
2820     !!!parse-error (type => 'unmatched end tag',
2821 wakaba 1.153 text => $token->{tag_name}, token => $token);
2822 wakaba 1.108 ## Ignore the token
2823     !!!next-token;
2824 wakaba 1.126 next B;
2825 wakaba 1.43 } # INSCOPE
2826    
2827     ## generate implied end tags
2828 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
2829     & END_TAG_OPTIONAL_EL) {
2830 wakaba 1.79 !!!cp ('t174');
2831 wakaba 1.86 pop @{$self->{open_elements}};
2832 wakaba 1.43 }
2833 wakaba 1.52
2834 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2835 wakaba 1.79 !!!cp ('t175');
2836 wakaba 1.122 !!!parse-error (type => 'not closed',
2837 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2838 wakaba 1.122 ->manakai_local_name,
2839     token => $token);
2840 wakaba 1.79 } else {
2841     !!!cp ('t176');
2842 wakaba 1.52 }
2843    
2844     splice @{$self->{open_elements}}, $i;
2845    
2846     $clear_up_to_marker->();
2847    
2848 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
2849 wakaba 1.52
2850     !!!next-token;
2851 wakaba 1.126 next B;
2852 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2853 wakaba 1.79 !!!cp ('t177');
2854 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2855     text => $token->{tag_name}, token => $token);
2856 wakaba 1.52 ## Ignore the token
2857     !!!next-token;
2858 wakaba 1.126 next B;
2859 wakaba 1.52 } else {
2860 wakaba 1.79 !!!cp ('t178');
2861 wakaba 1.52 #
2862     }
2863     } elsif ({
2864     table => 1, tbody => 1, tfoot => 1,
2865     thead => 1, tr => 1,
2866     }->{$token->{tag_name}} and
2867 wakaba 1.210 ($self->{insertion_mode} & IM_MASK) == IN_CELL_IM) {
2868 wakaba 1.52 ## have an element in table scope
2869     my $i;
2870     my $tn;
2871 wakaba 1.108 INSCOPE: {
2872     for (reverse 0..$#{$self->{open_elements}}) {
2873     my $node = $self->{open_elements}->[$_];
2874 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
2875 wakaba 1.108 !!!cp ('t179');
2876     $i = $_;
2877    
2878     ## Close the cell
2879 wakaba 1.125 !!!back-token; # </x>
2880 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $tn,
2881     line => $token->{line},
2882     column => $token->{column}};
2883 wakaba 1.126 next B;
2884 wakaba 1.206 } elsif ($node->[1] == TABLE_CELL_EL) {
2885 wakaba 1.108 !!!cp ('t180');
2886 wakaba 1.123 $tn = $node->[0]->manakai_local_name;
2887 wakaba 1.108 ## NOTE: There is exactly one |td| or |th| element
2888     ## in scope in the stack of open elements by definition.
2889 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2890 wakaba 1.108 ## ISSUE: Can this be reached?
2891     !!!cp ('t181');
2892     last;
2893     }
2894 wakaba 1.52 }
2895 wakaba 1.108
2896 wakaba 1.79 !!!cp ('t182');
2897 wakaba 1.108 !!!parse-error (type => 'unmatched end tag',
2898 wakaba 1.153 text => $token->{tag_name}, token => $token);
2899 wakaba 1.52 ## Ignore the token
2900     !!!next-token;
2901 wakaba 1.126 next B;
2902 wakaba 1.108 } # INSCOPE
2903 wakaba 1.52 } elsif ($token->{tag_name} eq 'table' and
2904 wakaba 1.210 ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2905 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
2906     token => $token);
2907 wakaba 1.52
2908     ## As if </caption>
2909     ## have a table element in table scope
2910     my $i;
2911     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2912     my $node = $self->{open_elements}->[$_];
2913 wakaba 1.206 if ($node->[1] == CAPTION_EL) {
2914 wakaba 1.79 !!!cp ('t184');
2915 wakaba 1.52 $i = $_;
2916     last INSCOPE;
2917 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
2918 wakaba 1.79 !!!cp ('t185');
2919 wakaba 1.52 last INSCOPE;
2920     }
2921     } # INSCOPE
2922     unless (defined $i) {
2923 wakaba 1.79 !!!cp ('t186');
2924 wakaba 1.209 ## TODO: Wrong error type?
2925 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2926     text => 'caption', token => $token);
2927 wakaba 1.52 ## Ignore the token
2928     !!!next-token;
2929 wakaba 1.126 next B;
2930 wakaba 1.52 }
2931    
2932     ## generate implied end tags
2933 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
2934 wakaba 1.79 !!!cp ('t187');
2935 wakaba 1.86 pop @{$self->{open_elements}};
2936 wakaba 1.52 }
2937    
2938 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == CAPTION_EL) {
2939 wakaba 1.79 !!!cp ('t188');
2940 wakaba 1.122 !!!parse-error (type => 'not closed',
2941 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
2942 wakaba 1.122 ->manakai_local_name,
2943     token => $token);
2944 wakaba 1.79 } else {
2945     !!!cp ('t189');
2946 wakaba 1.52 }
2947    
2948     splice @{$self->{open_elements}}, $i;
2949    
2950     $clear_up_to_marker->();
2951    
2952 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
2953 wakaba 1.52
2954     ## reprocess
2955 wakaba 1.126 next B;
2956 wakaba 1.52 } elsif ({
2957     body => 1, col => 1, colgroup => 1, html => 1,
2958     }->{$token->{tag_name}}) {
2959 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
2960 wakaba 1.79 !!!cp ('t190');
2961 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
2962     text => $token->{tag_name}, token => $token);
2963 wakaba 1.52 ## Ignore the token
2964     !!!next-token;
2965 wakaba 1.126 next B;
2966 wakaba 1.52 } else {
2967 wakaba 1.79 !!!cp ('t191');
2968 wakaba 1.52 #
2969     }
2970 wakaba 1.210 } elsif ({
2971     tbody => 1, tfoot => 1,
2972     thead => 1, tr => 1,
2973     }->{$token->{tag_name}} and
2974     ($self->{insertion_mode} & IM_MASK) == IN_CAPTION_IM) {
2975     !!!cp ('t192');
2976     !!!parse-error (type => 'unmatched end tag',
2977     text => $token->{tag_name}, token => $token);
2978     ## Ignore the token
2979     !!!next-token;
2980     next B;
2981     } else {
2982     !!!cp ('t193');
2983     #
2984     }
2985 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
2986     for my $entry (@{$self->{open_elements}}) {
2987 wakaba 1.123 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
2988 wakaba 1.104 !!!cp ('t75');
2989 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
2990 wakaba 1.104 last;
2991     }
2992     }
2993    
2994     ## Stop parsing.
2995     last B;
2996 wakaba 1.52 } else {
2997     die "$0: $token->{type}: Unknown token type";
2998     }
2999    
3000     $insert = $insert_to_current;
3001     #
3002 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
3003 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
3004 wakaba 1.95 if (not $open_tables->[-1]->[1] and # tainted
3005 wakaba 1.188 $token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3006 wakaba 1.95 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3007 wakaba 1.52
3008 wakaba 1.95 unless (length $token->{data}) {
3009     !!!cp ('t194');
3010     !!!next-token;
3011 wakaba 1.126 next B;
3012 wakaba 1.95 } else {
3013     !!!cp ('t195');
3014     }
3015     }
3016 wakaba 1.52
3017 wakaba 1.153 !!!parse-error (type => 'in table:#text', token => $token);
3018 wakaba 1.52
3019 wakaba 1.202 ## NOTE: As if in body, but insert into the foster parent element.
3020     $reconstruct_active_formatting_elements->($insert_to_foster);
3021 wakaba 1.52
3022 wakaba 1.202 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
3023     # MUST
3024     my $foster_parent_element;
3025     my $next_sibling;
3026     my $prev_sibling;
3027     OE: for (reverse 0..$#{$self->{open_elements}}) {
3028 wakaba 1.206 if ($self->{open_elements}->[$_]->[1] == TABLE_EL) {
3029 wakaba 1.202 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3030     if (defined $parent and $parent->node_type == 1) {
3031     $foster_parent_element = $parent;
3032     !!!cp ('t196');
3033     $next_sibling = $self->{open_elements}->[$_]->[0];
3034     $prev_sibling = $next_sibling->previous_sibling;
3035     #
3036 wakaba 1.52 } else {
3037 wakaba 1.202 !!!cp ('t197');
3038     $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3039     $prev_sibling = $foster_parent_element->last_child;
3040     #
3041 wakaba 1.52 }
3042 wakaba 1.202 last OE;
3043     }
3044     } # OE
3045     $foster_parent_element = $self->{open_elements}->[0]->[0] and
3046     $prev_sibling = $foster_parent_element->last_child
3047     unless defined $foster_parent_element;
3048     undef $prev_sibling unless $open_tables->[-1]->[2]; # ~node inserted
3049     if (defined $prev_sibling and
3050     $prev_sibling->node_type == 3) {
3051     !!!cp ('t198');
3052     $prev_sibling->manakai_append_text ($token->{data});
3053     } else {
3054     !!!cp ('t199');
3055     $foster_parent_element->insert_before
3056     ($self->{document}->create_text_node ($token->{data}),
3057     $next_sibling);
3058     }
3059 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
3060 wakaba 1.202 $open_tables->[-1]->[2] = 1; # ~node inserted
3061 wakaba 1.95 } else {
3062 wakaba 1.202 ## NOTE: Fragment case or in a foster parent'ed element
3063     ## (e.g. |<table><span>a|). In fragment case, whether the
3064     ## character is appended to existing node or a new node is
3065     ## created is irrelevant, since the foster parent'ed nodes
3066     ## are discarded and fragment parsing does not invoke any
3067     ## script.
3068 wakaba 1.95 !!!cp ('t200');
3069 wakaba 1.202 $self->{open_elements}->[-1]->[0]->manakai_append_text
3070     ($token->{data});
3071 wakaba 1.95 }
3072 wakaba 1.52
3073 wakaba 1.95 !!!next-token;
3074 wakaba 1.126 next B;
3075 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
3076 wakaba 1.153 if ({
3077 wakaba 1.210 tr => (($self->{insertion_mode} & IM_MASK) != IN_ROW_IM),
3078 wakaba 1.153 th => 1, td => 1,
3079     }->{$token->{tag_name}}) {
3080 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_IM) {
3081 wakaba 1.153 ## Clear back to table context
3082     while (not ($self->{open_elements}->[-1]->[1]
3083     & TABLE_SCOPING_EL)) {
3084     !!!cp ('t201');
3085     pop @{$self->{open_elements}};
3086     }
3087    
3088     !!!insert-element ('tbody',, $token);
3089     $self->{insertion_mode} = IN_TABLE_BODY_IM;
3090     ## reprocess in the "in table body" insertion mode...
3091     }
3092    
3093 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3094 wakaba 1.153 unless ($token->{tag_name} eq 'tr') {
3095     !!!cp ('t202');
3096     !!!parse-error (type => 'missing start tag:tr', token => $token);
3097     }
3098 wakaba 1.43
3099 wakaba 1.153 ## Clear back to table body context
3100     while (not ($self->{open_elements}->[-1]->[1]
3101     & TABLE_ROWS_SCOPING_EL)) {
3102     !!!cp ('t203');
3103     ## ISSUE: Can this case be reached?
3104     pop @{$self->{open_elements}};
3105     }
3106 wakaba 1.43
3107 wakaba 1.202 $self->{insertion_mode} = IN_ROW_IM;
3108     if ($token->{tag_name} eq 'tr') {
3109     !!!cp ('t204');
3110     !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3111     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3112     !!!nack ('t204');
3113     !!!next-token;
3114     next B;
3115     } else {
3116     !!!cp ('t205');
3117     !!!insert-element ('tr',, $token);
3118     ## reprocess in the "in row" insertion mode
3119     }
3120     } else {
3121     !!!cp ('t206');
3122     }
3123 wakaba 1.52
3124     ## Clear back to table row context
3125 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3126     & TABLE_ROW_SCOPING_EL)) {
3127 wakaba 1.79 !!!cp ('t207');
3128 wakaba 1.52 pop @{$self->{open_elements}};
3129 wakaba 1.43 }
3130 wakaba 1.52
3131 wakaba 1.202 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3132     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3133     $self->{insertion_mode} = IN_CELL_IM;
3134 wakaba 1.52
3135 wakaba 1.202 push @$active_formatting_elements, ['#marker', ''];
3136 wakaba 1.52
3137 wakaba 1.202 !!!nack ('t207.1');
3138     !!!next-token;
3139     next B;
3140     } elsif ({
3141     caption => 1, col => 1, colgroup => 1,
3142     tbody => 1, tfoot => 1, thead => 1,
3143     tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3144     }->{$token->{tag_name}}) {
3145 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3146 wakaba 1.202 ## As if </tr>
3147     ## have an element in table scope
3148     my $i;
3149     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3150     my $node = $self->{open_elements}->[$_];
3151 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3152 wakaba 1.202 !!!cp ('t208');
3153     $i = $_;
3154     last INSCOPE;
3155     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3156     !!!cp ('t209');
3157     last INSCOPE;
3158     }
3159     } # INSCOPE
3160     unless (defined $i) {
3161     !!!cp ('t210');
3162     ## TODO: This type is wrong.
3163     !!!parse-error (type => 'unmacthed end tag',
3164     text => $token->{tag_name}, token => $token);
3165     ## Ignore the token
3166     !!!nack ('t210.1');
3167 wakaba 1.52 !!!next-token;
3168 wakaba 1.126 next B;
3169 wakaba 1.202 }
3170 wakaba 1.43
3171 wakaba 1.52 ## Clear back to table row context
3172 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3173     & TABLE_ROW_SCOPING_EL)) {
3174 wakaba 1.79 !!!cp ('t211');
3175 wakaba 1.83 ## ISSUE: Can this case be reached?
3176 wakaba 1.52 pop @{$self->{open_elements}};
3177 wakaba 1.1 }
3178 wakaba 1.43
3179 wakaba 1.52 pop @{$self->{open_elements}}; # tr
3180 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3181 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
3182 wakaba 1.79 !!!cp ('t212');
3183 wakaba 1.52 ## reprocess
3184 wakaba 1.125 !!!ack-later;
3185 wakaba 1.126 next B;
3186 wakaba 1.52 } else {
3187 wakaba 1.79 !!!cp ('t213');
3188 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
3189     }
3190 wakaba 1.1 }
3191 wakaba 1.52
3192 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3193 wakaba 1.52 ## have an element in table scope
3194 wakaba 1.43 my $i;
3195     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3196     my $node = $self->{open_elements}->[$_];
3197 wakaba 1.206 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3198 wakaba 1.79 !!!cp ('t214');
3199 wakaba 1.43 $i = $_;
3200     last INSCOPE;
3201 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3202 wakaba 1.79 !!!cp ('t215');
3203 wakaba 1.43 last INSCOPE;
3204     }
3205     } # INSCOPE
3206 wakaba 1.52 unless (defined $i) {
3207 wakaba 1.79 !!!cp ('t216');
3208 wakaba 1.153 ## TODO: This erorr type is wrong.
3209     !!!parse-error (type => 'unmatched end tag',
3210     text => $token->{tag_name}, token => $token);
3211 wakaba 1.52 ## Ignore the token
3212 wakaba 1.125 !!!nack ('t216.1');
3213 wakaba 1.52 !!!next-token;
3214 wakaba 1.126 next B;
3215 wakaba 1.43 }
3216 wakaba 1.52
3217     ## Clear back to table body context
3218 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3219     & TABLE_ROWS_SCOPING_EL)) {
3220 wakaba 1.79 !!!cp ('t217');
3221 wakaba 1.83 ## ISSUE: Can this state be reached?
3222 wakaba 1.52 pop @{$self->{open_elements}};
3223 wakaba 1.43 }
3224    
3225 wakaba 1.52 ## As if <{current node}>
3226     ## have an element in table scope
3227     ## true by definition
3228 wakaba 1.43
3229 wakaba 1.52 ## Clear back to table body context
3230     ## nop by definition
3231 wakaba 1.43
3232 wakaba 1.52 pop @{$self->{open_elements}};
3233 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3234 wakaba 1.52 ## reprocess in "in table" insertion mode...
3235 wakaba 1.79 } else {
3236     !!!cp ('t218');
3237 wakaba 1.52 }
3238    
3239 wakaba 1.202 if ($token->{tag_name} eq 'col') {
3240     ## Clear back to table context
3241     while (not ($self->{open_elements}->[-1]->[1]
3242     & TABLE_SCOPING_EL)) {
3243     !!!cp ('t219');
3244     ## ISSUE: Can this state be reached?
3245     pop @{$self->{open_elements}};
3246     }
3247    
3248     !!!insert-element ('colgroup',, $token);
3249     $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
3250     ## reprocess
3251     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3252     !!!ack-later;
3253     next B;
3254     } elsif ({
3255     caption => 1,
3256     colgroup => 1,
3257     tbody => 1, tfoot => 1, thead => 1,
3258     }->{$token->{tag_name}}) {
3259     ## Clear back to table context
3260 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3261     & TABLE_SCOPING_EL)) {
3262 wakaba 1.79 !!!cp ('t220');
3263 wakaba 1.83 ## ISSUE: Can this state be reached?
3264 wakaba 1.52 pop @{$self->{open_elements}};
3265 wakaba 1.1 }
3266 wakaba 1.52
3267 wakaba 1.202 push @$active_formatting_elements, ['#marker', '']
3268     if $token->{tag_name} eq 'caption';
3269 wakaba 1.52
3270 wakaba 1.202 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3271     $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3272     $self->{insertion_mode} = {
3273     caption => IN_CAPTION_IM,
3274     colgroup => IN_COLUMN_GROUP_IM,
3275     tbody => IN_TABLE_BODY_IM,
3276     tfoot => IN_TABLE_BODY_IM,
3277     thead => IN_TABLE_BODY_IM,
3278     }->{$token->{tag_name}};
3279     !!!next-token;
3280     !!!nack ('t220.1');
3281     next B;
3282     } else {
3283     die "$0: in table: <>: $token->{tag_name}";
3284     }
3285 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
3286 wakaba 1.122 !!!parse-error (type => 'not closed',
3287 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
3288 wakaba 1.122 ->manakai_local_name,
3289     token => $token);
3290 wakaba 1.1
3291 wakaba 1.52 ## As if </table>
3292 wakaba 1.1 ## have a table element in table scope
3293     my $i;
3294 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3295     my $node = $self->{open_elements}->[$_];
3296 wakaba 1.206 if ($node->[1] == TABLE_EL) {
3297 wakaba 1.79 !!!cp ('t221');
3298 wakaba 1.1 $i = $_;
3299     last INSCOPE;
3300 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3301 wakaba 1.79 !!!cp ('t222');
3302 wakaba 1.1 last INSCOPE;
3303     }
3304     } # INSCOPE
3305     unless (defined $i) {
3306 wakaba 1.79 !!!cp ('t223');
3307 wakaba 1.83 ## TODO: The following is wrong, maybe.
3308 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => 'table',
3309     token => $token);
3310 wakaba 1.52 ## Ignore tokens </table><table>
3311 wakaba 1.125 !!!nack ('t223.1');
3312 wakaba 1.1 !!!next-token;
3313 wakaba 1.126 next B;
3314 wakaba 1.1 }
3315    
3316 wakaba 1.151 ## TODO: Followings are removed from the latest spec.
3317 wakaba 1.1 ## generate implied end tags
3318 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
3319 wakaba 1.79 !!!cp ('t224');
3320 wakaba 1.86 pop @{$self->{open_elements}};
3321 wakaba 1.1 }
3322    
3323 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == TABLE_EL) {
3324 wakaba 1.79 !!!cp ('t225');
3325 wakaba 1.122 ## NOTE: |<table><tr><table>|
3326     !!!parse-error (type => 'not closed',
3327 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
3328 wakaba 1.122 ->manakai_local_name,
3329     token => $token);
3330 wakaba 1.79 } else {
3331     !!!cp ('t226');
3332 wakaba 1.1 }
3333    
3334 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3335 wakaba 1.95 pop @{$open_tables};
3336 wakaba 1.1
3337 wakaba 1.52 $self->_reset_insertion_mode;
3338 wakaba 1.1
3339 wakaba 1.125 ## reprocess
3340     !!!ack-later;
3341 wakaba 1.126 next B;
3342 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
3343     if (not $open_tables->[-1]->[1]) { # tainted
3344     !!!cp ('t227.8');
3345     ## NOTE: This is a "as if in head" code clone.
3346     $parse_rcdata->(CDATA_CONTENT_MODEL);
3347 wakaba 1.202 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3348 wakaba 1.126 next B;
3349 wakaba 1.100 } else {
3350     !!!cp ('t227.7');
3351     #
3352     }
3353     } elsif ($token->{tag_name} eq 'script') {
3354     if (not $open_tables->[-1]->[1]) { # tainted
3355     !!!cp ('t227.6');
3356     ## NOTE: This is a "as if in head" code clone.
3357     $script_start_tag->();
3358 wakaba 1.202 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3359 wakaba 1.126 next B;
3360 wakaba 1.100 } else {
3361     !!!cp ('t227.5');
3362     #
3363     }
3364 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
3365     if (not $open_tables->[-1]->[1]) { # tainted
3366     if ($token->{attributes}->{type}) { ## TODO: case
3367     my $type = lc $token->{attributes}->{type}->{value};
3368     if ($type eq 'hidden') {
3369     !!!cp ('t227.3');
3370 wakaba 1.153 !!!parse-error (type => 'in table',
3371     text => $token->{tag_name}, token => $token);
3372 wakaba 1.98
3373 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3374 wakaba 1.202 $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted
3375 wakaba 1.98
3376     ## TODO: form element pointer
3377    
3378     pop @{$self->{open_elements}};
3379    
3380     !!!next-token;
3381 wakaba 1.125 !!!ack ('t227.2.1');
3382 wakaba 1.126 next B;
3383 wakaba 1.98 } else {
3384     !!!cp ('t227.2');
3385     #
3386     }
3387     } else {
3388     !!!cp ('t227.1');
3389     #
3390     }
3391     } else {
3392     !!!cp ('t227.4');
3393     #
3394     }
3395 wakaba 1.58 } else {
3396 wakaba 1.79 !!!cp ('t227');
3397 wakaba 1.58 #
3398     }
3399 wakaba 1.98
3400 wakaba 1.153 !!!parse-error (type => 'in table', text => $token->{tag_name},
3401     token => $token);
3402 wakaba 1.98
3403     $insert = $insert_to_foster;
3404     #
3405 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
3406 wakaba 1.210 if ($token->{tag_name} eq 'tr' and
3407     ($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3408     ## have an element in table scope
3409 wakaba 1.52 my $i;
3410     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3411     my $node = $self->{open_elements}->[$_];
3412 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3413 wakaba 1.79 !!!cp ('t228');
3414 wakaba 1.52 $i = $_;
3415     last INSCOPE;
3416 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3417 wakaba 1.79 !!!cp ('t229');
3418 wakaba 1.52 last INSCOPE;
3419     }
3420     } # INSCOPE
3421     unless (defined $i) {
3422 wakaba 1.79 !!!cp ('t230');
3423 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3424     text => $token->{tag_name}, token => $token);
3425 wakaba 1.52 ## Ignore the token
3426 wakaba 1.125 !!!nack ('t230.1');
3427 wakaba 1.42 !!!next-token;
3428 wakaba 1.126 next B;
3429 wakaba 1.79 } else {
3430     !!!cp ('t232');
3431 wakaba 1.42 }
3432    
3433 wakaba 1.52 ## Clear back to table row context
3434 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3435     & TABLE_ROW_SCOPING_EL)) {
3436 wakaba 1.79 !!!cp ('t231');
3437 wakaba 1.83 ## ISSUE: Can this state be reached?
3438 wakaba 1.52 pop @{$self->{open_elements}};
3439     }
3440 wakaba 1.42
3441 wakaba 1.52 pop @{$self->{open_elements}}; # tr
3442 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3443 wakaba 1.52 !!!next-token;
3444 wakaba 1.125 !!!nack ('t231.1');
3445 wakaba 1.126 next B;
3446 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
3447 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3448 wakaba 1.52 ## As if </tr>
3449     ## have an element in table scope
3450     my $i;
3451     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3452     my $node = $self->{open_elements}->[$_];
3453 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3454 wakaba 1.79 !!!cp ('t233');
3455 wakaba 1.52 $i = $_;
3456     last INSCOPE;
3457 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3458 wakaba 1.79 !!!cp ('t234');
3459 wakaba 1.52 last INSCOPE;
3460 wakaba 1.42 }
3461 wakaba 1.52 } # INSCOPE
3462     unless (defined $i) {
3463 wakaba 1.79 !!!cp ('t235');
3464 wakaba 1.83 ## TODO: The following is wrong.
3465 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3466     text => $token->{type}, token => $token);
3467 wakaba 1.52 ## Ignore the token
3468 wakaba 1.125 !!!nack ('t236.1');
3469 wakaba 1.52 !!!next-token;
3470 wakaba 1.126 next B;
3471 wakaba 1.42 }
3472 wakaba 1.52
3473     ## Clear back to table row context
3474 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3475     & TABLE_ROW_SCOPING_EL)) {
3476 wakaba 1.79 !!!cp ('t236');
3477 wakaba 1.83 ## ISSUE: Can this state be reached?
3478 wakaba 1.46 pop @{$self->{open_elements}};
3479 wakaba 1.1 }
3480 wakaba 1.46
3481 wakaba 1.52 pop @{$self->{open_elements}}; # tr
3482 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3483 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
3484 wakaba 1.1 }
3485    
3486 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_TABLE_BODY_IM) {
3487 wakaba 1.52 ## have an element in table scope
3488     my $i;
3489     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3490     my $node = $self->{open_elements}->[$_];
3491 wakaba 1.206 if ($node->[1] == TABLE_ROW_GROUP_EL) {
3492 wakaba 1.79 !!!cp ('t237');
3493 wakaba 1.52 $i = $_;
3494     last INSCOPE;
3495 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3496 wakaba 1.79 !!!cp ('t238');
3497 wakaba 1.52 last INSCOPE;
3498     }
3499     } # INSCOPE
3500     unless (defined $i) {
3501 wakaba 1.79 !!!cp ('t239');
3502 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3503     text => $token->{tag_name}, token => $token);
3504 wakaba 1.52 ## Ignore the token
3505 wakaba 1.125 !!!nack ('t239.1');
3506 wakaba 1.52 !!!next-token;
3507 wakaba 1.126 next B;
3508 wakaba 1.47 }
3509    
3510     ## Clear back to table body context
3511 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3512     & TABLE_ROWS_SCOPING_EL)) {
3513 wakaba 1.79 !!!cp ('t240');
3514 wakaba 1.47 pop @{$self->{open_elements}};
3515     }
3516    
3517 wakaba 1.52 ## As if <{current node}>
3518     ## have an element in table scope
3519     ## true by definition
3520    
3521     ## Clear back to table body context
3522     ## nop by definition
3523    
3524     pop @{$self->{open_elements}};
3525 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3526 wakaba 1.52 ## reprocess in the "in table" insertion mode...
3527     }
3528    
3529 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
3530     ## When you edit the code fragment below, please ensure that
3531     ## the code for <table> in the "in table" insertion mode
3532     ## is synced with it.
3533    
3534 wakaba 1.52 ## have a table element in table scope
3535     my $i;
3536     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3537     my $node = $self->{open_elements}->[$_];
3538 wakaba 1.206 if ($node->[1] == TABLE_EL) {
3539 wakaba 1.79 !!!cp ('t241');
3540 wakaba 1.52 $i = $_;
3541     last INSCOPE;
3542 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3543 wakaba 1.79 !!!cp ('t242');
3544 wakaba 1.52 last INSCOPE;
3545 wakaba 1.47 }
3546 wakaba 1.52 } # INSCOPE
3547     unless (defined $i) {
3548 wakaba 1.79 !!!cp ('t243');
3549 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3550     text => $token->{tag_name}, token => $token);
3551 wakaba 1.52 ## Ignore the token
3552 wakaba 1.125 !!!nack ('t243.1');
3553 wakaba 1.52 !!!next-token;
3554 wakaba 1.126 next B;
3555 wakaba 1.3 }
3556 wakaba 1.52
3557     splice @{$self->{open_elements}}, $i;
3558 wakaba 1.95 pop @{$open_tables};
3559 wakaba 1.1
3560 wakaba 1.52 $self->_reset_insertion_mode;
3561 wakaba 1.47
3562     !!!next-token;
3563 wakaba 1.126 next B;
3564 wakaba 1.47 } elsif ({
3565 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
3566 wakaba 1.52 }->{$token->{tag_name}} and
3567 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
3568 wakaba 1.210 if (($self->{insertion_mode} & IM_MASK) == IN_ROW_IM) {
3569 wakaba 1.52 ## have an element in table scope
3570     my $i;
3571     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3572     my $node = $self->{open_elements}->[$_];
3573 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3574 wakaba 1.79 !!!cp ('t247');
3575 wakaba 1.52 $i = $_;
3576     last INSCOPE;
3577 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3578 wakaba 1.79 !!!cp ('t248');
3579 wakaba 1.52 last INSCOPE;
3580     }
3581     } # INSCOPE
3582     unless (defined $i) {
3583 wakaba 1.79 !!!cp ('t249');
3584 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3585     text => $token->{tag_name}, token => $token);
3586 wakaba 1.52 ## Ignore the token
3587 wakaba 1.125 !!!nack ('t249.1');
3588 wakaba 1.52 !!!next-token;
3589 wakaba 1.126 next B;
3590 wakaba 1.52 }
3591    
3592 wakaba 1.48 ## As if </tr>
3593     ## have an element in table scope
3594     my $i;
3595     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3596     my $node = $self->{open_elements}->[$_];
3597 wakaba 1.206 if ($node->[1] == TABLE_ROW_EL) {
3598 wakaba 1.79 !!!cp ('t250');
3599 wakaba 1.48 $i = $_;
3600     last INSCOPE;
3601 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3602 wakaba 1.79 !!!cp ('t251');
3603 wakaba 1.48 last INSCOPE;
3604     }
3605     } # INSCOPE
3606 wakaba 1.52 unless (defined $i) {
3607 wakaba 1.79 !!!cp ('t252');
3608 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3609     text => 'tr', token => $token);
3610 wakaba 1.52 ## Ignore the token
3611 wakaba 1.125 !!!nack ('t252.1');
3612 wakaba 1.52 !!!next-token;
3613 wakaba 1.126 next B;
3614 wakaba 1.52 }
3615 wakaba 1.48
3616     ## Clear back to table row context
3617 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3618     & TABLE_ROW_SCOPING_EL)) {
3619 wakaba 1.79 !!!cp ('t253');
3620 wakaba 1.83 ## ISSUE: Can this case be reached?
3621 wakaba 1.48 pop @{$self->{open_elements}};
3622     }
3623    
3624     pop @{$self->{open_elements}}; # tr
3625 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
3626 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
3627     }
3628    
3629     ## have an element in table scope
3630     my $i;
3631     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3632     my $node = $self->{open_elements}->[$_];
3633 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3634 wakaba 1.79 !!!cp ('t254');
3635 wakaba 1.52 $i = $_;
3636     last INSCOPE;
3637 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
3638 wakaba 1.79 !!!cp ('t255');
3639 wakaba 1.52 last INSCOPE;
3640     }
3641     } # INSCOPE
3642     unless (defined $i) {
3643 wakaba 1.79 !!!cp ('t256');
3644 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3645     text => $token->{tag_name}, token => $token);
3646 wakaba 1.52 ## Ignore the token
3647 wakaba 1.125 !!!nack ('t256.1');
3648 wakaba 1.52 !!!next-token;
3649 wakaba 1.126 next B;
3650 wakaba 1.52 }
3651    
3652     ## Clear back to table body context
3653 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
3654     & TABLE_ROWS_SCOPING_EL)) {
3655 wakaba 1.79 !!!cp ('t257');
3656 wakaba 1.83 ## ISSUE: Can this case be reached?
3657 wakaba 1.52 pop @{$self->{open_elements}};
3658     }
3659    
3660     pop @{$self->{open_elements}};
3661 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3662 wakaba 1.125 !!!nack ('t257.1');
3663 wakaba 1.52 !!!next-token;
3664 wakaba 1.126 next B;
3665 wakaba 1.52 } elsif ({
3666     body => 1, caption => 1, col => 1, colgroup => 1,
3667     html => 1, td => 1, th => 1,
3668 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
3669     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
3670 wakaba 1.52 }->{$token->{tag_name}}) {
3671 wakaba 1.125 !!!cp ('t258');
3672 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3673     text => $token->{tag_name}, token => $token);
3674 wakaba 1.125 ## Ignore the token
3675     !!!nack ('t258.1');
3676     !!!next-token;
3677 wakaba 1.126 next B;
3678 wakaba 1.58 } else {
3679 wakaba 1.79 !!!cp ('t259');
3680 wakaba 1.153 !!!parse-error (type => 'in table:/',
3681     text => $token->{tag_name}, token => $token);
3682 wakaba 1.52
3683 wakaba 1.58 $insert = $insert_to_foster;
3684     #
3685     }
3686 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3687 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
3688 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
3689 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
3690 wakaba 1.104 !!!cp ('t259.1');
3691 wakaba 1.105 #
3692 wakaba 1.104 } else {
3693     !!!cp ('t259.2');
3694 wakaba 1.105 #
3695 wakaba 1.104 }
3696    
3697     ## Stop parsing
3698     last B;
3699 wakaba 1.58 } else {
3700     die "$0: $token->{type}: Unknown token type";
3701     }
3702 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
3703 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
3704 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3705 wakaba 1.52 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3706     unless (length $token->{data}) {
3707 wakaba 1.79 !!!cp ('t260');
3708 wakaba 1.52 !!!next-token;
3709 wakaba 1.126 next B;
3710 wakaba 1.52 }
3711     }
3712    
3713 wakaba 1.79 !!!cp ('t261');
3714 wakaba 1.52 #
3715 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
3716 wakaba 1.52 if ($token->{tag_name} eq 'col') {
3717 wakaba 1.79 !!!cp ('t262');
3718 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3719 wakaba 1.52 pop @{$self->{open_elements}};
3720 wakaba 1.125 !!!ack ('t262.1');
3721 wakaba 1.52 !!!next-token;
3722 wakaba 1.126 next B;
3723 wakaba 1.52 } else {
3724 wakaba 1.79 !!!cp ('t263');
3725 wakaba 1.52 #
3726     }
3727 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
3728 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
3729 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3730 wakaba 1.79 !!!cp ('t264');
3731 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3732     text => 'colgroup', token => $token);
3733 wakaba 1.52 ## Ignore the token
3734     !!!next-token;
3735 wakaba 1.126 next B;
3736 wakaba 1.52 } else {
3737 wakaba 1.79 !!!cp ('t265');
3738 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
3739 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3740 wakaba 1.52 !!!next-token;
3741 wakaba 1.126 next B;
3742 wakaba 1.52 }
3743     } elsif ($token->{tag_name} eq 'col') {
3744 wakaba 1.79 !!!cp ('t266');
3745 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3746     text => 'col', token => $token);
3747 wakaba 1.52 ## Ignore the token
3748     !!!next-token;
3749 wakaba 1.126 next B;
3750 wakaba 1.52 } else {
3751 wakaba 1.79 !!!cp ('t267');
3752 wakaba 1.52 #
3753     }
3754 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3755 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
3756 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
3757     !!!cp ('t270.2');
3758     ## Stop parsing.
3759     last B;
3760     } else {
3761     ## NOTE: As if </colgroup>.
3762     !!!cp ('t270.1');
3763     pop @{$self->{open_elements}}; # colgroup
3764     $self->{insertion_mode} = IN_TABLE_IM;
3765     ## Reprocess.
3766 wakaba 1.126 next B;
3767 wakaba 1.104 }
3768     } else {
3769     die "$0: $token->{type}: Unknown token type";
3770     }
3771 wakaba 1.52
3772     ## As if </colgroup>
3773 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL) {
3774 wakaba 1.79 !!!cp ('t269');
3775 wakaba 1.104 ## TODO: Wrong error type?
3776 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3777     text => 'colgroup', token => $token);
3778 wakaba 1.52 ## Ignore the token
3779 wakaba 1.125 !!!nack ('t269.1');
3780 wakaba 1.52 !!!next-token;
3781 wakaba 1.126 next B;
3782 wakaba 1.52 } else {
3783 wakaba 1.79 !!!cp ('t270');
3784 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
3785 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
3786 wakaba 1.125 !!!ack-later;
3787 wakaba 1.52 ## reprocess
3788 wakaba 1.126 next B;
3789 wakaba 1.52 }
3790 wakaba 1.101 } elsif ($self->{insertion_mode} & SELECT_IMS) {
3791 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
3792 wakaba 1.79 !!!cp ('t271');
3793 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3794     !!!next-token;
3795 wakaba 1.126 next B;
3796 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
3797 wakaba 1.123 if ($token->{tag_name} eq 'option') {
3798 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3799 wakaba 1.123 !!!cp ('t272');
3800     ## As if </option>
3801     pop @{$self->{open_elements}};
3802     } else {
3803     !!!cp ('t273');
3804     }
3805 wakaba 1.52
3806 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3807 wakaba 1.125 !!!nack ('t273.1');
3808 wakaba 1.123 !!!next-token;
3809 wakaba 1.126 next B;
3810 wakaba 1.123 } elsif ($token->{tag_name} eq 'optgroup') {
3811 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3812 wakaba 1.123 !!!cp ('t274');
3813     ## As if </option>
3814     pop @{$self->{open_elements}};
3815     } else {
3816     !!!cp ('t275');
3817     }
3818 wakaba 1.52
3819 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3820 wakaba 1.123 !!!cp ('t276');
3821     ## As if </optgroup>
3822     pop @{$self->{open_elements}};
3823     } else {
3824     !!!cp ('t277');
3825     }
3826 wakaba 1.52
3827 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
3828 wakaba 1.125 !!!nack ('t277.1');
3829 wakaba 1.123 !!!next-token;
3830 wakaba 1.126 next B;
3831 wakaba 1.146 } elsif ({
3832     select => 1, input => 1, textarea => 1,
3833     }->{$token->{tag_name}} or
3834 wakaba 1.210 (($self->{insertion_mode} & IM_MASK)
3835     == IN_SELECT_IN_TABLE_IM and
3836 wakaba 1.101 {
3837     caption => 1, table => 1,
3838     tbody => 1, tfoot => 1, thead => 1,
3839     tr => 1, td => 1, th => 1,
3840     }->{$token->{tag_name}})) {
3841     ## TODO: The type below is not good - <select> is replaced by </select>
3842 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'select',
3843     token => $token);
3844 wakaba 1.101 ## NOTE: As if the token were </select> (<select> case) or
3845     ## as if there were </select> (otherwise).
3846 wakaba 1.123 ## have an element in table scope
3847     my $i;
3848     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3849     my $node = $self->{open_elements}->[$_];
3850 wakaba 1.206 if ($node->[1] == SELECT_EL) {
3851 wakaba 1.123 !!!cp ('t278');
3852     $i = $_;
3853     last INSCOPE;
3854     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3855     !!!cp ('t279');
3856     last INSCOPE;
3857     }
3858     } # INSCOPE
3859     unless (defined $i) {
3860     !!!cp ('t280');
3861 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3862     text => 'select', token => $token);
3863 wakaba 1.123 ## Ignore the token
3864 wakaba 1.125 !!!nack ('t280.1');
3865 wakaba 1.123 !!!next-token;
3866 wakaba 1.126 next B;
3867 wakaba 1.123 }
3868 wakaba 1.52
3869 wakaba 1.123 !!!cp ('t281');
3870     splice @{$self->{open_elements}}, $i;
3871 wakaba 1.52
3872 wakaba 1.123 $self->_reset_insertion_mode;
3873 wakaba 1.47
3874 wakaba 1.101 if ($token->{tag_name} eq 'select') {
3875 wakaba 1.125 !!!nack ('t281.2');
3876 wakaba 1.101 !!!next-token;
3877 wakaba 1.126 next B;
3878 wakaba 1.101 } else {
3879     !!!cp ('t281.1');
3880 wakaba 1.125 !!!ack-later;
3881 wakaba 1.101 ## Reprocess the token.
3882 wakaba 1.126 next B;
3883 wakaba 1.101 }
3884 wakaba 1.58 } else {
3885 wakaba 1.79 !!!cp ('t282');
3886 wakaba 1.153 !!!parse-error (type => 'in select',
3887     text => $token->{tag_name}, token => $token);
3888 wakaba 1.58 ## Ignore the token
3889 wakaba 1.125 !!!nack ('t282.1');
3890 wakaba 1.58 !!!next-token;
3891 wakaba 1.126 next B;
3892 wakaba 1.58 }
3893     } elsif ($token->{type} == END_TAG_TOKEN) {
3894 wakaba 1.123 if ($token->{tag_name} eq 'optgroup') {
3895 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL and
3896     $self->{open_elements}->[-2]->[1] == OPTGROUP_EL) {
3897 wakaba 1.123 !!!cp ('t283');
3898     ## As if </option>
3899     splice @{$self->{open_elements}}, -2;
3900 wakaba 1.206 } elsif ($self->{open_elements}->[-1]->[1] == OPTGROUP_EL) {
3901 wakaba 1.123 !!!cp ('t284');
3902     pop @{$self->{open_elements}};
3903     } else {
3904     !!!cp ('t285');
3905 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3906     text => $token->{tag_name}, token => $token);
3907 wakaba 1.123 ## Ignore the token
3908     }
3909 wakaba 1.125 !!!nack ('t285.1');
3910 wakaba 1.123 !!!next-token;
3911 wakaba 1.126 next B;
3912 wakaba 1.123 } elsif ($token->{tag_name} eq 'option') {
3913 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == OPTION_EL) {
3914 wakaba 1.123 !!!cp ('t286');
3915     pop @{$self->{open_elements}};
3916     } else {
3917     !!!cp ('t287');
3918 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3919     text => $token->{tag_name}, token => $token);
3920 wakaba 1.123 ## Ignore the token
3921     }
3922 wakaba 1.125 !!!nack ('t287.1');
3923 wakaba 1.123 !!!next-token;
3924 wakaba 1.126 next B;
3925 wakaba 1.123 } elsif ($token->{tag_name} eq 'select') {
3926     ## have an element in table scope
3927     my $i;
3928     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3929     my $node = $self->{open_elements}->[$_];
3930 wakaba 1.206 if ($node->[1] == SELECT_EL) {
3931 wakaba 1.123 !!!cp ('t288');
3932     $i = $_;
3933     last INSCOPE;
3934     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3935     !!!cp ('t289');
3936     last INSCOPE;
3937     }
3938     } # INSCOPE
3939     unless (defined $i) {
3940     !!!cp ('t290');
3941 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3942     text => $token->{tag_name}, token => $token);
3943 wakaba 1.123 ## Ignore the token
3944 wakaba 1.125 !!!nack ('t290.1');
3945 wakaba 1.123 !!!next-token;
3946 wakaba 1.126 next B;
3947 wakaba 1.123 }
3948 wakaba 1.52
3949 wakaba 1.123 !!!cp ('t291');
3950     splice @{$self->{open_elements}}, $i;
3951 wakaba 1.52
3952 wakaba 1.123 $self->_reset_insertion_mode;
3953 wakaba 1.52
3954 wakaba 1.125 !!!nack ('t291.1');
3955 wakaba 1.123 !!!next-token;
3956 wakaba 1.126 next B;
3957 wakaba 1.210 } elsif (($self->{insertion_mode} & IM_MASK)
3958     == IN_SELECT_IN_TABLE_IM and
3959 wakaba 1.101 {
3960     caption => 1, table => 1, tbody => 1,
3961     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
3962     }->{$token->{tag_name}}) {
3963 wakaba 1.83 ## TODO: The following is wrong?
3964 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3965     text => $token->{tag_name}, token => $token);
3966 wakaba 1.52
3967 wakaba 1.123 ## have an element in table scope
3968     my $i;
3969     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3970     my $node = $self->{open_elements}->[$_];
3971     if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
3972     !!!cp ('t292');
3973     $i = $_;
3974     last INSCOPE;
3975     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3976     !!!cp ('t293');
3977     last INSCOPE;
3978     }
3979     } # INSCOPE
3980     unless (defined $i) {
3981     !!!cp ('t294');
3982     ## Ignore the token
3983 wakaba 1.125 !!!nack ('t294.1');
3984 wakaba 1.123 !!!next-token;
3985 wakaba 1.126 next B;
3986 wakaba 1.123 }
3987 wakaba 1.52
3988 wakaba 1.123 ## As if </select>
3989     ## have an element in table scope
3990     undef $i;
3991     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3992     my $node = $self->{open_elements}->[$_];
3993 wakaba 1.206 if ($node->[1] == SELECT_EL) {
3994 wakaba 1.123 !!!cp ('t295');
3995     $i = $_;
3996     last INSCOPE;
3997     } elsif ($node->[1] & TABLE_SCOPING_EL) {
3998 wakaba 1.83 ## ISSUE: Can this state be reached?
3999 wakaba 1.123 !!!cp ('t296');
4000     last INSCOPE;
4001     }
4002     } # INSCOPE
4003     unless (defined $i) {
4004     !!!cp ('t297');
4005 wakaba 1.83 ## TODO: The following error type is correct?
4006 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4007     text => 'select', token => $token);
4008 wakaba 1.123 ## Ignore the </select> token
4009 wakaba 1.125 !!!nack ('t297.1');
4010 wakaba 1.123 !!!next-token; ## TODO: ok?
4011 wakaba 1.126 next B;
4012 wakaba 1.123 }
4013 wakaba 1.52
4014 wakaba 1.123 !!!cp ('t298');
4015     splice @{$self->{open_elements}}, $i;
4016 wakaba 1.52
4017 wakaba 1.123 $self->_reset_insertion_mode;
4018 wakaba 1.52
4019 wakaba 1.125 !!!ack-later;
4020 wakaba 1.123 ## reprocess
4021 wakaba 1.126 next B;
4022 wakaba 1.58 } else {
4023 wakaba 1.79 !!!cp ('t299');
4024 wakaba 1.153 !!!parse-error (type => 'in select:/',
4025     text => $token->{tag_name}, token => $token);
4026 wakaba 1.52 ## Ignore the token
4027 wakaba 1.125 !!!nack ('t299.3');
4028 wakaba 1.52 !!!next-token;
4029 wakaba 1.126 next B;
4030 wakaba 1.58 }
4031 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4032 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4033 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
4034     !!!cp ('t299.1');
4035 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4036 wakaba 1.104 } else {
4037     !!!cp ('t299.2');
4038     }
4039    
4040     ## Stop parsing.
4041     last B;
4042 wakaba 1.58 } else {
4043     die "$0: $token->{type}: Unknown token type";
4044     }
4045 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
4046 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4047 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4048 wakaba 1.52 my $data = $1;
4049     ## As if in body
4050     $reconstruct_active_formatting_elements->($insert_to_current);
4051    
4052     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4053    
4054     unless (length $token->{data}) {
4055 wakaba 1.79 !!!cp ('t300');
4056 wakaba 1.52 !!!next-token;
4057 wakaba 1.126 next B;
4058 wakaba 1.52 }
4059     }
4060    
4061 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4062 wakaba 1.79 !!!cp ('t301');
4063 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
4064 wakaba 1.188 #
4065 wakaba 1.79 } else {
4066     !!!cp ('t302');
4067 wakaba 1.188 ## "after body" insertion mode
4068     !!!parse-error (type => 'after body:#text', token => $token);
4069     #
4070 wakaba 1.52 }
4071    
4072 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4073 wakaba 1.52 ## reprocess
4074 wakaba 1.126 next B;
4075 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4076 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4077 wakaba 1.79 !!!cp ('t303');
4078 wakaba 1.153 !!!parse-error (type => 'after html',
4079     text => $token->{tag_name}, token => $token);
4080 wakaba 1.188 #
4081 wakaba 1.79 } else {
4082     !!!cp ('t304');
4083 wakaba 1.188 ## "after body" insertion mode
4084     !!!parse-error (type => 'after body',
4085     text => $token->{tag_name}, token => $token);
4086     #
4087 wakaba 1.52 }
4088    
4089 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4090 wakaba 1.125 !!!ack-later;
4091 wakaba 1.52 ## reprocess
4092 wakaba 1.126 next B;
4093 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4094 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4095 wakaba 1.79 !!!cp ('t305');
4096 wakaba 1.153 !!!parse-error (type => 'after html:/',
4097     text => $token->{tag_name}, token => $token);
4098 wakaba 1.52
4099 wakaba 1.188 $self->{insertion_mode} = IN_BODY_IM;
4100     ## Reprocess.
4101     next B;
4102 wakaba 1.79 } else {
4103     !!!cp ('t306');
4104 wakaba 1.52 }
4105    
4106     ## "after body" insertion mode
4107     if ($token->{tag_name} eq 'html') {
4108     if (defined $self->{inner_html_node}) {
4109 wakaba 1.79 !!!cp ('t307');
4110 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4111     text => 'html', token => $token);
4112 wakaba 1.52 ## Ignore the token
4113     !!!next-token;
4114 wakaba 1.126 next B;
4115 wakaba 1.52 } else {
4116 wakaba 1.79 !!!cp ('t308');
4117 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
4118 wakaba 1.52 !!!next-token;
4119 wakaba 1.126 next B;
4120 wakaba 1.52 }
4121     } else {
4122 wakaba 1.79 !!!cp ('t309');
4123 wakaba 1.153 !!!parse-error (type => 'after body:/',
4124     text => $token->{tag_name}, token => $token);
4125 wakaba 1.52
4126 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4127 wakaba 1.52 ## reprocess
4128 wakaba 1.126 next B;
4129 wakaba 1.52 }
4130 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4131     !!!cp ('t309.2');
4132     ## Stop parsing
4133     last B;
4134 wakaba 1.52 } else {
4135     die "$0: $token->{type}: Unknown token type";
4136     }
4137 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
4138 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4139 wakaba 1.188 if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4140 wakaba 1.52 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4141    
4142     unless (length $token->{data}) {
4143 wakaba 1.79 !!!cp ('t310');
4144 wakaba 1.52 !!!next-token;
4145 wakaba 1.126 next B;
4146 wakaba 1.52 }
4147     }
4148    
4149 wakaba 1.188 if ($token->{data} =~ s/^[^\x09\x0A\x0C\x20]+//) {
4150 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4151 wakaba 1.79 !!!cp ('t311');
4152 wakaba 1.153 !!!parse-error (type => 'in frameset:#text', token => $token);
4153 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4154 wakaba 1.79 !!!cp ('t312');
4155 wakaba 1.153 !!!parse-error (type => 'after frameset:#text', token => $token);
4156 wakaba 1.158 } else { # "after after frameset"
4157 wakaba 1.79 !!!cp ('t313');
4158 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
4159 wakaba 1.52 }
4160    
4161     ## Ignore the token.
4162     if (length $token->{data}) {
4163 wakaba 1.79 !!!cp ('t314');
4164 wakaba 1.52 ## reprocess the rest of characters
4165     } else {
4166 wakaba 1.79 !!!cp ('t315');
4167 wakaba 1.52 !!!next-token;
4168     }
4169 wakaba 1.126 next B;
4170 wakaba 1.52 }
4171    
4172     die qq[$0: Character "$token->{data}"];
4173 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4174 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
4175 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
4176 wakaba 1.79 !!!cp ('t318');
4177 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4178 wakaba 1.125 !!!nack ('t318.1');
4179 wakaba 1.52 !!!next-token;
4180 wakaba 1.126 next B;
4181 wakaba 1.52 } elsif ($token->{tag_name} eq 'frame' and
4182 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
4183 wakaba 1.79 !!!cp ('t319');
4184 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4185 wakaba 1.52 pop @{$self->{open_elements}};
4186 wakaba 1.125 !!!ack ('t319.1');
4187 wakaba 1.52 !!!next-token;
4188 wakaba 1.126 next B;
4189 wakaba 1.52 } elsif ($token->{tag_name} eq 'noframes') {
4190 wakaba 1.79 !!!cp ('t320');
4191 wakaba 1.148 ## NOTE: As if in head.
4192 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4193 wakaba 1.126 next B;
4194 wakaba 1.158
4195     ## NOTE: |<!DOCTYPE HTML><frameset></frameset></html><noframes></noframes>|
4196     ## has no parse error.
4197 wakaba 1.52 } else {
4198 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4199 wakaba 1.79 !!!cp ('t321');
4200 wakaba 1.153 !!!parse-error (type => 'in frameset',
4201     text => $token->{tag_name}, token => $token);
4202 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4203 wakaba 1.79 !!!cp ('t322');
4204 wakaba 1.153 !!!parse-error (type => 'after frameset',
4205     text => $token->{tag_name}, token => $token);
4206 wakaba 1.158 } else { # "after after frameset"
4207     !!!cp ('t322.2');
4208     !!!parse-error (type => 'after after frameset',
4209     text => $token->{tag_name}, token => $token);
4210 wakaba 1.52 }
4211     ## Ignore the token
4212 wakaba 1.125 !!!nack ('t322.1');
4213 wakaba 1.52 !!!next-token;
4214 wakaba 1.126 next B;
4215 wakaba 1.52 }
4216 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4217 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
4218 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
4219 wakaba 1.206 if ($self->{open_elements}->[-1]->[1] == HTML_EL and
4220 wakaba 1.52 @{$self->{open_elements}} == 1) {
4221 wakaba 1.79 !!!cp ('t325');
4222 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4223     text => $token->{tag_name}, token => $token);
4224 wakaba 1.52 ## Ignore the token
4225     !!!next-token;
4226     } else {
4227 wakaba 1.79 !!!cp ('t326');
4228 wakaba 1.52 pop @{$self->{open_elements}};
4229     !!!next-token;
4230     }
4231 wakaba 1.47
4232 wakaba 1.52 if (not defined $self->{inner_html_node} and
4233 wakaba 1.206 not ($self->{open_elements}->[-1]->[1] == FRAMESET_EL)) {
4234 wakaba 1.79 !!!cp ('t327');
4235 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
4236 wakaba 1.79 } else {
4237     !!!cp ('t328');
4238 wakaba 1.52 }
4239 wakaba 1.126 next B;
4240 wakaba 1.52 } elsif ($token->{tag_name} eq 'html' and
4241 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
4242 wakaba 1.79 !!!cp ('t329');
4243 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
4244 wakaba 1.52 !!!next-token;
4245 wakaba 1.126 next B;
4246 wakaba 1.52 } else {
4247 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
4248 wakaba 1.79 !!!cp ('t330');
4249 wakaba 1.153 !!!parse-error (type => 'in frameset:/',
4250     text => $token->{tag_name}, token => $token);
4251 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
4252     !!!cp ('t330.1');
4253     !!!parse-error (type => 'after frameset:/',
4254     text => $token->{tag_name}, token => $token);
4255     } else { # "after after html"
4256 wakaba 1.79 !!!cp ('t331');
4257 wakaba 1.158 !!!parse-error (type => 'after after frameset:/',
4258 wakaba 1.153 text => $token->{tag_name}, token => $token);
4259 wakaba 1.52 }
4260     ## Ignore the token
4261     !!!next-token;
4262 wakaba 1.126 next B;
4263 wakaba 1.52 }
4264 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4265 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == HTML_EL and
4266 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
4267     !!!cp ('t331.1');
4268 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4269 wakaba 1.104 } else {
4270     !!!cp ('t331.2');
4271     }
4272    
4273     ## Stop parsing
4274     last B;
4275 wakaba 1.52 } else {
4276     die "$0: $token->{type}: Unknown token type";
4277     }
4278     } else {
4279     die "$0: $self->{insertion_mode}: Unknown insertion mode";
4280     }
4281 wakaba 1.47
4282 wakaba 1.52 ## "in body" insertion mode
4283 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
4284 wakaba 1.52 if ($token->{tag_name} eq 'script') {
4285 wakaba 1.79 !!!cp ('t332');
4286 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
4287 wakaba 1.100 $script_start_tag->();
4288 wakaba 1.126 next B;
4289 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
4290 wakaba 1.79 !!!cp ('t333');
4291 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
4292 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4293 wakaba 1.126 next B;
4294 wakaba 1.52 } elsif ({
4295 wakaba 1.194 base => 1, command => 1, eventsource => 1, link => 1,
4296 wakaba 1.52 }->{$token->{tag_name}}) {
4297 wakaba 1.79 !!!cp ('t334');
4298 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
4299 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4300 wakaba 1.194 pop @{$self->{open_elements}};
4301 wakaba 1.125 !!!ack ('t334.1');
4302 wakaba 1.52 !!!next-token;
4303 wakaba 1.126 next B;
4304 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
4305     ## NOTE: This is an "as if in head" code clone, only "-t" differs
4306 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4307 wakaba 1.194 my $meta_el = pop @{$self->{open_elements}};
4308 wakaba 1.46
4309 wakaba 1.52 unless ($self->{confident}) {
4310 wakaba 1.134 if ($token->{attributes}->{charset}) {
4311 wakaba 1.79 !!!cp ('t335');
4312 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4313     ## in the {change_encoding} callback.
4314 wakaba 1.63 $self->{change_encoding}
4315 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value}, $token);
4316 wakaba 1.66
4317     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4318     ->set_user_data (manakai_has_reference =>
4319     $token->{attributes}->{charset}
4320     ->{has_reference});
4321 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
4322     if ($token->{attributes}->{content}->{value}
4323 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4324 wakaba 1.189 [\x09\x0A\x0C\x0D\x20]*=
4325     [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4326     ([^"'\x09\x0A\x0C\x0D\x20][^\x09\x0A\x0C\x0D\x20\x3B]*))
4327     /x) {
4328 wakaba 1.79 !!!cp ('t336');
4329 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4330     ## in the {change_encoding} callback.
4331 wakaba 1.63 $self->{change_encoding}
4332 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
4333 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4334     ->set_user_data (manakai_has_reference =>
4335     $token->{attributes}->{content}
4336     ->{has_reference});
4337 wakaba 1.63 }
4338 wakaba 1.52 }
4339 wakaba 1.66 } else {
4340     if ($token->{attributes}->{charset}) {
4341 wakaba 1.79 !!!cp ('t337');
4342 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4343     ->set_user_data (manakai_has_reference =>
4344     $token->{attributes}->{charset}
4345     ->{has_reference});
4346     }
4347 wakaba 1.68 if ($token->{attributes}->{content}) {
4348 wakaba 1.79 !!!cp ('t338');
4349 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4350     ->set_user_data (manakai_has_reference =>
4351     $token->{attributes}->{content}
4352     ->{has_reference});
4353     }
4354 wakaba 1.52 }
4355 wakaba 1.1
4356 wakaba 1.125 !!!ack ('t338.1');
4357 wakaba 1.52 !!!next-token;
4358 wakaba 1.126 next B;
4359 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
4360 wakaba 1.79 !!!cp ('t341');
4361 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
4362 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
4363 wakaba 1.126 next B;
4364 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
4365 wakaba 1.153 !!!parse-error (type => 'in body', text => 'body', token => $token);
4366 wakaba 1.46
4367 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
4368 wakaba 1.206 not ($self->{open_elements}->[1]->[1] == BODY_EL)) {
4369 wakaba 1.79 !!!cp ('t342');
4370 wakaba 1.52 ## Ignore the token
4371     } else {
4372     my $body_el = $self->{open_elements}->[1]->[0];
4373     for my $attr_name (keys %{$token->{attributes}}) {
4374     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
4375 wakaba 1.79 !!!cp ('t343');
4376 wakaba 1.52 $body_el->set_attribute_ns
4377     (undef, [undef, $attr_name],
4378     $token->{attributes}->{$attr_name}->{value});
4379     }
4380     }
4381     }
4382 wakaba 1.125 !!!nack ('t343.1');
4383 wakaba 1.52 !!!next-token;
4384 wakaba 1.126 next B;
4385 wakaba 1.52 } elsif ({
4386 wakaba 1.195 ## NOTE: Start tags for non-phrasing flow content elements
4387    
4388     ## NOTE: The normal one
4389     address => 1, article => 1, aside => 1, blockquote => 1,
4390     center => 1, datagrid => 1, details => 1, dialog => 1,
4391     dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
4392     footer => 1, h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1,
4393     h6 => 1, header => 1, menu => 1, nav => 1, ol => 1, p => 1,
4394     section => 1, ul => 1,
4395     ## NOTE: As normal, but drops leading newline
4396 wakaba 1.97 pre => 1, listing => 1,
4397 wakaba 1.195 ## NOTE: As normal, but interacts with the form element pointer
4398 wakaba 1.109 form => 1,
4399 wakaba 1.195
4400 wakaba 1.109 table => 1,
4401     hr => 1,
4402 wakaba 1.52 }->{$token->{tag_name}}) {
4403 wakaba 1.109 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
4404     !!!cp ('t350');
4405 wakaba 1.113 !!!parse-error (type => 'in form:form', token => $token);
4406 wakaba 1.109 ## Ignore the token
4407 wakaba 1.125 !!!nack ('t350.1');
4408 wakaba 1.109 !!!next-token;
4409 wakaba 1.126 next B;
4410 wakaba 1.109 }
4411    
4412 wakaba 1.52 ## has a p element in scope
4413     INSCOPE: for (reverse @{$self->{open_elements}}) {
4414 wakaba 1.206 if ($_->[1] == P_EL) {
4415 wakaba 1.79 !!!cp ('t344');
4416 wakaba 1.125 !!!back-token; # <form>
4417 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4418     line => $token->{line}, column => $token->{column}};
4419 wakaba 1.126 next B;
4420 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
4421 wakaba 1.79 !!!cp ('t345');
4422 wakaba 1.52 last INSCOPE;
4423     }
4424     } # INSCOPE
4425    
4426 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4427 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
4428 wakaba 1.125 !!!nack ('t346.1');
4429 wakaba 1.52 !!!next-token;
4430 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4431 wakaba 1.52 $token->{data} =~ s/^\x0A//;
4432     unless (length $token->{data}) {
4433 wakaba 1.79 !!!cp ('t346');
4434 wakaba 1.1 !!!next-token;
4435 wakaba 1.79 } else {
4436     !!!cp ('t349');
4437 wakaba 1.52 }
4438 wakaba 1.79 } else {
4439     !!!cp ('t348');
4440 wakaba 1.52 }
4441 wakaba 1.109 } elsif ($token->{tag_name} eq 'form') {
4442     !!!cp ('t347.1');
4443     $self->{form_element} = $self->{open_elements}->[-1]->[0];
4444    
4445 wakaba 1.125 !!!nack ('t347.2');
4446 wakaba 1.109 !!!next-token;
4447     } elsif ($token->{tag_name} eq 'table') {
4448     !!!cp ('t382');
4449     push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
4450    
4451     $self->{insertion_mode} = IN_TABLE_IM;
4452    
4453 wakaba 1.125 !!!nack ('t382.1');
4454 wakaba 1.109 !!!next-token;
4455     } elsif ($token->{tag_name} eq 'hr') {
4456     !!!cp ('t386');
4457     pop @{$self->{open_elements}};
4458    
4459 wakaba 1.125 !!!nack ('t386.1');
4460 wakaba 1.109 !!!next-token;
4461 wakaba 1.52 } else {
4462 wakaba 1.125 !!!nack ('t347.1');
4463 wakaba 1.52 !!!next-token;
4464     }
4465 wakaba 1.126 next B;
4466 wakaba 1.196 } elsif ($token->{tag_name} eq 'li') {
4467     ## NOTE: As normal, but imply </li> when there's another <li> ...
4468 wakaba 1.193
4469     ## NOTE: Special, Scope (<li><foo><li> == <li><foo><li/></foo></li>)
4470     ## Interpreted as <li><foo/></li><li/> (non-conforming)
4471     ## blockquote (O9.27), center (O), dd (Fx3, O, S3.1.2, IE7),
4472     ## dt (Fx, O, S, IE), dl (O), fieldset (O, S, IE), form (Fx, O, S),
4473     ## hn (O), pre (O), applet (O, S), button (O, S), marquee (Fx, O, S),
4474     ## object (Fx)
4475     ## Generate non-tree (non-conforming)
4476     ## basefont (IE7 (where basefont is non-void)), center (IE),
4477     ## form (IE), hn (IE)
4478     ## address, div, p (<li><foo><li> == <li><foo/></li><li/>)
4479     ## Interpreted as <li><foo><li/></foo></li> (non-conforming)
4480     ## div (Fx, S)
4481 wakaba 1.196
4482     my $non_optional;
4483 wakaba 1.52 my $i = -1;
4484 wakaba 1.196
4485     ## 1.
4486     for my $node (reverse @{$self->{open_elements}}) {
4487 wakaba 1.206 if ($node->[1] == LI_EL) {
4488 wakaba 1.196 ## 2. (a) As if </li>
4489     {
4490     ## If no </li> - not applied
4491     #
4492    
4493     ## Otherwise
4494    
4495     ## 1. generate implied end tags, except for </li>
4496     #
4497    
4498     ## 2. If current node != "li", parse error
4499     if ($non_optional) {
4500     !!!parse-error (type => 'not closed',
4501     text => $non_optional->[0]->manakai_local_name,
4502     token => $token);
4503     !!!cp ('t355');
4504     } else {
4505     !!!cp ('t356');
4506     }
4507    
4508     ## 3. Pop
4509     splice @{$self->{open_elements}}, $i;
4510 wakaba 1.52 }
4511 wakaba 1.196
4512     last; ## 2. (b) goto 5.
4513     } elsif (
4514     ## NOTE: not "formatting" and not "phrasing"
4515     ($node->[1] & SPECIAL_EL or
4516     $node->[1] & SCOPING_EL) and
4517     ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4518 wakaba 1.206 (not $node->[1] & ADDRESS_DIV_P_EL)
4519     ) {
4520 wakaba 1.196 ## 3.
4521 wakaba 1.79 !!!cp ('t357');
4522 wakaba 1.196 last; ## goto 5.
4523     } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4524 wakaba 1.79 !!!cp ('t358');
4525 wakaba 1.196 #
4526     } else {
4527     !!!cp ('t359');
4528     $non_optional ||= $node;
4529     #
4530 wakaba 1.52 }
4531 wakaba 1.196 ## 4.
4532     ## goto 2.
4533 wakaba 1.52 $i--;
4534 wakaba 1.196 }
4535    
4536     ## 5. (a) has a |p| element in scope
4537     INSCOPE: for (reverse @{$self->{open_elements}}) {
4538 wakaba 1.206 if ($_->[1] == P_EL) {
4539 wakaba 1.196 !!!cp ('t353');
4540 wakaba 1.198
4541     ## NOTE: |<p><li>|, for example.
4542    
4543 wakaba 1.196 !!!back-token; # <x>
4544     $token = {type => END_TAG_TOKEN, tag_name => 'p',
4545     line => $token->{line}, column => $token->{column}};
4546     next B;
4547     } elsif ($_->[1] & SCOPING_EL) {
4548     !!!cp ('t354');
4549     last INSCOPE;
4550     }
4551     } # INSCOPE
4552    
4553     ## 5. (b) insert
4554 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4555 wakaba 1.125 !!!nack ('t359.1');
4556 wakaba 1.52 !!!next-token;
4557 wakaba 1.126 next B;
4558 wakaba 1.196 } elsif ($token->{tag_name} eq 'dt' or
4559     $token->{tag_name} eq 'dd') {
4560     ## NOTE: As normal, but imply </dt> or </dd> when ...
4561    
4562     my $non_optional;
4563     my $i = -1;
4564    
4565     ## 1.
4566     for my $node (reverse @{$self->{open_elements}}) {
4567 wakaba 1.207 if ($node->[1] == DTDD_EL) {
4568 wakaba 1.196 ## 2. (a) As if </li>
4569     {
4570     ## If no </li> - not applied
4571     #
4572    
4573     ## Otherwise
4574    
4575     ## 1. generate implied end tags, except for </dt> or </dd>
4576     #
4577    
4578     ## 2. If current node != "dt"|"dd", parse error
4579     if ($non_optional) {
4580     !!!parse-error (type => 'not closed',
4581     text => $non_optional->[0]->manakai_local_name,
4582     token => $token);
4583     !!!cp ('t355.1');
4584     } else {
4585     !!!cp ('t356.1');
4586     }
4587    
4588     ## 3. Pop
4589     splice @{$self->{open_elements}}, $i;
4590     }
4591    
4592     last; ## 2. (b) goto 5.
4593     } elsif (
4594     ## NOTE: not "formatting" and not "phrasing"
4595     ($node->[1] & SPECIAL_EL or
4596     $node->[1] & SCOPING_EL) and
4597     ## NOTE: "li", "dt", and "dd" are in |SPECIAL_EL|.
4598    
4599 wakaba 1.206 (not $node->[1] & ADDRESS_DIV_P_EL)
4600     ) {
4601 wakaba 1.196 ## 3.
4602     !!!cp ('t357.1');
4603     last; ## goto 5.
4604     } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
4605     !!!cp ('t358.1');
4606     #
4607     } else {
4608     !!!cp ('t359.1');
4609     $non_optional ||= $node;
4610     #
4611     }
4612     ## 4.
4613     ## goto 2.
4614     $i--;
4615     }
4616    
4617     ## 5. (a) has a |p| element in scope
4618     INSCOPE: for (reverse @{$self->{open_elements}}) {
4619 wakaba 1.206 if ($_->[1] == P_EL) {
4620 wakaba 1.196 !!!cp ('t353.1');
4621     !!!back-token; # <x>
4622     $token = {type => END_TAG_TOKEN, tag_name => 'p',
4623     line => $token->{line}, column => $token->{column}};
4624     next B;
4625     } elsif ($_->[1] & SCOPING_EL) {
4626     !!!cp ('t354.1');
4627     last INSCOPE;
4628     }
4629     } # INSCOPE
4630    
4631     ## 5. (b) insert
4632     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4633     !!!nack ('t359.2');
4634     !!!next-token;
4635     next B;
4636 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
4637 wakaba 1.195 ## NOTE: As normal, but effectively ends parsing
4638    
4639 wakaba 1.52 ## has a p element in scope
4640     INSCOPE: for (reverse @{$self->{open_elements}}) {
4641 wakaba 1.206 if ($_->[1] == P_EL) {
4642 wakaba 1.79 !!!cp ('t367');
4643 wakaba 1.125 !!!back-token; # <plaintext>
4644 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
4645     line => $token->{line}, column => $token->{column}};
4646 wakaba 1.126 next B;
4647 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
4648 wakaba 1.79 !!!cp ('t368');
4649 wakaba 1.52 last INSCOPE;
4650 wakaba 1.46 }
4651 wakaba 1.52 } # INSCOPE
4652    
4653 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4654 wakaba 1.52
4655     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
4656    
4657 wakaba 1.125 !!!nack ('t368.1');
4658 wakaba 1.52 !!!next-token;
4659 wakaba 1.126 next B;
4660 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
4661     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
4662     my $node = $active_formatting_elements->[$i];
4663 wakaba 1.206 if ($node->[1] == A_EL) {
4664 wakaba 1.79 !!!cp ('t371');
4665 wakaba 1.113 !!!parse-error (type => 'in a:a', token => $token);
4666 wakaba 1.52
4667 wakaba 1.125 !!!back-token; # <a>
4668 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'a',
4669     line => $token->{line}, column => $token->{column}};
4670 wakaba 1.113 $formatting_end_tag->($token);
4671 wakaba 1.52
4672     AFE2: for (reverse 0..$#$active_formatting_elements) {
4673     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
4674 wakaba 1.79 !!!cp ('t372');
4675 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
4676     last AFE2;
4677 wakaba 1.1 }
4678 wakaba 1.52 } # AFE2
4679     OE: for (reverse 0..$#{$self->{open_elements}}) {
4680     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
4681 wakaba 1.79 !!!cp ('t373');
4682 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
4683     last OE;
4684 wakaba 1.1 }
4685 wakaba 1.52 } # OE
4686     last AFE;
4687     } elsif ($node->[0] eq '#marker') {
4688 wakaba 1.79 !!!cp ('t374');
4689 wakaba 1.52 last AFE;
4690     }
4691     } # AFE
4692    
4693     $reconstruct_active_formatting_elements->($insert_to_current);
4694 wakaba 1.1
4695 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4696 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
4697 wakaba 1.1
4698 wakaba 1.125 !!!nack ('t374.1');
4699 wakaba 1.52 !!!next-token;
4700 wakaba 1.126 next B;
4701 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
4702     $reconstruct_active_formatting_elements->($insert_to_current);
4703 wakaba 1.1
4704 wakaba 1.52 ## has a |nobr| element in scope
4705     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4706     my $node = $self->{open_elements}->[$_];
4707 wakaba 1.206 if ($node->[1] == NOBR_EL) {
4708 wakaba 1.79 !!!cp ('t376');
4709 wakaba 1.113 !!!parse-error (type => 'in nobr:nobr', token => $token);
4710 wakaba 1.125 !!!back-token; # <nobr>
4711 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
4712     line => $token->{line}, column => $token->{column}};
4713 wakaba 1.126 next B;
4714 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
4715 wakaba 1.79 !!!cp ('t377');
4716 wakaba 1.52 last INSCOPE;
4717     }
4718     } # INSCOPE
4719    
4720 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4721 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
4722    
4723 wakaba 1.125 !!!nack ('t377.1');
4724 wakaba 1.52 !!!next-token;
4725 wakaba 1.126 next B;
4726 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
4727     ## has a button element in scope
4728     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4729     my $node = $self->{open_elements}->[$_];
4730 wakaba 1.206 if ($node->[1] == BUTTON_EL) {
4731 wakaba 1.79 !!!cp ('t378');
4732 wakaba 1.113 !!!parse-error (type => 'in button:button', token => $token);
4733 wakaba 1.125 !!!back-token; # <button>
4734 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'button',
4735     line => $token->{line}, column => $token->{column}};
4736 wakaba 1.126 next B;
4737 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
4738 wakaba 1.79 !!!cp ('t379');
4739 wakaba 1.52 last INSCOPE;
4740     }
4741     } # INSCOPE
4742    
4743     $reconstruct_active_formatting_elements->($insert_to_current);
4744    
4745 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4746 wakaba 1.85
4747     ## TODO: associate with $self->{form_element} if defined
4748    
4749 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
4750 wakaba 1.1
4751 wakaba 1.125 !!!nack ('t379.1');
4752 wakaba 1.52 !!!next-token;
4753 wakaba 1.126 next B;
4754 wakaba 1.103 } elsif ({
4755 wakaba 1.109 xmp => 1,
4756     iframe => 1,
4757     noembed => 1,
4758 wakaba 1.148 noframes => 1, ## NOTE: This is an "as if in head" code clone.
4759 wakaba 1.109 noscript => 0, ## TODO: 1 if scripting is enabled
4760 wakaba 1.103 }->{$token->{tag_name}}) {
4761 wakaba 1.109 if ($token->{tag_name} eq 'xmp') {
4762     !!!cp ('t381');
4763     $reconstruct_active_formatting_elements->($insert_to_current);
4764     } else {
4765     !!!cp ('t399');
4766     }
4767     ## NOTE: There is an "as if in body" code clone.
4768 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4769 wakaba 1.126 next B;
4770 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
4771 wakaba 1.113 !!!parse-error (type => 'isindex', token => $token);
4772 wakaba 1.52
4773     if (defined $self->{form_element}) {
4774 wakaba 1.79 !!!cp ('t389');
4775 wakaba 1.52 ## Ignore the token
4776 wakaba 1.125 !!!nack ('t389'); ## NOTE: Not acknowledged.
4777 wakaba 1.52 !!!next-token;
4778 wakaba 1.126 next B;
4779 wakaba 1.52 } else {
4780 wakaba 1.147 !!!ack ('t391.1');
4781    
4782 wakaba 1.52 my $at = $token->{attributes};
4783     my $form_attrs;
4784     $form_attrs->{action} = $at->{action} if $at->{action};
4785     my $prompt_attr = $at->{prompt};
4786     $at->{name} = {name => 'name', value => 'isindex'};
4787     delete $at->{action};
4788     delete $at->{prompt};
4789     my @tokens = (
4790 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
4791 wakaba 1.114 attributes => $form_attrs,
4792     line => $token->{line}, column => $token->{column}},
4793     {type => START_TAG_TOKEN, tag_name => 'hr',
4794     line => $token->{line}, column => $token->{column}},
4795     {type => START_TAG_TOKEN, tag_name => 'p',
4796     line => $token->{line}, column => $token->{column}},
4797     {type => START_TAG_TOKEN, tag_name => 'label',
4798     line => $token->{line}, column => $token->{column}},
4799 wakaba 1.52 );
4800     if ($prompt_attr) {
4801 wakaba 1.79 !!!cp ('t390');
4802 wakaba 1.114 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
4803 wakaba 1.118 #line => $token->{line}, column => $token->{column},
4804     };
4805 wakaba 1.1 } else {
4806 wakaba 1.79 !!!cp ('t391');
4807 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
4808 wakaba 1.114 data => 'This is a searchable index. Insert your search keywords here: ',
4809 wakaba 1.118 #line => $token->{line}, column => $token->{column},
4810     }; # SHOULD
4811 wakaba 1.52 ## TODO: make this configurable
4812 wakaba 1.1 }
4813 wakaba 1.52 push @tokens,
4814 wakaba 1.114 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
4815     line => $token->{line}, column => $token->{column}},
4816 wakaba 1.55 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
4817 wakaba 1.114 {type => END_TAG_TOKEN, tag_name => 'label',
4818     line => $token->{line}, column => $token->{column}},
4819     {type => END_TAG_TOKEN, tag_name => 'p',
4820     line => $token->{line}, column => $token->{column}},
4821     {type => START_TAG_TOKEN, tag_name => 'hr',
4822     line => $token->{line}, column => $token->{column}},
4823     {type => END_TAG_TOKEN, tag_name => 'form',
4824     line => $token->{line}, column => $token->{column}};
4825 wakaba 1.52 !!!back-token (@tokens);
4826 wakaba 1.125 !!!next-token;
4827 wakaba 1.126 next B;
4828 wakaba 1.52 }
4829     } elsif ($token->{tag_name} eq 'textarea') {
4830 wakaba 1.205 ## Step 1
4831     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4832 wakaba 1.52
4833 wakaba 1.205 ## Step 2
4834 wakaba 1.52 ## TODO: $self->{form_element} if defined
4835 wakaba 1.205
4836     ## Step 3
4837     $self->{ignore_newline} = 1;
4838    
4839     ## Step 4
4840     ## ISSUE: This step is wrong. (r2302 enbugged)
4841    
4842     ## Step 5
4843 wakaba 1.52 $self->{content_model} = RCDATA_CONTENT_MODEL;
4844     delete $self->{escape}; # MUST
4845 wakaba 1.205
4846     ## Step 6-7
4847     $self->{insertion_mode} |= IN_CDATA_RCDATA_IM;
4848    
4849 wakaba 1.125 !!!nack ('t392.1');
4850 wakaba 1.52 !!!next-token;
4851 wakaba 1.126 next B;
4852 wakaba 1.201 } elsif ($token->{tag_name} eq 'optgroup' or
4853     $token->{tag_name} eq 'option') {
4854     ## has an |option| element in scope
4855     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4856     my $node = $self->{open_elements}->[$_];
4857 wakaba 1.206 if ($node->[1] == OPTION_EL) {
4858 wakaba 1.201 !!!cp ('t397.1');
4859     ## NOTE: As if </option>
4860     !!!back-token; # <option> or <optgroup>
4861     $token = {type => END_TAG_TOKEN, tag_name => 'option',
4862     line => $token->{line}, column => $token->{column}};
4863     next B;
4864     } elsif ($node->[1] & SCOPING_EL) {
4865     !!!cp ('t397.2');
4866     last INSCOPE;
4867     }
4868     } # INSCOPE
4869    
4870     $reconstruct_active_formatting_elements->($insert_to_current);
4871    
4872     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4873    
4874     !!!nack ('t397.3');
4875     !!!next-token;
4876     redo B;
4877 wakaba 1.151 } elsif ($token->{tag_name} eq 'rt' or
4878     $token->{tag_name} eq 'rp') {
4879     ## has a |ruby| element in scope
4880     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4881     my $node = $self->{open_elements}->[$_];
4882 wakaba 1.206 if ($node->[1] == RUBY_EL) {
4883 wakaba 1.151 !!!cp ('t398.1');
4884     ## generate implied end tags
4885     while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
4886     !!!cp ('t398.2');
4887     pop @{$self->{open_elements}};
4888     }
4889 wakaba 1.206 unless ($self->{open_elements}->[-1]->[1] == RUBY_EL) {
4890 wakaba 1.151 !!!cp ('t398.3');
4891     !!!parse-error (type => 'not closed',
4892 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4893 wakaba 1.151 ->manakai_local_name,
4894     token => $token);
4895     pop @{$self->{open_elements}}
4896 wakaba 1.206 while not $self->{open_elements}->[-1]->[1] == RUBY_EL;
4897 wakaba 1.151 }
4898     last INSCOPE;
4899     } elsif ($node->[1] & SCOPING_EL) {
4900     !!!cp ('t398.4');
4901     last INSCOPE;
4902     }
4903     } # INSCOPE
4904 wakaba 1.212
4905     ## TODO: <non-ruby><rt> is not allowed.
4906 wakaba 1.151
4907     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4908    
4909     !!!nack ('t398.5');
4910     !!!next-token;
4911     redo B;
4912 wakaba 1.126 } elsif ($token->{tag_name} eq 'math' or
4913     $token->{tag_name} eq 'svg') {
4914     $reconstruct_active_formatting_elements->($insert_to_current);
4915 wakaba 1.131
4916 wakaba 1.155 ## "Adjust MathML attributes" ('math' only) - done in insert-element-f
4917    
4918 wakaba 1.131 ## "adjust SVG attributes" ('svg' only) - done in insert-element-f
4919    
4920     ## "adjust foreign attributes" - done in insert-element-f
4921 wakaba 1.126
4922 wakaba 1.131 !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
4923 wakaba 1.126
4924     if ($self->{self_closing}) {
4925     pop @{$self->{open_elements}};
4926 wakaba 1.201 !!!ack ('t398.6');
4927 wakaba 1.126 } else {
4928 wakaba 1.201 !!!cp ('t398.7');
4929 wakaba 1.126 $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
4930     ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
4931     ## mode, "in body" (not "in foreign content") secondary insertion
4932     ## mode, maybe.
4933     }
4934    
4935     !!!next-token;
4936     next B;
4937 wakaba 1.52 } elsif ({
4938     caption => 1, col => 1, colgroup => 1, frame => 1,
4939 wakaba 1.201 frameset => 1, head => 1,
4940 wakaba 1.52 tbody => 1, td => 1, tfoot => 1, th => 1,
4941     thead => 1, tr => 1,
4942     }->{$token->{tag_name}}) {
4943 wakaba 1.79 !!!cp ('t401');
4944 wakaba 1.153 !!!parse-error (type => 'in body',
4945     text => $token->{tag_name}, token => $token);
4946 wakaba 1.52 ## Ignore the token
4947 wakaba 1.125 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
4948 wakaba 1.52 !!!next-token;
4949 wakaba 1.126 next B;
4950 wakaba 1.198 } elsif ($token->{tag_name} eq 'param' or
4951     $token->{tag_name} eq 'source') {
4952     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4953     pop @{$self->{open_elements}};
4954    
4955     !!!ack ('t398.5');
4956     !!!next-token;
4957     redo B;
4958 wakaba 1.52 } else {
4959 wakaba 1.110 if ($token->{tag_name} eq 'image') {
4960     !!!cp ('t384');
4961 wakaba 1.113 !!!parse-error (type => 'image', token => $token);
4962 wakaba 1.110 $token->{tag_name} = 'img';
4963     } else {
4964     !!!cp ('t385');
4965     }
4966    
4967     ## NOTE: There is an "as if <br>" code clone.
4968 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
4969    
4970 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
4971 wakaba 1.109
4972 wakaba 1.110 if ({
4973     applet => 1, marquee => 1, object => 1,
4974     }->{$token->{tag_name}}) {
4975     !!!cp ('t380');
4976     push @$active_formatting_elements, ['#marker', ''];
4977 wakaba 1.125 !!!nack ('t380.1');
4978 wakaba 1.110 } elsif ({
4979     b => 1, big => 1, em => 1, font => 1, i => 1,
4980 wakaba 1.193 s => 1, small => 1, strike => 1,
4981 wakaba 1.110 strong => 1, tt => 1, u => 1,
4982     }->{$token->{tag_name}}) {
4983     !!!cp ('t375');
4984     push @$active_formatting_elements, $self->{open_elements}->[-1];
4985 wakaba 1.125 !!!nack ('t375.1');
4986 wakaba 1.110 } elsif ($token->{tag_name} eq 'input') {
4987     !!!cp ('t388');
4988     ## TODO: associate with $self->{form_element} if defined
4989     pop @{$self->{open_elements}};
4990 wakaba 1.125 !!!ack ('t388.2');
4991 wakaba 1.110 } elsif ({
4992     area => 1, basefont => 1, bgsound => 1, br => 1,
4993 wakaba 1.198 embed => 1, img => 1, spacer => 1, wbr => 1,
4994 wakaba 1.110 }->{$token->{tag_name}}) {
4995     !!!cp ('t388.1');
4996     pop @{$self->{open_elements}};
4997 wakaba 1.125 !!!ack ('t388.3');
4998 wakaba 1.110 } elsif ($token->{tag_name} eq 'select') {
4999 wakaba 1.109 ## TODO: associate with $self->{form_element} if defined
5000    
5001     if ($self->{insertion_mode} & TABLE_IMS or
5002     $self->{insertion_mode} & BODY_TABLE_IMS or
5003 wakaba 1.210 ($self->{insertion_mode} & IM_MASK) == IN_COLUMN_GROUP_IM) {
5004 wakaba 1.109 !!!cp ('t400.1');
5005     $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
5006     } else {
5007     !!!cp ('t400.2');
5008     $self->{insertion_mode} = IN_SELECT_IM;
5009     }
5010 wakaba 1.125 !!!nack ('t400.3');
5011 wakaba 1.110 } else {
5012 wakaba 1.125 !!!nack ('t402');
5013 wakaba 1.109 }
5014 wakaba 1.51
5015 wakaba 1.52 !!!next-token;
5016 wakaba 1.126 next B;
5017 wakaba 1.52 }
5018 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5019 wakaba 1.52 if ($token->{tag_name} eq 'body') {
5020 wakaba 1.107 ## has a |body| element in scope
5021     my $i;
5022 wakaba 1.111 INSCOPE: {
5023     for (reverse @{$self->{open_elements}}) {
5024 wakaba 1.206 if ($_->[1] == BODY_EL) {
5025 wakaba 1.111 !!!cp ('t405');
5026     $i = $_;
5027     last INSCOPE;
5028 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
5029 wakaba 1.111 !!!cp ('t405.1');
5030     last;
5031     }
5032 wakaba 1.52 }
5033 wakaba 1.111
5034 wakaba 1.200 ## NOTE: |<marquee></body>|, |<svg><foreignobject></body>|
5035    
5036     !!!parse-error (type => 'unmatched end tag',
5037 wakaba 1.153 text => $token->{tag_name}, token => $token);
5038 wakaba 1.107 ## NOTE: Ignore the token.
5039 wakaba 1.52 !!!next-token;
5040 wakaba 1.126 next B;
5041 wakaba 1.111 } # INSCOPE
5042 wakaba 1.107
5043     for (@{$self->{open_elements}}) {
5044 wakaba 1.123 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL) {
5045 wakaba 1.107 !!!cp ('t403');
5046 wakaba 1.122 !!!parse-error (type => 'not closed',
5047 wakaba 1.153 text => $_->[0]->manakai_local_name,
5048 wakaba 1.122 token => $token);
5049 wakaba 1.107 last;
5050     } else {
5051     !!!cp ('t404');
5052     }
5053     }
5054    
5055     $self->{insertion_mode} = AFTER_BODY_IM;
5056     !!!next-token;
5057 wakaba 1.126 next B;
5058 wakaba 1.52 } elsif ($token->{tag_name} eq 'html') {
5059 wakaba 1.122 ## TODO: Update this code. It seems that the code below is not
5060     ## up-to-date, though it has same effect as speced.
5061 wakaba 1.123 if (@{$self->{open_elements}} > 1 and
5062 wakaba 1.206 $self->{open_elements}->[1]->[1] == BODY_EL) {
5063     unless ($self->{open_elements}->[-1]->[1] == BODY_EL) {
5064 wakaba 1.79 !!!cp ('t406');
5065 wakaba 1.122 !!!parse-error (type => 'not closed',
5066 wakaba 1.153 text => $self->{open_elements}->[1]->[0]
5067 wakaba 1.122 ->manakai_local_name,
5068     token => $token);
5069 wakaba 1.79 } else {
5070     !!!cp ('t407');
5071 wakaba 1.1 }
5072 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
5073 wakaba 1.52 ## reprocess
5074 wakaba 1.126 next B;
5075 wakaba 1.51 } else {
5076 wakaba 1.79 !!!cp ('t408');
5077 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5078     text => $token->{tag_name}, token => $token);
5079 wakaba 1.52 ## Ignore the token
5080     !!!next-token;
5081 wakaba 1.126 next B;
5082 wakaba 1.51 }
5083 wakaba 1.52 } elsif ({
5084 wakaba 1.195 ## NOTE: End tags for non-phrasing flow content elements
5085    
5086     ## NOTE: The normal ones
5087     address => 1, article => 1, aside => 1, blockquote => 1,
5088     center => 1, datagrid => 1, details => 1, dialog => 1,
5089     dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
5090     footer => 1, header => 1, listing => 1, menu => 1, nav => 1,
5091     ol => 1, pre => 1, section => 1, ul => 1,
5092    
5093     ## NOTE: As normal, but ... optional tags
5094 wakaba 1.52 dd => 1, dt => 1, li => 1,
5095 wakaba 1.195
5096 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
5097 wakaba 1.52 }->{$token->{tag_name}}) {
5098 wakaba 1.197 ## NOTE: Code for <li> start tags includes "as if </li>" code.
5099     ## Code for <dt> or <dd> start tags includes "as if </dt> or
5100     ## </dd>" code.
5101    
5102 wakaba 1.52 ## has an element in scope
5103     my $i;
5104     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5105     my $node = $self->{open_elements}->[$_];
5106 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5107 wakaba 1.79 !!!cp ('t410');
5108 wakaba 1.52 $i = $_;
5109 wakaba 1.87 last INSCOPE;
5110 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5111 wakaba 1.79 !!!cp ('t411');
5112 wakaba 1.52 last INSCOPE;
5113 wakaba 1.51 }
5114 wakaba 1.52 } # INSCOPE
5115 wakaba 1.89
5116     unless (defined $i) { # has an element in scope
5117     !!!cp ('t413');
5118 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5119     text => $token->{tag_name}, token => $token);
5120 wakaba 1.157 ## NOTE: Ignore the token.
5121 wakaba 1.89 } else {
5122     ## Step 1. generate implied end tags
5123     while ({
5124 wakaba 1.151 ## END_TAG_OPTIONAL_EL
5125 wakaba 1.89 dd => ($token->{tag_name} ne 'dd'),
5126     dt => ($token->{tag_name} ne 'dt'),
5127     li => ($token->{tag_name} ne 'li'),
5128 wakaba 1.194 option => 1,
5129     optgroup => 1,
5130 wakaba 1.89 p => 1,
5131 wakaba 1.151 rt => 1,
5132     rp => 1,
5133 wakaba 1.123 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
5134 wakaba 1.89 !!!cp ('t409');
5135     pop @{$self->{open_elements}};
5136     }
5137    
5138     ## Step 2.
5139 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5140     ne $token->{tag_name}) {
5141 wakaba 1.79 !!!cp ('t412');
5142 wakaba 1.122 !!!parse-error (type => 'not closed',
5143 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5144 wakaba 1.122 ->manakai_local_name,
5145     token => $token);
5146 wakaba 1.51 } else {
5147 wakaba 1.89 !!!cp ('t414');
5148 wakaba 1.51 }
5149 wakaba 1.89
5150     ## Step 3.
5151 wakaba 1.52 splice @{$self->{open_elements}}, $i;
5152 wakaba 1.89
5153     ## Step 4.
5154     $clear_up_to_marker->()
5155     if {
5156 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
5157 wakaba 1.89 }->{$token->{tag_name}};
5158 wakaba 1.51 }
5159 wakaba 1.52 !!!next-token;
5160 wakaba 1.126 next B;
5161 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
5162 wakaba 1.195 ## NOTE: As normal, but interacts with the form element pointer
5163    
5164 wakaba 1.92 undef $self->{form_element};
5165    
5166 wakaba 1.52 ## has an element in scope
5167 wakaba 1.92 my $i;
5168 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5169     my $node = $self->{open_elements}->[$_];
5170 wakaba 1.206 if ($node->[1] == FORM_EL) {
5171 wakaba 1.79 !!!cp ('t418');
5172 wakaba 1.92 $i = $_;
5173 wakaba 1.52 last INSCOPE;
5174 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5175 wakaba 1.79 !!!cp ('t419');
5176 wakaba 1.52 last INSCOPE;
5177     }
5178     } # INSCOPE
5179 wakaba 1.92
5180     unless (defined $i) { # has an element in scope
5181 wakaba 1.79 !!!cp ('t421');
5182 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5183     text => $token->{tag_name}, token => $token);
5184 wakaba 1.157 ## NOTE: Ignore the token.
5185 wakaba 1.92 } else {
5186     ## Step 1. generate implied end tags
5187 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5188 wakaba 1.92 !!!cp ('t417');
5189     pop @{$self->{open_elements}};
5190     }
5191    
5192     ## Step 2.
5193 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5194     ne $token->{tag_name}) {
5195 wakaba 1.92 !!!cp ('t417.1');
5196 wakaba 1.122 !!!parse-error (type => 'not closed',
5197 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5198 wakaba 1.122 ->manakai_local_name,
5199     token => $token);
5200 wakaba 1.92 } else {
5201     !!!cp ('t420');
5202     }
5203    
5204     ## Step 3.
5205     splice @{$self->{open_elements}}, $i;
5206 wakaba 1.52 }
5207    
5208     !!!next-token;
5209 wakaba 1.126 next B;
5210 wakaba 1.52 } elsif ({
5211 wakaba 1.195 ## NOTE: As normal, except acts as a closer for any ...
5212 wakaba 1.52 h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
5213     }->{$token->{tag_name}}) {
5214     ## has an element in scope
5215     my $i;
5216     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5217     my $node = $self->{open_elements}->[$_];
5218 wakaba 1.206 if ($node->[1] == HEADING_EL) {
5219 wakaba 1.79 !!!cp ('t423');
5220 wakaba 1.52 $i = $_;
5221     last INSCOPE;
5222 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5223 wakaba 1.79 !!!cp ('t424');
5224 wakaba 1.52 last INSCOPE;
5225 wakaba 1.51 }
5226 wakaba 1.52 } # INSCOPE
5227 wakaba 1.93
5228     unless (defined $i) { # has an element in scope
5229     !!!cp ('t425.1');
5230 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5231     text => $token->{tag_name}, token => $token);
5232 wakaba 1.157 ## NOTE: Ignore the token.
5233 wakaba 1.79 } else {
5234 wakaba 1.93 ## Step 1. generate implied end tags
5235 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5236 wakaba 1.93 !!!cp ('t422');
5237     pop @{$self->{open_elements}};
5238     }
5239    
5240     ## Step 2.
5241 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5242     ne $token->{tag_name}) {
5243 wakaba 1.93 !!!cp ('t425');
5244 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5245     text => $token->{tag_name}, token => $token);
5246 wakaba 1.93 } else {
5247     !!!cp ('t426');
5248     }
5249    
5250     ## Step 3.
5251     splice @{$self->{open_elements}}, $i;
5252 wakaba 1.36 }
5253 wakaba 1.52
5254     !!!next-token;
5255 wakaba 1.126 next B;
5256 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
5257 wakaba 1.195 ## NOTE: As normal, except </p> implies <p> and ...
5258    
5259 wakaba 1.87 ## has an element in scope
5260 wakaba 1.197 my $non_optional;
5261 wakaba 1.87 my $i;
5262     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5263     my $node = $self->{open_elements}->[$_];
5264 wakaba 1.206 if ($node->[1] == P_EL) {
5265 wakaba 1.87 !!!cp ('t410.1');
5266     $i = $_;
5267 wakaba 1.88 last INSCOPE;
5268 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
5269 wakaba 1.87 !!!cp ('t411.1');
5270     last INSCOPE;
5271 wakaba 1.197 } elsif ($node->[1] & END_TAG_OPTIONAL_EL) {
5272     ## NOTE: |END_TAG_OPTIONAL_EL| includes "p"
5273     !!!cp ('t411.2');
5274     #
5275     } else {
5276     !!!cp ('t411.3');
5277     $non_optional ||= $node;
5278     #
5279 wakaba 1.87 }
5280     } # INSCOPE
5281 wakaba 1.91
5282     if (defined $i) {
5283 wakaba 1.197 ## 1. Generate implied end tags
5284     #
5285    
5286     ## 2. If current node != "p", parse error
5287     if ($non_optional) {
5288 wakaba 1.87 !!!cp ('t412.1');
5289 wakaba 1.122 !!!parse-error (type => 'not closed',
5290 wakaba 1.197 text => $non_optional->[0]->manakai_local_name,
5291 wakaba 1.122 token => $token);
5292 wakaba 1.87 } else {
5293 wakaba 1.91 !!!cp ('t414.1');
5294 wakaba 1.87 }
5295 wakaba 1.91
5296 wakaba 1.197 ## 3. Pop
5297 wakaba 1.87 splice @{$self->{open_elements}}, $i;
5298     } else {
5299 wakaba 1.91 !!!cp ('t413.1');
5300 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5301     text => $token->{tag_name}, token => $token);
5302 wakaba 1.91
5303 wakaba 1.87 !!!cp ('t415.1');
5304     ## As if <p>, then reprocess the current token
5305     my $el;
5306 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'p',, $token);
5307 wakaba 1.87 $insert->($el);
5308 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
5309 wakaba 1.87 }
5310 wakaba 1.91
5311 wakaba 1.87 !!!next-token;
5312 wakaba 1.126 next B;
5313 wakaba 1.52 } elsif ({
5314     a => 1,
5315     b => 1, big => 1, em => 1, font => 1, i => 1,
5316 wakaba 1.193 nobr => 1, s => 1, small => 1, strike => 1,
5317 wakaba 1.52 strong => 1, tt => 1, u => 1,
5318     }->{$token->{tag_name}}) {
5319 wakaba 1.79 !!!cp ('t427');
5320 wakaba 1.113 $formatting_end_tag->($token);
5321 wakaba 1.126 next B;
5322 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
5323 wakaba 1.79 !!!cp ('t428');
5324 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5325     text => 'br', token => $token);
5326 wakaba 1.52
5327     ## As if <br>
5328     $reconstruct_active_formatting_elements->($insert_to_current);
5329    
5330     my $el;
5331 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'br',, $token);
5332 wakaba 1.52 $insert->($el);
5333    
5334     ## Ignore the token.
5335     !!!next-token;
5336 wakaba 1.126 next B;
5337 wakaba 1.52 } else {
5338 wakaba 1.195 if ($token->{tag_name} eq 'sarcasm') {
5339     sleep 0.001; # take a deep breath
5340     }
5341    
5342 wakaba 1.52 ## Step 1
5343     my $node_i = -1;
5344     my $node = $self->{open_elements}->[$node_i];
5345 wakaba 1.51
5346 wakaba 1.52 ## Step 2
5347     S2: {
5348 wakaba 1.200 my $node_tag_name = $node->[0]->manakai_local_name;
5349     $node_tag_name =~ tr/A-Z/a-z/; # for SVG camelCase tag names
5350     if ($node_tag_name eq $token->{tag_name}) {
5351 wakaba 1.52 ## Step 1
5352     ## generate implied end tags
5353 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5354 wakaba 1.79 !!!cp ('t430');
5355 wakaba 1.151 ## NOTE: |<ruby><rt></ruby>|.
5356     ## ISSUE: <ruby><rt></rt> will also take this code path,
5357     ## which seems wrong.
5358 wakaba 1.86 pop @{$self->{open_elements}};
5359 wakaba 1.151 $node_i++;
5360 wakaba 1.52 }
5361    
5362     ## Step 2
5363 wakaba 1.200 my $current_tag_name
5364     = $self->{open_elements}->[-1]->[0]->manakai_local_name;
5365     $current_tag_name =~ tr/A-Z/a-z/;
5366     if ($current_tag_name ne $token->{tag_name}) {
5367 wakaba 1.79 !!!cp ('t431');
5368 wakaba 1.58 ## NOTE: <x><y></x>
5369 wakaba 1.122 !!!parse-error (type => 'not closed',
5370 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5371 wakaba 1.122 ->manakai_local_name,
5372     token => $token);
5373 wakaba 1.79 } else {
5374     !!!cp ('t432');
5375 wakaba 1.52 }
5376    
5377     ## Step 3
5378 wakaba 1.151 splice @{$self->{open_elements}}, $node_i if $node_i < 0;
5379 wakaba 1.51
5380 wakaba 1.1 !!!next-token;
5381 wakaba 1.52 last S2;
5382 wakaba 1.1 } else {
5383 wakaba 1.52 ## Step 3
5384 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
5385 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
5386 wakaba 1.123 ($node->[1] & SPECIAL_EL or
5387     $node->[1] & SCOPING_EL)) {
5388 wakaba 1.79 !!!cp ('t433');
5389 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5390     text => $token->{tag_name}, token => $token);
5391 wakaba 1.52 ## Ignore the token
5392     !!!next-token;
5393     last S2;
5394 wakaba 1.193
5395     ## NOTE: |<span><dd></span>a|: In Safari 3.1.2 and Opera
5396     ## 9.27, "a" is a child of <dd> (conforming). In
5397     ## Firefox 3.0.2, "a" is a child of <body>. In WinIE 7,
5398     ## "a" is a child of both <body> and <dd>.
5399 wakaba 1.52 }
5400 wakaba 1.193
5401 wakaba 1.79 !!!cp ('t434');
5402 wakaba 1.1 }
5403 wakaba 1.52
5404     ## Step 4
5405     $node_i--;
5406     $node = $self->{open_elements}->[$node_i];
5407    
5408     ## Step 5;
5409     redo S2;
5410     } # S2
5411 wakaba 1.126 next B;
5412 wakaba 1.1 }
5413     }
5414 wakaba 1.126 next B;
5415     } continue { # B
5416     if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
5417     ## NOTE: The code below is executed in cases where it does not have
5418     ## to be, but it it is harmless even in those cases.
5419     ## has an element in scope
5420     INSCOPE: {
5421     for (reverse 0..$#{$self->{open_elements}}) {
5422     my $node = $self->{open_elements}->[$_];
5423     if ($node->[1] & FOREIGN_EL) {
5424     last INSCOPE;
5425     } elsif ($node->[1] & SCOPING_EL) {
5426     last;
5427     }
5428     }
5429    
5430     ## NOTE: No foreign element in scope.
5431     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
5432     } # INSCOPE
5433     }
5434 wakaba 1.1 } # B
5435    
5436     ## Stop parsing # MUST
5437    
5438     ## TODO: script stuffs
5439 wakaba 1.3 } # _tree_construct_main
5440    
5441 wakaba 1.177 sub set_inner_html ($$$$;$) {
5442 wakaba 1.3 my $class = shift;
5443     my $node = shift;
5444 wakaba 1.177 #my $s = \$_[0];
5445 wakaba 1.3 my $onerror = $_[1];
5446 wakaba 1.162 my $get_wrapper = $_[2] || sub ($) { return $_[0] };
5447 wakaba 1.3
5448 wakaba 1.63 ## ISSUE: Should {confident} be true?
5449    
5450 wakaba 1.3 my $nt = $node->node_type;
5451     if ($nt == 9) {
5452     # MUST
5453    
5454     ## Step 1 # MUST
5455     ## TODO: If the document has an active parser, ...
5456     ## ISSUE: There is an issue in the spec.
5457    
5458     ## Step 2 # MUST
5459     my @cn = @{$node->child_nodes};
5460     for (@cn) {
5461     $node->remove_child ($_);
5462     }
5463    
5464     ## Step 3, 4, 5 # MUST
5465 wakaba 1.177 $class->parse_char_string ($_[0] => $node, $onerror, $get_wrapper);
5466 wakaba 1.3 } elsif ($nt == 1) {
5467     ## TODO: If non-html element
5468    
5469     ## NOTE: Most of this code is copied from |parse_string|
5470    
5471 wakaba 1.162 ## TODO: Support for $get_wrapper
5472    
5473 wakaba 1.3 ## Step 1 # MUST
5474 wakaba 1.14 my $this_doc = $node->owner_document;
5475     my $doc = $this_doc->implementation->create_document;
5476 wakaba 1.18 $doc->manakai_is_html (1);
5477 wakaba 1.3 my $p = $class->new;
5478     $p->{document} = $doc;
5479    
5480 wakaba 1.84 ## Step 8 # MUST
5481 wakaba 1.3 my $i = 0;
5482 wakaba 1.121 $p->{line_prev} = $p->{line} = 1;
5483     $p->{column_prev} = $p->{column} = 0;
5484 wakaba 1.177 require Whatpm::Charset::DecodeHandle;
5485     my $input = Whatpm::Charset::DecodeHandle::CharString->new (\($_[0]));
5486     $input = $get_wrapper->($input);
5487 wakaba 1.183 $p->{set_nc} = sub {
5488 wakaba 1.3 my $self = shift;
5489 wakaba 1.14
5490 wakaba 1.178 my $char = '';
5491 wakaba 1.183 if (defined $self->{next_nc}) {
5492     $char = $self->{next_nc};
5493     delete $self->{next_nc};
5494     $self->{nc} = ord $char;
5495 wakaba 1.177 } else {
5496 wakaba 1.180 $self->{char_buffer} = '';
5497     $self->{char_buffer_pos} = 0;
5498    
5499     my $count = $input->manakai_read_until
5500 wakaba 1.182 ($self->{char_buffer}, qr/[^\x00\x0A\x0D]/,
5501     $self->{char_buffer_pos});
5502 wakaba 1.180 if ($count) {
5503     $self->{line_prev} = $self->{line};
5504     $self->{column_prev} = $self->{column};
5505     $self->{column}++;
5506 wakaba 1.183 $self->{nc}
5507 wakaba 1.180 = ord substr ($self->{char_buffer},
5508     $self->{char_buffer_pos}++, 1);
5509     return;
5510     }
5511    
5512 wakaba 1.178 if ($input->read ($char, 1)) {
5513 wakaba 1.183 $self->{nc} = ord $char;
5514 wakaba 1.178 } else {
5515 wakaba 1.183 $self->{nc} = -1;
5516 wakaba 1.178 return;
5517     }
5518 wakaba 1.177 }
5519 wakaba 1.121
5520     ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
5521     $p->{column}++;
5522 wakaba 1.4
5523 wakaba 1.183 if ($self->{nc} == 0x000A) { # LF
5524 wakaba 1.121 $p->{line}++;
5525     $p->{column} = 0;
5526 wakaba 1.79 !!!cp ('i1');
5527 wakaba 1.183 } elsif ($self->{nc} == 0x000D) { # CR
5528 wakaba 1.177 ## TODO: support for abort/streaming
5529 wakaba 1.178 my $next = '';
5530     if ($input->read ($next, 1) and $next ne "\x0A") {
5531 wakaba 1.183 $self->{next_nc} = $next;
5532 wakaba 1.177 }
5533 wakaba 1.183 $self->{nc} = 0x000A; # LF # MUST
5534 wakaba 1.121 $p->{line}++;
5535     $p->{column} = 0;
5536 wakaba 1.79 !!!cp ('i2');
5537 wakaba 1.183 } elsif ($self->{nc} == 0x0000) { # NULL
5538 wakaba 1.79 !!!cp ('i4');
5539 wakaba 1.14 !!!parse-error (type => 'NULL');
5540 wakaba 1.183 $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5541 wakaba 1.3 }
5542     };
5543 wakaba 1.171
5544 wakaba 1.172 $p->{read_until} = sub {
5545 wakaba 1.177 #my ($scalar, $specials_range, $offset) = @_;
5546 wakaba 1.183 return 0 if defined $p->{next_nc};
5547 wakaba 1.180
5548 wakaba 1.182 my $pattern = qr/[^$_[1]\x00\x0A\x0D]/;
5549 wakaba 1.180 my $offset = $_[2] || 0;
5550    
5551     if ($p->{char_buffer_pos} < length $p->{char_buffer}) {
5552     pos ($p->{char_buffer}) = $p->{char_buffer_pos};
5553     if ($p->{char_buffer} =~ /\G(?>$pattern)+/) {
5554     substr ($_[0], $offset)
5555     = substr ($p->{char_buffer}, $-[0], $+[0] - $-[0]);
5556     my $count = $+[0] - $-[0];
5557     if ($count) {
5558     $p->{column} += $count;
5559     $p->{char_buffer_pos} += $count;
5560     $p->{line_prev} = $p->{line};
5561     $p->{column_prev} = $p->{column} - 1;
5562 wakaba 1.183 $p->{nc} = -1;
5563 wakaba 1.180 }
5564     return $count;
5565     } else {
5566     return 0;
5567     }
5568     } else {
5569     my $count = $input->manakai_read_until ($_[0], $pattern, $_[2]);
5570     if ($count) {
5571     $p->{column} += $count;
5572     $p->{column_prev} += $count;
5573 wakaba 1.183 $p->{nc} = -1;
5574 wakaba 1.180 }
5575     return $count;
5576 wakaba 1.177 }
5577     }; # $p->{read_until}
5578 wakaba 1.171
5579 wakaba 1.3 my $ponerror = $onerror || sub {
5580     my (%opt) = @_;
5581 wakaba 1.121 my $line = $opt{line};
5582     my $column = $opt{column};
5583     if (defined $opt{token} and defined $opt{token}->{line}) {
5584     $line = $opt{token}->{line};
5585     $column = $opt{token}->{column};
5586     }
5587     warn "Parse error ($opt{type}) at line $line column $column\n";
5588 wakaba 1.3 };
5589     $p->{parse_error} = sub {
5590 wakaba 1.121 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
5591 wakaba 1.3 };
5592    
5593 wakaba 1.178 my $char_onerror = sub {
5594     my (undef, $type, %opt) = @_;
5595     $ponerror->(layer => 'encode',
5596     line => $p->{line}, column => $p->{column} + 1,
5597     %opt, type => $type);
5598     }; # $char_onerror
5599     $input->onerror ($char_onerror);
5600    
5601 wakaba 1.3 $p->_initialize_tokenizer;
5602     $p->_initialize_tree_constructor;
5603    
5604     ## Step 2
5605 wakaba 1.71 my $node_ln = $node->manakai_local_name;
5606 wakaba 1.40 $p->{content_model} = {
5607     title => RCDATA_CONTENT_MODEL,
5608     textarea => RCDATA_CONTENT_MODEL,
5609     style => CDATA_CONTENT_MODEL,
5610     script => CDATA_CONTENT_MODEL,
5611     xmp => CDATA_CONTENT_MODEL,
5612     iframe => CDATA_CONTENT_MODEL,
5613     noembed => CDATA_CONTENT_MODEL,
5614     noframes => CDATA_CONTENT_MODEL,
5615     noscript => CDATA_CONTENT_MODEL,
5616     plaintext => PLAINTEXT_CONTENT_MODEL,
5617     }->{$node_ln};
5618     $p->{content_model} = PCDATA_CONTENT_MODEL
5619     unless defined $p->{content_model};
5620 wakaba 1.3
5621 wakaba 1.123 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
5622     ## TODO: Foreign element OK?
5623 wakaba 1.3
5624 wakaba 1.84 ## Step 3
5625 wakaba 1.3 my $root = $doc->create_element_ns
5626     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
5627    
5628 wakaba 1.84 ## Step 4 # MUST
5629 wakaba 1.3 $doc->append_child ($root);
5630    
5631 wakaba 1.84 ## Step 5 # MUST
5632 wakaba 1.123 push @{$p->{open_elements}}, [$root, $el_category->{html}];
5633 wakaba 1.3
5634     undef $p->{head_element};
5635 wakaba 1.202 undef $p->{head_element_inserted};
5636 wakaba 1.3
5637 wakaba 1.84 ## Step 6 # MUST
5638 wakaba 1.3 $p->_reset_insertion_mode;
5639    
5640 wakaba 1.84 ## Step 7 # MUST
5641 wakaba 1.3 my $anode = $node;
5642     AN: while (defined $anode) {
5643     if ($anode->node_type == 1) {
5644     my $nsuri = $anode->namespace_uri;
5645     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
5646 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
5647 wakaba 1.79 !!!cp ('i5');
5648 wakaba 1.3 $p->{form_element} = $anode;
5649     last AN;
5650     }
5651     }
5652     }
5653     $anode = $anode->parent_node;
5654     } # AN
5655    
5656 wakaba 1.84 ## Step 9 # MUST
5657 wakaba 1.3 {
5658     my $self = $p;
5659     !!!next-token;
5660     }
5661     $p->_tree_construction_main;
5662    
5663 wakaba 1.84 ## Step 10 # MUST
5664 wakaba 1.3 my @cn = @{$node->child_nodes};
5665     for (@cn) {
5666     $node->remove_child ($_);
5667     }
5668     ## ISSUE: mutation events? read-only?
5669    
5670 wakaba 1.84 ## Step 11 # MUST
5671 wakaba 1.3 @cn = @{$root->child_nodes};
5672     for (@cn) {
5673 wakaba 1.14 $this_doc->adopt_node ($_);
5674 wakaba 1.3 $node->append_child ($_);
5675     }
5676 wakaba 1.14 ## ISSUE: mutation events?
5677 wakaba 1.3
5678     $p->_terminate_tree_constructor;
5679 wakaba 1.121
5680     delete $p->{parse_error}; # delete loop
5681 wakaba 1.3 } else {
5682     die "$0: |set_inner_html| is not defined for node of type $nt";
5683     }
5684     } # set_inner_html
5685    
5686     } # tree construction stage
5687 wakaba 1.1
5688 wakaba 1.63 package Whatpm::HTML::RestartParser;
5689     push our @ISA, 'Error';
5690    
5691 wakaba 1.1 1;
5692 wakaba 1.212 # $Date: 2008/10/27 05:44:47 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24