/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.149 - (hide annotations) (download) (as text)
Sun May 25 08:53:49 2008 UTC (16 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.148: +4 -3 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	25 May 2008 08:53:05 -0000
	* ContentType.t: Test result related to UTF-32 updated (HTML5
	revision 1701).

2008-05-25  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	25 May 2008 08:53:43 -0000
	* ContentType.pm: Drop support for UTF-32 (HTML5 revision 1701).

	* HTML.pm.src: UTF-16BE and UTF-16LE should be considered
	as UTF-16 (HTML5 revision 1701).

2008-05-25  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.149 our $VERSION=do{my @r=(q$Revision: 1.148 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.18 ## ISSUE:
7     ## var doc = implementation.createDocument (null, null, null);
8     ## doc.write ('');
9     ## alert (doc.compatMode);
10 wakaba 1.1
11 wakaba 1.139 require IO::Handle;
12    
13 wakaba 1.126 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
14     my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
15     my $SVG_NS = q<http://www.w3.org/2000/svg>;
16     my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
17     my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
18     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
19    
20 wakaba 1.123 sub A_EL () { 0b1 }
21     sub ADDRESS_EL () { 0b10 }
22     sub BODY_EL () { 0b100 }
23     sub BUTTON_EL () { 0b1000 }
24     sub CAPTION_EL () { 0b10000 }
25     sub DD_EL () { 0b100000 }
26     sub DIV_EL () { 0b1000000 }
27     sub DT_EL () { 0b10000000 }
28     sub FORM_EL () { 0b100000000 }
29     sub FORMATTING_EL () { 0b1000000000 }
30     sub FRAMESET_EL () { 0b10000000000 }
31     sub HEADING_EL () { 0b100000000000 }
32     sub HTML_EL () { 0b1000000000000 }
33     sub LI_EL () { 0b10000000000000 }
34     sub NOBR_EL () { 0b100000000000000 }
35     sub OPTION_EL () { 0b1000000000000000 }
36     sub OPTGROUP_EL () { 0b10000000000000000 }
37     sub P_EL () { 0b100000000000000000 }
38     sub SELECT_EL () { 0b1000000000000000000 }
39     sub TABLE_EL () { 0b10000000000000000000 }
40     sub TABLE_CELL_EL () { 0b100000000000000000000 }
41     sub TABLE_ROW_EL () { 0b1000000000000000000000 }
42     sub TABLE_ROW_GROUP_EL () { 0b10000000000000000000000 }
43     sub MISC_SCOPING_EL () { 0b100000000000000000000000 }
44     sub MISC_SPECIAL_EL () { 0b1000000000000000000000000 }
45 wakaba 1.126 sub FOREIGN_EL () { 0b10000000000000000000000000 }
46     sub FOREIGN_FLOW_CONTENT_EL () { 0b100000000000000000000000000 }
47     sub MML_AXML_EL () { 0b1000000000000000000000000000 }
48 wakaba 1.123
49     sub TABLE_ROWS_EL () {
50     TABLE_EL |
51     TABLE_ROW_EL |
52     TABLE_ROW_GROUP_EL
53     }
54    
55     sub END_TAG_OPTIONAL_EL () {
56     DD_EL |
57     DT_EL |
58     LI_EL |
59     P_EL
60     }
61    
62     sub ALL_END_TAG_OPTIONAL_EL () {
63     END_TAG_OPTIONAL_EL |
64     BODY_EL |
65     HTML_EL |
66     TABLE_CELL_EL |
67     TABLE_ROW_EL |
68     TABLE_ROW_GROUP_EL
69     }
70    
71     sub SCOPING_EL () {
72     BUTTON_EL |
73     CAPTION_EL |
74     HTML_EL |
75     TABLE_EL |
76     TABLE_CELL_EL |
77     MISC_SCOPING_EL
78     }
79    
80     sub TABLE_SCOPING_EL () {
81     HTML_EL |
82     TABLE_EL
83     }
84    
85     sub TABLE_ROWS_SCOPING_EL () {
86     HTML_EL |
87     TABLE_ROW_GROUP_EL
88     }
89    
90     sub TABLE_ROW_SCOPING_EL () {
91     HTML_EL |
92     TABLE_ROW_EL
93     }
94    
95     sub SPECIAL_EL () {
96     ADDRESS_EL |
97     BODY_EL |
98     DIV_EL |
99     END_TAG_OPTIONAL_EL |
100     FORM_EL |
101     FRAMESET_EL |
102     HEADING_EL |
103     OPTION_EL |
104     OPTGROUP_EL |
105     SELECT_EL |
106     TABLE_ROW_EL |
107     TABLE_ROW_GROUP_EL |
108     MISC_SPECIAL_EL
109     }
110    
111     my $el_category = {
112     a => A_EL | FORMATTING_EL,
113     address => ADDRESS_EL,
114     applet => MISC_SCOPING_EL,
115     area => MISC_SPECIAL_EL,
116     b => FORMATTING_EL,
117     base => MISC_SPECIAL_EL,
118     basefont => MISC_SPECIAL_EL,
119     bgsound => MISC_SPECIAL_EL,
120     big => FORMATTING_EL,
121     blockquote => MISC_SPECIAL_EL,
122     body => BODY_EL,
123     br => MISC_SPECIAL_EL,
124     button => BUTTON_EL,
125     caption => CAPTION_EL,
126     center => MISC_SPECIAL_EL,
127     col => MISC_SPECIAL_EL,
128     colgroup => MISC_SPECIAL_EL,
129     dd => DD_EL,
130     dir => MISC_SPECIAL_EL,
131     div => DIV_EL,
132     dl => MISC_SPECIAL_EL,
133     dt => DT_EL,
134     em => FORMATTING_EL,
135     embed => MISC_SPECIAL_EL,
136     fieldset => MISC_SPECIAL_EL,
137     font => FORMATTING_EL,
138     form => FORM_EL,
139     frame => MISC_SPECIAL_EL,
140     frameset => FRAMESET_EL,
141     h1 => HEADING_EL,
142     h2 => HEADING_EL,
143     h3 => HEADING_EL,
144     h4 => HEADING_EL,
145     h5 => HEADING_EL,
146     h6 => HEADING_EL,
147     head => MISC_SPECIAL_EL,
148     hr => MISC_SPECIAL_EL,
149     html => HTML_EL,
150     i => FORMATTING_EL,
151     iframe => MISC_SPECIAL_EL,
152     img => MISC_SPECIAL_EL,
153     input => MISC_SPECIAL_EL,
154     isindex => MISC_SPECIAL_EL,
155     li => LI_EL,
156     link => MISC_SPECIAL_EL,
157     listing => MISC_SPECIAL_EL,
158     marquee => MISC_SCOPING_EL,
159     menu => MISC_SPECIAL_EL,
160     meta => MISC_SPECIAL_EL,
161     nobr => NOBR_EL | FORMATTING_EL,
162     noembed => MISC_SPECIAL_EL,
163     noframes => MISC_SPECIAL_EL,
164     noscript => MISC_SPECIAL_EL,
165     object => MISC_SCOPING_EL,
166     ol => MISC_SPECIAL_EL,
167     optgroup => OPTGROUP_EL,
168     option => OPTION_EL,
169     p => P_EL,
170     param => MISC_SPECIAL_EL,
171     plaintext => MISC_SPECIAL_EL,
172     pre => MISC_SPECIAL_EL,
173     s => FORMATTING_EL,
174     script => MISC_SPECIAL_EL,
175     select => SELECT_EL,
176     small => FORMATTING_EL,
177     spacer => MISC_SPECIAL_EL,
178     strike => FORMATTING_EL,
179     strong => FORMATTING_EL,
180     style => MISC_SPECIAL_EL,
181     table => TABLE_EL,
182     tbody => TABLE_ROW_GROUP_EL,
183     td => TABLE_CELL_EL,
184     textarea => MISC_SPECIAL_EL,
185     tfoot => TABLE_ROW_GROUP_EL,
186     th => TABLE_CELL_EL,
187     thead => TABLE_ROW_GROUP_EL,
188     title => MISC_SPECIAL_EL,
189     tr => TABLE_ROW_EL,
190     tt => FORMATTING_EL,
191     u => FORMATTING_EL,
192     ul => MISC_SPECIAL_EL,
193     wbr => MISC_SPECIAL_EL,
194     };
195    
196 wakaba 1.126 my $el_category_f = {
197     $MML_NS => {
198     'annotation-xml' => MML_AXML_EL,
199     mi => FOREIGN_FLOW_CONTENT_EL,
200     mo => FOREIGN_FLOW_CONTENT_EL,
201     mn => FOREIGN_FLOW_CONTENT_EL,
202     ms => FOREIGN_FLOW_CONTENT_EL,
203     mtext => FOREIGN_FLOW_CONTENT_EL,
204     },
205     $SVG_NS => {
206 wakaba 1.131 foreignObject => FOREIGN_FLOW_CONTENT_EL,
207 wakaba 1.126 desc => FOREIGN_FLOW_CONTENT_EL,
208     title => FOREIGN_FLOW_CONTENT_EL,
209     },
210     ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
211     };
212    
213 wakaba 1.131 my $svg_attr_name = {
214 wakaba 1.146 attributename => 'attributeName',
215 wakaba 1.131 attributetype => 'attributeType',
216     basefrequency => 'baseFrequency',
217     baseprofile => 'baseProfile',
218     calcmode => 'calcMode',
219     clippathunits => 'clipPathUnits',
220     contentscripttype => 'contentScriptType',
221     contentstyletype => 'contentStyleType',
222     diffuseconstant => 'diffuseConstant',
223     edgemode => 'edgeMode',
224     externalresourcesrequired => 'externalResourcesRequired',
225     filterres => 'filterRes',
226     filterunits => 'filterUnits',
227     glyphref => 'glyphRef',
228     gradienttransform => 'gradientTransform',
229     gradientunits => 'gradientUnits',
230     kernelmatrix => 'kernelMatrix',
231     kernelunitlength => 'kernelUnitLength',
232     keypoints => 'keyPoints',
233     keysplines => 'keySplines',
234     keytimes => 'keyTimes',
235     lengthadjust => 'lengthAdjust',
236     limitingconeangle => 'limitingConeAngle',
237     markerheight => 'markerHeight',
238     markerunits => 'markerUnits',
239     markerwidth => 'markerWidth',
240     maskcontentunits => 'maskContentUnits',
241     maskunits => 'maskUnits',
242     numoctaves => 'numOctaves',
243     pathlength => 'pathLength',
244     patterncontentunits => 'patternContentUnits',
245     patterntransform => 'patternTransform',
246     patternunits => 'patternUnits',
247     pointsatx => 'pointsAtX',
248     pointsaty => 'pointsAtY',
249     pointsatz => 'pointsAtZ',
250     preservealpha => 'preserveAlpha',
251     preserveaspectratio => 'preserveAspectRatio',
252     primitiveunits => 'primitiveUnits',
253     refx => 'refX',
254     refy => 'refY',
255     repeatcount => 'repeatCount',
256     repeatdur => 'repeatDur',
257     requiredextensions => 'requiredExtensions',
258 wakaba 1.146 requiredfeatures => 'requiredFeatures',
259 wakaba 1.131 specularconstant => 'specularConstant',
260     specularexponent => 'specularExponent',
261     spreadmethod => 'spreadMethod',
262     startoffset => 'startOffset',
263     stddeviation => 'stdDeviation',
264     stitchtiles => 'stitchTiles',
265     surfacescale => 'surfaceScale',
266     systemlanguage => 'systemLanguage',
267     tablevalues => 'tableValues',
268     targetx => 'targetX',
269     targety => 'targetY',
270     textlength => 'textLength',
271     viewbox => 'viewBox',
272     viewtarget => 'viewTarget',
273     xchannelselector => 'xChannelSelector',
274     ychannelselector => 'yChannelSelector',
275     zoomandpan => 'zoomAndPan',
276     };
277    
278     my $foreign_attr_xname = {
279     'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
280     'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
281     'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
282     'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
283     'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
284     'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
285     'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
286     'xml:base' => [$XML_NS, ['xml', 'base']],
287     'xml:lang' => [$XML_NS, ['xml', 'lang']],
288     'xml:space' => [$XML_NS, ['xml', 'space']],
289     'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
290     'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
291     };
292    
293     ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
294    
295 wakaba 1.4 my $c1_entity_char = {
296 wakaba 1.10 0x80 => 0x20AC,
297     0x81 => 0xFFFD,
298     0x82 => 0x201A,
299     0x83 => 0x0192,
300     0x84 => 0x201E,
301     0x85 => 0x2026,
302     0x86 => 0x2020,
303     0x87 => 0x2021,
304     0x88 => 0x02C6,
305     0x89 => 0x2030,
306     0x8A => 0x0160,
307     0x8B => 0x2039,
308     0x8C => 0x0152,
309     0x8D => 0xFFFD,
310     0x8E => 0x017D,
311     0x8F => 0xFFFD,
312     0x90 => 0xFFFD,
313     0x91 => 0x2018,
314     0x92 => 0x2019,
315     0x93 => 0x201C,
316     0x94 => 0x201D,
317     0x95 => 0x2022,
318     0x96 => 0x2013,
319     0x97 => 0x2014,
320     0x98 => 0x02DC,
321     0x99 => 0x2122,
322     0x9A => 0x0161,
323     0x9B => 0x203A,
324     0x9C => 0x0153,
325     0x9D => 0xFFFD,
326     0x9E => 0x017E,
327     0x9F => 0x0178,
328 wakaba 1.4 }; # $c1_entity_char
329 wakaba 1.1
330 wakaba 1.63 sub parse_byte_string ($$$$;$) {
331 wakaba 1.138 my $self = shift;
332     my $charset_name = shift;
333     open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
334     return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
335     } # parse_byte_string
336    
337     sub parse_byte_stream ($$$$;$) {
338 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
339 wakaba 1.133 my $charset_name = shift;
340 wakaba 1.138 my $byte_stream = $_[0];
341 wakaba 1.133
342 wakaba 1.134 my $onerror = $_[2] || sub {
343     my (%opt) = @_;
344     warn "Parse error ($opt{type})\n";
345     };
346     $self->{parse_error} = $onerror; # updated later by parse_char_string
347    
348 wakaba 1.133 ## HTML5 encoding sniffing algorithm
349     require Message::Charset::Info;
350     my $charset;
351 wakaba 1.136 my $buffer;
352     my ($char_stream, $e_status);
353 wakaba 1.133
354     SNIFFING: {
355    
356     ## Step 1
357     if (defined $charset_name) {
358     $charset = Message::Charset::Info->get_by_iana_name ($charset_name);
359    
360     ## ISSUE: Unsupported encoding is not ignored according to the spec.
361 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
362     ($byte_stream, allow_error_reporting => 1,
363 wakaba 1.133 allow_fallback => 1);
364 wakaba 1.136 if ($char_stream) {
365 wakaba 1.133 $self->{confident} = 1;
366     last SNIFFING;
367 wakaba 1.136 } else {
368     ## TODO: unsupported error
369 wakaba 1.133 }
370     }
371    
372     ## Step 2
373 wakaba 1.136 my $byte_buffer = '';
374     for (1..1024) {
375     my $char = $byte_stream->getc;
376     last unless defined $char;
377     $byte_buffer .= $char;
378     } ## TODO: timeout
379 wakaba 1.133
380     ## Step 3
381 wakaba 1.136 if ($byte_buffer =~ /^\xFE\xFF/) {
382 wakaba 1.133 $charset = Message::Charset::Info->get_by_iana_name ('utf-16be');
383 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
384     ($byte_stream, allow_error_reporting => 1,
385     allow_fallback => 1, byte_buffer => \$byte_buffer);
386 wakaba 1.133 $self->{confident} = 1;
387     last SNIFFING;
388 wakaba 1.136 } elsif ($byte_buffer =~ /^\xFF\xFE/) {
389 wakaba 1.133 $charset = Message::Charset::Info->get_by_iana_name ('utf-16le');
390 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
391     ($byte_stream, allow_error_reporting => 1,
392     allow_fallback => 1, byte_buffer => \$byte_buffer);
393 wakaba 1.133 $self->{confident} = 1;
394     last SNIFFING;
395 wakaba 1.136 } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
396 wakaba 1.133 $charset = Message::Charset::Info->get_by_iana_name ('utf-8');
397 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
398     ($byte_stream, allow_error_reporting => 1,
399     allow_fallback => 1, byte_buffer => \$byte_buffer);
400 wakaba 1.133 $self->{confident} = 1;
401     last SNIFFING;
402     }
403    
404     ## Step 4
405     ## TODO: <meta charset>
406    
407     ## Step 5
408     ## TODO: from history
409    
410     ## Step 6
411 wakaba 1.65 require Whatpm::Charset::UniversalCharDet;
412 wakaba 1.133 $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
413 wakaba 1.136 ($byte_buffer);
414 wakaba 1.133 if (defined $charset_name) {
415     $charset = Message::Charset::Info->get_by_iana_name ($charset_name);
416    
417     ## ISSUE: Unsupported encoding is not ignored according to the spec.
418 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
419     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
420     ($byte_stream);
421     ($char_stream, $e_status) = $charset->get_decode_handle
422     ($buffer, allow_error_reporting => 1,
423     allow_fallback => 1, byte_buffer => \$byte_buffer);
424     if ($char_stream) {
425     $buffer->{buffer} = $byte_buffer;
426 wakaba 1.134 !!!parse-error (type => 'sniffing:chardet', ## TODO: type name
427     value => $charset_name,
428     level => $self->{info_level},
429     line => 1, column => 1);
430 wakaba 1.133 $self->{confident} = 0;
431     last SNIFFING;
432     }
433     }
434    
435     ## Step 7: default
436     ## TODO: Make this configurable.
437     $charset = Message::Charset::Info->get_by_iana_name ('windows-1252');
438     ## NOTE: We choose |windows-1252| here, since |utf-8| should be
439     ## detectable in the step 6.
440 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
441     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
442     ($byte_stream);
443     ($char_stream, $e_status)
444     = $charset->get_decode_handle ($buffer,
445     allow_error_reporting => 1,
446     allow_fallback => 1,
447     byte_buffer => \$byte_buffer);
448     $buffer->{buffer} = $byte_buffer;
449 wakaba 1.134 !!!parse-error (type => 'sniffing:default', ## TODO: type name
450     value => 'windows-1252',
451     level => $self->{info_level},
452     line => 1, column => 1);
453 wakaba 1.63 $self->{confident} = 0;
454 wakaba 1.133 } # SNIFFING
455    
456 wakaba 1.134 $self->{input_encoding} = $charset->get_iana_name;
457 wakaba 1.133 if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
458 wakaba 1.134 !!!parse-error (type => 'chardecode:fallback', ## TODO: type name
459 wakaba 1.136 value => $self->{input_encoding},
460 wakaba 1.134 level => $self->{unsupported_level},
461     line => 1, column => 1);
462 wakaba 1.133 } elsif (not ($e_status &
463     Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) {
464 wakaba 1.134 !!!parse-error (type => 'chardecode:no error', ## TODO: type name
465     value => $self->{input_encoding},
466     level => $self->{unsupported_level},
467     line => 1, column => 1);
468 wakaba 1.63 }
469    
470     $self->{change_encoding} = sub {
471     my $self = shift;
472 wakaba 1.134 $charset_name = shift;
473 wakaba 1.114 my $token = shift;
474 wakaba 1.63
475 wakaba 1.134 $charset = Message::Charset::Info->get_by_iana_name ($charset_name);
476 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
477     ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
478     byte_buffer => \ $buffer->{buffer});
479 wakaba 1.134
480 wakaba 1.136 if ($char_stream) { # if supported
481 wakaba 1.134 ## "Change the encoding" algorithm:
482 wakaba 1.63
483 wakaba 1.134 ## Step 1
484 wakaba 1.149 if ($charset->{category} &
485     Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
486 wakaba 1.134 $charset = Message::Charset::Info->get_by_iana_name ('utf-8');
487 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
488     ($byte_stream,
489     byte_buffer => \ $buffer->{buffer});
490 wakaba 1.134 }
491     $charset_name = $charset->get_iana_name;
492    
493     ## Step 2
494     if (defined $self->{input_encoding} and
495     $self->{input_encoding} eq $charset_name) {
496     !!!parse-error (type => 'charset label:matching', ## TODO: type
497     value => $charset_name,
498     level => $self->{info_level});
499     $self->{confident} = 1;
500     return;
501     }
502 wakaba 1.63
503 wakaba 1.134 !!!parse-error (type => 'charset label detected:'.$self->{input_encoding}.
504     ':'.$charset_name, level => 'w', token => $token);
505    
506     ## Step 3
507     # if (can) {
508     ## change the encoding on the fly.
509     #$self->{confident} = 1;
510     #return;
511     # }
512    
513     ## Step 4
514     throw Whatpm::HTML::RestartParser ();
515 wakaba 1.63 }
516     }; # $self->{change_encoding}
517    
518 wakaba 1.136 my $char_onerror = sub {
519     my (undef, $type, %opt) = @_;
520 wakaba 1.137 !!!parse-error (%opt, type => $type,
521     line => $self->{line}, column => $self->{column} + 1);
522 wakaba 1.136 if ($opt{octets}) {
523     ${$opt{octets}} = "\x{FFFD}"; # relacement character
524     }
525     };
526     $char_stream->onerror ($char_onerror);
527    
528 wakaba 1.63 my @args = @_; shift @args; # $s
529     my $return;
530     try {
531 wakaba 1.136 $return = $self->parse_char_stream ($char_stream, @args);
532 wakaba 1.63 } catch Whatpm::HTML::RestartParser with {
533 wakaba 1.134 ## NOTE: Invoked after {change_encoding}.
534    
535     $self->{input_encoding} = $charset->get_iana_name;
536     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
537     !!!parse-error (type => 'chardecode:fallback', ## TODO: type name
538 wakaba 1.136 value => $self->{input_encoding},
539 wakaba 1.134 level => $self->{unsupported_level},
540     line => 1, column => 1);
541     } elsif (not ($e_status &
542     Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) {
543     !!!parse-error (type => 'chardecode:no error', ## TODO: type name
544     value => $self->{input_encoding},
545     level => $self->{unsupported_level},
546     line => 1, column => 1);
547     }
548 wakaba 1.63 $self->{confident} = 1;
549 wakaba 1.136 $char_stream->onerror ($char_onerror);
550     $return = $self->parse_char_stream ($char_stream, @args);
551 wakaba 1.63 };
552     return $return;
553 wakaba 1.138 } # parse_byte_stream
554 wakaba 1.63
555 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
556     ## and the HTML layer MUST ignore it. However, we does strip BOM in
557     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
558     ## because the core part of our HTML parser expects a string of character,
559     ## not a string of bytes or code units or anything which might contain a BOM.
560     ## Therefore, any parser interface that accepts a string of bytes,
561     ## such as |parse_byte_string| in this module, must ensure that it does
562     ## strip the BOM and never strip any ZWNBSP.
563    
564 wakaba 1.135 sub parse_char_string ($$$;$) {
565     my $self = shift;
566 wakaba 1.139 require utf8;
567     my $s = ref $_[0] ? $_[0] : \($_[0]);
568     open my $input, '<' . (utf8::is_utf8 ($$s) ? ':utf8' : ''), $s;
569 wakaba 1.135 return $self->parse_char_stream ($input, @_[1..$#_]);
570     } # parse_char_string
571     *parse_string = \&parse_char_string;
572 wakaba 1.63
573 wakaba 1.135 sub parse_char_stream ($$$;$) {
574 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
575 wakaba 1.135 my $input = $_[0];
576 wakaba 1.1 $self->{document} = $_[1];
577 wakaba 1.63 @{$self->{document}->child_nodes} = ();
578 wakaba 1.1
579 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
580    
581 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
582 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
583     if defined $self->{input_encoding};
584 wakaba 1.63
585 wakaba 1.1 my $i = 0;
586 wakaba 1.112 $self->{line_prev} = $self->{line} = 1;
587     $self->{column_prev} = $self->{column} = 0;
588 wakaba 1.76 $self->{set_next_char} = sub {
589 wakaba 1.1 my $self = shift;
590 wakaba 1.13
591 wakaba 1.76 pop @{$self->{prev_char}};
592     unshift @{$self->{prev_char}}, $self->{next_char};
593 wakaba 1.13
594 wakaba 1.139 my $char;
595     if (defined $self->{next_next_char}) {
596     $char = $self->{next_next_char};
597     delete $self->{next_next_char};
598     } else {
599     $char = $input->getc;
600     }
601 wakaba 1.135 $self->{next_char} = -1 and return unless defined $char;
602     $self->{next_char} = ord $char;
603 wakaba 1.112
604     ($self->{line_prev}, $self->{column_prev})
605     = ($self->{line}, $self->{column});
606     $self->{column}++;
607 wakaba 1.1
608 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
609 wakaba 1.132 !!!cp ('j1');
610 wakaba 1.112 $self->{line}++;
611     $self->{column} = 0;
612 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
613 wakaba 1.132 !!!cp ('j2');
614 wakaba 1.135 my $next = $input->getc;
615 wakaba 1.139 if (defined $next and $next ne "\x0A") {
616     $self->{next_next_char} = $next;
617 wakaba 1.135 }
618 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
619 wakaba 1.112 $self->{line}++;
620     $self->{column} = 0;
621 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
622 wakaba 1.132 !!!cp ('j3');
623 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
624     } elsif ($self->{next_char} == 0x0000) { # NULL
625 wakaba 1.132 !!!cp ('j4');
626 wakaba 1.8 !!!parse-error (type => 'NULL');
627 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
628 wakaba 1.132 } elsif ($self->{next_char} <= 0x0008 or
629     (0x000E <= $self->{next_char} and $self->{next_char} <= 0x001F) or
630     (0x007F <= $self->{next_char} and $self->{next_char} <= 0x009F) or
631     (0xD800 <= $self->{next_char} and $self->{next_char} <= 0xDFFF) or
632     (0xFDD0 <= $self->{next_char} and $self->{next_char} <= 0xFDDF) or
633     {
634     0xFFFE => 1, 0xFFFF => 1, 0x1FFFE => 1, 0x1FFFF => 1,
635     0x2FFFE => 1, 0x2FFFF => 1, 0x3FFFE => 1, 0x3FFFF => 1,
636     0x4FFFE => 1, 0x4FFFF => 1, 0x5FFFE => 1, 0x5FFFF => 1,
637     0x6FFFE => 1, 0x6FFFF => 1, 0x7FFFE => 1, 0x7FFFF => 1,
638     0x8FFFE => 1, 0x8FFFF => 1, 0x9FFFE => 1, 0x9FFFF => 1,
639     0xAFFFE => 1, 0xAFFFF => 1, 0xBFFFE => 1, 0xBFFFF => 1,
640     0xCFFFE => 1, 0xCFFFF => 1, 0xDFFFE => 1, 0xDFFFF => 1,
641     0xEFFFE => 1, 0xEFFFF => 1, 0xFFFFE => 1, 0xFFFFF => 1,
642     0x10FFFE => 1, 0x10FFFF => 1,
643     }->{$self->{next_char}}) {
644     !!!cp ('j5');
645     !!!parse-error (type => 'control char', level => $self->{must_level});
646     ## TODO: error type documentation
647 wakaba 1.1 }
648     };
649 wakaba 1.76 $self->{prev_char} = [-1, -1, -1];
650     $self->{next_char} = -1;
651 wakaba 1.1
652 wakaba 1.3 my $onerror = $_[2] || sub {
653     my (%opt) = @_;
654 wakaba 1.112 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
655     my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
656     warn "Parse error ($opt{type}) at line $line column $column\n";
657 wakaba 1.3 };
658     $self->{parse_error} = sub {
659 wakaba 1.112 $onerror->(line => $self->{line}, column => $self->{column}, @_);
660 wakaba 1.1 };
661    
662     $self->_initialize_tokenizer;
663     $self->_initialize_tree_constructor;
664     $self->_construct_tree;
665     $self->_terminate_tree_constructor;
666    
667 wakaba 1.112 delete $self->{parse_error}; # remove loop
668    
669 wakaba 1.1 return $self->{document};
670 wakaba 1.135 } # parse_char_stream
671 wakaba 1.1
672     sub new ($) {
673     my $class = shift;
674 wakaba 1.134 my $self = bless {
675     must_level => 'm',
676     should_level => 's',
677     good_level => 'w',
678     warn_level => 'w',
679     info_level => 'i',
680     unsupported_level => 'u',
681     }, $class;
682 wakaba 1.76 $self->{set_next_char} = sub {
683     $self->{next_char} = -1;
684 wakaba 1.1 };
685     $self->{parse_error} = sub {
686     #
687     };
688 wakaba 1.63 $self->{change_encoding} = sub {
689     # if ($_[0] is a supported encoding) {
690     # run "change the encoding" algorithm;
691     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
692     # }
693     };
694 wakaba 1.61 $self->{application_cache_selection} = sub {
695     #
696     };
697 wakaba 1.1 return $self;
698     } # new
699    
700 wakaba 1.40 sub CM_ENTITY () { 0b001 } # & markup in data
701     sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
702     sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
703    
704     sub PLAINTEXT_CONTENT_MODEL () { 0 }
705     sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
706     sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
707     sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
708    
709 wakaba 1.57 sub DATA_STATE () { 0 }
710     sub ENTITY_DATA_STATE () { 1 }
711     sub TAG_OPEN_STATE () { 2 }
712     sub CLOSE_TAG_OPEN_STATE () { 3 }
713     sub TAG_NAME_STATE () { 4 }
714     sub BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
715     sub ATTRIBUTE_NAME_STATE () { 6 }
716     sub AFTER_ATTRIBUTE_NAME_STATE () { 7 }
717     sub BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
718     sub ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
719     sub ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
720     sub ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
721     sub ENTITY_IN_ATTRIBUTE_VALUE_STATE () { 12 }
722     sub MARKUP_DECLARATION_OPEN_STATE () { 13 }
723     sub COMMENT_START_STATE () { 14 }
724     sub COMMENT_START_DASH_STATE () { 15 }
725     sub COMMENT_STATE () { 16 }
726     sub COMMENT_END_STATE () { 17 }
727     sub COMMENT_END_DASH_STATE () { 18 }
728     sub BOGUS_COMMENT_STATE () { 19 }
729     sub DOCTYPE_STATE () { 20 }
730     sub BEFORE_DOCTYPE_NAME_STATE () { 21 }
731     sub DOCTYPE_NAME_STATE () { 22 }
732     sub AFTER_DOCTYPE_NAME_STATE () { 23 }
733     sub BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
734     sub DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
735     sub DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
736     sub AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
737     sub BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
738     sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
739     sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
740     sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
741     sub BOGUS_DOCTYPE_STATE () { 32 }
742 wakaba 1.72 sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
743 wakaba 1.125 sub SELF_CLOSING_START_TAG_STATE () { 34 }
744 wakaba 1.127 sub CDATA_BLOCK_STATE () { 35 }
745 wakaba 1.57
746 wakaba 1.55 sub DOCTYPE_TOKEN () { 1 }
747     sub COMMENT_TOKEN () { 2 }
748     sub START_TAG_TOKEN () { 3 }
749     sub END_TAG_TOKEN () { 4 }
750     sub END_OF_FILE_TOKEN () { 5 }
751     sub CHARACTER_TOKEN () { 6 }
752    
753 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
754     sub HEAD_IMS () { 0b1000 }
755     sub BODY_IMS () { 0b10000 }
756 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
757 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
758 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
759 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
760     sub FRAME_IMS () { 0b1000000000 }
761 wakaba 1.101 sub SELECT_IMS () { 0b10000000000 }
762 wakaba 1.126 sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 }
763     ## NOTE: "in foreign content" insertion mode is special; it is combined
764     ## with the secondary insertion mode. In this parser, they are stored
765     ## together in the bit-or'ed form.
766 wakaba 1.54
767 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
768    
769     ## NOTE: "after after body" insertion mode.
770 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
771 wakaba 1.84
772     ## NOTE: "after after frameset" insertion mode.
773 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
774 wakaba 1.84
775 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
776     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
777     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
778     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
779     sub IN_BODY_IM () { BODY_IMS }
780 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
781     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
782     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
783     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
784 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
785     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
786     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
787     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
788 wakaba 1.101 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
789     sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
790 wakaba 1.54 sub IN_COLUMN_GROUP_IM () { 0b10 }
791    
792 wakaba 1.1 ## Implementations MUST act as if state machine in the spec
793    
794     sub _initialize_tokenizer ($) {
795     my $self = shift;
796 wakaba 1.57 $self->{state} = DATA_STATE; # MUST
797 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # be
798 wakaba 1.1 undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
799     undef $self->{current_attribute};
800     undef $self->{last_emitted_start_tag_name};
801     undef $self->{last_attribute_value_state};
802 wakaba 1.125 delete $self->{self_closing};
803 wakaba 1.1 $self->{char} = [];
804 wakaba 1.76 # $self->{next_char}
805 wakaba 1.1 !!!next-input-character;
806     $self->{token} = [];
807 wakaba 1.18 # $self->{escape}
808 wakaba 1.1 } # _initialize_tokenizer
809    
810     ## A token has:
811 wakaba 1.55 ## ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
812     ## CHARACTER_TOKEN, or END_OF_FILE_TOKEN
813     ## ->{name} (DOCTYPE_TOKEN)
814     ## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
815     ## ->{public_identifier} (DOCTYPE_TOKEN)
816     ## ->{system_identifier} (DOCTYPE_TOKEN)
817 wakaba 1.75 ## ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
818 wakaba 1.55 ## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
819 wakaba 1.66 ## ->{name}
820     ## ->{value}
821     ## ->{has_reference} == 1 or 0
822 wakaba 1.55 ## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)
823 wakaba 1.125 ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
824     ## |->{self_closing}| is used to save the value of |$self->{self_closing}|
825     ## while the token is pushed back to the stack.
826    
827     ## ISSUE: "When a DOCTYPE token is created, its
828     ## <i>self-closing flag</i> must be unset (its other state is that it
829     ## be set), and its attributes list must be empty.": Wrong subject?
830 wakaba 1.1
831     ## Emitted token MUST immediately be handled by the tree construction state.
832    
833     ## Before each step, UA MAY check to see if either one of the scripts in
834     ## "list of scripts that will execute as soon as possible" or the first
835     ## script in the "list of scripts that will execute asynchronously",
836     ## has completed loading. If one has, then it MUST be executed
837     ## and removed from the list.
838    
839 wakaba 1.59 ## NOTE: HTML5 "Writing HTML documents" section, applied to
840     ## documents and not to user agents and conformance checkers,
841     ## contains some requirements that are not detected by the
842     ## parsing algorithm:
843     ## - Some requirements on character encoding declarations. ## TODO
844     ## - "Elements MUST NOT contain content that their content model disallows."
845     ## ... Some are parse error, some are not (will be reported by c.c.).
846     ## - Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) ## TODO
847     ## - Text (in elements, attributes, and comments) SHOULD NOT contain
848     ## control characters other than space characters. ## TODO: (what is control character? C0, C1 and DEL? Unicode control character?)
849    
850     ## TODO: HTML5 poses authors two SHOULD-level requirements that cannot
851     ## be detected by the HTML5 parsing algorithm:
852     ## - Text,
853    
854 wakaba 1.1 sub _get_next_token ($) {
855     my $self = shift;
856 wakaba 1.125
857     if ($self->{self_closing}) {
858     !!!parse-error (type => 'nestc', token => $self->{current_token});
859     ## NOTE: The |self_closing| flag is only set by start tag token.
860     ## In addition, when a start tag token is emitted, it is always set to
861     ## |current_token|.
862     delete $self->{self_closing};
863     }
864    
865 wakaba 1.1 if (@{$self->{token}}) {
866 wakaba 1.125 $self->{self_closing} = $self->{token}->[0]->{self_closing};
867 wakaba 1.1 return shift @{$self->{token}};
868     }
869    
870     A: {
871 wakaba 1.57 if ($self->{state} == DATA_STATE) {
872 wakaba 1.76 if ($self->{next_char} == 0x0026) { # &
873 wakaba 1.72 if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
874     not $self->{escape}) {
875 wakaba 1.77 !!!cp (1);
876 wakaba 1.57 $self->{state} = ENTITY_DATA_STATE;
877 wakaba 1.1 !!!next-input-character;
878     redo A;
879     } else {
880 wakaba 1.77 !!!cp (2);
881 wakaba 1.1 #
882     }
883 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
884 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
885 wakaba 1.13 unless ($self->{escape}) {
886 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
887     $self->{prev_char}->[1] == 0x0021 and # !
888     $self->{prev_char}->[2] == 0x003C) { # <
889 wakaba 1.77 !!!cp (3);
890 wakaba 1.13 $self->{escape} = 1;
891 wakaba 1.77 } else {
892     !!!cp (4);
893 wakaba 1.13 }
894 wakaba 1.77 } else {
895     !!!cp (5);
896 wakaba 1.13 }
897     }
898    
899     #
900 wakaba 1.76 } elsif ($self->{next_char} == 0x003C) { # <
901 wakaba 1.40 if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
902     (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
903 wakaba 1.13 not $self->{escape})) {
904 wakaba 1.77 !!!cp (6);
905 wakaba 1.57 $self->{state} = TAG_OPEN_STATE;
906 wakaba 1.1 !!!next-input-character;
907     redo A;
908     } else {
909 wakaba 1.77 !!!cp (7);
910 wakaba 1.1 #
911     }
912 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
913 wakaba 1.13 if ($self->{escape} and
914 wakaba 1.40 ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
915 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
916     $self->{prev_char}->[1] == 0x002D) { # -
917 wakaba 1.77 !!!cp (8);
918 wakaba 1.13 delete $self->{escape};
919 wakaba 1.77 } else {
920     !!!cp (9);
921 wakaba 1.13 }
922 wakaba 1.77 } else {
923     !!!cp (10);
924 wakaba 1.13 }
925    
926     #
927 wakaba 1.76 } elsif ($self->{next_char} == -1) {
928 wakaba 1.77 !!!cp (11);
929 wakaba 1.112 !!!emit ({type => END_OF_FILE_TOKEN,
930     line => $self->{line}, column => $self->{column}});
931 wakaba 1.1 last A; ## TODO: ok?
932 wakaba 1.77 } else {
933     !!!cp (12);
934 wakaba 1.1 }
935     # Anything else
936 wakaba 1.55 my $token = {type => CHARACTER_TOKEN,
937 wakaba 1.112 data => chr $self->{next_char},
938 wakaba 1.120 line => $self->{line}, column => $self->{column},
939 wakaba 1.118 };
940 wakaba 1.1 ## Stay in the data state
941     !!!next-input-character;
942    
943     !!!emit ($token);
944    
945     redo A;
946 wakaba 1.57 } elsif ($self->{state} == ENTITY_DATA_STATE) {
947 wakaba 1.1 ## (cannot happen in CDATA state)
948 wakaba 1.112
949 wakaba 1.120 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
950 wakaba 1.1
951 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity (0, -1);
952 wakaba 1.1
953 wakaba 1.57 $self->{state} = DATA_STATE;
954 wakaba 1.1 # next-input-character is already done
955    
956     unless (defined $token) {
957 wakaba 1.77 !!!cp (13);
958 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '&',
959 wakaba 1.120 line => $l, column => $c,
960 wakaba 1.118 });
961 wakaba 1.1 } else {
962 wakaba 1.77 !!!cp (14);
963 wakaba 1.1 !!!emit ($token);
964     }
965    
966     redo A;
967 wakaba 1.57 } elsif ($self->{state} == TAG_OPEN_STATE) {
968 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
969 wakaba 1.76 if ($self->{next_char} == 0x002F) { # /
970 wakaba 1.77 !!!cp (15);
971 wakaba 1.1 !!!next-input-character;
972 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
973 wakaba 1.1 redo A;
974     } else {
975 wakaba 1.77 !!!cp (16);
976 wakaba 1.1 ## reconsume
977 wakaba 1.57 $self->{state} = DATA_STATE;
978 wakaba 1.1
979 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
980 wakaba 1.120 line => $self->{line_prev},
981     column => $self->{column_prev},
982 wakaba 1.118 });
983 wakaba 1.1
984     redo A;
985     }
986 wakaba 1.40 } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
987 wakaba 1.76 if ($self->{next_char} == 0x0021) { # !
988 wakaba 1.77 !!!cp (17);
989 wakaba 1.57 $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
990 wakaba 1.1 !!!next-input-character;
991     redo A;
992 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
993 wakaba 1.77 !!!cp (18);
994 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
995 wakaba 1.1 !!!next-input-character;
996     redo A;
997 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
998     $self->{next_char} <= 0x005A) { # A..Z
999 wakaba 1.77 !!!cp (19);
1000 wakaba 1.1 $self->{current_token}
1001 wakaba 1.55 = {type => START_TAG_TOKEN,
1002 wakaba 1.112 tag_name => chr ($self->{next_char} + 0x0020),
1003     line => $self->{line_prev},
1004     column => $self->{column_prev}};
1005 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1006 wakaba 1.1 !!!next-input-character;
1007     redo A;
1008 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
1009     $self->{next_char} <= 0x007A) { # a..z
1010 wakaba 1.77 !!!cp (20);
1011 wakaba 1.55 $self->{current_token} = {type => START_TAG_TOKEN,
1012 wakaba 1.112 tag_name => chr ($self->{next_char}),
1013     line => $self->{line_prev},
1014     column => $self->{column_prev}};
1015 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1016 wakaba 1.1 !!!next-input-character;
1017     redo A;
1018 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1019 wakaba 1.77 !!!cp (21);
1020 wakaba 1.115 !!!parse-error (type => 'empty start tag',
1021     line => $self->{line_prev},
1022     column => $self->{column_prev});
1023 wakaba 1.57 $self->{state} = DATA_STATE;
1024 wakaba 1.1 !!!next-input-character;
1025    
1026 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<>',
1027 wakaba 1.120 line => $self->{line_prev},
1028     column => $self->{column_prev},
1029 wakaba 1.118 });
1030 wakaba 1.1
1031     redo A;
1032 wakaba 1.76 } elsif ($self->{next_char} == 0x003F) { # ?
1033 wakaba 1.77 !!!cp (22);
1034 wakaba 1.115 !!!parse-error (type => 'pio',
1035     line => $self->{line_prev},
1036     column => $self->{column_prev});
1037 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1038 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1039 wakaba 1.120 line => $self->{line_prev},
1040     column => $self->{column_prev},
1041 wakaba 1.118 };
1042 wakaba 1.76 ## $self->{next_char} is intentionally left as is
1043 wakaba 1.1 redo A;
1044     } else {
1045 wakaba 1.77 !!!cp (23);
1046 wakaba 1.136 !!!parse-error (type => 'bare stago',
1047     line => $self->{line_prev},
1048     column => $self->{column_prev});
1049 wakaba 1.57 $self->{state} = DATA_STATE;
1050 wakaba 1.1 ## reconsume
1051    
1052 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
1053 wakaba 1.120 line => $self->{line_prev},
1054     column => $self->{column_prev},
1055 wakaba 1.118 });
1056 wakaba 1.1
1057     redo A;
1058     }
1059     } else {
1060 wakaba 1.40 die "$0: $self->{content_model} in tag open";
1061 wakaba 1.1 }
1062 wakaba 1.57 } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
1063 wakaba 1.113 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
1064 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
1065 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
1066 wakaba 1.112
1067 wakaba 1.30 ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
1068 wakaba 1.23 my @next_char;
1069     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
1070 wakaba 1.76 push @next_char, $self->{next_char};
1071 wakaba 1.23 my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
1072     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
1073 wakaba 1.76 if ($self->{next_char} == $c or $self->{next_char} == $C) {
1074 wakaba 1.77 !!!cp (24);
1075 wakaba 1.23 !!!next-input-character;
1076     next TAGNAME;
1077     } else {
1078 wakaba 1.77 !!!cp (25);
1079 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
1080 wakaba 1.23 !!!back-next-input-character (@next_char);
1081 wakaba 1.57 $self->{state} = DATA_STATE;
1082 wakaba 1.23
1083 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1084 wakaba 1.120 line => $l, column => $c,
1085 wakaba 1.118 });
1086 wakaba 1.23
1087     redo A;
1088     }
1089     }
1090 wakaba 1.76 push @next_char, $self->{next_char};
1091 wakaba 1.23
1092 wakaba 1.76 unless ($self->{next_char} == 0x0009 or # HT
1093     $self->{next_char} == 0x000A or # LF
1094     $self->{next_char} == 0x000B or # VT
1095     $self->{next_char} == 0x000C or # FF
1096     $self->{next_char} == 0x0020 or # SP
1097     $self->{next_char} == 0x003E or # >
1098     $self->{next_char} == 0x002F or # /
1099     $self->{next_char} == -1) {
1100 wakaba 1.77 !!!cp (26);
1101 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
1102 wakaba 1.1 !!!back-next-input-character (@next_char);
1103 wakaba 1.57 $self->{state} = DATA_STATE;
1104 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1105 wakaba 1.120 line => $l, column => $c,
1106 wakaba 1.118 });
1107 wakaba 1.1 redo A;
1108 wakaba 1.23 } else {
1109 wakaba 1.77 !!!cp (27);
1110 wakaba 1.76 $self->{next_char} = shift @next_char;
1111 wakaba 1.23 !!!back-next-input-character (@next_char);
1112     # and consume...
1113 wakaba 1.1 }
1114 wakaba 1.23 } else {
1115     ## No start tag token has ever been emitted
1116 wakaba 1.77 !!!cp (28);
1117 wakaba 1.23 # next-input-character is already done
1118 wakaba 1.57 $self->{state} = DATA_STATE;
1119 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1120 wakaba 1.120 line => $l, column => $c,
1121 wakaba 1.118 });
1122 wakaba 1.1 redo A;
1123     }
1124     }
1125    
1126 wakaba 1.76 if (0x0041 <= $self->{next_char} and
1127     $self->{next_char} <= 0x005A) { # A..Z
1128 wakaba 1.77 !!!cp (29);
1129 wakaba 1.112 $self->{current_token}
1130     = {type => END_TAG_TOKEN,
1131     tag_name => chr ($self->{next_char} + 0x0020),
1132     line => $l, column => $c};
1133 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1134 wakaba 1.1 !!!next-input-character;
1135     redo A;
1136 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
1137     $self->{next_char} <= 0x007A) { # a..z
1138 wakaba 1.77 !!!cp (30);
1139 wakaba 1.55 $self->{current_token} = {type => END_TAG_TOKEN,
1140 wakaba 1.112 tag_name => chr ($self->{next_char}),
1141     line => $l, column => $c};
1142 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1143 wakaba 1.1 !!!next-input-character;
1144     redo A;
1145 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1146 wakaba 1.77 !!!cp (31);
1147 wakaba 1.115 !!!parse-error (type => 'empty end tag',
1148     line => $self->{line_prev}, ## "<" in "</>"
1149     column => $self->{column_prev} - 1);
1150 wakaba 1.57 $self->{state} = DATA_STATE;
1151 wakaba 1.1 !!!next-input-character;
1152     redo A;
1153 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1154 wakaba 1.77 !!!cp (32);
1155 wakaba 1.3 !!!parse-error (type => 'bare etago');
1156 wakaba 1.57 $self->{state} = DATA_STATE;
1157 wakaba 1.1 # reconsume
1158    
1159 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1160 wakaba 1.120 line => $l, column => $c,
1161 wakaba 1.118 });
1162 wakaba 1.1
1163     redo A;
1164     } else {
1165 wakaba 1.77 !!!cp (33);
1166 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
1167 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1168 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1169 wakaba 1.120 line => $self->{line_prev}, # "<" of "</"
1170     column => $self->{column_prev} - 1,
1171 wakaba 1.118 };
1172 wakaba 1.76 ## $self->{next_char} is intentionally left as is
1173 wakaba 1.1 redo A;
1174     }
1175 wakaba 1.57 } elsif ($self->{state} == TAG_NAME_STATE) {
1176 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1177     $self->{next_char} == 0x000A or # LF
1178     $self->{next_char} == 0x000B or # VT
1179     $self->{next_char} == 0x000C or # FF
1180     $self->{next_char} == 0x0020) { # SP
1181 wakaba 1.77 !!!cp (34);
1182 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1183 wakaba 1.1 !!!next-input-character;
1184     redo A;
1185 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1186 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1187 wakaba 1.77 !!!cp (35);
1188 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1189 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1190 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1191 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
1192     # ## NOTE: This should never be reached.
1193     # !!! cp (36);
1194     # !!! parse-error (type => 'end tag attribute');
1195     #} else {
1196 wakaba 1.77 !!!cp (37);
1197 wakaba 1.78 #}
1198 wakaba 1.1 } else {
1199     die "$0: $self->{current_token}->{type}: Unknown token type";
1200     }
1201 wakaba 1.57 $self->{state} = DATA_STATE;
1202 wakaba 1.1 !!!next-input-character;
1203    
1204     !!!emit ($self->{current_token}); # start tag or end tag
1205    
1206     redo A;
1207 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1208     $self->{next_char} <= 0x005A) { # A..Z
1209 wakaba 1.77 !!!cp (38);
1210 wakaba 1.76 $self->{current_token}->{tag_name} .= chr ($self->{next_char} + 0x0020);
1211 wakaba 1.1 # start tag or end tag
1212     ## Stay in this state
1213     !!!next-input-character;
1214     redo A;
1215 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1216 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1217 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1218 wakaba 1.77 !!!cp (39);
1219 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1220 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1221 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1222 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
1223     # ## NOTE: This state should never be reached.
1224     # !!! cp (40);
1225     # !!! parse-error (type => 'end tag attribute');
1226     #} else {
1227 wakaba 1.77 !!!cp (41);
1228 wakaba 1.78 #}
1229 wakaba 1.1 } else {
1230     die "$0: $self->{current_token}->{type}: Unknown token type";
1231     }
1232 wakaba 1.57 $self->{state} = DATA_STATE;
1233 wakaba 1.1 # reconsume
1234    
1235     !!!emit ($self->{current_token}); # start tag or end tag
1236    
1237     redo A;
1238 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1239 wakaba 1.125 !!!cp (42);
1240     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1241 wakaba 1.1 !!!next-input-character;
1242     redo A;
1243     } else {
1244 wakaba 1.77 !!!cp (44);
1245 wakaba 1.76 $self->{current_token}->{tag_name} .= chr $self->{next_char};
1246 wakaba 1.1 # start tag or end tag
1247     ## Stay in the state
1248     !!!next-input-character;
1249     redo A;
1250     }
1251 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1252 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1253     $self->{next_char} == 0x000A or # LF
1254     $self->{next_char} == 0x000B or # VT
1255     $self->{next_char} == 0x000C or # FF
1256     $self->{next_char} == 0x0020) { # SP
1257 wakaba 1.77 !!!cp (45);
1258 wakaba 1.1 ## Stay in the state
1259     !!!next-input-character;
1260     redo A;
1261 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1262 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1263 wakaba 1.77 !!!cp (46);
1264 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1265 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1266 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1267 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1268 wakaba 1.77 !!!cp (47);
1269 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1270 wakaba 1.77 } else {
1271     !!!cp (48);
1272 wakaba 1.1 }
1273     } else {
1274     die "$0: $self->{current_token}->{type}: Unknown token type";
1275     }
1276 wakaba 1.57 $self->{state} = DATA_STATE;
1277 wakaba 1.1 !!!next-input-character;
1278    
1279     !!!emit ($self->{current_token}); # start tag or end tag
1280    
1281     redo A;
1282 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1283     $self->{next_char} <= 0x005A) { # A..Z
1284 wakaba 1.77 !!!cp (49);
1285 wakaba 1.119 $self->{current_attribute}
1286     = {name => chr ($self->{next_char} + 0x0020),
1287     value => '',
1288     line => $self->{line}, column => $self->{column}};
1289 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1290 wakaba 1.1 !!!next-input-character;
1291     redo A;
1292 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1293 wakaba 1.125 !!!cp (50);
1294     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1295 wakaba 1.1 !!!next-input-character;
1296     redo A;
1297 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1298 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1299 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1300 wakaba 1.77 !!!cp (52);
1301 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1302 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1303 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1304 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1305 wakaba 1.77 !!!cp (53);
1306 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1307 wakaba 1.77 } else {
1308     !!!cp (54);
1309 wakaba 1.1 }
1310     } else {
1311     die "$0: $self->{current_token}->{type}: Unknown token type";
1312     }
1313 wakaba 1.57 $self->{state} = DATA_STATE;
1314 wakaba 1.1 # reconsume
1315    
1316     !!!emit ($self->{current_token}); # start tag or end tag
1317    
1318     redo A;
1319     } else {
1320 wakaba 1.72 if ({
1321     0x0022 => 1, # "
1322     0x0027 => 1, # '
1323     0x003D => 1, # =
1324 wakaba 1.76 }->{$self->{next_char}}) {
1325 wakaba 1.77 !!!cp (55);
1326 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1327 wakaba 1.77 } else {
1328     !!!cp (56);
1329 wakaba 1.72 }
1330 wakaba 1.119 $self->{current_attribute}
1331     = {name => chr ($self->{next_char}),
1332     value => '',
1333     line => $self->{line}, column => $self->{column}};
1334 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1335 wakaba 1.1 !!!next-input-character;
1336     redo A;
1337     }
1338 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1339 wakaba 1.1 my $before_leave = sub {
1340     if (exists $self->{current_token}->{attributes} # start tag or end tag
1341     ->{$self->{current_attribute}->{name}}) { # MUST
1342 wakaba 1.77 !!!cp (57);
1343 wakaba 1.120 !!!parse-error (type => 'duplicate attribute:'.$self->{current_attribute}->{name}, line => $self->{current_attribute}->{line}, column => $self->{current_attribute}->{column});
1344 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
1345     } else {
1346 wakaba 1.77 !!!cp (58);
1347 wakaba 1.1 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
1348     = $self->{current_attribute};
1349     }
1350     }; # $before_leave
1351    
1352 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1353     $self->{next_char} == 0x000A or # LF
1354     $self->{next_char} == 0x000B or # VT
1355     $self->{next_char} == 0x000C or # FF
1356     $self->{next_char} == 0x0020) { # SP
1357 wakaba 1.77 !!!cp (59);
1358 wakaba 1.1 $before_leave->();
1359 wakaba 1.57 $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
1360 wakaba 1.1 !!!next-input-character;
1361     redo A;
1362 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1363 wakaba 1.77 !!!cp (60);
1364 wakaba 1.1 $before_leave->();
1365 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1366 wakaba 1.1 !!!next-input-character;
1367     redo A;
1368 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1369 wakaba 1.1 $before_leave->();
1370 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1371 wakaba 1.77 !!!cp (61);
1372 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1373 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1374 wakaba 1.77 !!!cp (62);
1375 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1376 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1377 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1378 wakaba 1.1 }
1379     } else {
1380     die "$0: $self->{current_token}->{type}: Unknown token type";
1381     }
1382 wakaba 1.57 $self->{state} = DATA_STATE;
1383 wakaba 1.1 !!!next-input-character;
1384    
1385     !!!emit ($self->{current_token}); # start tag or end tag
1386    
1387     redo A;
1388 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1389     $self->{next_char} <= 0x005A) { # A..Z
1390 wakaba 1.77 !!!cp (63);
1391 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char} + 0x0020);
1392 wakaba 1.1 ## Stay in the state
1393     !!!next-input-character;
1394     redo A;
1395 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1396 wakaba 1.125 !!!cp (64);
1397 wakaba 1.1 $before_leave->();
1398 wakaba 1.125 $self->{state} = SELF_CLOSING_START_TAG_STATE;
1399 wakaba 1.1 !!!next-input-character;
1400     redo A;
1401 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1402 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1403 wakaba 1.1 $before_leave->();
1404 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1405 wakaba 1.77 !!!cp (66);
1406 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1407 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1408 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1409 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1410 wakaba 1.77 !!!cp (67);
1411 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1412 wakaba 1.77 } else {
1413 wakaba 1.78 ## NOTE: This state should never be reached.
1414 wakaba 1.77 !!!cp (68);
1415 wakaba 1.1 }
1416     } else {
1417     die "$0: $self->{current_token}->{type}: Unknown token type";
1418     }
1419 wakaba 1.57 $self->{state} = DATA_STATE;
1420 wakaba 1.1 # reconsume
1421    
1422     !!!emit ($self->{current_token}); # start tag or end tag
1423    
1424     redo A;
1425     } else {
1426 wakaba 1.76 if ($self->{next_char} == 0x0022 or # "
1427     $self->{next_char} == 0x0027) { # '
1428 wakaba 1.77 !!!cp (69);
1429 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1430 wakaba 1.77 } else {
1431     !!!cp (70);
1432 wakaba 1.72 }
1433 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char});
1434 wakaba 1.1 ## Stay in the state
1435     !!!next-input-character;
1436     redo A;
1437     }
1438 wakaba 1.57 } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1439 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1440     $self->{next_char} == 0x000A or # LF
1441     $self->{next_char} == 0x000B or # VT
1442     $self->{next_char} == 0x000C or # FF
1443     $self->{next_char} == 0x0020) { # SP
1444 wakaba 1.77 !!!cp (71);
1445 wakaba 1.1 ## Stay in the state
1446     !!!next-input-character;
1447     redo A;
1448 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1449 wakaba 1.77 !!!cp (72);
1450 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1451 wakaba 1.1 !!!next-input-character;
1452     redo A;
1453 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1454 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1455 wakaba 1.77 !!!cp (73);
1456 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1457 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1458 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1459 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1460 wakaba 1.77 !!!cp (74);
1461 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1462 wakaba 1.77 } else {
1463 wakaba 1.78 ## NOTE: This state should never be reached.
1464 wakaba 1.77 !!!cp (75);
1465 wakaba 1.1 }
1466     } else {
1467     die "$0: $self->{current_token}->{type}: Unknown token type";
1468     }
1469 wakaba 1.57 $self->{state} = DATA_STATE;
1470 wakaba 1.1 !!!next-input-character;
1471    
1472     !!!emit ($self->{current_token}); # start tag or end tag
1473    
1474     redo A;
1475 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1476     $self->{next_char} <= 0x005A) { # A..Z
1477 wakaba 1.77 !!!cp (76);
1478 wakaba 1.119 $self->{current_attribute}
1479     = {name => chr ($self->{next_char} + 0x0020),
1480     value => '',
1481     line => $self->{line}, column => $self->{column}};
1482 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1483 wakaba 1.1 !!!next-input-character;
1484     redo A;
1485 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1486 wakaba 1.125 !!!cp (77);
1487     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1488 wakaba 1.1 !!!next-input-character;
1489     redo A;
1490 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1491 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1492 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1493 wakaba 1.77 !!!cp (79);
1494 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1495 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1496 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1497 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1498 wakaba 1.77 !!!cp (80);
1499 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1500 wakaba 1.77 } else {
1501 wakaba 1.78 ## NOTE: This state should never be reached.
1502 wakaba 1.77 !!!cp (81);
1503 wakaba 1.1 }
1504     } else {
1505     die "$0: $self->{current_token}->{type}: Unknown token type";
1506     }
1507 wakaba 1.57 $self->{state} = DATA_STATE;
1508 wakaba 1.1 # reconsume
1509    
1510     !!!emit ($self->{current_token}); # start tag or end tag
1511    
1512     redo A;
1513     } else {
1514 wakaba 1.77 !!!cp (82);
1515 wakaba 1.119 $self->{current_attribute}
1516     = {name => chr ($self->{next_char}),
1517     value => '',
1518     line => $self->{line}, column => $self->{column}};
1519 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1520 wakaba 1.1 !!!next-input-character;
1521     redo A;
1522     }
1523 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1524 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1525     $self->{next_char} == 0x000A or # LF
1526     $self->{next_char} == 0x000B or # VT
1527     $self->{next_char} == 0x000C or # FF
1528     $self->{next_char} == 0x0020) { # SP
1529 wakaba 1.77 !!!cp (83);
1530 wakaba 1.1 ## Stay in the state
1531     !!!next-input-character;
1532     redo A;
1533 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
1534 wakaba 1.77 !!!cp (84);
1535 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
1536 wakaba 1.1 !!!next-input-character;
1537     redo A;
1538 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1539 wakaba 1.77 !!!cp (85);
1540 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1541 wakaba 1.1 ## reconsume
1542     redo A;
1543 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
1544 wakaba 1.77 !!!cp (86);
1545 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
1546 wakaba 1.1 !!!next-input-character;
1547     redo A;
1548 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1549 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1550 wakaba 1.77 !!!cp (87);
1551 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1552 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1553 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1554 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1555 wakaba 1.77 !!!cp (88);
1556 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1557 wakaba 1.77 } else {
1558 wakaba 1.78 ## NOTE: This state should never be reached.
1559 wakaba 1.77 !!!cp (89);
1560 wakaba 1.1 }
1561     } else {
1562     die "$0: $self->{current_token}->{type}: Unknown token type";
1563     }
1564 wakaba 1.57 $self->{state} = DATA_STATE;
1565 wakaba 1.1 !!!next-input-character;
1566    
1567     !!!emit ($self->{current_token}); # start tag or end tag
1568    
1569     redo A;
1570 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1571 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1572 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1573 wakaba 1.77 !!!cp (90);
1574 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1575 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1576 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1577 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1578 wakaba 1.77 !!!cp (91);
1579 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1580 wakaba 1.77 } else {
1581 wakaba 1.78 ## NOTE: This state should never be reached.
1582 wakaba 1.77 !!!cp (92);
1583 wakaba 1.1 }
1584     } else {
1585     die "$0: $self->{current_token}->{type}: Unknown token type";
1586     }
1587 wakaba 1.57 $self->{state} = DATA_STATE;
1588 wakaba 1.1 ## reconsume
1589    
1590     !!!emit ($self->{current_token}); # start tag or end tag
1591    
1592     redo A;
1593     } else {
1594 wakaba 1.76 if ($self->{next_char} == 0x003D) { # =
1595 wakaba 1.77 !!!cp (93);
1596 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1597 wakaba 1.77 } else {
1598     !!!cp (94);
1599 wakaba 1.72 }
1600 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1601 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1602 wakaba 1.1 !!!next-input-character;
1603     redo A;
1604     }
1605 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1606 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
1607 wakaba 1.77 !!!cp (95);
1608 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1609 wakaba 1.1 !!!next-input-character;
1610     redo A;
1611 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1612 wakaba 1.77 !!!cp (96);
1613 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1614     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1615 wakaba 1.1 !!!next-input-character;
1616     redo A;
1617 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1618 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1619 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1620 wakaba 1.77 !!!cp (97);
1621 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1622 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1623 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1624 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1625 wakaba 1.77 !!!cp (98);
1626 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1627 wakaba 1.77 } else {
1628 wakaba 1.78 ## NOTE: This state should never be reached.
1629 wakaba 1.77 !!!cp (99);
1630 wakaba 1.1 }
1631     } else {
1632     die "$0: $self->{current_token}->{type}: Unknown token type";
1633     }
1634 wakaba 1.57 $self->{state} = DATA_STATE;
1635 wakaba 1.1 ## reconsume
1636    
1637     !!!emit ($self->{current_token}); # start tag or end tag
1638    
1639     redo A;
1640     } else {
1641 wakaba 1.77 !!!cp (100);
1642 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1643 wakaba 1.1 ## Stay in the state
1644     !!!next-input-character;
1645     redo A;
1646     }
1647 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1648 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
1649 wakaba 1.77 !!!cp (101);
1650 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1651 wakaba 1.1 !!!next-input-character;
1652     redo A;
1653 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1654 wakaba 1.77 !!!cp (102);
1655 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1656     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1657 wakaba 1.1 !!!next-input-character;
1658     redo A;
1659 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1660 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1661 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1662 wakaba 1.77 !!!cp (103);
1663 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1664 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1665 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1666 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1667 wakaba 1.77 !!!cp (104);
1668 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1669 wakaba 1.77 } else {
1670 wakaba 1.78 ## NOTE: This state should never be reached.
1671 wakaba 1.77 !!!cp (105);
1672 wakaba 1.1 }
1673     } else {
1674     die "$0: $self->{current_token}->{type}: Unknown token type";
1675     }
1676 wakaba 1.57 $self->{state} = DATA_STATE;
1677 wakaba 1.1 ## reconsume
1678    
1679     !!!emit ($self->{current_token}); # start tag or end tag
1680    
1681     redo A;
1682     } else {
1683 wakaba 1.77 !!!cp (106);
1684 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1685 wakaba 1.1 ## Stay in the state
1686     !!!next-input-character;
1687     redo A;
1688     }
1689 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1690 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1691     $self->{next_char} == 0x000A or # LF
1692     $self->{next_char} == 0x000B or # HT
1693     $self->{next_char} == 0x000C or # FF
1694     $self->{next_char} == 0x0020) { # SP
1695 wakaba 1.77 !!!cp (107);
1696 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1697 wakaba 1.1 !!!next-input-character;
1698     redo A;
1699 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1700 wakaba 1.77 !!!cp (108);
1701 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1702     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1703 wakaba 1.1 !!!next-input-character;
1704     redo A;
1705 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1706 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1707 wakaba 1.77 !!!cp (109);
1708 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1709 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1710 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1711 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1712 wakaba 1.77 !!!cp (110);
1713 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1714 wakaba 1.77 } else {
1715 wakaba 1.78 ## NOTE: This state should never be reached.
1716 wakaba 1.77 !!!cp (111);
1717 wakaba 1.1 }
1718     } else {
1719     die "$0: $self->{current_token}->{type}: Unknown token type";
1720     }
1721 wakaba 1.57 $self->{state} = DATA_STATE;
1722 wakaba 1.1 !!!next-input-character;
1723    
1724     !!!emit ($self->{current_token}); # start tag or end tag
1725    
1726     redo A;
1727 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1728 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1729 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1730 wakaba 1.77 !!!cp (112);
1731 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1732 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1733 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1734 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1735 wakaba 1.77 !!!cp (113);
1736 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1737 wakaba 1.77 } else {
1738 wakaba 1.78 ## NOTE: This state should never be reached.
1739 wakaba 1.77 !!!cp (114);
1740 wakaba 1.1 }
1741     } else {
1742     die "$0: $self->{current_token}->{type}: Unknown token type";
1743     }
1744 wakaba 1.57 $self->{state} = DATA_STATE;
1745 wakaba 1.1 ## reconsume
1746    
1747     !!!emit ($self->{current_token}); # start tag or end tag
1748    
1749     redo A;
1750     } else {
1751 wakaba 1.72 if ({
1752     0x0022 => 1, # "
1753     0x0027 => 1, # '
1754     0x003D => 1, # =
1755 wakaba 1.76 }->{$self->{next_char}}) {
1756 wakaba 1.77 !!!cp (115);
1757 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1758 wakaba 1.77 } else {
1759     !!!cp (116);
1760 wakaba 1.72 }
1761 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1762 wakaba 1.1 ## Stay in the state
1763     !!!next-input-character;
1764     redo A;
1765     }
1766 wakaba 1.57 } elsif ($self->{state} == ENTITY_IN_ATTRIBUTE_VALUE_STATE) {
1767 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity
1768     (1,
1769     $self->{last_attribute_value_state}
1770     == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE ? 0x0022 : # "
1771     $self->{last_attribute_value_state}
1772     == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE ? 0x0027 : # '
1773     -1);
1774 wakaba 1.1
1775     unless (defined $token) {
1776 wakaba 1.77 !!!cp (117);
1777 wakaba 1.1 $self->{current_attribute}->{value} .= '&';
1778     } else {
1779 wakaba 1.77 !!!cp (118);
1780 wakaba 1.1 $self->{current_attribute}->{value} .= $token->{data};
1781 wakaba 1.66 $self->{current_attribute}->{has_reference} = $token->{has_reference};
1782 wakaba 1.1 ## ISSUE: spec says "append the returned character token to the current attribute's value"
1783     }
1784    
1785     $self->{state} = $self->{last_attribute_value_state};
1786     # next-input-character is already done
1787     redo A;
1788 wakaba 1.72 } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1789 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1790     $self->{next_char} == 0x000A or # LF
1791     $self->{next_char} == 0x000B or # VT
1792     $self->{next_char} == 0x000C or # FF
1793     $self->{next_char} == 0x0020) { # SP
1794 wakaba 1.77 !!!cp (118);
1795 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1796     !!!next-input-character;
1797     redo A;
1798 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1799 wakaba 1.72 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1800 wakaba 1.77 !!!cp (119);
1801 wakaba 1.72 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1802     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1803     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1804     if ($self->{current_token}->{attributes}) {
1805 wakaba 1.77 !!!cp (120);
1806 wakaba 1.72 !!!parse-error (type => 'end tag attribute');
1807 wakaba 1.77 } else {
1808 wakaba 1.78 ## NOTE: This state should never be reached.
1809 wakaba 1.77 !!!cp (121);
1810 wakaba 1.72 }
1811     } else {
1812     die "$0: $self->{current_token}->{type}: Unknown token type";
1813     }
1814     $self->{state} = DATA_STATE;
1815     !!!next-input-character;
1816    
1817     !!!emit ($self->{current_token}); # start tag or end tag
1818    
1819     redo A;
1820 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1821 wakaba 1.125 !!!cp (122);
1822     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1823 wakaba 1.72 !!!next-input-character;
1824 wakaba 1.125 redo A;
1825 wakaba 1.141 } elsif ($self->{next_char} == -1) {
1826     !!!parse-error (type => 'unclosed tag');
1827     if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1828     !!!cp (122.3);
1829     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1830     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1831     if ($self->{current_token}->{attributes}) {
1832     !!!cp (122.1);
1833     !!!parse-error (type => 'end tag attribute');
1834     } else {
1835     ## NOTE: This state should never be reached.
1836     !!!cp (122.2);
1837     }
1838     } else {
1839     die "$0: $self->{current_token}->{type}: Unknown token type";
1840     }
1841     $self->{state} = DATA_STATE;
1842     ## Reconsume.
1843     !!!emit ($self->{current_token}); # start tag or end tag
1844     redo A;
1845 wakaba 1.125 } else {
1846     !!!cp ('124.1');
1847     !!!parse-error (type => 'no space between attributes');
1848     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1849     ## reconsume
1850     redo A;
1851     }
1852     } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
1853     if ($self->{next_char} == 0x003E) { # >
1854     if ($self->{current_token}->{type} == END_TAG_TOKEN) {
1855     !!!cp ('124.2');
1856     !!!parse-error (type => 'nestc', token => $self->{current_token});
1857     ## TODO: Different type than slash in start tag
1858     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1859     if ($self->{current_token}->{attributes}) {
1860     !!!cp ('124.4');
1861     !!!parse-error (type => 'end tag attribute');
1862     } else {
1863     !!!cp ('124.5');
1864     }
1865     ## TODO: Test |<title></title/>|
1866 wakaba 1.72 } else {
1867 wakaba 1.125 !!!cp ('124.3');
1868     $self->{self_closing} = 1;
1869 wakaba 1.72 }
1870 wakaba 1.125
1871     $self->{state} = DATA_STATE;
1872     !!!next-input-character;
1873    
1874     !!!emit ($self->{current_token}); # start tag or end tag
1875    
1876 wakaba 1.72 redo A;
1877 wakaba 1.141 } elsif ($self->{next_char} == -1) {
1878     !!!parse-error (type => 'unclosed tag');
1879     if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1880     !!!cp (124.7);
1881     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1882     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1883     if ($self->{current_token}->{attributes}) {
1884     !!!cp (124.5);
1885     !!!parse-error (type => 'end tag attribute');
1886     } else {
1887     ## NOTE: This state should never be reached.
1888     !!!cp (124.6);
1889     }
1890     } else {
1891     die "$0: $self->{current_token}->{type}: Unknown token type";
1892     }
1893     $self->{state} = DATA_STATE;
1894     ## Reconsume.
1895     !!!emit ($self->{current_token}); # start tag or end tag
1896     redo A;
1897 wakaba 1.72 } else {
1898 wakaba 1.125 !!!cp ('124.4');
1899     !!!parse-error (type => 'nestc');
1900     ## TODO: This error type is wrong.
1901 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1902 wakaba 1.125 ## Reconsume.
1903 wakaba 1.72 redo A;
1904     }
1905 wakaba 1.57 } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
1906 wakaba 1.1 ## (only happen if PCDATA state)
1907    
1908 wakaba 1.112 ## NOTE: Set by the previous state
1909     #my $token = {type => COMMENT_TOKEN, data => ''};
1910 wakaba 1.1
1911     BC: {
1912 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
1913 wakaba 1.77 !!!cp (124);
1914 wakaba 1.57 $self->{state} = DATA_STATE;
1915 wakaba 1.1 !!!next-input-character;
1916    
1917 wakaba 1.112 !!!emit ($self->{current_token}); # comment
1918 wakaba 1.1
1919     redo A;
1920 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1921 wakaba 1.77 !!!cp (125);
1922 wakaba 1.57 $self->{state} = DATA_STATE;
1923 wakaba 1.1 ## reconsume
1924    
1925 wakaba 1.112 !!!emit ($self->{current_token}); # comment
1926 wakaba 1.1
1927     redo A;
1928     } else {
1929 wakaba 1.77 !!!cp (126);
1930 wakaba 1.112 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
1931 wakaba 1.1 !!!next-input-character;
1932     redo BC;
1933     }
1934     } # BC
1935 wakaba 1.77
1936     die "$0: _get_next_token: unexpected case [BC]";
1937 wakaba 1.57 } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
1938 wakaba 1.1 ## (only happen if PCDATA state)
1939    
1940 wakaba 1.120 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1);
1941 wakaba 1.112
1942 wakaba 1.1 my @next_char;
1943 wakaba 1.76 push @next_char, $self->{next_char};
1944 wakaba 1.1
1945 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1946 wakaba 1.1 !!!next-input-character;
1947 wakaba 1.76 push @next_char, $self->{next_char};
1948     if ($self->{next_char} == 0x002D) { # -
1949 wakaba 1.77 !!!cp (127);
1950 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1951 wakaba 1.120 line => $l, column => $c,
1952 wakaba 1.118 };
1953 wakaba 1.57 $self->{state} = COMMENT_START_STATE;
1954 wakaba 1.1 !!!next-input-character;
1955     redo A;
1956 wakaba 1.77 } else {
1957     !!!cp (128);
1958 wakaba 1.1 }
1959 wakaba 1.76 } elsif ($self->{next_char} == 0x0044 or # D
1960     $self->{next_char} == 0x0064) { # d
1961 wakaba 1.1 !!!next-input-character;
1962 wakaba 1.76 push @next_char, $self->{next_char};
1963     if ($self->{next_char} == 0x004F or # O
1964     $self->{next_char} == 0x006F) { # o
1965 wakaba 1.1 !!!next-input-character;
1966 wakaba 1.76 push @next_char, $self->{next_char};
1967     if ($self->{next_char} == 0x0043 or # C
1968     $self->{next_char} == 0x0063) { # c
1969 wakaba 1.1 !!!next-input-character;
1970 wakaba 1.76 push @next_char, $self->{next_char};
1971     if ($self->{next_char} == 0x0054 or # T
1972     $self->{next_char} == 0x0074) { # t
1973 wakaba 1.1 !!!next-input-character;
1974 wakaba 1.76 push @next_char, $self->{next_char};
1975     if ($self->{next_char} == 0x0059 or # Y
1976     $self->{next_char} == 0x0079) { # y
1977 wakaba 1.1 !!!next-input-character;
1978 wakaba 1.76 push @next_char, $self->{next_char};
1979     if ($self->{next_char} == 0x0050 or # P
1980     $self->{next_char} == 0x0070) { # p
1981 wakaba 1.1 !!!next-input-character;
1982 wakaba 1.76 push @next_char, $self->{next_char};
1983     if ($self->{next_char} == 0x0045 or # E
1984     $self->{next_char} == 0x0065) { # e
1985 wakaba 1.77 !!!cp (129);
1986     ## TODO: What a stupid code this is!
1987 wakaba 1.57 $self->{state} = DOCTYPE_STATE;
1988 wakaba 1.112 $self->{current_token} = {type => DOCTYPE_TOKEN,
1989     quirks => 1,
1990 wakaba 1.120 line => $l, column => $c,
1991 wakaba 1.118 };
1992 wakaba 1.1 !!!next-input-character;
1993     redo A;
1994 wakaba 1.77 } else {
1995     !!!cp (130);
1996 wakaba 1.1 }
1997 wakaba 1.77 } else {
1998     !!!cp (131);
1999 wakaba 1.1 }
2000 wakaba 1.77 } else {
2001     !!!cp (132);
2002 wakaba 1.1 }
2003 wakaba 1.77 } else {
2004     !!!cp (133);
2005 wakaba 1.1 }
2006 wakaba 1.77 } else {
2007     !!!cp (134);
2008 wakaba 1.1 }
2009 wakaba 1.77 } else {
2010     !!!cp (135);
2011 wakaba 1.1 }
2012 wakaba 1.127 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
2013     $self->{open_elements}->[-1]->[1] & FOREIGN_EL and
2014     $self->{next_char} == 0x005B) { # [
2015     !!!next-input-character;
2016     push @next_char, $self->{next_char};
2017     if ($self->{next_char} == 0x0043) { # C
2018     !!!next-input-character;
2019     push @next_char, $self->{next_char};
2020     if ($self->{next_char} == 0x0044) { # D
2021     !!!next-input-character;
2022     push @next_char, $self->{next_char};
2023     if ($self->{next_char} == 0x0041) { # A
2024     !!!next-input-character;
2025     push @next_char, $self->{next_char};
2026     if ($self->{next_char} == 0x0054) { # T
2027     !!!next-input-character;
2028     push @next_char, $self->{next_char};
2029     if ($self->{next_char} == 0x0041) { # A
2030     !!!next-input-character;
2031     push @next_char, $self->{next_char};
2032     if ($self->{next_char} == 0x005B) { # [
2033     !!!cp (135.1);
2034     $self->{state} = CDATA_BLOCK_STATE;
2035     !!!next-input-character;
2036     redo A;
2037     } else {
2038     !!!cp (135.2);
2039     }
2040     } else {
2041     !!!cp (135.3);
2042     }
2043     } else {
2044     !!!cp (135.4);
2045     }
2046     } else {
2047     !!!cp (135.5);
2048     }
2049     } else {
2050     !!!cp (135.6);
2051     }
2052     } else {
2053     !!!cp (135.7);
2054     }
2055 wakaba 1.77 } else {
2056     !!!cp (136);
2057 wakaba 1.1 }
2058    
2059 wakaba 1.30 !!!parse-error (type => 'bogus comment');
2060 wakaba 1.76 $self->{next_char} = shift @next_char;
2061 wakaba 1.1 !!!back-next-input-character (@next_char);
2062 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
2063 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
2064 wakaba 1.120 line => $l, column => $c,
2065 wakaba 1.118 };
2066 wakaba 1.1 redo A;
2067    
2068     ## ISSUE: typos in spec: chacacters, is is a parse error
2069     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
2070 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_STATE) {
2071 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2072 wakaba 1.77 !!!cp (137);
2073 wakaba 1.57 $self->{state} = COMMENT_START_DASH_STATE;
2074 wakaba 1.23 !!!next-input-character;
2075     redo A;
2076 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2077 wakaba 1.77 !!!cp (138);
2078 wakaba 1.23 !!!parse-error (type => 'bogus comment');
2079 wakaba 1.57 $self->{state} = DATA_STATE;
2080 wakaba 1.23 !!!next-input-character;
2081    
2082     !!!emit ($self->{current_token}); # comment
2083    
2084     redo A;
2085 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2086 wakaba 1.77 !!!cp (139);
2087 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
2088 wakaba 1.57 $self->{state} = DATA_STATE;
2089 wakaba 1.23 ## reconsume
2090    
2091     !!!emit ($self->{current_token}); # comment
2092    
2093     redo A;
2094     } else {
2095 wakaba 1.77 !!!cp (140);
2096 wakaba 1.23 $self->{current_token}->{data} # comment
2097 wakaba 1.76 .= chr ($self->{next_char});
2098 wakaba 1.57 $self->{state} = COMMENT_STATE;
2099 wakaba 1.23 !!!next-input-character;
2100     redo A;
2101     }
2102 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
2103 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2104 wakaba 1.77 !!!cp (141);
2105 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
2106 wakaba 1.23 !!!next-input-character;
2107     redo A;
2108 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2109 wakaba 1.77 !!!cp (142);
2110 wakaba 1.23 !!!parse-error (type => 'bogus comment');
2111 wakaba 1.57 $self->{state} = DATA_STATE;
2112 wakaba 1.23 !!!next-input-character;
2113    
2114     !!!emit ($self->{current_token}); # comment
2115    
2116     redo A;
2117 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2118 wakaba 1.77 !!!cp (143);
2119 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
2120 wakaba 1.57 $self->{state} = DATA_STATE;
2121 wakaba 1.23 ## reconsume
2122    
2123     !!!emit ($self->{current_token}); # comment
2124    
2125     redo A;
2126     } else {
2127 wakaba 1.77 !!!cp (144);
2128 wakaba 1.23 $self->{current_token}->{data} # comment
2129 wakaba 1.76 .= '-' . chr ($self->{next_char});
2130 wakaba 1.57 $self->{state} = COMMENT_STATE;
2131 wakaba 1.23 !!!next-input-character;
2132     redo A;
2133     }
2134 wakaba 1.57 } elsif ($self->{state} == COMMENT_STATE) {
2135 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2136 wakaba 1.77 !!!cp (145);
2137 wakaba 1.57 $self->{state} = COMMENT_END_DASH_STATE;
2138 wakaba 1.1 !!!next-input-character;
2139     redo A;
2140 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2141 wakaba 1.77 !!!cp (146);
2142 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2143 wakaba 1.57 $self->{state} = DATA_STATE;
2144 wakaba 1.1 ## reconsume
2145    
2146     !!!emit ($self->{current_token}); # comment
2147    
2148     redo A;
2149     } else {
2150 wakaba 1.77 !!!cp (147);
2151 wakaba 1.76 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
2152 wakaba 1.1 ## Stay in the state
2153     !!!next-input-character;
2154     redo A;
2155     }
2156 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2157 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2158 wakaba 1.77 !!!cp (148);
2159 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
2160 wakaba 1.1 !!!next-input-character;
2161     redo A;
2162 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2163 wakaba 1.77 !!!cp (149);
2164 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2165 wakaba 1.57 $self->{state} = DATA_STATE;
2166 wakaba 1.1 ## reconsume
2167    
2168     !!!emit ($self->{current_token}); # comment
2169    
2170     redo A;
2171     } else {
2172 wakaba 1.77 !!!cp (150);
2173 wakaba 1.76 $self->{current_token}->{data} .= '-' . chr ($self->{next_char}); # comment
2174 wakaba 1.57 $self->{state} = COMMENT_STATE;
2175 wakaba 1.1 !!!next-input-character;
2176     redo A;
2177     }
2178 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_STATE) {
2179 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2180 wakaba 1.77 !!!cp (151);
2181 wakaba 1.57 $self->{state} = DATA_STATE;
2182 wakaba 1.1 !!!next-input-character;
2183    
2184     !!!emit ($self->{current_token}); # comment
2185    
2186     redo A;
2187 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
2188 wakaba 1.77 !!!cp (152);
2189 wakaba 1.114 !!!parse-error (type => 'dash in comment',
2190     line => $self->{line_prev},
2191     column => $self->{column_prev});
2192 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
2193     ## Stay in the state
2194     !!!next-input-character;
2195     redo A;
2196 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2197 wakaba 1.77 !!!cp (153);
2198 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2199 wakaba 1.57 $self->{state} = DATA_STATE;
2200 wakaba 1.1 ## reconsume
2201    
2202     !!!emit ($self->{current_token}); # comment
2203    
2204     redo A;
2205     } else {
2206 wakaba 1.77 !!!cp (154);
2207 wakaba 1.114 !!!parse-error (type => 'dash in comment',
2208     line => $self->{line_prev},
2209     column => $self->{column_prev});
2210 wakaba 1.76 $self->{current_token}->{data} .= '--' . chr ($self->{next_char}); # comment
2211 wakaba 1.57 $self->{state} = COMMENT_STATE;
2212 wakaba 1.1 !!!next-input-character;
2213     redo A;
2214     }
2215 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_STATE) {
2216 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2217     $self->{next_char} == 0x000A or # LF
2218     $self->{next_char} == 0x000B or # VT
2219     $self->{next_char} == 0x000C or # FF
2220     $self->{next_char} == 0x0020) { # SP
2221 wakaba 1.77 !!!cp (155);
2222 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2223 wakaba 1.1 !!!next-input-character;
2224     redo A;
2225     } else {
2226 wakaba 1.77 !!!cp (156);
2227 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
2228 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2229 wakaba 1.1 ## reconsume
2230     redo A;
2231     }
2232 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
2233 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2234     $self->{next_char} == 0x000A or # LF
2235     $self->{next_char} == 0x000B or # VT
2236     $self->{next_char} == 0x000C or # FF
2237     $self->{next_char} == 0x0020) { # SP
2238 wakaba 1.77 !!!cp (157);
2239 wakaba 1.1 ## Stay in the state
2240     !!!next-input-character;
2241     redo A;
2242 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2243 wakaba 1.77 !!!cp (158);
2244 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
2245 wakaba 1.57 $self->{state} = DATA_STATE;
2246 wakaba 1.1 !!!next-input-character;
2247    
2248 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
2249 wakaba 1.1
2250     redo A;
2251 wakaba 1.77 } elsif ($self->{next_char} == -1) {
2252     !!!cp (159);
2253 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
2254 wakaba 1.57 $self->{state} = DATA_STATE;
2255 wakaba 1.1 ## reconsume
2256    
2257 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
2258 wakaba 1.1
2259     redo A;
2260     } else {
2261 wakaba 1.77 !!!cp (160);
2262 wakaba 1.112 $self->{current_token}->{name} = chr $self->{next_char};
2263     delete $self->{current_token}->{quirks};
2264 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
2265 wakaba 1.57 $self->{state} = DOCTYPE_NAME_STATE;
2266 wakaba 1.1 !!!next-input-character;
2267     redo A;
2268     }
2269 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
2270 wakaba 1.18 ## ISSUE: Redundant "First," in the spec.
2271 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2272     $self->{next_char} == 0x000A or # LF
2273     $self->{next_char} == 0x000B or # VT
2274     $self->{next_char} == 0x000C or # FF
2275     $self->{next_char} == 0x0020) { # SP
2276 wakaba 1.77 !!!cp (161);
2277 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_NAME_STATE;
2278 wakaba 1.1 !!!next-input-character;
2279     redo A;
2280 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2281 wakaba 1.77 !!!cp (162);
2282 wakaba 1.57 $self->{state} = DATA_STATE;
2283 wakaba 1.1 !!!next-input-character;
2284    
2285     !!!emit ($self->{current_token}); # DOCTYPE
2286    
2287     redo A;
2288 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2289 wakaba 1.77 !!!cp (163);
2290 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2291 wakaba 1.57 $self->{state} = DATA_STATE;
2292 wakaba 1.1 ## reconsume
2293    
2294 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2295 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2296 wakaba 1.1
2297     redo A;
2298     } else {
2299 wakaba 1.77 !!!cp (164);
2300 wakaba 1.1 $self->{current_token}->{name}
2301 wakaba 1.76 .= chr ($self->{next_char}); # DOCTYPE
2302 wakaba 1.1 ## Stay in the state
2303     !!!next-input-character;
2304     redo A;
2305     }
2306 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
2307 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2308     $self->{next_char} == 0x000A or # LF
2309     $self->{next_char} == 0x000B or # VT
2310     $self->{next_char} == 0x000C or # FF
2311     $self->{next_char} == 0x0020) { # SP
2312 wakaba 1.77 !!!cp (165);
2313 wakaba 1.1 ## Stay in the state
2314     !!!next-input-character;
2315     redo A;
2316 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2317 wakaba 1.77 !!!cp (166);
2318 wakaba 1.57 $self->{state} = DATA_STATE;
2319 wakaba 1.1 !!!next-input-character;
2320    
2321     !!!emit ($self->{current_token}); # DOCTYPE
2322    
2323     redo A;
2324 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2325 wakaba 1.77 !!!cp (167);
2326 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2327 wakaba 1.57 $self->{state} = DATA_STATE;
2328 wakaba 1.1 ## reconsume
2329    
2330 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2331 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2332    
2333     redo A;
2334 wakaba 1.76 } elsif ($self->{next_char} == 0x0050 or # P
2335     $self->{next_char} == 0x0070) { # p
2336 wakaba 1.18 !!!next-input-character;
2337 wakaba 1.76 if ($self->{next_char} == 0x0055 or # U
2338     $self->{next_char} == 0x0075) { # u
2339 wakaba 1.18 !!!next-input-character;
2340 wakaba 1.76 if ($self->{next_char} == 0x0042 or # B
2341     $self->{next_char} == 0x0062) { # b
2342 wakaba 1.18 !!!next-input-character;
2343 wakaba 1.76 if ($self->{next_char} == 0x004C or # L
2344     $self->{next_char} == 0x006C) { # l
2345 wakaba 1.18 !!!next-input-character;
2346 wakaba 1.76 if ($self->{next_char} == 0x0049 or # I
2347     $self->{next_char} == 0x0069) { # i
2348 wakaba 1.18 !!!next-input-character;
2349 wakaba 1.76 if ($self->{next_char} == 0x0043 or # C
2350     $self->{next_char} == 0x0063) { # c
2351 wakaba 1.77 !!!cp (168);
2352 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2353 wakaba 1.18 !!!next-input-character;
2354     redo A;
2355 wakaba 1.77 } else {
2356     !!!cp (169);
2357 wakaba 1.18 }
2358 wakaba 1.77 } else {
2359     !!!cp (170);
2360 wakaba 1.18 }
2361 wakaba 1.77 } else {
2362     !!!cp (171);
2363 wakaba 1.18 }
2364 wakaba 1.77 } else {
2365     !!!cp (172);
2366 wakaba 1.18 }
2367 wakaba 1.77 } else {
2368     !!!cp (173);
2369 wakaba 1.18 }
2370    
2371     #
2372 wakaba 1.76 } elsif ($self->{next_char} == 0x0053 or # S
2373     $self->{next_char} == 0x0073) { # s
2374 wakaba 1.18 !!!next-input-character;
2375 wakaba 1.76 if ($self->{next_char} == 0x0059 or # Y
2376     $self->{next_char} == 0x0079) { # y
2377 wakaba 1.18 !!!next-input-character;
2378 wakaba 1.76 if ($self->{next_char} == 0x0053 or # S
2379     $self->{next_char} == 0x0073) { # s
2380 wakaba 1.18 !!!next-input-character;
2381 wakaba 1.76 if ($self->{next_char} == 0x0054 or # T
2382     $self->{next_char} == 0x0074) { # t
2383 wakaba 1.18 !!!next-input-character;
2384 wakaba 1.76 if ($self->{next_char} == 0x0045 or # E
2385     $self->{next_char} == 0x0065) { # e
2386 wakaba 1.18 !!!next-input-character;
2387 wakaba 1.76 if ($self->{next_char} == 0x004D or # M
2388     $self->{next_char} == 0x006D) { # m
2389 wakaba 1.77 !!!cp (174);
2390 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2391 wakaba 1.18 !!!next-input-character;
2392     redo A;
2393 wakaba 1.77 } else {
2394     !!!cp (175);
2395 wakaba 1.18 }
2396 wakaba 1.77 } else {
2397     !!!cp (176);
2398 wakaba 1.18 }
2399 wakaba 1.77 } else {
2400     !!!cp (177);
2401 wakaba 1.18 }
2402 wakaba 1.77 } else {
2403     !!!cp (178);
2404 wakaba 1.18 }
2405 wakaba 1.77 } else {
2406     !!!cp (179);
2407 wakaba 1.18 }
2408    
2409     #
2410     } else {
2411 wakaba 1.77 !!!cp (180);
2412 wakaba 1.18 !!!next-input-character;
2413     #
2414     }
2415    
2416     !!!parse-error (type => 'string after DOCTYPE name');
2417 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2418 wakaba 1.73
2419 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2420 wakaba 1.18 # next-input-character is already done
2421     redo A;
2422 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2423 wakaba 1.18 if ({
2424     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2425     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2426 wakaba 1.76 }->{$self->{next_char}}) {
2427 wakaba 1.77 !!!cp (181);
2428 wakaba 1.18 ## Stay in the state
2429     !!!next-input-character;
2430     redo A;
2431 wakaba 1.76 } elsif ($self->{next_char} eq 0x0022) { # "
2432 wakaba 1.77 !!!cp (182);
2433 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2434 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
2435 wakaba 1.18 !!!next-input-character;
2436     redo A;
2437 wakaba 1.76 } elsif ($self->{next_char} eq 0x0027) { # '
2438 wakaba 1.77 !!!cp (183);
2439 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2440 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
2441 wakaba 1.18 !!!next-input-character;
2442     redo A;
2443 wakaba 1.76 } elsif ($self->{next_char} eq 0x003E) { # >
2444 wakaba 1.77 !!!cp (184);
2445 wakaba 1.18 !!!parse-error (type => 'no PUBLIC literal');
2446    
2447 wakaba 1.57 $self->{state} = DATA_STATE;
2448 wakaba 1.18 !!!next-input-character;
2449    
2450 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2451 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2452    
2453     redo A;
2454 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2455 wakaba 1.77 !!!cp (185);
2456 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2457    
2458 wakaba 1.57 $self->{state} = DATA_STATE;
2459 wakaba 1.18 ## reconsume
2460    
2461 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2462 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2463    
2464     redo A;
2465     } else {
2466 wakaba 1.77 !!!cp (186);
2467 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC');
2468 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2469 wakaba 1.73
2470 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2471 wakaba 1.18 !!!next-input-character;
2472     redo A;
2473     }
2474 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2475 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2476 wakaba 1.77 !!!cp (187);
2477 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2478 wakaba 1.18 !!!next-input-character;
2479     redo A;
2480 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2481 wakaba 1.77 !!!cp (188);
2482 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2483    
2484     $self->{state} = DATA_STATE;
2485     !!!next-input-character;
2486    
2487 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2488 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2489    
2490     redo A;
2491 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2492 wakaba 1.77 !!!cp (189);
2493 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2494    
2495 wakaba 1.57 $self->{state} = DATA_STATE;
2496 wakaba 1.18 ## reconsume
2497    
2498 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2499 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2500    
2501     redo A;
2502     } else {
2503 wakaba 1.77 !!!cp (190);
2504 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2505 wakaba 1.76 .= chr $self->{next_char};
2506 wakaba 1.18 ## Stay in the state
2507     !!!next-input-character;
2508     redo A;
2509     }
2510 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
2511 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2512 wakaba 1.77 !!!cp (191);
2513 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2514 wakaba 1.18 !!!next-input-character;
2515     redo A;
2516 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2517 wakaba 1.77 !!!cp (192);
2518 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2519    
2520     $self->{state} = DATA_STATE;
2521     !!!next-input-character;
2522    
2523 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2524 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2525    
2526     redo A;
2527 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2528 wakaba 1.77 !!!cp (193);
2529 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2530    
2531 wakaba 1.57 $self->{state} = DATA_STATE;
2532 wakaba 1.18 ## reconsume
2533    
2534 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2535 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2536    
2537     redo A;
2538     } else {
2539 wakaba 1.77 !!!cp (194);
2540 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2541 wakaba 1.76 .= chr $self->{next_char};
2542 wakaba 1.18 ## Stay in the state
2543     !!!next-input-character;
2544     redo A;
2545     }
2546 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2547 wakaba 1.18 if ({
2548     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2549     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2550 wakaba 1.76 }->{$self->{next_char}}) {
2551 wakaba 1.77 !!!cp (195);
2552 wakaba 1.18 ## Stay in the state
2553     !!!next-input-character;
2554     redo A;
2555 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2556 wakaba 1.77 !!!cp (196);
2557 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2558 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2559 wakaba 1.18 !!!next-input-character;
2560     redo A;
2561 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2562 wakaba 1.77 !!!cp (197);
2563 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2564 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2565 wakaba 1.18 !!!next-input-character;
2566     redo A;
2567 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2568 wakaba 1.77 !!!cp (198);
2569 wakaba 1.57 $self->{state} = DATA_STATE;
2570 wakaba 1.18 !!!next-input-character;
2571    
2572     !!!emit ($self->{current_token}); # DOCTYPE
2573    
2574     redo A;
2575 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2576 wakaba 1.77 !!!cp (199);
2577 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2578    
2579 wakaba 1.57 $self->{state} = DATA_STATE;
2580 wakaba 1.26 ## reconsume
2581 wakaba 1.18
2582 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2583 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2584    
2585     redo A;
2586     } else {
2587 wakaba 1.77 !!!cp (200);
2588 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC literal');
2589 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2590 wakaba 1.73
2591 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2592 wakaba 1.18 !!!next-input-character;
2593     redo A;
2594     }
2595 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2596 wakaba 1.18 if ({
2597     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2598     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2599 wakaba 1.76 }->{$self->{next_char}}) {
2600 wakaba 1.77 !!!cp (201);
2601 wakaba 1.18 ## Stay in the state
2602     !!!next-input-character;
2603     redo A;
2604 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2605 wakaba 1.77 !!!cp (202);
2606 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2607 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2608 wakaba 1.18 !!!next-input-character;
2609     redo A;
2610 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2611 wakaba 1.77 !!!cp (203);
2612 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2613 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2614 wakaba 1.18 !!!next-input-character;
2615     redo A;
2616 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2617 wakaba 1.77 !!!cp (204);
2618 wakaba 1.18 !!!parse-error (type => 'no SYSTEM literal');
2619 wakaba 1.57 $self->{state} = DATA_STATE;
2620 wakaba 1.18 !!!next-input-character;
2621    
2622 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2623 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2624    
2625     redo A;
2626 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2627 wakaba 1.77 !!!cp (205);
2628 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2629    
2630 wakaba 1.57 $self->{state} = DATA_STATE;
2631 wakaba 1.26 ## reconsume
2632 wakaba 1.18
2633 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2634 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2635    
2636     redo A;
2637     } else {
2638 wakaba 1.77 !!!cp (206);
2639 wakaba 1.30 !!!parse-error (type => 'string after SYSTEM');
2640 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2641 wakaba 1.73
2642 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2643 wakaba 1.18 !!!next-input-character;
2644     redo A;
2645     }
2646 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2647 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2648 wakaba 1.77 !!!cp (207);
2649 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2650 wakaba 1.18 !!!next-input-character;
2651     redo A;
2652 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2653 wakaba 1.77 !!!cp (208);
2654 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2655    
2656     $self->{state} = DATA_STATE;
2657     !!!next-input-character;
2658    
2659 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2660 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2661    
2662     redo A;
2663 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2664 wakaba 1.77 !!!cp (209);
2665 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2666    
2667 wakaba 1.57 $self->{state} = DATA_STATE;
2668 wakaba 1.18 ## reconsume
2669    
2670 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2671 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2672    
2673     redo A;
2674     } else {
2675 wakaba 1.77 !!!cp (210);
2676 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2677 wakaba 1.76 .= chr $self->{next_char};
2678 wakaba 1.18 ## Stay in the state
2679     !!!next-input-character;
2680     redo A;
2681     }
2682 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
2683 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2684 wakaba 1.77 !!!cp (211);
2685 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2686 wakaba 1.18 !!!next-input-character;
2687     redo A;
2688 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2689 wakaba 1.77 !!!cp (212);
2690 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2691    
2692     $self->{state} = DATA_STATE;
2693     !!!next-input-character;
2694    
2695 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2696 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2697    
2698     redo A;
2699 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2700 wakaba 1.77 !!!cp (213);
2701 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2702    
2703 wakaba 1.57 $self->{state} = DATA_STATE;
2704 wakaba 1.18 ## reconsume
2705    
2706 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2707 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
2708    
2709     redo A;
2710     } else {
2711 wakaba 1.77 !!!cp (214);
2712 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2713 wakaba 1.76 .= chr $self->{next_char};
2714 wakaba 1.18 ## Stay in the state
2715     !!!next-input-character;
2716     redo A;
2717     }
2718 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2719 wakaba 1.18 if ({
2720     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2721     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2722 wakaba 1.76 }->{$self->{next_char}}) {
2723 wakaba 1.77 !!!cp (215);
2724 wakaba 1.18 ## Stay in the state
2725     !!!next-input-character;
2726     redo A;
2727 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2728 wakaba 1.77 !!!cp (216);
2729 wakaba 1.57 $self->{state} = DATA_STATE;
2730 wakaba 1.18 !!!next-input-character;
2731    
2732     !!!emit ($self->{current_token}); # DOCTYPE
2733    
2734     redo A;
2735 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2736 wakaba 1.77 !!!cp (217);
2737 wakaba 1.18
2738 wakaba 1.57 $self->{state} = DATA_STATE;
2739 wakaba 1.26 ## reconsume
2740 wakaba 1.18
2741 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2742 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2743    
2744     redo A;
2745     } else {
2746 wakaba 1.77 !!!cp (218);
2747 wakaba 1.18 !!!parse-error (type => 'string after SYSTEM literal');
2748 wakaba 1.75 #$self->{current_token}->{quirks} = 1;
2749 wakaba 1.73
2750 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2751 wakaba 1.1 !!!next-input-character;
2752     redo A;
2753     }
2754 wakaba 1.57 } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
2755 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2756 wakaba 1.77 !!!cp (219);
2757 wakaba 1.57 $self->{state} = DATA_STATE;
2758 wakaba 1.1 !!!next-input-character;
2759    
2760     !!!emit ($self->{current_token}); # DOCTYPE
2761    
2762     redo A;
2763 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2764 wakaba 1.77 !!!cp (220);
2765 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2766 wakaba 1.57 $self->{state} = DATA_STATE;
2767 wakaba 1.1 ## reconsume
2768    
2769     !!!emit ($self->{current_token}); # DOCTYPE
2770    
2771     redo A;
2772     } else {
2773 wakaba 1.77 !!!cp (221);
2774 wakaba 1.1 ## Stay in the state
2775     !!!next-input-character;
2776     redo A;
2777     }
2778 wakaba 1.127 } elsif ($self->{state} == CDATA_BLOCK_STATE) {
2779     my $s = '';
2780    
2781     my ($l, $c) = ($self->{line}, $self->{column});
2782    
2783     CS: while ($self->{next_char} != -1) {
2784     if ($self->{next_char} == 0x005D) { # ]
2785     !!!next-input-character;
2786     if ($self->{next_char} == 0x005D) { # ]
2787     !!!next-input-character;
2788     MDC: {
2789     if ($self->{next_char} == 0x003E) { # >
2790     !!!cp (221.1);
2791     !!!next-input-character;
2792     last CS;
2793     } elsif ($self->{next_char} == 0x005D) { # ]
2794     !!!cp (221.2);
2795     $s .= ']';
2796     !!!next-input-character;
2797     redo MDC;
2798     } else {
2799     !!!cp (221.3);
2800     $s .= ']]';
2801     #
2802     }
2803     } # MDC
2804     } else {
2805     !!!cp (221.4);
2806     $s .= ']';
2807     #
2808     }
2809     } else {
2810     !!!cp (221.5);
2811     #
2812     }
2813     $s .= chr $self->{next_char};
2814     !!!next-input-character;
2815     } # CS
2816    
2817     $self->{state} = DATA_STATE;
2818     ## next-input-character done or EOF, which is reconsumed.
2819    
2820     if (length $s) {
2821     !!!cp (221.6);
2822     !!!emit ({type => CHARACTER_TOKEN, data => $s,
2823     line => $l, column => $c});
2824     } else {
2825     !!!cp (221.7);
2826     }
2827    
2828     redo A;
2829    
2830     ## ISSUE: "text tokens" in spec.
2831     ## TODO: Streaming support
2832 wakaba 1.1 } else {
2833     die "$0: $self->{state}: Unknown state";
2834     }
2835     } # A
2836    
2837     die "$0: _get_next_token: unexpected case";
2838     } # _get_next_token
2839    
2840 wakaba 1.72 sub _tokenize_attempt_to_consume_an_entity ($$$) {
2841     my ($self, $in_attr, $additional) = @_;
2842 wakaba 1.20
2843 wakaba 1.112 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
2844    
2845 wakaba 1.20 if ({
2846     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2847     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
2848 wakaba 1.72 $additional => 1,
2849 wakaba 1.76 }->{$self->{next_char}}) {
2850 wakaba 1.78 !!!cp (1001);
2851 wakaba 1.20 ## Don't consume
2852     ## No error
2853     return undef;
2854 wakaba 1.76 } elsif ($self->{next_char} == 0x0023) { # #
2855 wakaba 1.1 !!!next-input-character;
2856 wakaba 1.76 if ($self->{next_char} == 0x0078 or # x
2857     $self->{next_char} == 0x0058) { # X
2858 wakaba 1.26 my $code;
2859 wakaba 1.1 X: {
2860 wakaba 1.76 my $x_char = $self->{next_char};
2861 wakaba 1.1 !!!next-input-character;
2862 wakaba 1.76 if (0x0030 <= $self->{next_char} and
2863     $self->{next_char} <= 0x0039) { # 0..9
2864 wakaba 1.78 !!!cp (1002);
2865 wakaba 1.26 $code ||= 0;
2866     $code *= 0x10;
2867 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2868 wakaba 1.1 redo X;
2869 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
2870     $self->{next_char} <= 0x0066) { # a..f
2871 wakaba 1.78 !!!cp (1003);
2872 wakaba 1.26 $code ||= 0;
2873     $code *= 0x10;
2874 wakaba 1.76 $code += $self->{next_char} - 0x0060 + 9;
2875 wakaba 1.1 redo X;
2876 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
2877     $self->{next_char} <= 0x0046) { # A..F
2878 wakaba 1.78 !!!cp (1004);
2879 wakaba 1.26 $code ||= 0;
2880     $code *= 0x10;
2881 wakaba 1.76 $code += $self->{next_char} - 0x0040 + 9;
2882 wakaba 1.1 redo X;
2883 wakaba 1.26 } elsif (not defined $code) { # no hexadecimal digit
2884 wakaba 1.78 !!!cp (1005);
2885 wakaba 1.112 !!!parse-error (type => 'bare hcro', line => $l, column => $c);
2886 wakaba 1.76 !!!back-next-input-character ($x_char, $self->{next_char});
2887     $self->{next_char} = 0x0023; # #
2888 wakaba 1.1 return undef;
2889 wakaba 1.76 } elsif ($self->{next_char} == 0x003B) { # ;
2890 wakaba 1.78 !!!cp (1006);
2891 wakaba 1.1 !!!next-input-character;
2892     } else {
2893 wakaba 1.78 !!!cp (1007);
2894 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2895 wakaba 1.1 }
2896    
2897 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2898 wakaba 1.78 !!!cp (1008);
2899 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U+%04X', $code), line => $l, column => $c);
2900 wakaba 1.26 $code = 0xFFFD;
2901     } elsif ($code > 0x10FFFF) {
2902 wakaba 1.78 !!!cp (1009);
2903 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U-%08X', $code), line => $l, column => $c);
2904 wakaba 1.26 $code = 0xFFFD;
2905     } elsif ($code == 0x000D) {
2906 wakaba 1.78 !!!cp (1010);
2907 wakaba 1.112 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
2908 wakaba 1.26 $code = 0x000A;
2909     } elsif (0x80 <= $code and $code <= 0x9F) {
2910 wakaba 1.78 !!!cp (1011);
2911 wakaba 1.112 !!!parse-error (type => (sprintf 'C1 character reference:U+%04X', $code), line => $l, column => $c);
2912 wakaba 1.26 $code = $c1_entity_char->{$code};
2913 wakaba 1.1 }
2914    
2915 wakaba 1.66 return {type => CHARACTER_TOKEN, data => chr $code,
2916 wakaba 1.118 has_reference => 1,
2917 wakaba 1.120 line => $l, column => $c,
2918 wakaba 1.118 };
2919 wakaba 1.1 } # X
2920 wakaba 1.76 } elsif (0x0030 <= $self->{next_char} and
2921     $self->{next_char} <= 0x0039) { # 0..9
2922     my $code = $self->{next_char} - 0x0030;
2923 wakaba 1.1 !!!next-input-character;
2924    
2925 wakaba 1.76 while (0x0030 <= $self->{next_char} and
2926     $self->{next_char} <= 0x0039) { # 0..9
2927 wakaba 1.78 !!!cp (1012);
2928 wakaba 1.1 $code *= 10;
2929 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2930 wakaba 1.1
2931     !!!next-input-character;
2932     }
2933    
2934 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
2935 wakaba 1.78 !!!cp (1013);
2936 wakaba 1.1 !!!next-input-character;
2937     } else {
2938 wakaba 1.78 !!!cp (1014);
2939 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2940 wakaba 1.1 }
2941    
2942 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2943 wakaba 1.78 !!!cp (1015);
2944 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U+%04X', $code), line => $l, column => $c);
2945 wakaba 1.26 $code = 0xFFFD;
2946     } elsif ($code > 0x10FFFF) {
2947 wakaba 1.78 !!!cp (1016);
2948 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U-%08X', $code), line => $l, column => $c);
2949 wakaba 1.26 $code = 0xFFFD;
2950     } elsif ($code == 0x000D) {
2951 wakaba 1.78 !!!cp (1017);
2952 wakaba 1.112 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
2953 wakaba 1.26 $code = 0x000A;
2954 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
2955 wakaba 1.78 !!!cp (1018);
2956 wakaba 1.112 !!!parse-error (type => (sprintf 'C1 character reference:U+%04X', $code), line => $l, column => $c);
2957 wakaba 1.4 $code = $c1_entity_char->{$code};
2958 wakaba 1.1 }
2959    
2960 wakaba 1.112 return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1,
2961 wakaba 1.120 line => $l, column => $c,
2962 wakaba 1.118 };
2963 wakaba 1.1 } else {
2964 wakaba 1.78 !!!cp (1019);
2965 wakaba 1.112 !!!parse-error (type => 'bare nero', line => $l, column => $c);
2966 wakaba 1.76 !!!back-next-input-character ($self->{next_char});
2967     $self->{next_char} = 0x0023; # #
2968 wakaba 1.1 return undef;
2969     }
2970 wakaba 1.76 } elsif ((0x0041 <= $self->{next_char} and
2971     $self->{next_char} <= 0x005A) or
2972     (0x0061 <= $self->{next_char} and
2973     $self->{next_char} <= 0x007A)) {
2974     my $entity_name = chr $self->{next_char};
2975 wakaba 1.1 !!!next-input-character;
2976    
2977     my $value = $entity_name;
2978 wakaba 1.37 my $match = 0;
2979 wakaba 1.16 require Whatpm::_NamedEntityList;
2980     our $EntityChar;
2981 wakaba 1.1
2982 wakaba 1.128 while (length $entity_name < 30 and
2983 wakaba 1.1 ## NOTE: Some number greater than the maximum length of entity name
2984 wakaba 1.76 ((0x0041 <= $self->{next_char} and # a
2985     $self->{next_char} <= 0x005A) or # x
2986     (0x0061 <= $self->{next_char} and # a
2987     $self->{next_char} <= 0x007A) or # z
2988     (0x0030 <= $self->{next_char} and # 0
2989     $self->{next_char} <= 0x0039) or # 9
2990     $self->{next_char} == 0x003B)) { # ;
2991     $entity_name .= chr $self->{next_char};
2992 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
2993 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
2994 wakaba 1.78 !!!cp (1020);
2995 wakaba 1.26 $value = $EntityChar->{$entity_name};
2996 wakaba 1.16 $match = 1;
2997     !!!next-input-character;
2998     last;
2999 wakaba 1.37 } else {
3000 wakaba 1.78 !!!cp (1021);
3001 wakaba 1.26 $value = $EntityChar->{$entity_name};
3002     $match = -1;
3003 wakaba 1.37 !!!next-input-character;
3004 wakaba 1.16 }
3005 wakaba 1.1 } else {
3006 wakaba 1.78 !!!cp (1022);
3007 wakaba 1.76 $value .= chr $self->{next_char};
3008 wakaba 1.37 $match *= 2;
3009     !!!next-input-character;
3010 wakaba 1.1 }
3011     }
3012    
3013 wakaba 1.16 if ($match > 0) {
3014 wakaba 1.78 !!!cp (1023);
3015 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
3016 wakaba 1.120 line => $l, column => $c,
3017 wakaba 1.118 };
3018 wakaba 1.16 } elsif ($match < 0) {
3019 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
3020 wakaba 1.37 if ($in_attr and $match < -1) {
3021 wakaba 1.78 !!!cp (1024);
3022 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$entity_name,
3023 wakaba 1.120 line => $l, column => $c,
3024 wakaba 1.118 };
3025 wakaba 1.37 } else {
3026 wakaba 1.78 !!!cp (1025);
3027 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
3028 wakaba 1.120 line => $l, column => $c,
3029 wakaba 1.118 };
3030 wakaba 1.37 }
3031 wakaba 1.1 } else {
3032 wakaba 1.78 !!!cp (1026);
3033 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
3034 wakaba 1.66 ## NOTE: "No characters are consumed" in the spec.
3035 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$value,
3036 wakaba 1.120 line => $l, column => $c,
3037 wakaba 1.118 };
3038 wakaba 1.1 }
3039     } else {
3040 wakaba 1.78 !!!cp (1027);
3041 wakaba 1.1 ## no characters are consumed
3042 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
3043 wakaba 1.1 return undef;
3044     }
3045     } # _tokenize_attempt_to_consume_an_entity
3046    
3047     sub _initialize_tree_constructor ($) {
3048     my $self = shift;
3049     ## NOTE: $self->{document} MUST be specified before this method is called
3050     $self->{document}->strict_error_checking (0);
3051     ## TODO: Turn mutation events off # MUST
3052     ## TODO: Turn loose Document option (manakai extension) on
3053 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
3054 wakaba 1.1 } # _initialize_tree_constructor
3055    
3056     sub _terminate_tree_constructor ($) {
3057     my $self = shift;
3058     $self->{document}->strict_error_checking (1);
3059     ## TODO: Turn mutation events on
3060     } # _terminate_tree_constructor
3061    
3062     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
3063    
3064 wakaba 1.3 { # tree construction stage
3065     my $token;
3066    
3067 wakaba 1.1 sub _construct_tree ($) {
3068     my ($self) = @_;
3069    
3070     ## When an interactive UA render the $self->{document} available
3071     ## to the user, or when it begin accepting user input, are
3072     ## not defined.
3073    
3074     ## Append a character: collect it and all subsequent consecutive
3075     ## characters and insert one Text node whose data is concatenation
3076     ## of all those characters. # MUST
3077    
3078     !!!next-token;
3079    
3080 wakaba 1.3 undef $self->{form_element};
3081     undef $self->{head_element};
3082     $self->{open_elements} = [];
3083     undef $self->{inner_html_node};
3084    
3085 wakaba 1.84 ## NOTE: The "initial" insertion mode.
3086 wakaba 1.3 $self->_tree_construction_initial; # MUST
3087 wakaba 1.84
3088     ## NOTE: The "before html" insertion mode.
3089 wakaba 1.3 $self->_tree_construction_root_element;
3090 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
3091    
3092     ## NOTE: The "before head" insertion mode and so on.
3093 wakaba 1.3 $self->_tree_construction_main;
3094     } # _construct_tree
3095    
3096     sub _tree_construction_initial ($) {
3097     my $self = shift;
3098 wakaba 1.84
3099     ## NOTE: "initial" insertion mode
3100    
3101 wakaba 1.18 INITIAL: {
3102 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3103 wakaba 1.18 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
3104     ## error, switch to a conformance checking mode for another
3105     ## language.
3106     my $doctype_name = $token->{name};
3107     $doctype_name = '' unless defined $doctype_name;
3108     $doctype_name =~ tr/a-z/A-Z/;
3109     if (not defined $token->{name} or # <!DOCTYPE>
3110     defined $token->{public_identifier} or
3111     defined $token->{system_identifier}) {
3112 wakaba 1.79 !!!cp ('t1');
3113 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
3114 wakaba 1.18 } elsif ($doctype_name ne 'HTML') {
3115 wakaba 1.79 !!!cp ('t2');
3116 wakaba 1.18 ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
3117 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
3118 wakaba 1.79 } else {
3119     !!!cp ('t3');
3120 wakaba 1.18 }
3121    
3122     my $doctype = $self->{document}->create_document_type_definition
3123     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
3124 wakaba 1.122 ## NOTE: Default value for both |public_id| and |system_id| attributes
3125     ## are empty strings, so that we don't set any value in missing cases.
3126 wakaba 1.18 $doctype->public_id ($token->{public_identifier})
3127     if defined $token->{public_identifier};
3128     $doctype->system_id ($token->{system_identifier})
3129     if defined $token->{system_identifier};
3130     ## NOTE: Other DocumentType attributes are null or empty lists.
3131     ## ISSUE: internalSubset = null??
3132     $self->{document}->append_child ($doctype);
3133    
3134 wakaba 1.75 if ($token->{quirks} or $doctype_name ne 'HTML') {
3135 wakaba 1.79 !!!cp ('t4');
3136 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3137     } elsif (defined $token->{public_identifier}) {
3138     my $pubid = $token->{public_identifier};
3139     $pubid =~ tr/a-z/A-z/;
3140 wakaba 1.143 my $prefix = [
3141     "+//SILMARIL//DTD HTML PRO V0R11 19970101//",
3142     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
3143     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
3144     "-//IETF//DTD HTML 2.0 LEVEL 1//",
3145     "-//IETF//DTD HTML 2.0 LEVEL 2//",
3146     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//",
3147     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//",
3148     "-//IETF//DTD HTML 2.0 STRICT//",
3149     "-//IETF//DTD HTML 2.0//",
3150     "-//IETF//DTD HTML 2.1E//",
3151     "-//IETF//DTD HTML 3.0//",
3152     "-//IETF//DTD HTML 3.2 FINAL//",
3153     "-//IETF//DTD HTML 3.2//",
3154     "-//IETF//DTD HTML 3//",
3155     "-//IETF//DTD HTML LEVEL 0//",
3156     "-//IETF//DTD HTML LEVEL 1//",
3157     "-//IETF//DTD HTML LEVEL 2//",
3158     "-//IETF//DTD HTML LEVEL 3//",
3159     "-//IETF//DTD HTML STRICT LEVEL 0//",
3160     "-//IETF//DTD HTML STRICT LEVEL 1//",
3161     "-//IETF//DTD HTML STRICT LEVEL 2//",
3162     "-//IETF//DTD HTML STRICT LEVEL 3//",
3163     "-//IETF//DTD HTML STRICT//",
3164     "-//IETF//DTD HTML//",
3165     "-//METRIUS//DTD METRIUS PRESENTATIONAL//",
3166     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//",
3167     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//",
3168     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//",
3169     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//",
3170     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//",
3171     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//",
3172     "-//NETSCAPE COMM. CORP.//DTD HTML//",
3173     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//",
3174     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//",
3175     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//",
3176     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//",
3177     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//",
3178     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//",
3179     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//",
3180     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//",
3181     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//",
3182     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//",
3183     "-//W3C//DTD HTML 3 1995-03-24//",
3184     "-//W3C//DTD HTML 3.2 DRAFT//",
3185     "-//W3C//DTD HTML 3.2 FINAL//",
3186     "-//W3C//DTD HTML 3.2//",
3187     "-//W3C//DTD HTML 3.2S DRAFT//",
3188     "-//W3C//DTD HTML 4.0 FRAMESET//",
3189     "-//W3C//DTD HTML 4.0 TRANSITIONAL//",
3190     "-//W3C//DTD HTML EXPERIMETNAL 19960712//",
3191     "-//W3C//DTD HTML EXPERIMENTAL 970421//",
3192     "-//W3C//DTD W3 HTML//",
3193     "-//W3O//DTD W3 HTML 3.0//",
3194     "-//WEBTECHS//DTD MOZILLA HTML 2.0//",
3195     "-//WEBTECHS//DTD MOZILLA HTML//",
3196     ]; # $prefix
3197     my $match;
3198     for (@$prefix) {
3199     if (substr ($prefix, 0, length $_) eq $_) {
3200     $match = 1;
3201     last;
3202     }
3203     }
3204     if ($match or
3205     $pubid eq "-//W3O//DTD W3 HTML STRICT 3.0//EN//" or
3206     $pubid eq "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" or
3207     $pubid eq "HTML") {
3208 wakaba 1.79 !!!cp ('t5');
3209 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3210 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
3211     $pubid =~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
3212 wakaba 1.18 if (defined $token->{system_identifier}) {
3213 wakaba 1.79 !!!cp ('t6');
3214 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3215     } else {
3216 wakaba 1.79 !!!cp ('t7');
3217 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
3218 wakaba 1.3 }
3219 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
3220     $pubid =~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
3221 wakaba 1.79 !!!cp ('t8');
3222 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
3223 wakaba 1.79 } else {
3224     !!!cp ('t9');
3225 wakaba 1.18 }
3226 wakaba 1.79 } else {
3227     !!!cp ('t10');
3228 wakaba 1.18 }
3229     if (defined $token->{system_identifier}) {
3230     my $sysid = $token->{system_identifier};
3231     $sysid =~ tr/A-Z/a-z/;
3232     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
3233 wakaba 1.143 ## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"| is
3234     ## marked as quirks.
3235 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3236 wakaba 1.79 !!!cp ('t11');
3237     } else {
3238     !!!cp ('t12');
3239 wakaba 1.18 }
3240 wakaba 1.79 } else {
3241     !!!cp ('t13');
3242 wakaba 1.18 }
3243    
3244 wakaba 1.84 ## Go to the "before html" insertion mode.
3245 wakaba 1.18 !!!next-token;
3246     return;
3247     } elsif ({
3248 wakaba 1.55 START_TAG_TOKEN, 1,
3249     END_TAG_TOKEN, 1,
3250     END_OF_FILE_TOKEN, 1,
3251 wakaba 1.18 }->{$token->{type}}) {
3252 wakaba 1.79 !!!cp ('t14');
3253 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
3254 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3255 wakaba 1.84 ## Go to the "before html" insertion mode.
3256 wakaba 1.18 ## reprocess
3257 wakaba 1.125 !!!ack-later;
3258 wakaba 1.18 return;
3259 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
3260 wakaba 1.18 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
3261     ## Ignore the token
3262 wakaba 1.26
3263 wakaba 1.18 unless (length $token->{data}) {
3264 wakaba 1.79 !!!cp ('t15');
3265 wakaba 1.84 ## Stay in the insertion mode.
3266 wakaba 1.18 !!!next-token;
3267     redo INITIAL;
3268 wakaba 1.79 } else {
3269     !!!cp ('t16');
3270 wakaba 1.3 }
3271 wakaba 1.79 } else {
3272     !!!cp ('t17');
3273 wakaba 1.3 }
3274 wakaba 1.18
3275 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
3276 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3277 wakaba 1.84 ## Go to the "before html" insertion mode.
3278 wakaba 1.18 ## reprocess
3279     return;
3280 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3281 wakaba 1.79 !!!cp ('t18');
3282 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
3283     $self->{document}->append_child ($comment);
3284    
3285 wakaba 1.84 ## Stay in the insertion mode.
3286 wakaba 1.18 !!!next-token;
3287     redo INITIAL;
3288     } else {
3289 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
3290 wakaba 1.18 }
3291     } # INITIAL
3292 wakaba 1.79
3293     die "$0: _tree_construction_initial: This should be never reached";
3294 wakaba 1.3 } # _tree_construction_initial
3295    
3296     sub _tree_construction_root_element ($) {
3297     my $self = shift;
3298 wakaba 1.84
3299     ## NOTE: "before html" insertion mode.
3300 wakaba 1.3
3301     B: {
3302 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3303 wakaba 1.79 !!!cp ('t19');
3304 wakaba 1.113 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
3305 wakaba 1.3 ## Ignore the token
3306 wakaba 1.84 ## Stay in the insertion mode.
3307 wakaba 1.3 !!!next-token;
3308     redo B;
3309 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3310 wakaba 1.79 !!!cp ('t20');
3311 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
3312     $self->{document}->append_child ($comment);
3313 wakaba 1.84 ## Stay in the insertion mode.
3314 wakaba 1.3 !!!next-token;
3315     redo B;
3316 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
3317 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
3318     ## Ignore the token.
3319    
3320 wakaba 1.3 unless (length $token->{data}) {
3321 wakaba 1.79 !!!cp ('t21');
3322 wakaba 1.84 ## Stay in the insertion mode.
3323 wakaba 1.3 !!!next-token;
3324     redo B;
3325 wakaba 1.79 } else {
3326     !!!cp ('t22');
3327 wakaba 1.3 }
3328 wakaba 1.79 } else {
3329     !!!cp ('t23');
3330 wakaba 1.3 }
3331 wakaba 1.61
3332     $self->{application_cache_selection}->(undef);
3333    
3334     #
3335     } elsif ($token->{type} == START_TAG_TOKEN) {
3336 wakaba 1.84 if ($token->{tag_name} eq 'html') {
3337     my $root_element;
3338 wakaba 1.126 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
3339 wakaba 1.84 $self->{document}->append_child ($root_element);
3340 wakaba 1.123 push @{$self->{open_elements}},
3341     [$root_element, $el_category->{html}];
3342 wakaba 1.84
3343     if ($token->{attributes}->{manifest}) {
3344     !!!cp ('t24');
3345     $self->{application_cache_selection}
3346     ->($token->{attributes}->{manifest}->{value});
3347 wakaba 1.118 ## ISSUE: Spec is unclear on relative references.
3348     ## According to Hixie (#whatwg 2008-03-19), it should be
3349     ## resolved against the base URI of the document in HTML
3350     ## or xml:base of the element in XHTML.
3351 wakaba 1.84 } else {
3352     !!!cp ('t25');
3353     $self->{application_cache_selection}->(undef);
3354     }
3355    
3356 wakaba 1.125 !!!nack ('t25c');
3357    
3358 wakaba 1.84 !!!next-token;
3359     return; ## Go to the "before head" insertion mode.
3360 wakaba 1.61 } else {
3361 wakaba 1.84 !!!cp ('t25.1');
3362     #
3363 wakaba 1.61 }
3364 wakaba 1.3 } elsif ({
3365 wakaba 1.55 END_TAG_TOKEN, 1,
3366     END_OF_FILE_TOKEN, 1,
3367 wakaba 1.3 }->{$token->{type}}) {
3368 wakaba 1.79 !!!cp ('t26');
3369 wakaba 1.3 #
3370     } else {
3371 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
3372 wakaba 1.3 }
3373 wakaba 1.61
3374 wakaba 1.126 my $root_element;
3375     !!!create-element ($root_element, $HTML_NS, 'html',, $token);
3376 wakaba 1.84 $self->{document}->append_child ($root_element);
3377 wakaba 1.123 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
3378 wakaba 1.84
3379     $self->{application_cache_selection}->(undef);
3380    
3381     ## NOTE: Reprocess the token.
3382 wakaba 1.125 !!!ack-later;
3383 wakaba 1.84 return; ## Go to the "before head" insertion mode.
3384    
3385     ## ISSUE: There is an issue in the spec
3386 wakaba 1.3 } # B
3387 wakaba 1.79
3388     die "$0: _tree_construction_root_element: This should never be reached";
3389 wakaba 1.3 } # _tree_construction_root_element
3390    
3391     sub _reset_insertion_mode ($) {
3392     my $self = shift;
3393    
3394     ## Step 1
3395     my $last;
3396    
3397     ## Step 2
3398     my $i = -1;
3399     my $node = $self->{open_elements}->[$i];
3400    
3401     ## Step 3
3402     S3: {
3403 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
3404     $last = 1;
3405     if (defined $self->{inner_html_node}) {
3406 wakaba 1.140 !!!cp ('t28');
3407     $node = $self->{inner_html_node};
3408     } else {
3409     die "_reset_insertion_mode: t27";
3410 wakaba 1.3 }
3411     }
3412 wakaba 1.140
3413     ## Step 4..14
3414     my $new_mode;
3415     if ($node->[1] & FOREIGN_EL) {
3416     !!!cp ('t28.1');
3417     ## NOTE: Strictly spaking, the line below only applies to MathML and
3418     ## SVG elements. Currently the HTML syntax supports only MathML and
3419     ## SVG elements as foreigners.
3420 wakaba 1.148 $new_mode = IN_BODY_IM | IN_FOREIGN_CONTENT_IM;
3421 wakaba 1.140 } elsif ($node->[1] & TABLE_CELL_EL) {
3422     if ($last) {
3423     !!!cp ('t28.2');
3424     #
3425     } else {
3426     !!!cp ('t28.3');
3427     $new_mode = IN_CELL_IM;
3428     }
3429     } else {
3430     !!!cp ('t28.4');
3431     $new_mode = {
3432 wakaba 1.54 select => IN_SELECT_IM,
3433 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
3434     ## insertion mode to "in select" by themselves.
3435 wakaba 1.54 tr => IN_ROW_IM,
3436     tbody => IN_TABLE_BODY_IM,
3437     thead => IN_TABLE_BODY_IM,
3438     tfoot => IN_TABLE_BODY_IM,
3439     caption => IN_CAPTION_IM,
3440     colgroup => IN_COLUMN_GROUP_IM,
3441     table => IN_TABLE_IM,
3442     head => IN_BODY_IM, # not in head!
3443     body => IN_BODY_IM,
3444     frameset => IN_FRAMESET_IM,
3445 wakaba 1.123 }->{$node->[0]->manakai_local_name};
3446 wakaba 1.140 }
3447     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
3448 wakaba 1.3
3449 wakaba 1.126 ## Step 15
3450 wakaba 1.123 if ($node->[1] & HTML_EL) {
3451 wakaba 1.3 unless (defined $self->{head_element}) {
3452 wakaba 1.79 !!!cp ('t29');
3453 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
3454 wakaba 1.3 } else {
3455 wakaba 1.81 ## ISSUE: Can this state be reached?
3456 wakaba 1.79 !!!cp ('t30');
3457 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3458 wakaba 1.3 }
3459     return;
3460 wakaba 1.79 } else {
3461     !!!cp ('t31');
3462 wakaba 1.3 }
3463    
3464 wakaba 1.126 ## Step 16
3465 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
3466 wakaba 1.3
3467 wakaba 1.126 ## Step 17
3468 wakaba 1.3 $i--;
3469     $node = $self->{open_elements}->[$i];
3470    
3471 wakaba 1.126 ## Step 18
3472 wakaba 1.3 redo S3;
3473     } # S3
3474 wakaba 1.79
3475     die "$0: _reset_insertion_mode: This line should never be reached";
3476 wakaba 1.3 } # _reset_insertion_mode
3477    
3478     sub _tree_construction_main ($) {
3479     my $self = shift;
3480    
3481 wakaba 1.1 my $active_formatting_elements = [];
3482    
3483     my $reconstruct_active_formatting_elements = sub { # MUST
3484     my $insert = shift;
3485    
3486     ## Step 1
3487     return unless @$active_formatting_elements;
3488    
3489     ## Step 3
3490     my $i = -1;
3491     my $entry = $active_formatting_elements->[$i];
3492    
3493     ## Step 2
3494     return if $entry->[0] eq '#marker';
3495 wakaba 1.3 for (@{$self->{open_elements}}) {
3496 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3497 wakaba 1.79 !!!cp ('t32');
3498 wakaba 1.1 return;
3499     }
3500     }
3501    
3502     S4: {
3503     ## Step 4
3504     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
3505    
3506     ## Step 5
3507     $i--;
3508     $entry = $active_formatting_elements->[$i];
3509    
3510     ## Step 6
3511     if ($entry->[0] eq '#marker') {
3512 wakaba 1.81 !!!cp ('t33_1');
3513 wakaba 1.1 #
3514     } else {
3515     my $in_open_elements;
3516 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
3517 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3518 wakaba 1.79 !!!cp ('t33');
3519 wakaba 1.1 $in_open_elements = 1;
3520     last OE;
3521     }
3522     }
3523     if ($in_open_elements) {
3524 wakaba 1.79 !!!cp ('t34');
3525 wakaba 1.1 #
3526     } else {
3527 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
3528 wakaba 1.79 !!!cp ('t35');
3529 wakaba 1.1 redo S4;
3530     }
3531     }
3532    
3533     ## Step 7
3534     $i++;
3535     $entry = $active_formatting_elements->[$i];
3536     } # S4
3537    
3538     S7: {
3539     ## Step 8
3540     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
3541    
3542     ## Step 9
3543     $insert->($clone->[0]);
3544 wakaba 1.3 push @{$self->{open_elements}}, $clone;
3545 wakaba 1.1
3546     ## Step 10
3547 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
3548 wakaba 1.1
3549     ## Step 11
3550     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
3551 wakaba 1.79 !!!cp ('t36');
3552 wakaba 1.1 ## Step 7'
3553     $i++;
3554     $entry = $active_formatting_elements->[$i];
3555    
3556     redo S7;
3557     }
3558 wakaba 1.79
3559     !!!cp ('t37');
3560 wakaba 1.1 } # S7
3561     }; # $reconstruct_active_formatting_elements
3562    
3563     my $clear_up_to_marker = sub {
3564     for (reverse 0..$#$active_formatting_elements) {
3565     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3566 wakaba 1.79 !!!cp ('t38');
3567 wakaba 1.1 splice @$active_formatting_elements, $_;
3568     return;
3569     }
3570     }
3571 wakaba 1.79
3572     !!!cp ('t39');
3573 wakaba 1.1 }; # $clear_up_to_marker
3574    
3575 wakaba 1.96 my $insert;
3576    
3577     my $parse_rcdata = sub ($) {
3578     my ($content_model_flag) = @_;
3579 wakaba 1.25
3580     ## Step 1
3581     my $start_tag_name = $token->{tag_name};
3582     my $el;
3583 wakaba 1.126 !!!create-element ($el, $HTML_NS, $start_tag_name, $token->{attributes}, $token);
3584 wakaba 1.25
3585     ## Step 2
3586 wakaba 1.96 $insert->($el);
3587 wakaba 1.25
3588     ## Step 3
3589 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
3590 wakaba 1.13 delete $self->{escape}; # MUST
3591 wakaba 1.25
3592     ## Step 4
3593 wakaba 1.1 my $text = '';
3594 wakaba 1.125 !!!nack ('t40.1');
3595 wakaba 1.1 !!!next-token;
3596 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing
3597 wakaba 1.79 !!!cp ('t40');
3598 wakaba 1.1 $text .= $token->{data};
3599     !!!next-token;
3600 wakaba 1.25 }
3601    
3602     ## Step 5
3603 wakaba 1.1 if (length $text) {
3604 wakaba 1.79 !!!cp ('t41');
3605 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
3606     $el->append_child ($text);
3607 wakaba 1.1 }
3608 wakaba 1.25
3609     ## Step 6
3610 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3611 wakaba 1.25
3612     ## Step 7
3613 wakaba 1.79 if ($token->{type} == END_TAG_TOKEN and
3614     $token->{tag_name} eq $start_tag_name) {
3615     !!!cp ('t42');
3616 wakaba 1.1 ## Ignore the token
3617     } else {
3618 wakaba 1.96 ## NOTE: An end-of-file token.
3619     if ($content_model_flag == CDATA_CONTENT_MODEL) {
3620     !!!cp ('t43');
3621 wakaba 1.113 !!!parse-error (type => 'in CDATA:#'.$token->{type}, token => $token);
3622 wakaba 1.96 } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
3623     !!!cp ('t44');
3624 wakaba 1.113 !!!parse-error (type => 'in RCDATA:#'.$token->{type}, token => $token);
3625 wakaba 1.96 } else {
3626     die "$0: $content_model_flag in parse_rcdata";
3627     }
3628 wakaba 1.1 }
3629     !!!next-token;
3630 wakaba 1.25 }; # $parse_rcdata
3631 wakaba 1.1
3632 wakaba 1.96 my $script_start_tag = sub () {
3633 wakaba 1.1 my $script_el;
3634 wakaba 1.126 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
3635 wakaba 1.1 ## TODO: mark as "parser-inserted"
3636    
3637 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
3638 wakaba 1.13 delete $self->{escape}; # MUST
3639 wakaba 1.1
3640     my $text = '';
3641 wakaba 1.125 !!!nack ('t45.1');
3642 wakaba 1.1 !!!next-token;
3643 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
3644 wakaba 1.79 !!!cp ('t45');
3645 wakaba 1.1 $text .= $token->{data};
3646     !!!next-token;
3647     } # stop if non-character token or tokenizer stops tokenising
3648     if (length $text) {
3649 wakaba 1.79 !!!cp ('t46');
3650 wakaba 1.1 $script_el->manakai_append_text ($text);
3651     }
3652    
3653 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3654 wakaba 1.1
3655 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
3656 wakaba 1.1 $token->{tag_name} eq 'script') {
3657 wakaba 1.79 !!!cp ('t47');
3658 wakaba 1.1 ## Ignore the token
3659     } else {
3660 wakaba 1.79 !!!cp ('t48');
3661 wakaba 1.113 !!!parse-error (type => 'in CDATA:#'.$token->{type}, token => $token);
3662 wakaba 1.1 ## ISSUE: And ignore?
3663     ## TODO: mark as "already executed"
3664     }
3665    
3666 wakaba 1.3 if (defined $self->{inner_html_node}) {
3667 wakaba 1.79 !!!cp ('t49');
3668 wakaba 1.3 ## TODO: mark as "already executed"
3669     } else {
3670 wakaba 1.79 !!!cp ('t50');
3671 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
3672     ## TODO: insertion point = just before the next input character
3673 wakaba 1.25
3674     $insert->($script_el);
3675 wakaba 1.1
3676     ## TODO: insertion point = $old_insertion_point (might be "undefined")
3677    
3678     ## TODO: if there is a script that will execute as soon as the parser resume, then...
3679     }
3680    
3681     !!!next-token;
3682     }; # $script_start_tag
3683    
3684 wakaba 1.102 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
3685     ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
3686     my $open_tables = [[$self->{open_elements}->[0]->[0]]];
3687    
3688 wakaba 1.1 my $formatting_end_tag = sub {
3689 wakaba 1.113 my $end_tag_token = shift;
3690     my $tag_name = $end_tag_token->{tag_name};
3691 wakaba 1.1
3692 wakaba 1.103 ## NOTE: The adoption agency algorithm (AAA).
3693 wakaba 1.102
3694 wakaba 1.1 FET: {
3695     ## Step 1
3696     my $formatting_element;
3697     my $formatting_element_i_in_active;
3698     AFE: for (reverse 0..$#$active_formatting_elements) {
3699 wakaba 1.123 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3700     !!!cp ('t52');
3701     last AFE;
3702     } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
3703     eq $tag_name) {
3704 wakaba 1.79 !!!cp ('t51');
3705 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
3706     $formatting_element_i_in_active = $_;
3707     last AFE;
3708     }
3709     } # AFE
3710     unless (defined $formatting_element) {
3711 wakaba 1.79 !!!cp ('t53');
3712 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$tag_name, token => $end_tag_token);
3713 wakaba 1.1 ## Ignore the token
3714     !!!next-token;
3715     return;
3716     }
3717     ## has an element in scope
3718     my $in_scope = 1;
3719     my $formatting_element_i_in_open;
3720 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3721     my $node = $self->{open_elements}->[$_];
3722 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
3723     if ($in_scope) {
3724 wakaba 1.79 !!!cp ('t54');
3725 wakaba 1.1 $formatting_element_i_in_open = $_;
3726     last INSCOPE;
3727     } else { # in open elements but not in scope
3728 wakaba 1.79 !!!cp ('t55');
3729 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name},
3730     token => $end_tag_token);
3731 wakaba 1.1 ## Ignore the token
3732     !!!next-token;
3733     return;
3734     }
3735 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
3736 wakaba 1.79 !!!cp ('t56');
3737 wakaba 1.1 $in_scope = 0;
3738     }
3739     } # INSCOPE
3740     unless (defined $formatting_element_i_in_open) {
3741 wakaba 1.79 !!!cp ('t57');
3742 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name},
3743     token => $end_tag_token);
3744 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
3745     !!!next-token; ## TODO: ok?
3746     return;
3747     }
3748 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
3749 wakaba 1.79 !!!cp ('t58');
3750 wakaba 1.122 !!!parse-error (type => 'not closed',
3751     value => $self->{open_elements}->[-1]->[0]
3752     ->manakai_local_name,
3753 wakaba 1.113 token => $end_tag_token);
3754 wakaba 1.1 }
3755    
3756     ## Step 2
3757     my $furthest_block;
3758     my $furthest_block_i_in_open;
3759 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3760     my $node = $self->{open_elements}->[$_];
3761 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
3762 wakaba 1.1 #not $phrasing_category->{$node->[1]} and
3763 wakaba 1.123 ($node->[1] & SPECIAL_EL or
3764     $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
3765 wakaba 1.79 !!!cp ('t59');
3766 wakaba 1.1 $furthest_block = $node;
3767     $furthest_block_i_in_open = $_;
3768     } elsif ($node->[0] eq $formatting_element->[0]) {
3769 wakaba 1.79 !!!cp ('t60');
3770 wakaba 1.1 last OE;
3771     }
3772     } # OE
3773    
3774     ## Step 3
3775     unless (defined $furthest_block) { # MUST
3776 wakaba 1.79 !!!cp ('t61');
3777 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
3778 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
3779     !!!next-token;
3780     return;
3781     }
3782    
3783     ## Step 4
3784 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
3785 wakaba 1.1
3786     ## Step 5
3787     my $furthest_block_parent = $furthest_block->[0]->parent_node;
3788     if (defined $furthest_block_parent) {
3789 wakaba 1.79 !!!cp ('t62');
3790 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
3791     }
3792    
3793     ## Step 6
3794     my $bookmark_prev_el
3795     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
3796     ->[0];
3797    
3798     ## Step 7
3799     my $node = $furthest_block;
3800     my $node_i_in_open = $furthest_block_i_in_open;
3801     my $last_node = $furthest_block;
3802     S7: {
3803     ## Step 1
3804     $node_i_in_open--;
3805 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
3806 wakaba 1.1
3807     ## Step 2
3808     my $node_i_in_active;
3809     S7S2: {
3810     for (reverse 0..$#$active_formatting_elements) {
3811     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3812 wakaba 1.79 !!!cp ('t63');
3813 wakaba 1.1 $node_i_in_active = $_;
3814     last S7S2;
3815     }
3816     }
3817 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
3818 wakaba 1.1 redo S7;
3819     } # S7S2
3820    
3821     ## Step 3
3822     last S7 if $node->[0] eq $formatting_element->[0];
3823    
3824     ## Step 4
3825     if ($last_node->[0] eq $furthest_block->[0]) {
3826 wakaba 1.79 !!!cp ('t64');
3827 wakaba 1.1 $bookmark_prev_el = $node->[0];
3828     }
3829    
3830     ## Step 5
3831     if ($node->[0]->has_child_nodes ()) {
3832 wakaba 1.79 !!!cp ('t65');
3833 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
3834     $active_formatting_elements->[$node_i_in_active] = $clone;
3835 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
3836 wakaba 1.1 $node = $clone;
3837     }
3838    
3839     ## Step 6
3840     $node->[0]->append_child ($last_node->[0]);
3841    
3842     ## Step 7
3843     $last_node = $node;
3844    
3845     ## Step 8
3846     redo S7;
3847     } # S7
3848    
3849     ## Step 8
3850 wakaba 1.123 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
3851 wakaba 1.102 my $foster_parent_element;
3852     my $next_sibling;
3853 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
3854     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
3855 wakaba 1.102 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3856     if (defined $parent and $parent->node_type == 1) {
3857     !!!cp ('t65.1');
3858     $foster_parent_element = $parent;
3859     $next_sibling = $self->{open_elements}->[$_]->[0];
3860     } else {
3861     !!!cp ('t65.2');
3862     $foster_parent_element
3863     = $self->{open_elements}->[$_ - 1]->[0];
3864     }
3865     last OE;
3866     }
3867     } # OE
3868     $foster_parent_element = $self->{open_elements}->[0]->[0]
3869     unless defined $foster_parent_element;
3870     $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
3871     $open_tables->[-1]->[1] = 1; # tainted
3872     } else {
3873     !!!cp ('t65.3');
3874     $common_ancestor_node->[0]->append_child ($last_node->[0]);
3875     }
3876 wakaba 1.1
3877     ## Step 9
3878     my $clone = [$formatting_element->[0]->clone_node (0),
3879     $formatting_element->[1]];
3880    
3881     ## Step 10
3882     my @cn = @{$furthest_block->[0]->child_nodes};
3883     $clone->[0]->append_child ($_) for @cn;
3884    
3885     ## Step 11
3886     $furthest_block->[0]->append_child ($clone->[0]);
3887    
3888     ## Step 12
3889     my $i;
3890     AFE: for (reverse 0..$#$active_formatting_elements) {
3891     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
3892 wakaba 1.79 !!!cp ('t66');
3893 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
3894     $i-- and last AFE if defined $i;
3895     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
3896 wakaba 1.79 !!!cp ('t67');
3897 wakaba 1.1 $i = $_;
3898     }
3899     } # AFE
3900     splice @$active_formatting_elements, $i + 1, 0, $clone;
3901    
3902     ## Step 13
3903     undef $i;
3904 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3905     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
3906 wakaba 1.79 !!!cp ('t68');
3907 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
3908 wakaba 1.1 $i-- and last OE if defined $i;
3909 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
3910 wakaba 1.79 !!!cp ('t69');
3911 wakaba 1.1 $i = $_;
3912     }
3913     } # OE
3914 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
3915 wakaba 1.1
3916     ## Step 14
3917     redo FET;
3918     } # FET
3919     }; # $formatting_end_tag
3920    
3921 wakaba 1.96 $insert = my $insert_to_current = sub {
3922 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
3923 wakaba 1.1 }; # $insert_to_current
3924    
3925     my $insert_to_foster = sub {
3926 wakaba 1.95 my $child = shift;
3927 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
3928 wakaba 1.95 # MUST
3929     my $foster_parent_element;
3930     my $next_sibling;
3931 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
3932     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
3933 wakaba 1.3 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3934 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3935 wakaba 1.79 !!!cp ('t70');
3936 wakaba 1.1 $foster_parent_element = $parent;
3937 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3938 wakaba 1.1 } else {
3939 wakaba 1.79 !!!cp ('t71');
3940 wakaba 1.1 $foster_parent_element
3941 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
3942 wakaba 1.1 }
3943     last OE;
3944     }
3945     } # OE
3946 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
3947 wakaba 1.1 unless defined $foster_parent_element;
3948     $foster_parent_element->insert_before
3949     ($child, $next_sibling);
3950 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
3951     } else {
3952     !!!cp ('t72');
3953     $self->{open_elements}->[-1]->[0]->append_child ($child);
3954     }
3955 wakaba 1.1 }; # $insert_to_foster
3956    
3957 wakaba 1.126 B: while (1) {
3958 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3959 wakaba 1.79 !!!cp ('t73');
3960 wakaba 1.113 !!!parse-error (type => 'DOCTYPE in the middle', token => $token);
3961 wakaba 1.52 ## Ignore the token
3962     ## Stay in the phase
3963     !!!next-token;
3964 wakaba 1.126 next B;
3965 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
3966 wakaba 1.52 $token->{tag_name} eq 'html') {
3967 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
3968 wakaba 1.79 !!!cp ('t79');
3969 wakaba 1.113 !!!parse-error (type => 'after html:html', token => $token);
3970 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
3971     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
3972 wakaba 1.79 !!!cp ('t80');
3973 wakaba 1.113 !!!parse-error (type => 'after html:html', token => $token);
3974 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
3975 wakaba 1.79 } else {
3976     !!!cp ('t81');
3977 wakaba 1.52 }
3978    
3979 wakaba 1.84 !!!cp ('t82');
3980 wakaba 1.113 !!!parse-error (type => 'not first start tag', token => $token);
3981 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
3982     for my $attr_name (keys %{$token->{attributes}}) {
3983     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
3984 wakaba 1.79 !!!cp ('t84');
3985 wakaba 1.52 $top_el->set_attribute_ns
3986     (undef, [undef, $attr_name],
3987     $token->{attributes}->{$attr_name}->{value});
3988     }
3989     }
3990 wakaba 1.125 !!!nack ('t84.1');
3991 wakaba 1.52 !!!next-token;
3992 wakaba 1.126 next B;
3993 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3994 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
3995 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
3996 wakaba 1.79 !!!cp ('t85');
3997 wakaba 1.52 $self->{document}->append_child ($comment);
3998 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
3999 wakaba 1.79 !!!cp ('t86');
4000 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
4001     } else {
4002 wakaba 1.79 !!!cp ('t87');
4003 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4004     }
4005     !!!next-token;
4006 wakaba 1.126 next B;
4007     } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
4008     if ($token->{type} == CHARACTER_TOKEN) {
4009     !!!cp ('t87.1');
4010     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4011     !!!next-token;
4012     next B;
4013     } elsif ($token->{type} == START_TAG_TOKEN) {
4014 wakaba 1.129 if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
4015     $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
4016 wakaba 1.126 not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
4017     ($token->{tag_name} eq 'svg' and
4018     $self->{open_elements}->[-1]->[1] & MML_AXML_EL)) {
4019     ## NOTE: "using the rules for secondary insertion mode"then"continue"
4020     !!!cp ('t87.2');
4021     #
4022     } elsif ({
4023 wakaba 1.130 b => 1, big => 1, blockquote => 1, body => 1, br => 1,
4024 wakaba 1.146 center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1,
4025     em => 1, embed => 1, font => 1, h1 => 1, h2 => 1, h3 => 1,
4026     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1,
4027     img => 1, li => 1, listing => 1, menu => 1, meta => 1,
4028     nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1,
4029     small => 1, span => 1, strong => 1, strike => 1, sub => 1,
4030     sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1,
4031 wakaba 1.126 }->{$token->{tag_name}}) {
4032     !!!cp ('t87.2');
4033     !!!parse-error (type => 'not closed',
4034     value => $self->{open_elements}->[-1]->[0]
4035     ->manakai_local_name,
4036     token => $token);
4037    
4038     pop @{$self->{open_elements}}
4039     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
4040    
4041 wakaba 1.130 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
4042 wakaba 1.126 ## Reprocess.
4043     next B;
4044     } else {
4045 wakaba 1.131 my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
4046     my $tag_name = $token->{tag_name};
4047     if ($nsuri eq $SVG_NS) {
4048     $tag_name = {
4049     altglyph => 'altGlyph',
4050     altglyphdef => 'altGlyphDef',
4051     altglyphitem => 'altGlyphItem',
4052     animatecolor => 'animateColor',
4053     animatemotion => 'animateMotion',
4054     animatetransform => 'animateTransform',
4055     clippath => 'clipPath',
4056     feblend => 'feBlend',
4057     fecolormatrix => 'feColorMatrix',
4058     fecomponenttransfer => 'feComponentTransfer',
4059     fecomposite => 'feComposite',
4060     feconvolvematrix => 'feConvolveMatrix',
4061     fediffuselighting => 'feDiffuseLighting',
4062     fedisplacementmap => 'feDisplacementMap',
4063     fedistantlight => 'feDistantLight',
4064     feflood => 'feFlood',
4065     fefunca => 'feFuncA',
4066     fefuncb => 'feFuncB',
4067     fefuncg => 'feFuncG',
4068     fefuncr => 'feFuncR',
4069     fegaussianblur => 'feGaussianBlur',
4070     feimage => 'feImage',
4071     femerge => 'feMerge',
4072     femergenode => 'feMergeNode',
4073     femorphology => 'feMorphology',
4074     feoffset => 'feOffset',
4075     fepointlight => 'fePointLight',
4076     fespecularlighting => 'feSpecularLighting',
4077     fespotlight => 'feSpotLight',
4078     fetile => 'feTile',
4079     feturbulence => 'feTurbulence',
4080     foreignobject => 'foreignObject',
4081     glyphref => 'glyphRef',
4082     lineargradient => 'linearGradient',
4083     radialgradient => 'radialGradient',
4084     #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
4085     textpath => 'textPath',
4086     }->{$tag_name} || $tag_name;
4087     }
4088    
4089     ## "adjust SVG attributes" (SVG only) - done in insert-element-f
4090    
4091     ## "adjust foreign attributes" - done in insert-element-f
4092 wakaba 1.126
4093 wakaba 1.131 !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);
4094 wakaba 1.126
4095     if ($self->{self_closing}) {
4096     pop @{$self->{open_elements}};
4097     !!!ack ('t87.3');
4098     } else {
4099     !!!cp ('t87.4');
4100     }
4101    
4102     !!!next-token;
4103     next B;
4104     }
4105     } elsif ($token->{type} == END_TAG_TOKEN) {
4106     ## NOTE: "using the rules for secondary insertion mode" then "continue"
4107     !!!cp ('t87.5');
4108     #
4109     } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4110     !!!cp ('t87.6');
4111 wakaba 1.146 !!!parse-error (type => 'not closed',
4112     value => $self->{open_elements}->[-1]->[0]
4113     ->manakai_local_name,
4114     token => $token);
4115    
4116     pop @{$self->{open_elements}}
4117     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
4118    
4119     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
4120     ## Reprocess.
4121     next B;
4122 wakaba 1.126 } else {
4123     die "$0: $token->{type}: Unknown token type";
4124     }
4125     }
4126    
4127     if ($self->{insertion_mode} & HEAD_IMS) {
4128 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4129 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4130 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4131     !!!cp ('t88.2');
4132     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4133     } else {
4134     !!!cp ('t88.1');
4135     ## Ignore the token.
4136     !!!next-token;
4137 wakaba 1.126 next B;
4138 wakaba 1.99 }
4139 wakaba 1.52 unless (length $token->{data}) {
4140 wakaba 1.79 !!!cp ('t88');
4141 wakaba 1.52 !!!next-token;
4142 wakaba 1.126 next B;
4143 wakaba 1.1 }
4144     }
4145 wakaba 1.52
4146 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4147 wakaba 1.79 !!!cp ('t89');
4148 wakaba 1.52 ## As if <head>
4149 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4150 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4151 wakaba 1.123 push @{$self->{open_elements}},
4152     [$self->{head_element}, $el_category->{head}];
4153 wakaba 1.52
4154     ## Reprocess in the "in head" insertion mode...
4155     pop @{$self->{open_elements}};
4156    
4157     ## Reprocess in the "after head" insertion mode...
4158 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4159 wakaba 1.79 !!!cp ('t90');
4160 wakaba 1.52 ## As if </noscript>
4161     pop @{$self->{open_elements}};
4162 wakaba 1.113 !!!parse-error (type => 'in noscript:#character', token => $token);
4163 wakaba 1.1
4164 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
4165     ## As if </head>
4166     pop @{$self->{open_elements}};
4167    
4168     ## Reprocess in the "after head" insertion mode...
4169 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4170 wakaba 1.79 !!!cp ('t91');
4171 wakaba 1.52 pop @{$self->{open_elements}};
4172    
4173     ## Reprocess in the "after head" insertion mode...
4174 wakaba 1.79 } else {
4175     !!!cp ('t92');
4176 wakaba 1.1 }
4177 wakaba 1.52
4178 wakaba 1.123 ## "after head" insertion mode
4179     ## As if <body>
4180     !!!insert-element ('body',, $token);
4181     $self->{insertion_mode} = IN_BODY_IM;
4182     ## reprocess
4183 wakaba 1.126 next B;
4184 wakaba 1.123 } elsif ($token->{type} == START_TAG_TOKEN) {
4185     if ($token->{tag_name} eq 'head') {
4186     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4187     !!!cp ('t93');
4188 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
4189 wakaba 1.123 $self->{open_elements}->[-1]->[0]->append_child
4190     ($self->{head_element});
4191     push @{$self->{open_elements}},
4192     [$self->{head_element}, $el_category->{head}];
4193     $self->{insertion_mode} = IN_HEAD_IM;
4194 wakaba 1.125 !!!nack ('t93.1');
4195 wakaba 1.123 !!!next-token;
4196 wakaba 1.126 next B;
4197 wakaba 1.125 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4198 wakaba 1.139 !!!cp ('t93.2');
4199     !!!parse-error (type => 'after head:head', token => $token); ## TODO: error type
4200     ## Ignore the token
4201     !!!nack ('t93.3');
4202     !!!next-token;
4203     next B;
4204 wakaba 1.125 } else {
4205     !!!cp ('t95');
4206     !!!parse-error (type => 'in head:head', token => $token); # or in head noscript
4207     ## Ignore the token
4208     !!!nack ('t95.1');
4209     !!!next-token;
4210 wakaba 1.126 next B;
4211 wakaba 1.125 }
4212     } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4213 wakaba 1.126 !!!cp ('t96');
4214     ## As if <head>
4215     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4216     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4217     push @{$self->{open_elements}},
4218     [$self->{head_element}, $el_category->{head}];
4219 wakaba 1.52
4220 wakaba 1.126 $self->{insertion_mode} = IN_HEAD_IM;
4221     ## Reprocess in the "in head" insertion mode...
4222     } else {
4223     !!!cp ('t97');
4224     }
4225 wakaba 1.52
4226 wakaba 1.49 if ($token->{tag_name} eq 'base') {
4227 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4228 wakaba 1.79 !!!cp ('t98');
4229 wakaba 1.49 ## As if </noscript>
4230     pop @{$self->{open_elements}};
4231 wakaba 1.113 !!!parse-error (type => 'in noscript:base', token => $token);
4232 wakaba 1.49
4233 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4234 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4235 wakaba 1.79 } else {
4236     !!!cp ('t99');
4237 wakaba 1.49 }
4238    
4239     ## NOTE: There is a "as if in head" code clone.
4240 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4241 wakaba 1.79 !!!cp ('t100');
4242 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4243 wakaba 1.123 push @{$self->{open_elements}},
4244     [$self->{head_element}, $el_category->{head}];
4245 wakaba 1.79 } else {
4246     !!!cp ('t101');
4247 wakaba 1.49 }
4248 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4249 wakaba 1.49 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4250 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4251 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4252 wakaba 1.125 !!!nack ('t101.1');
4253 wakaba 1.49 !!!next-token;
4254 wakaba 1.126 next B;
4255 wakaba 1.49 } elsif ($token->{tag_name} eq 'link') {
4256 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4257 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4258 wakaba 1.79 !!!cp ('t102');
4259 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4260 wakaba 1.123 push @{$self->{open_elements}},
4261     [$self->{head_element}, $el_category->{head}];
4262 wakaba 1.79 } else {
4263     !!!cp ('t103');
4264 wakaba 1.25 }
4265 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4266 wakaba 1.25 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4267 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4268 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4269 wakaba 1.125 !!!ack ('t103.1');
4270 wakaba 1.1 !!!next-token;
4271 wakaba 1.126 next B;
4272 wakaba 1.34 } elsif ($token->{tag_name} eq 'meta') {
4273     ## NOTE: There is a "as if in head" code clone.
4274 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4275 wakaba 1.79 !!!cp ('t104');
4276 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4277 wakaba 1.123 push @{$self->{open_elements}},
4278     [$self->{head_element}, $el_category->{head}];
4279 wakaba 1.79 } else {
4280     !!!cp ('t105');
4281 wakaba 1.34 }
4282 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4283 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4284 wakaba 1.34
4285     unless ($self->{confident}) {
4286 wakaba 1.134 if ($token->{attributes}->{charset}) {
4287 wakaba 1.79 !!!cp ('t106');
4288 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4289     ## in the {change_encoding} callback.
4290 wakaba 1.63 $self->{change_encoding}
4291 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value},
4292     $token);
4293 wakaba 1.66
4294     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4295     ->set_user_data (manakai_has_reference =>
4296     $token->{attributes}->{charset}
4297     ->{has_reference});
4298 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
4299     if ($token->{attributes}->{content}->{value}
4300 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4301 wakaba 1.70 [\x09-\x0D\x20]*=
4302 wakaba 1.34 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4303 wakaba 1.145 ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {
4304 wakaba 1.79 !!!cp ('t107');
4305 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4306     ## in the {change_encoding} callback.
4307 wakaba 1.63 $self->{change_encoding}
4308 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
4309     $token);
4310 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4311     ->set_user_data (manakai_has_reference =>
4312     $token->{attributes}->{content}
4313     ->{has_reference});
4314 wakaba 1.79 } else {
4315     !!!cp ('t108');
4316 wakaba 1.63 }
4317 wakaba 1.34 }
4318 wakaba 1.66 } else {
4319     if ($token->{attributes}->{charset}) {
4320 wakaba 1.79 !!!cp ('t109');
4321 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4322     ->set_user_data (manakai_has_reference =>
4323     $token->{attributes}->{charset}
4324     ->{has_reference});
4325     }
4326 wakaba 1.68 if ($token->{attributes}->{content}) {
4327 wakaba 1.79 !!!cp ('t110');
4328 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4329     ->set_user_data (manakai_has_reference =>
4330     $token->{attributes}->{content}
4331     ->{has_reference});
4332     }
4333 wakaba 1.34 }
4334    
4335 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4336 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4337 wakaba 1.125 !!!ack ('t110.1');
4338 wakaba 1.34 !!!next-token;
4339 wakaba 1.126 next B;
4340 wakaba 1.49 } elsif ($token->{tag_name} eq 'title') {
4341 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4342 wakaba 1.79 !!!cp ('t111');
4343 wakaba 1.49 ## As if </noscript>
4344     pop @{$self->{open_elements}};
4345 wakaba 1.113 !!!parse-error (type => 'in noscript:title', token => $token);
4346 wakaba 1.49
4347 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4348 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4349 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4350 wakaba 1.79 !!!cp ('t112');
4351 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4352 wakaba 1.123 push @{$self->{open_elements}},
4353     [$self->{head_element}, $el_category->{head}];
4354 wakaba 1.79 } else {
4355     !!!cp ('t113');
4356 wakaba 1.25 }
4357 wakaba 1.49
4358     ## NOTE: There is a "as if in head" code clone.
4359 wakaba 1.31 my $parent = defined $self->{head_element} ? $self->{head_element}
4360     : $self->{open_elements}->[-1]->[0];
4361 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
4362 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4363 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4364 wakaba 1.126 next B;
4365 wakaba 1.148 } elsif ($token->{tag_name} eq 'style' or
4366     $token->{tag_name} eq 'noframes') {
4367 wakaba 1.25 ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
4368 wakaba 1.54 ## insertion mode IN_HEAD_IM)
4369 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4370 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4371 wakaba 1.79 !!!cp ('t114');
4372 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4373 wakaba 1.123 push @{$self->{open_elements}},
4374     [$self->{head_element}, $el_category->{head}];
4375 wakaba 1.79 } else {
4376     !!!cp ('t115');
4377 wakaba 1.25 }
4378 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4379 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4380 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4381 wakaba 1.126 next B;
4382 wakaba 1.25 } elsif ($token->{tag_name} eq 'noscript') {
4383 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
4384 wakaba 1.79 !!!cp ('t116');
4385 wakaba 1.25 ## NOTE: and scripting is disalbed
4386 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4387 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
4388 wakaba 1.125 !!!nack ('t116.1');
4389 wakaba 1.1 !!!next-token;
4390 wakaba 1.126 next B;
4391 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4392 wakaba 1.79 !!!cp ('t117');
4393 wakaba 1.113 !!!parse-error (type => 'in noscript:noscript', token => $token);
4394 wakaba 1.1 ## Ignore the token
4395 wakaba 1.125 !!!nack ('t117.1');
4396 wakaba 1.41 !!!next-token;
4397 wakaba 1.126 next B;
4398 wakaba 1.1 } else {
4399 wakaba 1.79 !!!cp ('t118');
4400 wakaba 1.25 #
4401 wakaba 1.1 }
4402 wakaba 1.49 } elsif ($token->{tag_name} eq 'script') {
4403 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4404 wakaba 1.79 !!!cp ('t119');
4405 wakaba 1.49 ## As if </noscript>
4406     pop @{$self->{open_elements}};
4407 wakaba 1.113 !!!parse-error (type => 'in noscript:script', token => $token);
4408 wakaba 1.49
4409 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4410 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4411 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4412 wakaba 1.79 !!!cp ('t120');
4413 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4414 wakaba 1.123 push @{$self->{open_elements}},
4415     [$self->{head_element}, $el_category->{head}];
4416 wakaba 1.79 } else {
4417     !!!cp ('t121');
4418 wakaba 1.25 }
4419 wakaba 1.49
4420 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4421 wakaba 1.100 $script_start_tag->();
4422     pop @{$self->{open_elements}} # <head>
4423 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4424 wakaba 1.126 next B;
4425 wakaba 1.49 } elsif ($token->{tag_name} eq 'body' or
4426 wakaba 1.25 $token->{tag_name} eq 'frameset') {
4427 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4428 wakaba 1.79 !!!cp ('t122');
4429 wakaba 1.49 ## As if </noscript>
4430     pop @{$self->{open_elements}};
4431 wakaba 1.113 !!!parse-error (type => 'in noscript:'.$token->{tag_name}, token => $token);
4432 wakaba 1.49
4433     ## Reprocess in the "in head" insertion mode...
4434     ## As if </head>
4435     pop @{$self->{open_elements}};
4436    
4437     ## Reprocess in the "after head" insertion mode...
4438 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4439 wakaba 1.79 !!!cp ('t124');
4440 wakaba 1.49 pop @{$self->{open_elements}};
4441    
4442     ## Reprocess in the "after head" insertion mode...
4443 wakaba 1.79 } else {
4444     !!!cp ('t125');
4445 wakaba 1.49 }
4446    
4447     ## "after head" insertion mode
4448 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4449 wakaba 1.54 if ($token->{tag_name} eq 'body') {
4450 wakaba 1.79 !!!cp ('t126');
4451 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4452     } elsif ($token->{tag_name} eq 'frameset') {
4453 wakaba 1.79 !!!cp ('t127');
4454 wakaba 1.54 $self->{insertion_mode} = IN_FRAMESET_IM;
4455     } else {
4456     die "$0: tag name: $self->{tag_name}";
4457     }
4458 wakaba 1.125 !!!nack ('t127.1');
4459 wakaba 1.1 !!!next-token;
4460 wakaba 1.126 next B;
4461 wakaba 1.1 } else {
4462 wakaba 1.79 !!!cp ('t128');
4463 wakaba 1.1 #
4464     }
4465 wakaba 1.49
4466 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4467 wakaba 1.79 !!!cp ('t129');
4468 wakaba 1.49 ## As if </noscript>
4469     pop @{$self->{open_elements}};
4470 wakaba 1.113 !!!parse-error (type => 'in noscript:/'.$token->{tag_name}, token => $token);
4471 wakaba 1.49
4472     ## Reprocess in the "in head" insertion mode...
4473     ## As if </head>
4474 wakaba 1.25 pop @{$self->{open_elements}};
4475 wakaba 1.49
4476     ## Reprocess in the "after head" insertion mode...
4477 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4478 wakaba 1.79 !!!cp ('t130');
4479 wakaba 1.49 ## As if </head>
4480 wakaba 1.25 pop @{$self->{open_elements}};
4481 wakaba 1.49
4482     ## Reprocess in the "after head" insertion mode...
4483 wakaba 1.79 } else {
4484     !!!cp ('t131');
4485 wakaba 1.49 }
4486    
4487     ## "after head" insertion mode
4488     ## As if <body>
4489 wakaba 1.116 !!!insert-element ('body',, $token);
4490 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4491 wakaba 1.49 ## reprocess
4492 wakaba 1.125 !!!ack-later;
4493 wakaba 1.126 next B;
4494 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4495 wakaba 1.49 if ($token->{tag_name} eq 'head') {
4496 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4497 wakaba 1.79 !!!cp ('t132');
4498 wakaba 1.50 ## As if <head>
4499 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4500 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4501 wakaba 1.123 push @{$self->{open_elements}},
4502     [$self->{head_element}, $el_category->{head}];
4503 wakaba 1.50
4504     ## Reprocess in the "in head" insertion mode...
4505     pop @{$self->{open_elements}};
4506 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4507 wakaba 1.50 !!!next-token;
4508 wakaba 1.126 next B;
4509 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4510 wakaba 1.79 !!!cp ('t133');
4511 wakaba 1.49 ## As if </noscript>
4512     pop @{$self->{open_elements}};
4513 wakaba 1.113 !!!parse-error (type => 'in noscript:/head', token => $token);
4514 wakaba 1.49
4515     ## Reprocess in the "in head" insertion mode...
4516 wakaba 1.50 pop @{$self->{open_elements}};
4517 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4518 wakaba 1.50 !!!next-token;
4519 wakaba 1.126 next B;
4520 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4521 wakaba 1.79 !!!cp ('t134');
4522 wakaba 1.49 pop @{$self->{open_elements}};
4523 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4524 wakaba 1.49 !!!next-token;
4525 wakaba 1.126 next B;
4526 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4527     !!!cp ('t134.1');
4528     !!!parse-error (type => 'unmatched end tag:head', token => $token);
4529     ## Ignore the token
4530     !!!next-token;
4531     next B;
4532 wakaba 1.49 } else {
4533 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4534 wakaba 1.49 }
4535     } elsif ($token->{tag_name} eq 'noscript') {
4536 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4537 wakaba 1.79 !!!cp ('t136');
4538 wakaba 1.49 pop @{$self->{open_elements}};
4539 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4540 wakaba 1.49 !!!next-token;
4541 wakaba 1.126 next B;
4542 wakaba 1.139 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or
4543     $self->{insertion_mode} == AFTER_HEAD_IM) {
4544 wakaba 1.79 !!!cp ('t137');
4545 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:noscript', token => $token);
4546 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
4547     !!!next-token;
4548 wakaba 1.126 next B;
4549 wakaba 1.49 } else {
4550 wakaba 1.79 !!!cp ('t138');
4551 wakaba 1.49 #
4552     }
4553     } elsif ({
4554 wakaba 1.31 body => 1, html => 1,
4555     }->{$token->{tag_name}}) {
4556 wakaba 1.139 if ($self->{insertion_mode} == BEFORE_HEAD_IM or
4557     $self->{insertion_mode} == IN_HEAD_IM or
4558     $self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4559 wakaba 1.79 !!!cp ('t140');
4560 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4561 wakaba 1.49 ## Ignore the token
4562     !!!next-token;
4563 wakaba 1.126 next B;
4564 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4565     !!!cp ('t140.1');
4566     !!!parse-error (type => 'unmatched end tag:' . $token->{tag_name}, token => $token);
4567     ## Ignore the token
4568     !!!next-token;
4569     next B;
4570 wakaba 1.79 } else {
4571 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4572 wakaba 1.49 }
4573 wakaba 1.139 } elsif ($token->{tag_name} eq 'p') {
4574     !!!cp ('t142');
4575     !!!parse-error (type => 'unmatched end tag:p', token => $token);
4576     ## Ignore the token
4577     !!!next-token;
4578     next B;
4579     } elsif ($token->{tag_name} eq 'br') {
4580 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4581 wakaba 1.139 !!!cp ('t142.2');
4582     ## (before head) as if <head>, (in head) as if </head>
4583 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4584 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4585 wakaba 1.139 $self->{insertion_mode} = AFTER_HEAD_IM;
4586    
4587     ## Reprocess in the "after head" insertion mode...
4588     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4589     !!!cp ('t143.2');
4590     ## As if </head>
4591     pop @{$self->{open_elements}};
4592     $self->{insertion_mode} = AFTER_HEAD_IM;
4593    
4594     ## Reprocess in the "after head" insertion mode...
4595     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4596     !!!cp ('t143.3');
4597     ## ISSUE: Two parse errors for <head><noscript></br>
4598     !!!parse-error (type => 'unmatched end tag:br', token => $token);
4599     ## As if </noscript>
4600     pop @{$self->{open_elements}};
4601     $self->{insertion_mode} = IN_HEAD_IM;
4602 wakaba 1.50
4603     ## Reprocess in the "in head" insertion mode...
4604 wakaba 1.139 ## As if </head>
4605     pop @{$self->{open_elements}};
4606     $self->{insertion_mode} = AFTER_HEAD_IM;
4607    
4608     ## Reprocess in the "after head" insertion mode...
4609     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4610     !!!cp ('t143.4');
4611     #
4612 wakaba 1.79 } else {
4613 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4614 wakaba 1.50 }
4615    
4616 wakaba 1.139 ## ISSUE: does not agree with IE7 - it doesn't ignore </br>.
4617     !!!parse-error (type => 'unmatched end tag:br', token => $token);
4618     ## Ignore the token
4619     !!!next-token;
4620     next B;
4621 wakaba 1.25 } else {
4622 wakaba 1.139 !!!cp ('t145');
4623     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4624     ## Ignore the token
4625     !!!next-token;
4626     next B;
4627 wakaba 1.49 }
4628    
4629 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4630 wakaba 1.79 !!!cp ('t146');
4631 wakaba 1.49 ## As if </noscript>
4632     pop @{$self->{open_elements}};
4633 wakaba 1.113 !!!parse-error (type => 'in noscript:/'.$token->{tag_name}, token => $token);
4634 wakaba 1.49
4635     ## Reprocess in the "in head" insertion mode...
4636     ## As if </head>
4637     pop @{$self->{open_elements}};
4638    
4639     ## Reprocess in the "after head" insertion mode...
4640 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4641 wakaba 1.79 !!!cp ('t147');
4642 wakaba 1.49 ## As if </head>
4643     pop @{$self->{open_elements}};
4644    
4645     ## Reprocess in the "after head" insertion mode...
4646 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4647 wakaba 1.82 ## ISSUE: This case cannot be reached?
4648 wakaba 1.79 !!!cp ('t148');
4649 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4650 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
4651     !!!next-token;
4652 wakaba 1.126 next B;
4653 wakaba 1.79 } else {
4654     !!!cp ('t149');
4655 wakaba 1.1 }
4656    
4657 wakaba 1.49 ## "after head" insertion mode
4658     ## As if <body>
4659 wakaba 1.116 !!!insert-element ('body',, $token);
4660 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4661 wakaba 1.52 ## reprocess
4662 wakaba 1.126 next B;
4663 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4664     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4665     !!!cp ('t149.1');
4666    
4667     ## NOTE: As if <head>
4668 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4669 wakaba 1.104 $self->{open_elements}->[-1]->[0]->append_child
4670     ($self->{head_element});
4671 wakaba 1.123 #push @{$self->{open_elements}},
4672     # [$self->{head_element}, $el_category->{head}];
4673 wakaba 1.104 #$self->{insertion_mode} = IN_HEAD_IM;
4674     ## NOTE: Reprocess.
4675    
4676     ## NOTE: As if </head>
4677     #pop @{$self->{open_elements}};
4678     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4679     ## NOTE: Reprocess.
4680    
4681     #
4682     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4683     !!!cp ('t149.2');
4684    
4685     ## NOTE: As if </head>
4686     pop @{$self->{open_elements}};
4687     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4688     ## NOTE: Reprocess.
4689    
4690     #
4691     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4692     !!!cp ('t149.3');
4693    
4694 wakaba 1.113 !!!parse-error (type => 'in noscript:#eof', token => $token);
4695 wakaba 1.104
4696     ## As if </noscript>
4697     pop @{$self->{open_elements}};
4698     #$self->{insertion_mode} = IN_HEAD_IM;
4699     ## NOTE: Reprocess.
4700    
4701     ## NOTE: As if </head>
4702     pop @{$self->{open_elements}};
4703     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4704     ## NOTE: Reprocess.
4705    
4706     #
4707     } else {
4708     !!!cp ('t149.4');
4709     #
4710     }
4711    
4712     ## NOTE: As if <body>
4713 wakaba 1.116 !!!insert-element ('body',, $token);
4714 wakaba 1.104 $self->{insertion_mode} = IN_BODY_IM;
4715     ## NOTE: Reprocess.
4716 wakaba 1.126 next B;
4717 wakaba 1.104 } else {
4718     die "$0: $token->{type}: Unknown token type";
4719     }
4720 wakaba 1.52
4721     ## ISSUE: An issue in the spec.
4722 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
4723 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4724 wakaba 1.79 !!!cp ('t150');
4725 wakaba 1.52 ## NOTE: There is a code clone of "character in body".
4726     $reconstruct_active_formatting_elements->($insert_to_current);
4727    
4728     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4729    
4730     !!!next-token;
4731 wakaba 1.126 next B;
4732 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4733 wakaba 1.52 if ({
4734     caption => 1, col => 1, colgroup => 1, tbody => 1,
4735     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
4736     }->{$token->{tag_name}}) {
4737 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
4738 wakaba 1.52 ## have an element in table scope
4739 wakaba 1.108 for (reverse 0..$#{$self->{open_elements}}) {
4740 wakaba 1.52 my $node = $self->{open_elements}->[$_];
4741 wakaba 1.123 if ($node->[1] & TABLE_CELL_EL) {
4742 wakaba 1.79 !!!cp ('t151');
4743 wakaba 1.108
4744     ## Close the cell
4745 wakaba 1.125 !!!back-token; # <x>
4746 wakaba 1.122 $token = {type => END_TAG_TOKEN,
4747     tag_name => $node->[0]->manakai_local_name,
4748 wakaba 1.114 line => $token->{line},
4749     column => $token->{column}};
4750 wakaba 1.126 next B;
4751 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4752 wakaba 1.79 !!!cp ('t152');
4753 wakaba 1.108 ## ISSUE: This case can never be reached, maybe.
4754     last;
4755 wakaba 1.52 }
4756 wakaba 1.108 }
4757    
4758     !!!cp ('t153');
4759     !!!parse-error (type => 'start tag not allowed',
4760 wakaba 1.113 value => $token->{tag_name}, token => $token);
4761 wakaba 1.108 ## Ignore the token
4762 wakaba 1.125 !!!nack ('t153.1');
4763 wakaba 1.108 !!!next-token;
4764 wakaba 1.126 next B;
4765 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4766 wakaba 1.113 !!!parse-error (type => 'not closed:caption', token => $token);
4767 wakaba 1.52
4768 wakaba 1.108 ## NOTE: As if </caption>.
4769 wakaba 1.52 ## have a table element in table scope
4770     my $i;
4771 wakaba 1.108 INSCOPE: {
4772     for (reverse 0..$#{$self->{open_elements}}) {
4773     my $node = $self->{open_elements}->[$_];
4774 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4775 wakaba 1.108 !!!cp ('t155');
4776     $i = $_;
4777     last INSCOPE;
4778 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4779 wakaba 1.108 !!!cp ('t156');
4780     last;
4781     }
4782 wakaba 1.52 }
4783 wakaba 1.108
4784     !!!cp ('t157');
4785     !!!parse-error (type => 'start tag not allowed',
4786 wakaba 1.113 value => $token->{tag_name}, token => $token);
4787 wakaba 1.108 ## Ignore the token
4788 wakaba 1.125 !!!nack ('t157.1');
4789 wakaba 1.108 !!!next-token;
4790 wakaba 1.126 next B;
4791 wakaba 1.52 } # INSCOPE
4792    
4793     ## generate implied end tags
4794 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4795     & END_TAG_OPTIONAL_EL) {
4796 wakaba 1.79 !!!cp ('t158');
4797 wakaba 1.86 pop @{$self->{open_elements}};
4798 wakaba 1.52 }
4799    
4800 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
4801 wakaba 1.79 !!!cp ('t159');
4802 wakaba 1.122 !!!parse-error (type => 'not closed',
4803     value => $self->{open_elements}->[-1]->[0]
4804     ->manakai_local_name,
4805     token => $token);
4806 wakaba 1.79 } else {
4807     !!!cp ('t160');
4808 wakaba 1.52 }
4809    
4810     splice @{$self->{open_elements}}, $i;
4811    
4812     $clear_up_to_marker->();
4813    
4814 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4815 wakaba 1.52
4816     ## reprocess
4817 wakaba 1.125 !!!ack-later;
4818 wakaba 1.126 next B;
4819 wakaba 1.52 } else {
4820 wakaba 1.79 !!!cp ('t161');
4821 wakaba 1.52 #
4822     }
4823     } else {
4824 wakaba 1.79 !!!cp ('t162');
4825 wakaba 1.52 #
4826     }
4827 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4828 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4829 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
4830 wakaba 1.43 ## have an element in table scope
4831 wakaba 1.52 my $i;
4832 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4833     my $node = $self->{open_elements}->[$_];
4834 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
4835 wakaba 1.79 !!!cp ('t163');
4836 wakaba 1.52 $i = $_;
4837 wakaba 1.43 last INSCOPE;
4838 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4839 wakaba 1.79 !!!cp ('t164');
4840 wakaba 1.43 last INSCOPE;
4841     }
4842     } # INSCOPE
4843 wakaba 1.52 unless (defined $i) {
4844 wakaba 1.79 !!!cp ('t165');
4845 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4846 wakaba 1.43 ## Ignore the token
4847     !!!next-token;
4848 wakaba 1.126 next B;
4849 wakaba 1.43 }
4850    
4851 wakaba 1.52 ## generate implied end tags
4852 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4853     & END_TAG_OPTIONAL_EL) {
4854 wakaba 1.79 !!!cp ('t166');
4855 wakaba 1.86 pop @{$self->{open_elements}};
4856 wakaba 1.52 }
4857 wakaba 1.86
4858 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
4859     ne $token->{tag_name}) {
4860 wakaba 1.79 !!!cp ('t167');
4861 wakaba 1.122 !!!parse-error (type => 'not closed',
4862     value => $self->{open_elements}->[-1]->[0]
4863     ->manakai_local_name,
4864     token => $token);
4865 wakaba 1.79 } else {
4866     !!!cp ('t168');
4867 wakaba 1.52 }
4868    
4869     splice @{$self->{open_elements}}, $i;
4870    
4871     $clear_up_to_marker->();
4872    
4873 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
4874 wakaba 1.52
4875     !!!next-token;
4876 wakaba 1.126 next B;
4877 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4878 wakaba 1.79 !!!cp ('t169');
4879 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4880 wakaba 1.52 ## Ignore the token
4881     !!!next-token;
4882 wakaba 1.126 next B;
4883 wakaba 1.52 } else {
4884 wakaba 1.79 !!!cp ('t170');
4885 wakaba 1.52 #
4886     }
4887     } elsif ($token->{tag_name} eq 'caption') {
4888 wakaba 1.54 if ($self->{insertion_mode} == IN_CAPTION_IM) {
4889 wakaba 1.43 ## have a table element in table scope
4890     my $i;
4891 wakaba 1.108 INSCOPE: {
4892     for (reverse 0..$#{$self->{open_elements}}) {
4893     my $node = $self->{open_elements}->[$_];
4894 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4895 wakaba 1.108 !!!cp ('t171');
4896     $i = $_;
4897     last INSCOPE;
4898 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4899 wakaba 1.108 !!!cp ('t172');
4900     last;
4901     }
4902 wakaba 1.43 }
4903 wakaba 1.108
4904     !!!cp ('t173');
4905     !!!parse-error (type => 'unmatched end tag',
4906 wakaba 1.113 value => $token->{tag_name}, token => $token);
4907 wakaba 1.108 ## Ignore the token
4908     !!!next-token;
4909 wakaba 1.126 next B;
4910 wakaba 1.43 } # INSCOPE
4911    
4912     ## generate implied end tags
4913 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4914     & END_TAG_OPTIONAL_EL) {
4915 wakaba 1.79 !!!cp ('t174');
4916 wakaba 1.86 pop @{$self->{open_elements}};
4917 wakaba 1.43 }
4918 wakaba 1.52
4919 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
4920 wakaba 1.79 !!!cp ('t175');
4921 wakaba 1.122 !!!parse-error (type => 'not closed',
4922     value => $self->{open_elements}->[-1]->[0]
4923     ->manakai_local_name,
4924     token => $token);
4925 wakaba 1.79 } else {
4926     !!!cp ('t176');
4927 wakaba 1.52 }
4928    
4929     splice @{$self->{open_elements}}, $i;
4930    
4931     $clear_up_to_marker->();
4932    
4933 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4934 wakaba 1.52
4935     !!!next-token;
4936 wakaba 1.126 next B;
4937 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CELL_IM) {
4938 wakaba 1.79 !!!cp ('t177');
4939 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4940 wakaba 1.52 ## Ignore the token
4941     !!!next-token;
4942 wakaba 1.126 next B;
4943 wakaba 1.52 } else {
4944 wakaba 1.79 !!!cp ('t178');
4945 wakaba 1.52 #
4946     }
4947     } elsif ({
4948     table => 1, tbody => 1, tfoot => 1,
4949     thead => 1, tr => 1,
4950     }->{$token->{tag_name}} and
4951 wakaba 1.54 $self->{insertion_mode} == IN_CELL_IM) {
4952 wakaba 1.52 ## have an element in table scope
4953     my $i;
4954     my $tn;
4955 wakaba 1.108 INSCOPE: {
4956     for (reverse 0..$#{$self->{open_elements}}) {
4957     my $node = $self->{open_elements}->[$_];
4958 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
4959 wakaba 1.108 !!!cp ('t179');
4960     $i = $_;
4961    
4962     ## Close the cell
4963 wakaba 1.125 !!!back-token; # </x>
4964 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $tn,
4965     line => $token->{line},
4966     column => $token->{column}};
4967 wakaba 1.126 next B;
4968 wakaba 1.123 } elsif ($node->[1] & TABLE_CELL_EL) {
4969 wakaba 1.108 !!!cp ('t180');
4970 wakaba 1.123 $tn = $node->[0]->manakai_local_name;
4971 wakaba 1.108 ## NOTE: There is exactly one |td| or |th| element
4972     ## in scope in the stack of open elements by definition.
4973 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4974 wakaba 1.108 ## ISSUE: Can this be reached?
4975     !!!cp ('t181');
4976     last;
4977     }
4978 wakaba 1.52 }
4979 wakaba 1.108
4980 wakaba 1.79 !!!cp ('t182');
4981 wakaba 1.108 !!!parse-error (type => 'unmatched end tag',
4982 wakaba 1.113 value => $token->{tag_name}, token => $token);
4983 wakaba 1.52 ## Ignore the token
4984     !!!next-token;
4985 wakaba 1.126 next B;
4986 wakaba 1.108 } # INSCOPE
4987 wakaba 1.52 } elsif ($token->{tag_name} eq 'table' and
4988 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
4989 wakaba 1.113 !!!parse-error (type => 'not closed:caption', token => $token);
4990 wakaba 1.52
4991     ## As if </caption>
4992     ## have a table element in table scope
4993     my $i;
4994     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4995     my $node = $self->{open_elements}->[$_];
4996 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4997 wakaba 1.79 !!!cp ('t184');
4998 wakaba 1.52 $i = $_;
4999     last INSCOPE;
5000 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5001 wakaba 1.79 !!!cp ('t185');
5002 wakaba 1.52 last INSCOPE;
5003     }
5004     } # INSCOPE
5005     unless (defined $i) {
5006 wakaba 1.79 !!!cp ('t186');
5007 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:caption', token => $token);
5008 wakaba 1.52 ## Ignore the token
5009     !!!next-token;
5010 wakaba 1.126 next B;
5011 wakaba 1.52 }
5012    
5013     ## generate implied end tags
5014 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5015 wakaba 1.79 !!!cp ('t187');
5016 wakaba 1.86 pop @{$self->{open_elements}};
5017 wakaba 1.52 }
5018    
5019 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
5020 wakaba 1.79 !!!cp ('t188');
5021 wakaba 1.122 !!!parse-error (type => 'not closed',
5022     value => $self->{open_elements}->[-1]->[0]
5023     ->manakai_local_name,
5024     token => $token);
5025 wakaba 1.79 } else {
5026     !!!cp ('t189');
5027 wakaba 1.52 }
5028    
5029     splice @{$self->{open_elements}}, $i;
5030    
5031     $clear_up_to_marker->();
5032    
5033 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5034 wakaba 1.52
5035     ## reprocess
5036 wakaba 1.126 next B;
5037 wakaba 1.52 } elsif ({
5038     body => 1, col => 1, colgroup => 1, html => 1,
5039     }->{$token->{tag_name}}) {
5040 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
5041 wakaba 1.79 !!!cp ('t190');
5042 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5043 wakaba 1.52 ## Ignore the token
5044     !!!next-token;
5045 wakaba 1.126 next B;
5046 wakaba 1.52 } else {
5047 wakaba 1.79 !!!cp ('t191');
5048 wakaba 1.52 #
5049     }
5050     } elsif ({
5051     tbody => 1, tfoot => 1,
5052     thead => 1, tr => 1,
5053     }->{$token->{tag_name}} and
5054 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
5055 wakaba 1.79 !!!cp ('t192');
5056 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5057 wakaba 1.52 ## Ignore the token
5058     !!!next-token;
5059 wakaba 1.126 next B;
5060 wakaba 1.52 } else {
5061 wakaba 1.79 !!!cp ('t193');
5062 wakaba 1.52 #
5063     }
5064 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5065     for my $entry (@{$self->{open_elements}}) {
5066 wakaba 1.123 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
5067 wakaba 1.104 !!!cp ('t75');
5068 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5069 wakaba 1.104 last;
5070     }
5071     }
5072    
5073     ## Stop parsing.
5074     last B;
5075 wakaba 1.52 } else {
5076     die "$0: $token->{type}: Unknown token type";
5077     }
5078    
5079     $insert = $insert_to_current;
5080     #
5081 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
5082 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
5083 wakaba 1.95 if (not $open_tables->[-1]->[1] and # tainted
5084     $token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5085     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5086 wakaba 1.52
5087 wakaba 1.95 unless (length $token->{data}) {
5088     !!!cp ('t194');
5089     !!!next-token;
5090 wakaba 1.126 next B;
5091 wakaba 1.95 } else {
5092     !!!cp ('t195');
5093     }
5094     }
5095 wakaba 1.52
5096 wakaba 1.113 !!!parse-error (type => 'in table:#character', token => $token);
5097 wakaba 1.52
5098     ## As if in body, but insert into foster parent element
5099     ## ISSUE: Spec says that "whenever a node would be inserted
5100     ## into the current node" while characters might not be
5101     ## result in a new Text node.
5102     $reconstruct_active_formatting_elements->($insert_to_foster);
5103    
5104 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
5105 wakaba 1.52 # MUST
5106     my $foster_parent_element;
5107     my $next_sibling;
5108     my $prev_sibling;
5109     OE: for (reverse 0..$#{$self->{open_elements}}) {
5110 wakaba 1.123 if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
5111 wakaba 1.52 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5112     if (defined $parent and $parent->node_type == 1) {
5113 wakaba 1.79 !!!cp ('t196');
5114 wakaba 1.52 $foster_parent_element = $parent;
5115     $next_sibling = $self->{open_elements}->[$_]->[0];
5116     $prev_sibling = $next_sibling->previous_sibling;
5117     } else {
5118 wakaba 1.79 !!!cp ('t197');
5119 wakaba 1.52 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5120     $prev_sibling = $foster_parent_element->last_child;
5121     }
5122     last OE;
5123     }
5124     } # OE
5125     $foster_parent_element = $self->{open_elements}->[0]->[0] and
5126     $prev_sibling = $foster_parent_element->last_child
5127     unless defined $foster_parent_element;
5128     if (defined $prev_sibling and
5129     $prev_sibling->node_type == 3) {
5130 wakaba 1.79 !!!cp ('t198');
5131 wakaba 1.52 $prev_sibling->manakai_append_text ($token->{data});
5132     } else {
5133 wakaba 1.79 !!!cp ('t199');
5134 wakaba 1.52 $foster_parent_element->insert_before
5135     ($self->{document}->create_text_node ($token->{data}),
5136     $next_sibling);
5137     }
5138 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
5139     } else {
5140     !!!cp ('t200');
5141     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5142     }
5143 wakaba 1.52
5144 wakaba 1.95 !!!next-token;
5145 wakaba 1.126 next B;
5146 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
5147 wakaba 1.52 if ({
5148 wakaba 1.54 tr => ($self->{insertion_mode} != IN_ROW_IM),
5149 wakaba 1.52 th => 1, td => 1,
5150     }->{$token->{tag_name}}) {
5151 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_IM) {
5152 wakaba 1.52 ## Clear back to table context
5153 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5154     & TABLE_SCOPING_EL)) {
5155 wakaba 1.79 !!!cp ('t201');
5156 wakaba 1.52 pop @{$self->{open_elements}};
5157 wakaba 1.43 }
5158    
5159 wakaba 1.116 !!!insert-element ('tbody',, $token);
5160 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5161 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
5162     }
5163    
5164 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5165 wakaba 1.52 unless ($token->{tag_name} eq 'tr') {
5166 wakaba 1.79 !!!cp ('t202');
5167 wakaba 1.113 !!!parse-error (type => 'missing start tag:tr', token => $token);
5168 wakaba 1.52 }
5169 wakaba 1.43
5170 wakaba 1.52 ## Clear back to table body context
5171 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5172     & TABLE_ROWS_SCOPING_EL)) {
5173 wakaba 1.79 !!!cp ('t203');
5174 wakaba 1.83 ## ISSUE: Can this case be reached?
5175 wakaba 1.52 pop @{$self->{open_elements}};
5176     }
5177 wakaba 1.43
5178 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
5179 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
5180 wakaba 1.79 !!!cp ('t204');
5181 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5182 wakaba 1.125 !!!nack ('t204');
5183 wakaba 1.52 !!!next-token;
5184 wakaba 1.126 next B;
5185 wakaba 1.52 } else {
5186 wakaba 1.79 !!!cp ('t205');
5187 wakaba 1.116 !!!insert-element ('tr',, $token);
5188 wakaba 1.52 ## reprocess in the "in row" insertion mode
5189     }
5190 wakaba 1.79 } else {
5191     !!!cp ('t206');
5192 wakaba 1.52 }
5193    
5194     ## Clear back to table row context
5195 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5196     & TABLE_ROW_SCOPING_EL)) {
5197 wakaba 1.79 !!!cp ('t207');
5198 wakaba 1.52 pop @{$self->{open_elements}};
5199 wakaba 1.43 }
5200 wakaba 1.52
5201 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5202 wakaba 1.54 $self->{insertion_mode} = IN_CELL_IM;
5203 wakaba 1.52
5204     push @$active_formatting_elements, ['#marker', ''];
5205    
5206 wakaba 1.125 !!!nack ('t207.1');
5207 wakaba 1.52 !!!next-token;
5208 wakaba 1.126 next B;
5209 wakaba 1.52 } elsif ({
5210     caption => 1, col => 1, colgroup => 1,
5211     tbody => 1, tfoot => 1, thead => 1,
5212 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
5213 wakaba 1.52 }->{$token->{tag_name}}) {
5214 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5215 wakaba 1.52 ## As if </tr>
5216 wakaba 1.43 ## have an element in table scope
5217     my $i;
5218     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5219     my $node = $self->{open_elements}->[$_];
5220 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5221 wakaba 1.79 !!!cp ('t208');
5222 wakaba 1.43 $i = $_;
5223     last INSCOPE;
5224 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5225 wakaba 1.79 !!!cp ('t209');
5226 wakaba 1.43 last INSCOPE;
5227     }
5228     } # INSCOPE
5229 wakaba 1.79 unless (defined $i) {
5230 wakaba 1.125 !!!cp ('t210');
5231 wakaba 1.83 ## TODO: This type is wrong.
5232 wakaba 1.125 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name}, token => $token);
5233 wakaba 1.52 ## Ignore the token
5234 wakaba 1.125 !!!nack ('t210.1');
5235 wakaba 1.52 !!!next-token;
5236 wakaba 1.126 next B;
5237 wakaba 1.43 }
5238    
5239 wakaba 1.52 ## Clear back to table row context
5240 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5241     & TABLE_ROW_SCOPING_EL)) {
5242 wakaba 1.79 !!!cp ('t211');
5243 wakaba 1.83 ## ISSUE: Can this case be reached?
5244 wakaba 1.52 pop @{$self->{open_elements}};
5245 wakaba 1.1 }
5246 wakaba 1.43
5247 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5248 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5249 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
5250 wakaba 1.79 !!!cp ('t212');
5251 wakaba 1.52 ## reprocess
5252 wakaba 1.125 !!!ack-later;
5253 wakaba 1.126 next B;
5254 wakaba 1.52 } else {
5255 wakaba 1.79 !!!cp ('t213');
5256 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
5257     }
5258 wakaba 1.1 }
5259 wakaba 1.52
5260 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5261 wakaba 1.52 ## have an element in table scope
5262 wakaba 1.43 my $i;
5263     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5264     my $node = $self->{open_elements}->[$_];
5265 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
5266 wakaba 1.79 !!!cp ('t214');
5267 wakaba 1.43 $i = $_;
5268     last INSCOPE;
5269 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5270 wakaba 1.79 !!!cp ('t215');
5271 wakaba 1.43 last INSCOPE;
5272     }
5273     } # INSCOPE
5274 wakaba 1.52 unless (defined $i) {
5275 wakaba 1.79 !!!cp ('t216');
5276 wakaba 1.82 ## TODO: This erorr type ios wrong.
5277 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5278 wakaba 1.52 ## Ignore the token
5279 wakaba 1.125 !!!nack ('t216.1');
5280 wakaba 1.52 !!!next-token;
5281 wakaba 1.126 next B;
5282 wakaba 1.43 }
5283 wakaba 1.52
5284     ## Clear back to table body context
5285 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5286     & TABLE_ROWS_SCOPING_EL)) {
5287 wakaba 1.79 !!!cp ('t217');
5288 wakaba 1.83 ## ISSUE: Can this state be reached?
5289 wakaba 1.52 pop @{$self->{open_elements}};
5290 wakaba 1.43 }
5291    
5292 wakaba 1.52 ## As if <{current node}>
5293     ## have an element in table scope
5294     ## true by definition
5295 wakaba 1.43
5296 wakaba 1.52 ## Clear back to table body context
5297     ## nop by definition
5298 wakaba 1.43
5299 wakaba 1.52 pop @{$self->{open_elements}};
5300 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5301 wakaba 1.52 ## reprocess in "in table" insertion mode...
5302 wakaba 1.79 } else {
5303     !!!cp ('t218');
5304 wakaba 1.52 }
5305    
5306     if ($token->{tag_name} eq 'col') {
5307     ## Clear back to table context
5308 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5309     & TABLE_SCOPING_EL)) {
5310 wakaba 1.79 !!!cp ('t219');
5311 wakaba 1.83 ## ISSUE: Can this state be reached?
5312 wakaba 1.52 pop @{$self->{open_elements}};
5313     }
5314 wakaba 1.43
5315 wakaba 1.116 !!!insert-element ('colgroup',, $token);
5316 wakaba 1.54 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
5317 wakaba 1.52 ## reprocess
5318 wakaba 1.125 !!!ack-later;
5319 wakaba 1.126 next B;
5320 wakaba 1.52 } elsif ({
5321     caption => 1,
5322     colgroup => 1,
5323     tbody => 1, tfoot => 1, thead => 1,
5324     }->{$token->{tag_name}}) {
5325     ## Clear back to table context
5326 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5327     & TABLE_SCOPING_EL)) {
5328 wakaba 1.79 !!!cp ('t220');
5329 wakaba 1.83 ## ISSUE: Can this state be reached?
5330 wakaba 1.52 pop @{$self->{open_elements}};
5331 wakaba 1.1 }
5332 wakaba 1.52
5333     push @$active_formatting_elements, ['#marker', '']
5334     if $token->{tag_name} eq 'caption';
5335    
5336 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5337 wakaba 1.52 $self->{insertion_mode} = {
5338 wakaba 1.54 caption => IN_CAPTION_IM,
5339     colgroup => IN_COLUMN_GROUP_IM,
5340     tbody => IN_TABLE_BODY_IM,
5341     tfoot => IN_TABLE_BODY_IM,
5342     thead => IN_TABLE_BODY_IM,
5343 wakaba 1.52 }->{$token->{tag_name}};
5344 wakaba 1.1 !!!next-token;
5345 wakaba 1.125 !!!nack ('t220.1');
5346 wakaba 1.126 next B;
5347 wakaba 1.52 } else {
5348     die "$0: in table: <>: $token->{tag_name}";
5349 wakaba 1.1 }
5350 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
5351 wakaba 1.122 !!!parse-error (type => 'not closed',
5352     value => $self->{open_elements}->[-1]->[0]
5353     ->manakai_local_name,
5354     token => $token);
5355 wakaba 1.1
5356 wakaba 1.52 ## As if </table>
5357 wakaba 1.1 ## have a table element in table scope
5358     my $i;
5359 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5360     my $node = $self->{open_elements}->[$_];
5361 wakaba 1.123 if ($node->[1] & TABLE_EL) {
5362 wakaba 1.79 !!!cp ('t221');
5363 wakaba 1.1 $i = $_;
5364     last INSCOPE;
5365 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5366 wakaba 1.79 !!!cp ('t222');
5367 wakaba 1.1 last INSCOPE;
5368     }
5369     } # INSCOPE
5370     unless (defined $i) {
5371 wakaba 1.79 !!!cp ('t223');
5372 wakaba 1.83 ## TODO: The following is wrong, maybe.
5373 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:table', token => $token);
5374 wakaba 1.52 ## Ignore tokens </table><table>
5375 wakaba 1.125 !!!nack ('t223.1');
5376 wakaba 1.1 !!!next-token;
5377 wakaba 1.126 next B;
5378 wakaba 1.1 }
5379    
5380 wakaba 1.106 ## TODO: Followings are removed from the latest spec.
5381 wakaba 1.1 ## generate implied end tags
5382 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5383 wakaba 1.79 !!!cp ('t224');
5384 wakaba 1.86 pop @{$self->{open_elements}};
5385 wakaba 1.1 }
5386    
5387 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & TABLE_EL) {
5388 wakaba 1.79 !!!cp ('t225');
5389 wakaba 1.122 ## NOTE: |<table><tr><table>|
5390     !!!parse-error (type => 'not closed',
5391     value => $self->{open_elements}->[-1]->[0]
5392     ->manakai_local_name,
5393     token => $token);
5394 wakaba 1.79 } else {
5395     !!!cp ('t226');
5396 wakaba 1.1 }
5397    
5398 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5399 wakaba 1.95 pop @{$open_tables};
5400 wakaba 1.1
5401 wakaba 1.52 $self->_reset_insertion_mode;
5402 wakaba 1.1
5403 wakaba 1.125 ## reprocess
5404     !!!ack-later;
5405 wakaba 1.126 next B;
5406 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
5407     if (not $open_tables->[-1]->[1]) { # tainted
5408     !!!cp ('t227.8');
5409     ## NOTE: This is a "as if in head" code clone.
5410     $parse_rcdata->(CDATA_CONTENT_MODEL);
5411 wakaba 1.126 next B;
5412 wakaba 1.100 } else {
5413     !!!cp ('t227.7');
5414     #
5415     }
5416     } elsif ($token->{tag_name} eq 'script') {
5417     if (not $open_tables->[-1]->[1]) { # tainted
5418     !!!cp ('t227.6');
5419     ## NOTE: This is a "as if in head" code clone.
5420     $script_start_tag->();
5421 wakaba 1.126 next B;
5422 wakaba 1.100 } else {
5423     !!!cp ('t227.5');
5424     #
5425     }
5426 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
5427     if (not $open_tables->[-1]->[1]) { # tainted
5428     if ($token->{attributes}->{type}) { ## TODO: case
5429     my $type = lc $token->{attributes}->{type}->{value};
5430     if ($type eq 'hidden') {
5431     !!!cp ('t227.3');
5432 wakaba 1.113 !!!parse-error (type => 'in table:'.$token->{tag_name}, token => $token);
5433 wakaba 1.98
5434 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5435 wakaba 1.98
5436     ## TODO: form element pointer
5437    
5438     pop @{$self->{open_elements}};
5439    
5440     !!!next-token;
5441 wakaba 1.125 !!!ack ('t227.2.1');
5442 wakaba 1.126 next B;
5443 wakaba 1.98 } else {
5444     !!!cp ('t227.2');
5445     #
5446     }
5447     } else {
5448     !!!cp ('t227.1');
5449     #
5450     }
5451     } else {
5452     !!!cp ('t227.4');
5453     #
5454     }
5455 wakaba 1.58 } else {
5456 wakaba 1.79 !!!cp ('t227');
5457 wakaba 1.58 #
5458     }
5459 wakaba 1.98
5460 wakaba 1.113 !!!parse-error (type => 'in table:'.$token->{tag_name}, token => $token);
5461 wakaba 1.98
5462     $insert = $insert_to_foster;
5463     #
5464 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
5465 wakaba 1.52 if ($token->{tag_name} eq 'tr' and
5466 wakaba 1.54 $self->{insertion_mode} == IN_ROW_IM) {
5467 wakaba 1.52 ## have an element in table scope
5468     my $i;
5469     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5470     my $node = $self->{open_elements}->[$_];
5471 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5472 wakaba 1.79 !!!cp ('t228');
5473 wakaba 1.52 $i = $_;
5474     last INSCOPE;
5475 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5476 wakaba 1.79 !!!cp ('t229');
5477 wakaba 1.52 last INSCOPE;
5478     }
5479     } # INSCOPE
5480     unless (defined $i) {
5481 wakaba 1.79 !!!cp ('t230');
5482 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5483 wakaba 1.52 ## Ignore the token
5484 wakaba 1.125 !!!nack ('t230.1');
5485 wakaba 1.42 !!!next-token;
5486 wakaba 1.126 next B;
5487 wakaba 1.79 } else {
5488     !!!cp ('t232');
5489 wakaba 1.42 }
5490    
5491 wakaba 1.52 ## Clear back to table row context
5492 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5493     & TABLE_ROW_SCOPING_EL)) {
5494 wakaba 1.79 !!!cp ('t231');
5495 wakaba 1.83 ## ISSUE: Can this state be reached?
5496 wakaba 1.52 pop @{$self->{open_elements}};
5497     }
5498 wakaba 1.42
5499 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5500 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5501 wakaba 1.52 !!!next-token;
5502 wakaba 1.125 !!!nack ('t231.1');
5503 wakaba 1.126 next B;
5504 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
5505 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5506 wakaba 1.52 ## As if </tr>
5507     ## have an element in table scope
5508     my $i;
5509     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5510     my $node = $self->{open_elements}->[$_];
5511 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5512 wakaba 1.79 !!!cp ('t233');
5513 wakaba 1.52 $i = $_;
5514     last INSCOPE;
5515 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5516 wakaba 1.79 !!!cp ('t234');
5517 wakaba 1.52 last INSCOPE;
5518 wakaba 1.42 }
5519 wakaba 1.52 } # INSCOPE
5520     unless (defined $i) {
5521 wakaba 1.79 !!!cp ('t235');
5522 wakaba 1.83 ## TODO: The following is wrong.
5523 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{type}, token => $token);
5524 wakaba 1.52 ## Ignore the token
5525 wakaba 1.125 !!!nack ('t236.1');
5526 wakaba 1.52 !!!next-token;
5527 wakaba 1.126 next B;
5528 wakaba 1.42 }
5529 wakaba 1.52
5530     ## Clear back to table row context
5531 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5532     & TABLE_ROW_SCOPING_EL)) {
5533 wakaba 1.79 !!!cp ('t236');
5534 wakaba 1.83 ## ISSUE: Can this state be reached?
5535 wakaba 1.46 pop @{$self->{open_elements}};
5536 wakaba 1.1 }
5537 wakaba 1.46
5538 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5539 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5540 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
5541 wakaba 1.1 }
5542    
5543 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5544 wakaba 1.52 ## have an element in table scope
5545     my $i;
5546     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5547     my $node = $self->{open_elements}->[$_];
5548 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
5549 wakaba 1.79 !!!cp ('t237');
5550 wakaba 1.52 $i = $_;
5551     last INSCOPE;
5552 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5553 wakaba 1.79 !!!cp ('t238');
5554 wakaba 1.52 last INSCOPE;
5555     }
5556     } # INSCOPE
5557     unless (defined $i) {
5558 wakaba 1.79 !!!cp ('t239');
5559 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5560 wakaba 1.52 ## Ignore the token
5561 wakaba 1.125 !!!nack ('t239.1');
5562 wakaba 1.52 !!!next-token;
5563 wakaba 1.126 next B;
5564 wakaba 1.47 }
5565    
5566     ## Clear back to table body context
5567 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5568     & TABLE_ROWS_SCOPING_EL)) {
5569 wakaba 1.79 !!!cp ('t240');
5570 wakaba 1.47 pop @{$self->{open_elements}};
5571     }
5572    
5573 wakaba 1.52 ## As if <{current node}>
5574     ## have an element in table scope
5575     ## true by definition
5576    
5577     ## Clear back to table body context
5578     ## nop by definition
5579    
5580     pop @{$self->{open_elements}};
5581 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5582 wakaba 1.52 ## reprocess in the "in table" insertion mode...
5583     }
5584    
5585 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
5586     ## When you edit the code fragment below, please ensure that
5587     ## the code for <table> in the "in table" insertion mode
5588     ## is synced with it.
5589    
5590 wakaba 1.52 ## have a table element in table scope
5591     my $i;
5592     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5593     my $node = $self->{open_elements}->[$_];
5594 wakaba 1.123 if ($node->[1] & TABLE_EL) {
5595 wakaba 1.79 !!!cp ('t241');
5596 wakaba 1.52 $i = $_;
5597     last INSCOPE;
5598 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5599 wakaba 1.79 !!!cp ('t242');
5600 wakaba 1.52 last INSCOPE;
5601 wakaba 1.47 }
5602 wakaba 1.52 } # INSCOPE
5603     unless (defined $i) {
5604 wakaba 1.79 !!!cp ('t243');
5605 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5606 wakaba 1.52 ## Ignore the token
5607 wakaba 1.125 !!!nack ('t243.1');
5608 wakaba 1.52 !!!next-token;
5609 wakaba 1.126 next B;
5610 wakaba 1.3 }
5611 wakaba 1.52
5612     splice @{$self->{open_elements}}, $i;
5613 wakaba 1.95 pop @{$open_tables};
5614 wakaba 1.1
5615 wakaba 1.52 $self->_reset_insertion_mode;
5616 wakaba 1.47
5617     !!!next-token;
5618 wakaba 1.126 next B;
5619 wakaba 1.47 } elsif ({
5620 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
5621 wakaba 1.52 }->{$token->{tag_name}} and
5622 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
5623 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5624 wakaba 1.52 ## have an element in table scope
5625     my $i;
5626     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5627     my $node = $self->{open_elements}->[$_];
5628 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5629 wakaba 1.79 !!!cp ('t247');
5630 wakaba 1.52 $i = $_;
5631     last INSCOPE;
5632 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5633 wakaba 1.79 !!!cp ('t248');
5634 wakaba 1.52 last INSCOPE;
5635     }
5636     } # INSCOPE
5637     unless (defined $i) {
5638 wakaba 1.79 !!!cp ('t249');
5639 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5640 wakaba 1.52 ## Ignore the token
5641 wakaba 1.125 !!!nack ('t249.1');
5642 wakaba 1.52 !!!next-token;
5643 wakaba 1.126 next B;
5644 wakaba 1.52 }
5645    
5646 wakaba 1.48 ## As if </tr>
5647     ## have an element in table scope
5648     my $i;
5649     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5650     my $node = $self->{open_elements}->[$_];
5651 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5652 wakaba 1.79 !!!cp ('t250');
5653 wakaba 1.48 $i = $_;
5654     last INSCOPE;
5655 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5656 wakaba 1.79 !!!cp ('t251');
5657 wakaba 1.48 last INSCOPE;
5658     }
5659     } # INSCOPE
5660 wakaba 1.52 unless (defined $i) {
5661 wakaba 1.79 !!!cp ('t252');
5662 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:tr', token => $token);
5663 wakaba 1.52 ## Ignore the token
5664 wakaba 1.125 !!!nack ('t252.1');
5665 wakaba 1.52 !!!next-token;
5666 wakaba 1.126 next B;
5667 wakaba 1.52 }
5668 wakaba 1.48
5669     ## Clear back to table row context
5670 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5671     & TABLE_ROW_SCOPING_EL)) {
5672 wakaba 1.79 !!!cp ('t253');
5673 wakaba 1.83 ## ISSUE: Can this case be reached?
5674 wakaba 1.48 pop @{$self->{open_elements}};
5675     }
5676    
5677     pop @{$self->{open_elements}}; # tr
5678 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5679 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
5680     }
5681    
5682     ## have an element in table scope
5683     my $i;
5684     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5685     my $node = $self->{open_elements}->[$_];
5686 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5687 wakaba 1.79 !!!cp ('t254');
5688 wakaba 1.52 $i = $_;
5689     last INSCOPE;
5690 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5691 wakaba 1.79 !!!cp ('t255');
5692 wakaba 1.52 last INSCOPE;
5693     }
5694     } # INSCOPE
5695     unless (defined $i) {
5696 wakaba 1.79 !!!cp ('t256');
5697 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5698 wakaba 1.52 ## Ignore the token
5699 wakaba 1.125 !!!nack ('t256.1');
5700 wakaba 1.52 !!!next-token;
5701 wakaba 1.126 next B;
5702 wakaba 1.52 }
5703    
5704     ## Clear back to table body context
5705 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5706     & TABLE_ROWS_SCOPING_EL)) {
5707 wakaba 1.79 !!!cp ('t257');
5708 wakaba 1.83 ## ISSUE: Can this case be reached?
5709 wakaba 1.52 pop @{$self->{open_elements}};
5710     }
5711    
5712     pop @{$self->{open_elements}};
5713 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5714 wakaba 1.125 !!!nack ('t257.1');
5715 wakaba 1.52 !!!next-token;
5716 wakaba 1.126 next B;
5717 wakaba 1.52 } elsif ({
5718     body => 1, caption => 1, col => 1, colgroup => 1,
5719     html => 1, td => 1, th => 1,
5720 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
5721     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
5722 wakaba 1.52 }->{$token->{tag_name}}) {
5723 wakaba 1.125 !!!cp ('t258');
5724     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5725     ## Ignore the token
5726     !!!nack ('t258.1');
5727     !!!next-token;
5728 wakaba 1.126 next B;
5729 wakaba 1.58 } else {
5730 wakaba 1.79 !!!cp ('t259');
5731 wakaba 1.113 !!!parse-error (type => 'in table:/'.$token->{tag_name}, token => $token);
5732 wakaba 1.52
5733 wakaba 1.58 $insert = $insert_to_foster;
5734     #
5735     }
5736 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5737 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
5738 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5739 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5740 wakaba 1.104 !!!cp ('t259.1');
5741 wakaba 1.105 #
5742 wakaba 1.104 } else {
5743     !!!cp ('t259.2');
5744 wakaba 1.105 #
5745 wakaba 1.104 }
5746    
5747     ## Stop parsing
5748     last B;
5749 wakaba 1.58 } else {
5750     die "$0: $token->{type}: Unknown token type";
5751     }
5752 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
5753 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5754 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5755     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5756     unless (length $token->{data}) {
5757 wakaba 1.79 !!!cp ('t260');
5758 wakaba 1.52 !!!next-token;
5759 wakaba 1.126 next B;
5760 wakaba 1.52 }
5761     }
5762    
5763 wakaba 1.79 !!!cp ('t261');
5764 wakaba 1.52 #
5765 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5766 wakaba 1.52 if ($token->{tag_name} eq 'col') {
5767 wakaba 1.79 !!!cp ('t262');
5768 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5769 wakaba 1.52 pop @{$self->{open_elements}};
5770 wakaba 1.125 !!!ack ('t262.1');
5771 wakaba 1.52 !!!next-token;
5772 wakaba 1.126 next B;
5773 wakaba 1.52 } else {
5774 wakaba 1.79 !!!cp ('t263');
5775 wakaba 1.52 #
5776     }
5777 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5778 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
5779 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
5780 wakaba 1.79 !!!cp ('t264');
5781 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:colgroup', token => $token);
5782 wakaba 1.52 ## Ignore the token
5783     !!!next-token;
5784 wakaba 1.126 next B;
5785 wakaba 1.52 } else {
5786 wakaba 1.79 !!!cp ('t265');
5787 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
5788 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5789 wakaba 1.52 !!!next-token;
5790 wakaba 1.126 next B;
5791 wakaba 1.52 }
5792     } elsif ($token->{tag_name} eq 'col') {
5793 wakaba 1.79 !!!cp ('t266');
5794 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:col', token => $token);
5795 wakaba 1.52 ## Ignore the token
5796     !!!next-token;
5797 wakaba 1.126 next B;
5798 wakaba 1.52 } else {
5799 wakaba 1.79 !!!cp ('t267');
5800 wakaba 1.52 #
5801     }
5802 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5803 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
5804 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5805     !!!cp ('t270.2');
5806     ## Stop parsing.
5807     last B;
5808     } else {
5809     ## NOTE: As if </colgroup>.
5810     !!!cp ('t270.1');
5811     pop @{$self->{open_elements}}; # colgroup
5812     $self->{insertion_mode} = IN_TABLE_IM;
5813     ## Reprocess.
5814 wakaba 1.126 next B;
5815 wakaba 1.104 }
5816     } else {
5817     die "$0: $token->{type}: Unknown token type";
5818     }
5819 wakaba 1.52
5820     ## As if </colgroup>
5821 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
5822 wakaba 1.79 !!!cp ('t269');
5823 wakaba 1.104 ## TODO: Wrong error type?
5824 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:colgroup', token => $token);
5825 wakaba 1.52 ## Ignore the token
5826 wakaba 1.125 !!!nack ('t269.1');
5827 wakaba 1.52 !!!next-token;
5828 wakaba 1.126 next B;
5829 wakaba 1.52 } else {
5830 wakaba 1.79 !!!cp ('t270');
5831 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
5832 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5833 wakaba 1.125 !!!ack-later;
5834 wakaba 1.52 ## reprocess
5835 wakaba 1.126 next B;
5836 wakaba 1.52 }
5837 wakaba 1.101 } elsif ($self->{insertion_mode} & SELECT_IMS) {
5838 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
5839 wakaba 1.79 !!!cp ('t271');
5840 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5841     !!!next-token;
5842 wakaba 1.126 next B;
5843 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
5844 wakaba 1.123 if ($token->{tag_name} eq 'option') {
5845     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5846     !!!cp ('t272');
5847     ## As if </option>
5848     pop @{$self->{open_elements}};
5849     } else {
5850     !!!cp ('t273');
5851     }
5852 wakaba 1.52
5853 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5854 wakaba 1.125 !!!nack ('t273.1');
5855 wakaba 1.123 !!!next-token;
5856 wakaba 1.126 next B;
5857 wakaba 1.123 } elsif ($token->{tag_name} eq 'optgroup') {
5858     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5859     !!!cp ('t274');
5860     ## As if </option>
5861     pop @{$self->{open_elements}};
5862     } else {
5863     !!!cp ('t275');
5864     }
5865 wakaba 1.52
5866 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
5867     !!!cp ('t276');
5868     ## As if </optgroup>
5869     pop @{$self->{open_elements}};
5870     } else {
5871     !!!cp ('t277');
5872     }
5873 wakaba 1.52
5874 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5875 wakaba 1.125 !!!nack ('t277.1');
5876 wakaba 1.123 !!!next-token;
5877 wakaba 1.126 next B;
5878 wakaba 1.146 } elsif ({
5879     select => 1, input => 1, textarea => 1,
5880     }->{$token->{tag_name}} or
5881 wakaba 1.101 ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
5882     {
5883     caption => 1, table => 1,
5884     tbody => 1, tfoot => 1, thead => 1,
5885     tr => 1, td => 1, th => 1,
5886     }->{$token->{tag_name}})) {
5887     ## TODO: The type below is not good - <select> is replaced by </select>
5888 wakaba 1.113 !!!parse-error (type => 'not closed:select', token => $token);
5889 wakaba 1.101 ## NOTE: As if the token were </select> (<select> case) or
5890     ## as if there were </select> (otherwise).
5891 wakaba 1.123 ## have an element in table scope
5892     my $i;
5893     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5894     my $node = $self->{open_elements}->[$_];
5895     if ($node->[1] & SELECT_EL) {
5896     !!!cp ('t278');
5897     $i = $_;
5898     last INSCOPE;
5899     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5900     !!!cp ('t279');
5901     last INSCOPE;
5902     }
5903     } # INSCOPE
5904     unless (defined $i) {
5905     !!!cp ('t280');
5906     !!!parse-error (type => 'unmatched end tag:select', token => $token);
5907     ## Ignore the token
5908 wakaba 1.125 !!!nack ('t280.1');
5909 wakaba 1.123 !!!next-token;
5910 wakaba 1.126 next B;
5911 wakaba 1.123 }
5912 wakaba 1.52
5913 wakaba 1.123 !!!cp ('t281');
5914     splice @{$self->{open_elements}}, $i;
5915 wakaba 1.52
5916 wakaba 1.123 $self->_reset_insertion_mode;
5917 wakaba 1.47
5918 wakaba 1.101 if ($token->{tag_name} eq 'select') {
5919 wakaba 1.125 !!!nack ('t281.2');
5920 wakaba 1.101 !!!next-token;
5921 wakaba 1.126 next B;
5922 wakaba 1.101 } else {
5923     !!!cp ('t281.1');
5924 wakaba 1.125 !!!ack-later;
5925 wakaba 1.101 ## Reprocess the token.
5926 wakaba 1.126 next B;
5927 wakaba 1.101 }
5928 wakaba 1.58 } else {
5929 wakaba 1.79 !!!cp ('t282');
5930 wakaba 1.113 !!!parse-error (type => 'in select:'.$token->{tag_name}, token => $token);
5931 wakaba 1.58 ## Ignore the token
5932 wakaba 1.125 !!!nack ('t282.1');
5933 wakaba 1.58 !!!next-token;
5934 wakaba 1.126 next B;
5935 wakaba 1.58 }
5936     } elsif ($token->{type} == END_TAG_TOKEN) {
5937 wakaba 1.123 if ($token->{tag_name} eq 'optgroup') {
5938     if ($self->{open_elements}->[-1]->[1] & OPTION_EL and
5939     $self->{open_elements}->[-2]->[1] & OPTGROUP_EL) {
5940     !!!cp ('t283');
5941     ## As if </option>
5942     splice @{$self->{open_elements}}, -2;
5943     } elsif ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
5944     !!!cp ('t284');
5945     pop @{$self->{open_elements}};
5946     } else {
5947     !!!cp ('t285');
5948     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5949     ## Ignore the token
5950     }
5951 wakaba 1.125 !!!nack ('t285.1');
5952 wakaba 1.123 !!!next-token;
5953 wakaba 1.126 next B;
5954 wakaba 1.123 } elsif ($token->{tag_name} eq 'option') {
5955     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5956     !!!cp ('t286');
5957     pop @{$self->{open_elements}};
5958     } else {
5959     !!!cp ('t287');
5960     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5961     ## Ignore the token
5962     }
5963 wakaba 1.125 !!!nack ('t287.1');
5964 wakaba 1.123 !!!next-token;
5965 wakaba 1.126 next B;
5966 wakaba 1.123 } elsif ($token->{tag_name} eq 'select') {
5967     ## have an element in table scope
5968     my $i;
5969     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5970     my $node = $self->{open_elements}->[$_];
5971     if ($node->[1] & SELECT_EL) {
5972     !!!cp ('t288');
5973     $i = $_;
5974     last INSCOPE;
5975     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5976     !!!cp ('t289');
5977     last INSCOPE;
5978     }
5979     } # INSCOPE
5980     unless (defined $i) {
5981     !!!cp ('t290');
5982     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5983     ## Ignore the token
5984 wakaba 1.125 !!!nack ('t290.1');
5985 wakaba 1.123 !!!next-token;
5986 wakaba 1.126 next B;
5987 wakaba 1.123 }
5988 wakaba 1.52
5989 wakaba 1.123 !!!cp ('t291');
5990     splice @{$self->{open_elements}}, $i;
5991 wakaba 1.52
5992 wakaba 1.123 $self->_reset_insertion_mode;
5993 wakaba 1.52
5994 wakaba 1.125 !!!nack ('t291.1');
5995 wakaba 1.123 !!!next-token;
5996 wakaba 1.126 next B;
5997 wakaba 1.101 } elsif ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
5998     {
5999     caption => 1, table => 1, tbody => 1,
6000     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
6001     }->{$token->{tag_name}}) {
6002 wakaba 1.83 ## TODO: The following is wrong?
6003 wakaba 1.123 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6004 wakaba 1.52
6005 wakaba 1.123 ## have an element in table scope
6006     my $i;
6007     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6008     my $node = $self->{open_elements}->[$_];
6009     if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6010     !!!cp ('t292');
6011     $i = $_;
6012     last INSCOPE;
6013     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6014     !!!cp ('t293');
6015     last INSCOPE;
6016     }
6017     } # INSCOPE
6018     unless (defined $i) {
6019     !!!cp ('t294');
6020     ## Ignore the token
6021 wakaba 1.125 !!!nack ('t294.1');
6022 wakaba 1.123 !!!next-token;
6023 wakaba 1.126 next B;
6024 wakaba 1.123 }
6025 wakaba 1.52
6026 wakaba 1.123 ## As if </select>
6027     ## have an element in table scope
6028     undef $i;
6029     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6030     my $node = $self->{open_elements}->[$_];
6031     if ($node->[1] & SELECT_EL) {
6032     !!!cp ('t295');
6033     $i = $_;
6034     last INSCOPE;
6035     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6036 wakaba 1.83 ## ISSUE: Can this state be reached?
6037 wakaba 1.123 !!!cp ('t296');
6038     last INSCOPE;
6039     }
6040     } # INSCOPE
6041     unless (defined $i) {
6042     !!!cp ('t297');
6043 wakaba 1.83 ## TODO: The following error type is correct?
6044 wakaba 1.123 !!!parse-error (type => 'unmatched end tag:select', token => $token);
6045     ## Ignore the </select> token
6046 wakaba 1.125 !!!nack ('t297.1');
6047 wakaba 1.123 !!!next-token; ## TODO: ok?
6048 wakaba 1.126 next B;
6049 wakaba 1.123 }
6050 wakaba 1.52
6051 wakaba 1.123 !!!cp ('t298');
6052     splice @{$self->{open_elements}}, $i;
6053 wakaba 1.52
6054 wakaba 1.123 $self->_reset_insertion_mode;
6055 wakaba 1.52
6056 wakaba 1.125 !!!ack-later;
6057 wakaba 1.123 ## reprocess
6058 wakaba 1.126 next B;
6059 wakaba 1.58 } else {
6060 wakaba 1.79 !!!cp ('t299');
6061 wakaba 1.113 !!!parse-error (type => 'in select:/'.$token->{tag_name}, token => $token);
6062 wakaba 1.52 ## Ignore the token
6063 wakaba 1.125 !!!nack ('t299.3');
6064 wakaba 1.52 !!!next-token;
6065 wakaba 1.126 next B;
6066 wakaba 1.58 }
6067 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6068 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
6069 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
6070     !!!cp ('t299.1');
6071 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
6072 wakaba 1.104 } else {
6073     !!!cp ('t299.2');
6074     }
6075    
6076     ## Stop parsing.
6077     last B;
6078 wakaba 1.58 } else {
6079     die "$0: $token->{type}: Unknown token type";
6080     }
6081 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
6082 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6083 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6084     my $data = $1;
6085     ## As if in body
6086     $reconstruct_active_formatting_elements->($insert_to_current);
6087    
6088     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6089    
6090     unless (length $token->{data}) {
6091 wakaba 1.79 !!!cp ('t300');
6092 wakaba 1.52 !!!next-token;
6093 wakaba 1.126 next B;
6094 wakaba 1.52 }
6095     }
6096    
6097 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6098 wakaba 1.79 !!!cp ('t301');
6099 wakaba 1.113 !!!parse-error (type => 'after html:#character', token => $token);
6100 wakaba 1.52
6101 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
6102 wakaba 1.79 } else {
6103     !!!cp ('t302');
6104 wakaba 1.52 }
6105    
6106     ## "after body" insertion mode
6107 wakaba 1.113 !!!parse-error (type => 'after body:#character', token => $token);
6108 wakaba 1.52
6109 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
6110 wakaba 1.52 ## reprocess
6111 wakaba 1.126 next B;
6112 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
6113 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6114 wakaba 1.79 !!!cp ('t303');
6115 wakaba 1.113 !!!parse-error (type => 'after html:'.$token->{tag_name}, token => $token);
6116 wakaba 1.52
6117 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
6118 wakaba 1.79 } else {
6119     !!!cp ('t304');
6120 wakaba 1.52 }
6121    
6122     ## "after body" insertion mode
6123 wakaba 1.113 !!!parse-error (type => 'after body:'.$token->{tag_name}, token => $token);
6124 wakaba 1.52
6125 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
6126 wakaba 1.125 !!!ack-later;
6127 wakaba 1.52 ## reprocess
6128 wakaba 1.126 next B;
6129 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6130 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6131 wakaba 1.79 !!!cp ('t305');
6132 wakaba 1.113 !!!parse-error (type => 'after html:/'.$token->{tag_name}, token => $token);
6133 wakaba 1.52
6134 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
6135 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
6136 wakaba 1.79 } else {
6137     !!!cp ('t306');
6138 wakaba 1.52 }
6139    
6140     ## "after body" insertion mode
6141     if ($token->{tag_name} eq 'html') {
6142     if (defined $self->{inner_html_node}) {
6143 wakaba 1.79 !!!cp ('t307');
6144 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:html', token => $token);
6145 wakaba 1.52 ## Ignore the token
6146     !!!next-token;
6147 wakaba 1.126 next B;
6148 wakaba 1.52 } else {
6149 wakaba 1.79 !!!cp ('t308');
6150 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
6151 wakaba 1.52 !!!next-token;
6152 wakaba 1.126 next B;
6153 wakaba 1.52 }
6154     } else {
6155 wakaba 1.79 !!!cp ('t309');
6156 wakaba 1.113 !!!parse-error (type => 'after body:/'.$token->{tag_name}, token => $token);
6157 wakaba 1.52
6158 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
6159 wakaba 1.52 ## reprocess
6160 wakaba 1.126 next B;
6161 wakaba 1.52 }
6162 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6163     !!!cp ('t309.2');
6164     ## Stop parsing
6165     last B;
6166 wakaba 1.52 } else {
6167     die "$0: $token->{type}: Unknown token type";
6168     }
6169 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
6170 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6171 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6172     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6173    
6174     unless (length $token->{data}) {
6175 wakaba 1.79 !!!cp ('t310');
6176 wakaba 1.52 !!!next-token;
6177 wakaba 1.126 next B;
6178 wakaba 1.52 }
6179     }
6180    
6181     if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {
6182 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6183 wakaba 1.79 !!!cp ('t311');
6184 wakaba 1.113 !!!parse-error (type => 'in frameset:#character', token => $token);
6185 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
6186 wakaba 1.79 !!!cp ('t312');
6187 wakaba 1.113 !!!parse-error (type => 'after frameset:#character', token => $token);
6188 wakaba 1.52 } else { # "after html frameset"
6189 wakaba 1.79 !!!cp ('t313');
6190 wakaba 1.113 !!!parse-error (type => 'after html:#character', token => $token);
6191 wakaba 1.52
6192 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
6193 wakaba 1.84 ## Reprocess in the "after frameset" insertion mode.
6194 wakaba 1.113 !!!parse-error (type => 'after frameset:#character', token => $token);
6195 wakaba 1.52 }
6196    
6197     ## Ignore the token.
6198     if (length $token->{data}) {
6199 wakaba 1.79 !!!cp ('t314');
6200 wakaba 1.52 ## reprocess the rest of characters
6201     } else {
6202 wakaba 1.79 !!!cp ('t315');
6203 wakaba 1.52 !!!next-token;
6204     }
6205 wakaba 1.126 next B;
6206 wakaba 1.52 }
6207    
6208     die qq[$0: Character "$token->{data}"];
6209 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
6210 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
6211 wakaba 1.79 !!!cp ('t316');
6212 wakaba 1.113 !!!parse-error (type => 'after html:'.$token->{tag_name}, token => $token);
6213 wakaba 1.1
6214 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
6215 wakaba 1.84 ## Process in the "after frameset" insertion mode.
6216 wakaba 1.79 } else {
6217     !!!cp ('t317');
6218     }
6219 wakaba 1.1
6220 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
6221 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6222 wakaba 1.79 !!!cp ('t318');
6223 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6224 wakaba 1.125 !!!nack ('t318.1');
6225 wakaba 1.52 !!!next-token;
6226 wakaba 1.126 next B;
6227 wakaba 1.52 } elsif ($token->{tag_name} eq 'frame' and
6228 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6229 wakaba 1.79 !!!cp ('t319');
6230 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6231 wakaba 1.52 pop @{$self->{open_elements}};
6232 wakaba 1.125 !!!ack ('t319.1');
6233 wakaba 1.52 !!!next-token;
6234 wakaba 1.126 next B;
6235 wakaba 1.52 } elsif ($token->{tag_name} eq 'noframes') {
6236 wakaba 1.79 !!!cp ('t320');
6237 wakaba 1.148 ## NOTE: As if in head.
6238 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6239 wakaba 1.126 next B;
6240 wakaba 1.52 } else {
6241 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6242 wakaba 1.79 !!!cp ('t321');
6243 wakaba 1.113 !!!parse-error (type => 'in frameset:'.$token->{tag_name}, token => $token);
6244 wakaba 1.52 } else {
6245 wakaba 1.79 !!!cp ('t322');
6246 wakaba 1.113 !!!parse-error (type => 'after frameset:'.$token->{tag_name}, token => $token);
6247 wakaba 1.52 }
6248     ## Ignore the token
6249 wakaba 1.125 !!!nack ('t322.1');
6250 wakaba 1.52 !!!next-token;
6251 wakaba 1.126 next B;
6252 wakaba 1.52 }
6253 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6254 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
6255 wakaba 1.79 !!!cp ('t323');
6256 wakaba 1.113 !!!parse-error (type => 'after html:/'.$token->{tag_name}, token => $token);
6257 wakaba 1.1
6258 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
6259 wakaba 1.84 ## Process in the "after frameset" insertion mode.
6260 wakaba 1.79 } else {
6261     !!!cp ('t324');
6262 wakaba 1.52 }
6263 wakaba 1.1
6264 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
6265 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6266 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
6267 wakaba 1.52 @{$self->{open_elements}} == 1) {
6268 wakaba 1.79 !!!cp ('t325');
6269 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6270 wakaba 1.52 ## Ignore the token
6271     !!!next-token;
6272     } else {
6273 wakaba 1.79 !!!cp ('t326');
6274 wakaba 1.52 pop @{$self->{open_elements}};
6275     !!!next-token;
6276     }
6277 wakaba 1.47
6278 wakaba 1.52 if (not defined $self->{inner_html_node} and
6279 wakaba 1.123 not ($self->{open_elements}->[-1]->[1] & FRAMESET_EL)) {
6280 wakaba 1.79 !!!cp ('t327');
6281 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
6282 wakaba 1.79 } else {
6283     !!!cp ('t328');
6284 wakaba 1.52 }
6285 wakaba 1.126 next B;
6286 wakaba 1.52 } elsif ($token->{tag_name} eq 'html' and
6287 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
6288 wakaba 1.79 !!!cp ('t329');
6289 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
6290 wakaba 1.52 !!!next-token;
6291 wakaba 1.126 next B;
6292 wakaba 1.52 } else {
6293 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6294 wakaba 1.79 !!!cp ('t330');
6295 wakaba 1.113 !!!parse-error (type => 'in frameset:/'.$token->{tag_name}, token => $token);
6296 wakaba 1.52 } else {
6297 wakaba 1.79 !!!cp ('t331');
6298 wakaba 1.113 !!!parse-error (type => 'after frameset:/'.$token->{tag_name}, token => $token);
6299 wakaba 1.52 }
6300     ## Ignore the token
6301     !!!next-token;
6302 wakaba 1.126 next B;
6303 wakaba 1.52 }
6304 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6305 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
6306 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
6307     !!!cp ('t331.1');
6308 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
6309 wakaba 1.104 } else {
6310     !!!cp ('t331.2');
6311     }
6312    
6313     ## Stop parsing
6314     last B;
6315 wakaba 1.52 } else {
6316     die "$0: $token->{type}: Unknown token type";
6317     }
6318 wakaba 1.47
6319 wakaba 1.52 ## ISSUE: An issue in spec here
6320     } else {
6321     die "$0: $self->{insertion_mode}: Unknown insertion mode";
6322     }
6323 wakaba 1.47
6324 wakaba 1.52 ## "in body" insertion mode
6325 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
6326 wakaba 1.52 if ($token->{tag_name} eq 'script') {
6327 wakaba 1.79 !!!cp ('t332');
6328 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6329 wakaba 1.100 $script_start_tag->();
6330 wakaba 1.126 next B;
6331 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
6332 wakaba 1.79 !!!cp ('t333');
6333 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6334 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6335 wakaba 1.126 next B;
6336 wakaba 1.52 } elsif ({
6337     base => 1, link => 1,
6338     }->{$token->{tag_name}}) {
6339 wakaba 1.79 !!!cp ('t334');
6340 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
6341 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6342 wakaba 1.52 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
6343 wakaba 1.125 !!!ack ('t334.1');
6344 wakaba 1.52 !!!next-token;
6345 wakaba 1.126 next B;
6346 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
6347     ## NOTE: This is an "as if in head" code clone, only "-t" differs
6348 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6349 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
6350 wakaba 1.46
6351 wakaba 1.52 unless ($self->{confident}) {
6352 wakaba 1.134 if ($token->{attributes}->{charset}) {
6353 wakaba 1.79 !!!cp ('t335');
6354 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
6355     ## in the {change_encoding} callback.
6356 wakaba 1.63 $self->{change_encoding}
6357 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value}, $token);
6358 wakaba 1.66
6359     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
6360     ->set_user_data (manakai_has_reference =>
6361     $token->{attributes}->{charset}
6362     ->{has_reference});
6363 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
6364     if ($token->{attributes}->{content}->{value}
6365 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
6366 wakaba 1.70 [\x09-\x0D\x20]*=
6367 wakaba 1.52 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
6368 wakaba 1.145 ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {
6369 wakaba 1.79 !!!cp ('t336');
6370 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
6371     ## in the {change_encoding} callback.
6372 wakaba 1.63 $self->{change_encoding}
6373 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
6374 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
6375     ->set_user_data (manakai_has_reference =>
6376     $token->{attributes}->{content}
6377     ->{has_reference});
6378 wakaba 1.63 }
6379 wakaba 1.52 }
6380 wakaba 1.66 } else {
6381     if ($token->{attributes}->{charset}) {
6382 wakaba 1.79 !!!cp ('t337');
6383 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
6384     ->set_user_data (manakai_has_reference =>
6385     $token->{attributes}->{charset}
6386     ->{has_reference});
6387     }
6388 wakaba 1.68 if ($token->{attributes}->{content}) {
6389 wakaba 1.79 !!!cp ('t338');
6390 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
6391     ->set_user_data (manakai_has_reference =>
6392     $token->{attributes}->{content}
6393     ->{has_reference});
6394     }
6395 wakaba 1.52 }
6396 wakaba 1.1
6397 wakaba 1.125 !!!ack ('t338.1');
6398 wakaba 1.52 !!!next-token;
6399 wakaba 1.126 next B;
6400 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
6401 wakaba 1.79 !!!cp ('t341');
6402 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6403 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
6404 wakaba 1.126 next B;
6405 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
6406 wakaba 1.113 !!!parse-error (type => 'in body:body', token => $token);
6407 wakaba 1.46
6408 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
6409 wakaba 1.123 not ($self->{open_elements}->[1]->[1] & BODY_EL)) {
6410 wakaba 1.79 !!!cp ('t342');
6411 wakaba 1.52 ## Ignore the token
6412     } else {
6413     my $body_el = $self->{open_elements}->[1]->[0];
6414     for my $attr_name (keys %{$token->{attributes}}) {
6415     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
6416 wakaba 1.79 !!!cp ('t343');
6417 wakaba 1.52 $body_el->set_attribute_ns
6418     (undef, [undef, $attr_name],
6419     $token->{attributes}->{$attr_name}->{value});
6420     }
6421     }
6422     }
6423 wakaba 1.125 !!!nack ('t343.1');
6424 wakaba 1.52 !!!next-token;
6425 wakaba 1.126 next B;
6426 wakaba 1.52 } elsif ({
6427     address => 1, blockquote => 1, center => 1, dir => 1,
6428 wakaba 1.85 div => 1, dl => 1, fieldset => 1,
6429     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6430 wakaba 1.97 menu => 1, ol => 1, p => 1, ul => 1,
6431     pre => 1, listing => 1,
6432 wakaba 1.109 form => 1,
6433     table => 1,
6434     hr => 1,
6435 wakaba 1.52 }->{$token->{tag_name}}) {
6436 wakaba 1.109 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
6437     !!!cp ('t350');
6438 wakaba 1.113 !!!parse-error (type => 'in form:form', token => $token);
6439 wakaba 1.109 ## Ignore the token
6440 wakaba 1.125 !!!nack ('t350.1');
6441 wakaba 1.109 !!!next-token;
6442 wakaba 1.126 next B;
6443 wakaba 1.109 }
6444    
6445 wakaba 1.52 ## has a p element in scope
6446     INSCOPE: for (reverse @{$self->{open_elements}}) {
6447 wakaba 1.123 if ($_->[1] & P_EL) {
6448 wakaba 1.79 !!!cp ('t344');
6449 wakaba 1.125 !!!back-token; # <form>
6450 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6451     line => $token->{line}, column => $token->{column}};
6452 wakaba 1.126 next B;
6453 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6454 wakaba 1.79 !!!cp ('t345');
6455 wakaba 1.52 last INSCOPE;
6456     }
6457     } # INSCOPE
6458    
6459 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6460 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
6461 wakaba 1.125 !!!nack ('t346.1');
6462 wakaba 1.52 !!!next-token;
6463 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6464 wakaba 1.52 $token->{data} =~ s/^\x0A//;
6465     unless (length $token->{data}) {
6466 wakaba 1.79 !!!cp ('t346');
6467 wakaba 1.1 !!!next-token;
6468 wakaba 1.79 } else {
6469     !!!cp ('t349');
6470 wakaba 1.52 }
6471 wakaba 1.79 } else {
6472     !!!cp ('t348');
6473 wakaba 1.52 }
6474 wakaba 1.109 } elsif ($token->{tag_name} eq 'form') {
6475     !!!cp ('t347.1');
6476     $self->{form_element} = $self->{open_elements}->[-1]->[0];
6477    
6478 wakaba 1.125 !!!nack ('t347.2');
6479 wakaba 1.109 !!!next-token;
6480     } elsif ($token->{tag_name} eq 'table') {
6481     !!!cp ('t382');
6482     push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
6483    
6484     $self->{insertion_mode} = IN_TABLE_IM;
6485    
6486 wakaba 1.125 !!!nack ('t382.1');
6487 wakaba 1.109 !!!next-token;
6488     } elsif ($token->{tag_name} eq 'hr') {
6489     !!!cp ('t386');
6490     pop @{$self->{open_elements}};
6491    
6492 wakaba 1.125 !!!nack ('t386.1');
6493 wakaba 1.109 !!!next-token;
6494 wakaba 1.52 } else {
6495 wakaba 1.125 !!!nack ('t347.1');
6496 wakaba 1.52 !!!next-token;
6497     }
6498 wakaba 1.126 next B;
6499 wakaba 1.109 } elsif ({li => 1, dt => 1, dd => 1}->{$token->{tag_name}}) {
6500 wakaba 1.52 ## has a p element in scope
6501     INSCOPE: for (reverse @{$self->{open_elements}}) {
6502 wakaba 1.123 if ($_->[1] & P_EL) {
6503 wakaba 1.79 !!!cp ('t353');
6504 wakaba 1.125 !!!back-token; # <x>
6505 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6506     line => $token->{line}, column => $token->{column}};
6507 wakaba 1.126 next B;
6508 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6509 wakaba 1.79 !!!cp ('t354');
6510 wakaba 1.52 last INSCOPE;
6511     }
6512     } # INSCOPE
6513    
6514     ## Step 1
6515     my $i = -1;
6516     my $node = $self->{open_elements}->[$i];
6517 wakaba 1.109 my $li_or_dtdd = {li => {li => 1},
6518     dt => {dt => 1, dd => 1},
6519     dd => {dt => 1, dd => 1}}->{$token->{tag_name}};
6520 wakaba 1.52 LI: {
6521     ## Step 2
6522 wakaba 1.123 if ($li_or_dtdd->{$node->[0]->manakai_local_name}) {
6523 wakaba 1.52 if ($i != -1) {
6524 wakaba 1.79 !!!cp ('t355');
6525 wakaba 1.122 !!!parse-error (type => 'not closed',
6526     value => $self->{open_elements}->[-1]->[0]
6527     ->manakai_local_name,
6528     token => $token);
6529 wakaba 1.79 } else {
6530     !!!cp ('t356');
6531 wakaba 1.52 }
6532     splice @{$self->{open_elements}}, $i;
6533     last LI;
6534 wakaba 1.79 } else {
6535     !!!cp ('t357');
6536 wakaba 1.52 }
6537    
6538     ## Step 3
6539 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
6540 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
6541 wakaba 1.123 ($node->[1] & SPECIAL_EL or
6542     $node->[1] & SCOPING_EL) and
6543     not ($node->[1] & ADDRESS_EL) and
6544     not ($node->[1] & DIV_EL)) {
6545 wakaba 1.79 !!!cp ('t358');
6546 wakaba 1.52 last LI;
6547     }
6548    
6549 wakaba 1.79 !!!cp ('t359');
6550 wakaba 1.52 ## Step 4
6551     $i--;
6552     $node = $self->{open_elements}->[$i];
6553     redo LI;
6554     } # LI
6555    
6556 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6557 wakaba 1.125 !!!nack ('t359.1');
6558 wakaba 1.52 !!!next-token;
6559 wakaba 1.126 next B;
6560 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
6561     ## has a p element in scope
6562     INSCOPE: for (reverse @{$self->{open_elements}}) {
6563 wakaba 1.123 if ($_->[1] & P_EL) {
6564 wakaba 1.79 !!!cp ('t367');
6565 wakaba 1.125 !!!back-token; # <plaintext>
6566 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6567     line => $token->{line}, column => $token->{column}};
6568 wakaba 1.126 next B;
6569 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6570 wakaba 1.79 !!!cp ('t368');
6571 wakaba 1.52 last INSCOPE;
6572 wakaba 1.46 }
6573 wakaba 1.52 } # INSCOPE
6574    
6575 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6576 wakaba 1.52
6577     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
6578    
6579 wakaba 1.125 !!!nack ('t368.1');
6580 wakaba 1.52 !!!next-token;
6581 wakaba 1.126 next B;
6582 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
6583     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
6584     my $node = $active_formatting_elements->[$i];
6585 wakaba 1.123 if ($node->[1] & A_EL) {
6586 wakaba 1.79 !!!cp ('t371');
6587 wakaba 1.113 !!!parse-error (type => 'in a:a', token => $token);
6588 wakaba 1.52
6589 wakaba 1.125 !!!back-token; # <a>
6590 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'a',
6591     line => $token->{line}, column => $token->{column}};
6592 wakaba 1.113 $formatting_end_tag->($token);
6593 wakaba 1.52
6594     AFE2: for (reverse 0..$#$active_formatting_elements) {
6595     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
6596 wakaba 1.79 !!!cp ('t372');
6597 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
6598     last AFE2;
6599 wakaba 1.1 }
6600 wakaba 1.52 } # AFE2
6601     OE: for (reverse 0..$#{$self->{open_elements}}) {
6602     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
6603 wakaba 1.79 !!!cp ('t373');
6604 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
6605     last OE;
6606 wakaba 1.1 }
6607 wakaba 1.52 } # OE
6608     last AFE;
6609     } elsif ($node->[0] eq '#marker') {
6610 wakaba 1.79 !!!cp ('t374');
6611 wakaba 1.52 last AFE;
6612     }
6613     } # AFE
6614    
6615     $reconstruct_active_formatting_elements->($insert_to_current);
6616 wakaba 1.1
6617 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6618 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
6619 wakaba 1.1
6620 wakaba 1.125 !!!nack ('t374.1');
6621 wakaba 1.52 !!!next-token;
6622 wakaba 1.126 next B;
6623 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
6624     $reconstruct_active_formatting_elements->($insert_to_current);
6625 wakaba 1.1
6626 wakaba 1.52 ## has a |nobr| element in scope
6627     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6628     my $node = $self->{open_elements}->[$_];
6629 wakaba 1.123 if ($node->[1] & NOBR_EL) {
6630 wakaba 1.79 !!!cp ('t376');
6631 wakaba 1.113 !!!parse-error (type => 'in nobr:nobr', token => $token);
6632 wakaba 1.125 !!!back-token; # <nobr>
6633 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
6634     line => $token->{line}, column => $token->{column}};
6635 wakaba 1.126 next B;
6636 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6637 wakaba 1.79 !!!cp ('t377');
6638 wakaba 1.52 last INSCOPE;
6639     }
6640     } # INSCOPE
6641    
6642 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6643 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
6644    
6645 wakaba 1.125 !!!nack ('t377.1');
6646 wakaba 1.52 !!!next-token;
6647 wakaba 1.126 next B;
6648 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
6649     ## has a button element in scope
6650     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6651     my $node = $self->{open_elements}->[$_];
6652 wakaba 1.123 if ($node->[1] & BUTTON_EL) {
6653 wakaba 1.79 !!!cp ('t378');
6654 wakaba 1.113 !!!parse-error (type => 'in button:button', token => $token);
6655 wakaba 1.125 !!!back-token; # <button>
6656 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'button',
6657     line => $token->{line}, column => $token->{column}};
6658 wakaba 1.126 next B;
6659 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6660 wakaba 1.79 !!!cp ('t379');
6661 wakaba 1.52 last INSCOPE;
6662     }
6663     } # INSCOPE
6664    
6665     $reconstruct_active_formatting_elements->($insert_to_current);
6666    
6667 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6668 wakaba 1.85
6669     ## TODO: associate with $self->{form_element} if defined
6670    
6671 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
6672 wakaba 1.1
6673 wakaba 1.125 !!!nack ('t379.1');
6674 wakaba 1.52 !!!next-token;
6675 wakaba 1.126 next B;
6676 wakaba 1.103 } elsif ({
6677 wakaba 1.109 xmp => 1,
6678     iframe => 1,
6679     noembed => 1,
6680 wakaba 1.148 noframes => 1, ## NOTE: This is an "as if in head" code clone.
6681 wakaba 1.109 noscript => 0, ## TODO: 1 if scripting is enabled
6682 wakaba 1.103 }->{$token->{tag_name}}) {
6683 wakaba 1.109 if ($token->{tag_name} eq 'xmp') {
6684     !!!cp ('t381');
6685     $reconstruct_active_formatting_elements->($insert_to_current);
6686     } else {
6687     !!!cp ('t399');
6688     }
6689     ## NOTE: There is an "as if in body" code clone.
6690 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6691 wakaba 1.126 next B;
6692 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
6693 wakaba 1.113 !!!parse-error (type => 'isindex', token => $token);
6694 wakaba 1.52
6695     if (defined $self->{form_element}) {
6696 wakaba 1.79 !!!cp ('t389');
6697 wakaba 1.52 ## Ignore the token
6698 wakaba 1.125 !!!nack ('t389'); ## NOTE: Not acknowledged.
6699 wakaba 1.52 !!!next-token;
6700 wakaba 1.126 next B;
6701 wakaba 1.52 } else {
6702 wakaba 1.147 !!!ack ('t391.1');
6703    
6704 wakaba 1.52 my $at = $token->{attributes};
6705     my $form_attrs;
6706     $form_attrs->{action} = $at->{action} if $at->{action};
6707     my $prompt_attr = $at->{prompt};
6708     $at->{name} = {name => 'name', value => 'isindex'};
6709     delete $at->{action};
6710     delete $at->{prompt};
6711     my @tokens = (
6712 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
6713 wakaba 1.114 attributes => $form_attrs,
6714     line => $token->{line}, column => $token->{column}},
6715     {type => START_TAG_TOKEN, tag_name => 'hr',
6716     line => $token->{line}, column => $token->{column}},
6717     {type => START_TAG_TOKEN, tag_name => 'p',
6718     line => $token->{line}, column => $token->{column}},
6719     {type => START_TAG_TOKEN, tag_name => 'label',
6720     line => $token->{line}, column => $token->{column}},
6721 wakaba 1.52 );
6722     if ($prompt_attr) {
6723 wakaba 1.79 !!!cp ('t390');
6724 wakaba 1.114 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
6725 wakaba 1.118 #line => $token->{line}, column => $token->{column},
6726     };
6727 wakaba 1.1 } else {
6728 wakaba 1.79 !!!cp ('t391');
6729 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
6730 wakaba 1.114 data => 'This is a searchable index. Insert your search keywords here: ',
6731 wakaba 1.118 #line => $token->{line}, column => $token->{column},
6732     }; # SHOULD
6733 wakaba 1.52 ## TODO: make this configurable
6734 wakaba 1.1 }
6735 wakaba 1.52 push @tokens,
6736 wakaba 1.114 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
6737     line => $token->{line}, column => $token->{column}},
6738 wakaba 1.55 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
6739 wakaba 1.114 {type => END_TAG_TOKEN, tag_name => 'label',
6740     line => $token->{line}, column => $token->{column}},
6741     {type => END_TAG_TOKEN, tag_name => 'p',
6742     line => $token->{line}, column => $token->{column}},
6743     {type => START_TAG_TOKEN, tag_name => 'hr',
6744     line => $token->{line}, column => $token->{column}},
6745     {type => END_TAG_TOKEN, tag_name => 'form',
6746     line => $token->{line}, column => $token->{column}};
6747 wakaba 1.52 !!!back-token (@tokens);
6748 wakaba 1.125 !!!next-token;
6749 wakaba 1.126 next B;
6750 wakaba 1.52 }
6751     } elsif ($token->{tag_name} eq 'textarea') {
6752     my $tag_name = $token->{tag_name};
6753     my $el;
6754 wakaba 1.126 !!!create-element ($el, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
6755 wakaba 1.52
6756     ## TODO: $self->{form_element} if defined
6757     $self->{content_model} = RCDATA_CONTENT_MODEL;
6758     delete $self->{escape}; # MUST
6759    
6760     $insert->($el);
6761    
6762     my $text = '';
6763 wakaba 1.125 !!!nack ('t392.1');
6764 wakaba 1.52 !!!next-token;
6765 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6766 wakaba 1.52 $token->{data} =~ s/^\x0A//;
6767 wakaba 1.51 unless (length $token->{data}) {
6768 wakaba 1.79 !!!cp ('t392');
6769 wakaba 1.51 !!!next-token;
6770 wakaba 1.79 } else {
6771     !!!cp ('t393');
6772 wakaba 1.51 }
6773 wakaba 1.79 } else {
6774     !!!cp ('t394');
6775 wakaba 1.51 }
6776 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
6777 wakaba 1.79 !!!cp ('t395');
6778 wakaba 1.52 $text .= $token->{data};
6779     !!!next-token;
6780     }
6781     if (length $text) {
6782 wakaba 1.79 !!!cp ('t396');
6783 wakaba 1.52 $el->manakai_append_text ($text);
6784     }
6785    
6786     $self->{content_model} = PCDATA_CONTENT_MODEL;
6787 wakaba 1.51
6788 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
6789 wakaba 1.52 $token->{tag_name} eq $tag_name) {
6790 wakaba 1.79 !!!cp ('t397');
6791 wakaba 1.52 ## Ignore the token
6792     } else {
6793 wakaba 1.79 !!!cp ('t398');
6794 wakaba 1.113 !!!parse-error (type => 'in RCDATA:#'.$token->{type}, token => $token);
6795 wakaba 1.51 }
6796 wakaba 1.52 !!!next-token;
6797 wakaba 1.126 next B;
6798     } elsif ($token->{tag_name} eq 'math' or
6799     $token->{tag_name} eq 'svg') {
6800     $reconstruct_active_formatting_elements->($insert_to_current);
6801 wakaba 1.131
6802     ## "adjust SVG attributes" ('svg' only) - done in insert-element-f
6803    
6804     ## "adjust foreign attributes" - done in insert-element-f
6805 wakaba 1.126
6806 wakaba 1.131 !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
6807 wakaba 1.126
6808     if ($self->{self_closing}) {
6809     pop @{$self->{open_elements}};
6810     !!!ack ('t398.1');
6811     } else {
6812     !!!cp ('t398.2');
6813     $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
6814     ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
6815     ## mode, "in body" (not "in foreign content") secondary insertion
6816     ## mode, maybe.
6817     }
6818    
6819     !!!next-token;
6820     next B;
6821 wakaba 1.52 } elsif ({
6822     caption => 1, col => 1, colgroup => 1, frame => 1,
6823     frameset => 1, head => 1, option => 1, optgroup => 1,
6824     tbody => 1, td => 1, tfoot => 1, th => 1,
6825     thead => 1, tr => 1,
6826     }->{$token->{tag_name}}) {
6827 wakaba 1.79 !!!cp ('t401');
6828 wakaba 1.113 !!!parse-error (type => 'in body:'.$token->{tag_name}, token => $token);
6829 wakaba 1.52 ## Ignore the token
6830 wakaba 1.125 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
6831 wakaba 1.52 !!!next-token;
6832 wakaba 1.126 next B;
6833 wakaba 1.52
6834     ## ISSUE: An issue on HTML5 new elements in the spec.
6835     } else {
6836 wakaba 1.110 if ($token->{tag_name} eq 'image') {
6837     !!!cp ('t384');
6838 wakaba 1.113 !!!parse-error (type => 'image', token => $token);
6839 wakaba 1.110 $token->{tag_name} = 'img';
6840     } else {
6841     !!!cp ('t385');
6842     }
6843    
6844     ## NOTE: There is an "as if <br>" code clone.
6845 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
6846    
6847 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6848 wakaba 1.109
6849 wakaba 1.110 if ({
6850     applet => 1, marquee => 1, object => 1,
6851     }->{$token->{tag_name}}) {
6852     !!!cp ('t380');
6853     push @$active_formatting_elements, ['#marker', ''];
6854 wakaba 1.125 !!!nack ('t380.1');
6855 wakaba 1.110 } elsif ({
6856     b => 1, big => 1, em => 1, font => 1, i => 1,
6857     s => 1, small => 1, strile => 1,
6858     strong => 1, tt => 1, u => 1,
6859     }->{$token->{tag_name}}) {
6860     !!!cp ('t375');
6861     push @$active_formatting_elements, $self->{open_elements}->[-1];
6862 wakaba 1.125 !!!nack ('t375.1');
6863 wakaba 1.110 } elsif ($token->{tag_name} eq 'input') {
6864     !!!cp ('t388');
6865     ## TODO: associate with $self->{form_element} if defined
6866     pop @{$self->{open_elements}};
6867 wakaba 1.125 !!!ack ('t388.2');
6868 wakaba 1.110 } elsif ({
6869     area => 1, basefont => 1, bgsound => 1, br => 1,
6870     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
6871     #image => 1,
6872     }->{$token->{tag_name}}) {
6873     !!!cp ('t388.1');
6874     pop @{$self->{open_elements}};
6875 wakaba 1.125 !!!ack ('t388.3');
6876 wakaba 1.110 } elsif ($token->{tag_name} eq 'select') {
6877 wakaba 1.109 ## TODO: associate with $self->{form_element} if defined
6878    
6879     if ($self->{insertion_mode} & TABLE_IMS or
6880     $self->{insertion_mode} & BODY_TABLE_IMS or
6881     $self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
6882     !!!cp ('t400.1');
6883     $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
6884     } else {
6885     !!!cp ('t400.2');
6886     $self->{insertion_mode} = IN_SELECT_IM;
6887     }
6888 wakaba 1.125 !!!nack ('t400.3');
6889 wakaba 1.110 } else {
6890 wakaba 1.125 !!!nack ('t402');
6891 wakaba 1.109 }
6892 wakaba 1.51
6893 wakaba 1.52 !!!next-token;
6894 wakaba 1.126 next B;
6895 wakaba 1.52 }
6896 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6897 wakaba 1.52 if ($token->{tag_name} eq 'body') {
6898 wakaba 1.107 ## has a |body| element in scope
6899     my $i;
6900 wakaba 1.111 INSCOPE: {
6901     for (reverse @{$self->{open_elements}}) {
6902 wakaba 1.123 if ($_->[1] & BODY_EL) {
6903 wakaba 1.111 !!!cp ('t405');
6904     $i = $_;
6905     last INSCOPE;
6906 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6907 wakaba 1.111 !!!cp ('t405.1');
6908     last;
6909     }
6910 wakaba 1.52 }
6911 wakaba 1.111
6912     !!!parse-error (type => 'start tag not allowed',
6913 wakaba 1.113 value => $token->{tag_name}, token => $token);
6914 wakaba 1.107 ## NOTE: Ignore the token.
6915 wakaba 1.52 !!!next-token;
6916 wakaba 1.126 next B;
6917 wakaba 1.111 } # INSCOPE
6918 wakaba 1.107
6919     for (@{$self->{open_elements}}) {
6920 wakaba 1.123 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL) {
6921 wakaba 1.107 !!!cp ('t403');
6922 wakaba 1.122 !!!parse-error (type => 'not closed',
6923     value => $_->[0]->manakai_local_name,
6924     token => $token);
6925 wakaba 1.107 last;
6926     } else {
6927     !!!cp ('t404');
6928     }
6929     }
6930    
6931     $self->{insertion_mode} = AFTER_BODY_IM;
6932     !!!next-token;
6933 wakaba 1.126 next B;
6934 wakaba 1.52 } elsif ($token->{tag_name} eq 'html') {
6935 wakaba 1.122 ## TODO: Update this code. It seems that the code below is not
6936     ## up-to-date, though it has same effect as speced.
6937 wakaba 1.123 if (@{$self->{open_elements}} > 1 and
6938     $self->{open_elements}->[1]->[1] & BODY_EL) {
6939 wakaba 1.52 ## ISSUE: There is an issue in the spec.
6940 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & BODY_EL) {
6941 wakaba 1.79 !!!cp ('t406');
6942 wakaba 1.122 !!!parse-error (type => 'not closed',
6943     value => $self->{open_elements}->[1]->[0]
6944     ->manakai_local_name,
6945     token => $token);
6946 wakaba 1.79 } else {
6947     !!!cp ('t407');
6948 wakaba 1.1 }
6949 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
6950 wakaba 1.52 ## reprocess
6951 wakaba 1.126 next B;
6952 wakaba 1.51 } else {
6953 wakaba 1.79 !!!cp ('t408');
6954 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6955 wakaba 1.52 ## Ignore the token
6956     !!!next-token;
6957 wakaba 1.126 next B;
6958 wakaba 1.51 }
6959 wakaba 1.52 } elsif ({
6960     address => 1, blockquote => 1, center => 1, dir => 1,
6961     div => 1, dl => 1, fieldset => 1, listing => 1,
6962     menu => 1, ol => 1, pre => 1, ul => 1,
6963     dd => 1, dt => 1, li => 1,
6964 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
6965 wakaba 1.52 }->{$token->{tag_name}}) {
6966     ## has an element in scope
6967     my $i;
6968     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6969     my $node = $self->{open_elements}->[$_];
6970 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6971 wakaba 1.79 !!!cp ('t410');
6972 wakaba 1.52 $i = $_;
6973 wakaba 1.87 last INSCOPE;
6974 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6975 wakaba 1.79 !!!cp ('t411');
6976 wakaba 1.52 last INSCOPE;
6977 wakaba 1.51 }
6978 wakaba 1.52 } # INSCOPE
6979 wakaba 1.89
6980     unless (defined $i) { # has an element in scope
6981     !!!cp ('t413');
6982 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6983 wakaba 1.89 } else {
6984     ## Step 1. generate implied end tags
6985     while ({
6986     dd => ($token->{tag_name} ne 'dd'),
6987     dt => ($token->{tag_name} ne 'dt'),
6988     li => ($token->{tag_name} ne 'li'),
6989     p => 1,
6990 wakaba 1.123 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
6991 wakaba 1.89 !!!cp ('t409');
6992     pop @{$self->{open_elements}};
6993     }
6994    
6995     ## Step 2.
6996 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6997     ne $token->{tag_name}) {
6998 wakaba 1.79 !!!cp ('t412');
6999 wakaba 1.122 !!!parse-error (type => 'not closed',
7000     value => $self->{open_elements}->[-1]->[0]
7001     ->manakai_local_name,
7002     token => $token);
7003 wakaba 1.51 } else {
7004 wakaba 1.89 !!!cp ('t414');
7005 wakaba 1.51 }
7006 wakaba 1.89
7007     ## Step 3.
7008 wakaba 1.52 splice @{$self->{open_elements}}, $i;
7009 wakaba 1.89
7010     ## Step 4.
7011     $clear_up_to_marker->()
7012     if {
7013 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
7014 wakaba 1.89 }->{$token->{tag_name}};
7015 wakaba 1.51 }
7016 wakaba 1.52 !!!next-token;
7017 wakaba 1.126 next B;
7018 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
7019 wakaba 1.92 undef $self->{form_element};
7020    
7021 wakaba 1.52 ## has an element in scope
7022 wakaba 1.92 my $i;
7023 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7024     my $node = $self->{open_elements}->[$_];
7025 wakaba 1.123 if ($node->[1] & FORM_EL) {
7026 wakaba 1.79 !!!cp ('t418');
7027 wakaba 1.92 $i = $_;
7028 wakaba 1.52 last INSCOPE;
7029 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7030 wakaba 1.79 !!!cp ('t419');
7031 wakaba 1.52 last INSCOPE;
7032     }
7033     } # INSCOPE
7034 wakaba 1.92
7035     unless (defined $i) { # has an element in scope
7036 wakaba 1.79 !!!cp ('t421');
7037 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
7038 wakaba 1.92 } else {
7039     ## Step 1. generate implied end tags
7040 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7041 wakaba 1.92 !!!cp ('t417');
7042     pop @{$self->{open_elements}};
7043     }
7044    
7045     ## Step 2.
7046 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7047     ne $token->{tag_name}) {
7048 wakaba 1.92 !!!cp ('t417.1');
7049 wakaba 1.122 !!!parse-error (type => 'not closed',
7050     value => $self->{open_elements}->[-1]->[0]
7051     ->manakai_local_name,
7052     token => $token);
7053 wakaba 1.92 } else {
7054     !!!cp ('t420');
7055     }
7056    
7057     ## Step 3.
7058     splice @{$self->{open_elements}}, $i;
7059 wakaba 1.52 }
7060    
7061     !!!next-token;
7062 wakaba 1.126 next B;
7063 wakaba 1.52 } elsif ({
7064     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
7065     }->{$token->{tag_name}}) {
7066     ## has an element in scope
7067     my $i;
7068     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7069     my $node = $self->{open_elements}->[$_];
7070 wakaba 1.123 if ($node->[1] & HEADING_EL) {
7071 wakaba 1.79 !!!cp ('t423');
7072 wakaba 1.52 $i = $_;
7073     last INSCOPE;
7074 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7075 wakaba 1.79 !!!cp ('t424');
7076 wakaba 1.52 last INSCOPE;
7077 wakaba 1.51 }
7078 wakaba 1.52 } # INSCOPE
7079 wakaba 1.93
7080     unless (defined $i) { # has an element in scope
7081     !!!cp ('t425.1');
7082 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
7083 wakaba 1.79 } else {
7084 wakaba 1.93 ## Step 1. generate implied end tags
7085 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7086 wakaba 1.93 !!!cp ('t422');
7087     pop @{$self->{open_elements}};
7088     }
7089    
7090     ## Step 2.
7091 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7092     ne $token->{tag_name}) {
7093 wakaba 1.93 !!!cp ('t425');
7094 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
7095 wakaba 1.93 } else {
7096     !!!cp ('t426');
7097     }
7098    
7099     ## Step 3.
7100     splice @{$self->{open_elements}}, $i;
7101 wakaba 1.36 }
7102 wakaba 1.52
7103     !!!next-token;
7104 wakaba 1.126 next B;
7105 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
7106     ## has an element in scope
7107     my $i;
7108     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7109     my $node = $self->{open_elements}->[$_];
7110 wakaba 1.123 if ($node->[1] & P_EL) {
7111 wakaba 1.87 !!!cp ('t410.1');
7112     $i = $_;
7113 wakaba 1.88 last INSCOPE;
7114 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7115 wakaba 1.87 !!!cp ('t411.1');
7116     last INSCOPE;
7117     }
7118     } # INSCOPE
7119 wakaba 1.91
7120     if (defined $i) {
7121 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7122     ne $token->{tag_name}) {
7123 wakaba 1.87 !!!cp ('t412.1');
7124 wakaba 1.122 !!!parse-error (type => 'not closed',
7125     value => $self->{open_elements}->[-1]->[0]
7126     ->manakai_local_name,
7127     token => $token);
7128 wakaba 1.87 } else {
7129 wakaba 1.91 !!!cp ('t414.1');
7130 wakaba 1.87 }
7131 wakaba 1.91
7132 wakaba 1.87 splice @{$self->{open_elements}}, $i;
7133     } else {
7134 wakaba 1.91 !!!cp ('t413.1');
7135 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
7136 wakaba 1.91
7137 wakaba 1.87 !!!cp ('t415.1');
7138     ## As if <p>, then reprocess the current token
7139     my $el;
7140 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'p',, $token);
7141 wakaba 1.87 $insert->($el);
7142 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
7143 wakaba 1.87 }
7144 wakaba 1.91
7145 wakaba 1.87 !!!next-token;
7146 wakaba 1.126 next B;
7147 wakaba 1.52 } elsif ({
7148     a => 1,
7149     b => 1, big => 1, em => 1, font => 1, i => 1,
7150     nobr => 1, s => 1, small => 1, strile => 1,
7151     strong => 1, tt => 1, u => 1,
7152     }->{$token->{tag_name}}) {
7153 wakaba 1.79 !!!cp ('t427');
7154 wakaba 1.113 $formatting_end_tag->($token);
7155 wakaba 1.126 next B;
7156 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
7157 wakaba 1.79 !!!cp ('t428');
7158 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:br', token => $token);
7159 wakaba 1.52
7160     ## As if <br>
7161     $reconstruct_active_formatting_elements->($insert_to_current);
7162    
7163     my $el;
7164 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'br',, $token);
7165 wakaba 1.52 $insert->($el);
7166    
7167     ## Ignore the token.
7168     !!!next-token;
7169 wakaba 1.126 next B;
7170 wakaba 1.52 } elsif ({
7171     caption => 1, col => 1, colgroup => 1, frame => 1,
7172     frameset => 1, head => 1, option => 1, optgroup => 1,
7173     tbody => 1, td => 1, tfoot => 1, th => 1,
7174     thead => 1, tr => 1,
7175     area => 1, basefont => 1, bgsound => 1,
7176     embed => 1, hr => 1, iframe => 1, image => 1,
7177     img => 1, input => 1, isindex => 1, noembed => 1,
7178     noframes => 1, param => 1, select => 1, spacer => 1,
7179     table => 1, textarea => 1, wbr => 1,
7180     noscript => 0, ## TODO: if scripting is enabled
7181     }->{$token->{tag_name}}) {
7182 wakaba 1.79 !!!cp ('t429');
7183 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
7184 wakaba 1.52 ## Ignore the token
7185     !!!next-token;
7186 wakaba 1.126 next B;
7187 wakaba 1.52
7188     ## ISSUE: Issue on HTML5 new elements in spec
7189    
7190     } else {
7191     ## Step 1
7192     my $node_i = -1;
7193     my $node = $self->{open_elements}->[$node_i];
7194 wakaba 1.51
7195 wakaba 1.52 ## Step 2
7196     S2: {
7197 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
7198 wakaba 1.52 ## Step 1
7199     ## generate implied end tags
7200 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7201 wakaba 1.79 !!!cp ('t430');
7202 wakaba 1.83 ## ISSUE: Can this case be reached?
7203 wakaba 1.86 pop @{$self->{open_elements}};
7204 wakaba 1.52 }
7205    
7206     ## Step 2
7207 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7208     ne $token->{tag_name}) {
7209 wakaba 1.79 !!!cp ('t431');
7210 wakaba 1.58 ## NOTE: <x><y></x>
7211 wakaba 1.122 !!!parse-error (type => 'not closed',
7212     value => $self->{open_elements}->[-1]->[0]
7213     ->manakai_local_name,
7214     token => $token);
7215 wakaba 1.79 } else {
7216     !!!cp ('t432');
7217 wakaba 1.52 }
7218    
7219     ## Step 3
7220     splice @{$self->{open_elements}}, $node_i;
7221 wakaba 1.51
7222 wakaba 1.1 !!!next-token;
7223 wakaba 1.52 last S2;
7224 wakaba 1.1 } else {
7225 wakaba 1.52 ## Step 3
7226 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
7227 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
7228 wakaba 1.123 ($node->[1] & SPECIAL_EL or
7229     $node->[1] & SCOPING_EL)) {
7230 wakaba 1.79 !!!cp ('t433');
7231 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
7232 wakaba 1.52 ## Ignore the token
7233     !!!next-token;
7234     last S2;
7235     }
7236 wakaba 1.79
7237     !!!cp ('t434');
7238 wakaba 1.1 }
7239 wakaba 1.52
7240     ## Step 4
7241     $node_i--;
7242     $node = $self->{open_elements}->[$node_i];
7243    
7244     ## Step 5;
7245     redo S2;
7246     } # S2
7247 wakaba 1.126 next B;
7248 wakaba 1.1 }
7249     }
7250 wakaba 1.126 next B;
7251     } continue { # B
7252     if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
7253     ## NOTE: The code below is executed in cases where it does not have
7254     ## to be, but it it is harmless even in those cases.
7255     ## has an element in scope
7256     INSCOPE: {
7257     for (reverse 0..$#{$self->{open_elements}}) {
7258     my $node = $self->{open_elements}->[$_];
7259     if ($node->[1] & FOREIGN_EL) {
7260     last INSCOPE;
7261     } elsif ($node->[1] & SCOPING_EL) {
7262     last;
7263     }
7264     }
7265    
7266     ## NOTE: No foreign element in scope.
7267     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
7268     } # INSCOPE
7269     }
7270 wakaba 1.1 } # B
7271    
7272     ## Stop parsing # MUST
7273    
7274     ## TODO: script stuffs
7275 wakaba 1.3 } # _tree_construct_main
7276    
7277     sub set_inner_html ($$$) {
7278     my $class = shift;
7279     my $node = shift;
7280     my $s = \$_[0];
7281     my $onerror = $_[1];
7282    
7283 wakaba 1.63 ## ISSUE: Should {confident} be true?
7284    
7285 wakaba 1.3 my $nt = $node->node_type;
7286     if ($nt == 9) {
7287     # MUST
7288    
7289     ## Step 1 # MUST
7290     ## TODO: If the document has an active parser, ...
7291     ## ISSUE: There is an issue in the spec.
7292    
7293     ## Step 2 # MUST
7294     my @cn = @{$node->child_nodes};
7295     for (@cn) {
7296     $node->remove_child ($_);
7297     }
7298    
7299     ## Step 3, 4, 5 # MUST
7300     $class->parse_string ($$s => $node, $onerror);
7301     } elsif ($nt == 1) {
7302     ## TODO: If non-html element
7303    
7304     ## NOTE: Most of this code is copied from |parse_string|
7305    
7306     ## Step 1 # MUST
7307 wakaba 1.14 my $this_doc = $node->owner_document;
7308     my $doc = $this_doc->implementation->create_document;
7309 wakaba 1.18 $doc->manakai_is_html (1);
7310 wakaba 1.3 my $p = $class->new;
7311     $p->{document} = $doc;
7312    
7313 wakaba 1.84 ## Step 8 # MUST
7314 wakaba 1.3 my $i = 0;
7315 wakaba 1.121 $p->{line_prev} = $p->{line} = 1;
7316     $p->{column_prev} = $p->{column} = 0;
7317 wakaba 1.76 $p->{set_next_char} = sub {
7318 wakaba 1.3 my $self = shift;
7319 wakaba 1.14
7320 wakaba 1.76 pop @{$self->{prev_char}};
7321     unshift @{$self->{prev_char}}, $self->{next_char};
7322 wakaba 1.14
7323 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
7324     $self->{next_char} = ord substr $$s, $i++, 1;
7325 wakaba 1.121
7326     ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
7327     $p->{column}++;
7328 wakaba 1.4
7329 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
7330 wakaba 1.121 $p->{line}++;
7331     $p->{column} = 0;
7332 wakaba 1.79 !!!cp ('i1');
7333 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
7334 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
7335 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
7336 wakaba 1.121 $p->{line}++;
7337     $p->{column} = 0;
7338 wakaba 1.79 !!!cp ('i2');
7339 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
7340     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
7341 wakaba 1.79 !!!cp ('i3');
7342 wakaba 1.76 } elsif ($self->{next_char} == 0x0000) { # NULL
7343 wakaba 1.79 !!!cp ('i4');
7344 wakaba 1.14 !!!parse-error (type => 'NULL');
7345 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
7346 wakaba 1.132 } elsif ($self->{next_char} <= 0x0008 or
7347     (0x000E <= $self->{next_char} and
7348     $self->{next_char} <= 0x001F) or
7349     (0x007F <= $self->{next_char} and
7350     $self->{next_char} <= 0x009F) or
7351     (0xD800 <= $self->{next_char} and
7352     $self->{next_char} <= 0xDFFF) or
7353     (0xFDD0 <= $self->{next_char} and
7354     $self->{next_char} <= 0xFDDF) or
7355     {
7356     0xFFFE => 1, 0xFFFF => 1, 0x1FFFE => 1, 0x1FFFF => 1,
7357     0x2FFFE => 1, 0x2FFFF => 1, 0x3FFFE => 1, 0x3FFFF => 1,
7358     0x4FFFE => 1, 0x4FFFF => 1, 0x5FFFE => 1, 0x5FFFF => 1,
7359     0x6FFFE => 1, 0x6FFFF => 1, 0x7FFFE => 1, 0x7FFFF => 1,
7360     0x8FFFE => 1, 0x8FFFF => 1, 0x9FFFE => 1, 0x9FFFF => 1,
7361     0xAFFFE => 1, 0xAFFFF => 1, 0xBFFFE => 1, 0xBFFFF => 1,
7362     0xCFFFE => 1, 0xCFFFF => 1, 0xDFFFE => 1, 0xDFFFF => 1,
7363     0xEFFFE => 1, 0xEFFFF => 1, 0xFFFFE => 1, 0xFFFFF => 1,
7364     0x10FFFE => 1, 0x10FFFF => 1,
7365     }->{$self->{next_char}}) {
7366     !!!cp ('i4.1');
7367     !!!parse-error (type => 'control char', level => $self->{must_level});
7368     ## TODO: error type documentation
7369 wakaba 1.3 }
7370     };
7371 wakaba 1.76 $p->{prev_char} = [-1, -1, -1];
7372     $p->{next_char} = -1;
7373 wakaba 1.3
7374     my $ponerror = $onerror || sub {
7375     my (%opt) = @_;
7376 wakaba 1.121 my $line = $opt{line};
7377     my $column = $opt{column};
7378     if (defined $opt{token} and defined $opt{token}->{line}) {
7379     $line = $opt{token}->{line};
7380     $column = $opt{token}->{column};
7381     }
7382     warn "Parse error ($opt{type}) at line $line column $column\n";
7383 wakaba 1.3 };
7384     $p->{parse_error} = sub {
7385 wakaba 1.121 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
7386 wakaba 1.3 };
7387    
7388     $p->_initialize_tokenizer;
7389     $p->_initialize_tree_constructor;
7390    
7391     ## Step 2
7392 wakaba 1.71 my $node_ln = $node->manakai_local_name;
7393 wakaba 1.40 $p->{content_model} = {
7394     title => RCDATA_CONTENT_MODEL,
7395     textarea => RCDATA_CONTENT_MODEL,
7396     style => CDATA_CONTENT_MODEL,
7397     script => CDATA_CONTENT_MODEL,
7398     xmp => CDATA_CONTENT_MODEL,
7399     iframe => CDATA_CONTENT_MODEL,
7400     noembed => CDATA_CONTENT_MODEL,
7401     noframes => CDATA_CONTENT_MODEL,
7402     noscript => CDATA_CONTENT_MODEL,
7403     plaintext => PLAINTEXT_CONTENT_MODEL,
7404     }->{$node_ln};
7405     $p->{content_model} = PCDATA_CONTENT_MODEL
7406     unless defined $p->{content_model};
7407     ## ISSUE: What is "the name of the element"? local name?
7408 wakaba 1.3
7409 wakaba 1.123 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
7410     ## TODO: Foreign element OK?
7411 wakaba 1.3
7412 wakaba 1.84 ## Step 3
7413 wakaba 1.3 my $root = $doc->create_element_ns
7414     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
7415    
7416 wakaba 1.84 ## Step 4 # MUST
7417 wakaba 1.3 $doc->append_child ($root);
7418    
7419 wakaba 1.84 ## Step 5 # MUST
7420 wakaba 1.123 push @{$p->{open_elements}}, [$root, $el_category->{html}];
7421 wakaba 1.3
7422     undef $p->{head_element};
7423    
7424 wakaba 1.84 ## Step 6 # MUST
7425 wakaba 1.3 $p->_reset_insertion_mode;
7426    
7427 wakaba 1.84 ## Step 7 # MUST
7428 wakaba 1.3 my $anode = $node;
7429     AN: while (defined $anode) {
7430     if ($anode->node_type == 1) {
7431     my $nsuri = $anode->namespace_uri;
7432     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
7433 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
7434 wakaba 1.79 !!!cp ('i5');
7435 wakaba 1.3 $p->{form_element} = $anode;
7436     last AN;
7437     }
7438     }
7439     }
7440     $anode = $anode->parent_node;
7441     } # AN
7442    
7443 wakaba 1.84 ## Step 9 # MUST
7444 wakaba 1.3 {
7445     my $self = $p;
7446     !!!next-token;
7447     }
7448     $p->_tree_construction_main;
7449    
7450 wakaba 1.84 ## Step 10 # MUST
7451 wakaba 1.3 my @cn = @{$node->child_nodes};
7452     for (@cn) {
7453     $node->remove_child ($_);
7454     }
7455     ## ISSUE: mutation events? read-only?
7456    
7457 wakaba 1.84 ## Step 11 # MUST
7458 wakaba 1.3 @cn = @{$root->child_nodes};
7459     for (@cn) {
7460 wakaba 1.14 $this_doc->adopt_node ($_);
7461 wakaba 1.3 $node->append_child ($_);
7462     }
7463 wakaba 1.14 ## ISSUE: mutation events?
7464 wakaba 1.3
7465     $p->_terminate_tree_constructor;
7466 wakaba 1.121
7467     delete $p->{parse_error}; # delete loop
7468 wakaba 1.3 } else {
7469     die "$0: |set_inner_html| is not defined for node of type $nt";
7470     }
7471     } # set_inner_html
7472    
7473     } # tree construction stage
7474 wakaba 1.1
7475 wakaba 1.63 package Whatpm::HTML::RestartParser;
7476     push our @ISA, 'Error';
7477    
7478 wakaba 1.1 1;
7479 wakaba 1.149 # $Date: 2008/05/25 07:54:33 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24