/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.174 - (hide annotations) (download) (as text)
Sun Sep 14 06:32:49 2008 UTC (16 years, 1 month ago) by wakaba
Branch: MAIN
Changes since 1.173: +7 -5 lines
File MIME type: application/x-wais-source
++ whatpm/Whatpm/ChangeLog	14 Sep 2008 06:32:02 -0000
	* HTML.pm.src ($char_onerror): Have character decoder's |line|
	and |column| a higher priority than the one set by the
	tokenizer's input handler.
	($self->{read_until}): Exclude U+FFFD (but this might
	not be necessary, since now we do line/column fixup in
	the character decode handle).

2008-09-14  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/Charset/ChangeLog	14 Sep 2008 06:32:40 -0000
	* DecodeHandle.pm: EUCJP class reimplemented using |read|-centric
	model.

2008-09-14  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.174 our $VERSION=do{my @r=(q$Revision: 1.173 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.18 ## ISSUE:
7     ## var doc = implementation.createDocument (null, null, null);
8     ## doc.write ('');
9     ## alert (doc.compatMode);
10 wakaba 1.1
11 wakaba 1.139 require IO::Handle;
12    
13 wakaba 1.126 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
14     my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
15     my $SVG_NS = q<http://www.w3.org/2000/svg>;
16     my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
17     my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
18     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
19    
20 wakaba 1.123 sub A_EL () { 0b1 }
21     sub ADDRESS_EL () { 0b10 }
22     sub BODY_EL () { 0b100 }
23     sub BUTTON_EL () { 0b1000 }
24     sub CAPTION_EL () { 0b10000 }
25     sub DD_EL () { 0b100000 }
26     sub DIV_EL () { 0b1000000 }
27     sub DT_EL () { 0b10000000 }
28     sub FORM_EL () { 0b100000000 }
29     sub FORMATTING_EL () { 0b1000000000 }
30     sub FRAMESET_EL () { 0b10000000000 }
31     sub HEADING_EL () { 0b100000000000 }
32     sub HTML_EL () { 0b1000000000000 }
33     sub LI_EL () { 0b10000000000000 }
34     sub NOBR_EL () { 0b100000000000000 }
35     sub OPTION_EL () { 0b1000000000000000 }
36     sub OPTGROUP_EL () { 0b10000000000000000 }
37     sub P_EL () { 0b100000000000000000 }
38     sub SELECT_EL () { 0b1000000000000000000 }
39     sub TABLE_EL () { 0b10000000000000000000 }
40     sub TABLE_CELL_EL () { 0b100000000000000000000 }
41     sub TABLE_ROW_EL () { 0b1000000000000000000000 }
42     sub TABLE_ROW_GROUP_EL () { 0b10000000000000000000000 }
43     sub MISC_SCOPING_EL () { 0b100000000000000000000000 }
44     sub MISC_SPECIAL_EL () { 0b1000000000000000000000000 }
45 wakaba 1.126 sub FOREIGN_EL () { 0b10000000000000000000000000 }
46     sub FOREIGN_FLOW_CONTENT_EL () { 0b100000000000000000000000000 }
47     sub MML_AXML_EL () { 0b1000000000000000000000000000 }
48 wakaba 1.151 sub RUBY_EL () { 0b10000000000000000000000000000 }
49     sub RUBY_COMPONENT_EL () { 0b100000000000000000000000000000 }
50 wakaba 1.123
51     sub TABLE_ROWS_EL () {
52     TABLE_EL |
53     TABLE_ROW_EL |
54     TABLE_ROW_GROUP_EL
55     }
56    
57 wakaba 1.151 ## NOTE: Used in "generate implied end tags" algorithm.
58     ## NOTE: There is a code where a modified version of END_TAG_OPTIONAL_EL
59     ## is used in "generate implied end tags" implementation (search for the
60     ## function mae).
61 wakaba 1.123 sub END_TAG_OPTIONAL_EL () {
62     DD_EL |
63     DT_EL |
64     LI_EL |
65 wakaba 1.151 P_EL |
66     RUBY_COMPONENT_EL
67 wakaba 1.123 }
68    
69 wakaba 1.151 ## NOTE: Used in </body> and EOF algorithms.
70 wakaba 1.123 sub ALL_END_TAG_OPTIONAL_EL () {
71 wakaba 1.151 DD_EL |
72     DT_EL |
73     LI_EL |
74     P_EL |
75    
76 wakaba 1.123 BODY_EL |
77     HTML_EL |
78     TABLE_CELL_EL |
79     TABLE_ROW_EL |
80     TABLE_ROW_GROUP_EL
81     }
82    
83     sub SCOPING_EL () {
84     BUTTON_EL |
85     CAPTION_EL |
86     HTML_EL |
87     TABLE_EL |
88     TABLE_CELL_EL |
89     MISC_SCOPING_EL
90     }
91    
92     sub TABLE_SCOPING_EL () {
93     HTML_EL |
94     TABLE_EL
95     }
96    
97     sub TABLE_ROWS_SCOPING_EL () {
98     HTML_EL |
99     TABLE_ROW_GROUP_EL
100     }
101    
102     sub TABLE_ROW_SCOPING_EL () {
103     HTML_EL |
104     TABLE_ROW_EL
105     }
106    
107     sub SPECIAL_EL () {
108     ADDRESS_EL |
109     BODY_EL |
110     DIV_EL |
111 wakaba 1.151
112     DD_EL |
113     DT_EL |
114     LI_EL |
115     P_EL |
116    
117 wakaba 1.123 FORM_EL |
118     FRAMESET_EL |
119     HEADING_EL |
120     OPTION_EL |
121     OPTGROUP_EL |
122     SELECT_EL |
123     TABLE_ROW_EL |
124     TABLE_ROW_GROUP_EL |
125     MISC_SPECIAL_EL
126     }
127    
128     my $el_category = {
129     a => A_EL | FORMATTING_EL,
130     address => ADDRESS_EL,
131     applet => MISC_SCOPING_EL,
132     area => MISC_SPECIAL_EL,
133     b => FORMATTING_EL,
134     base => MISC_SPECIAL_EL,
135     basefont => MISC_SPECIAL_EL,
136     bgsound => MISC_SPECIAL_EL,
137     big => FORMATTING_EL,
138     blockquote => MISC_SPECIAL_EL,
139     body => BODY_EL,
140     br => MISC_SPECIAL_EL,
141     button => BUTTON_EL,
142     caption => CAPTION_EL,
143     center => MISC_SPECIAL_EL,
144     col => MISC_SPECIAL_EL,
145     colgroup => MISC_SPECIAL_EL,
146     dd => DD_EL,
147     dir => MISC_SPECIAL_EL,
148     div => DIV_EL,
149     dl => MISC_SPECIAL_EL,
150     dt => DT_EL,
151     em => FORMATTING_EL,
152     embed => MISC_SPECIAL_EL,
153     fieldset => MISC_SPECIAL_EL,
154     font => FORMATTING_EL,
155     form => FORM_EL,
156     frame => MISC_SPECIAL_EL,
157     frameset => FRAMESET_EL,
158     h1 => HEADING_EL,
159     h2 => HEADING_EL,
160     h3 => HEADING_EL,
161     h4 => HEADING_EL,
162     h5 => HEADING_EL,
163     h6 => HEADING_EL,
164     head => MISC_SPECIAL_EL,
165     hr => MISC_SPECIAL_EL,
166     html => HTML_EL,
167     i => FORMATTING_EL,
168     iframe => MISC_SPECIAL_EL,
169     img => MISC_SPECIAL_EL,
170     input => MISC_SPECIAL_EL,
171     isindex => MISC_SPECIAL_EL,
172     li => LI_EL,
173     link => MISC_SPECIAL_EL,
174     listing => MISC_SPECIAL_EL,
175     marquee => MISC_SCOPING_EL,
176     menu => MISC_SPECIAL_EL,
177     meta => MISC_SPECIAL_EL,
178     nobr => NOBR_EL | FORMATTING_EL,
179     noembed => MISC_SPECIAL_EL,
180     noframes => MISC_SPECIAL_EL,
181     noscript => MISC_SPECIAL_EL,
182     object => MISC_SCOPING_EL,
183     ol => MISC_SPECIAL_EL,
184     optgroup => OPTGROUP_EL,
185     option => OPTION_EL,
186     p => P_EL,
187     param => MISC_SPECIAL_EL,
188     plaintext => MISC_SPECIAL_EL,
189     pre => MISC_SPECIAL_EL,
190 wakaba 1.151 rp => RUBY_COMPONENT_EL,
191     rt => RUBY_COMPONENT_EL,
192     ruby => RUBY_EL,
193 wakaba 1.123 s => FORMATTING_EL,
194     script => MISC_SPECIAL_EL,
195     select => SELECT_EL,
196     small => FORMATTING_EL,
197     spacer => MISC_SPECIAL_EL,
198     strike => FORMATTING_EL,
199     strong => FORMATTING_EL,
200     style => MISC_SPECIAL_EL,
201     table => TABLE_EL,
202     tbody => TABLE_ROW_GROUP_EL,
203     td => TABLE_CELL_EL,
204     textarea => MISC_SPECIAL_EL,
205     tfoot => TABLE_ROW_GROUP_EL,
206     th => TABLE_CELL_EL,
207     thead => TABLE_ROW_GROUP_EL,
208     title => MISC_SPECIAL_EL,
209     tr => TABLE_ROW_EL,
210     tt => FORMATTING_EL,
211     u => FORMATTING_EL,
212     ul => MISC_SPECIAL_EL,
213     wbr => MISC_SPECIAL_EL,
214     };
215    
216 wakaba 1.126 my $el_category_f = {
217     $MML_NS => {
218     'annotation-xml' => MML_AXML_EL,
219     mi => FOREIGN_FLOW_CONTENT_EL,
220     mo => FOREIGN_FLOW_CONTENT_EL,
221     mn => FOREIGN_FLOW_CONTENT_EL,
222     ms => FOREIGN_FLOW_CONTENT_EL,
223     mtext => FOREIGN_FLOW_CONTENT_EL,
224     },
225     $SVG_NS => {
226 wakaba 1.131 foreignObject => FOREIGN_FLOW_CONTENT_EL,
227 wakaba 1.126 desc => FOREIGN_FLOW_CONTENT_EL,
228     title => FOREIGN_FLOW_CONTENT_EL,
229     },
230     ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
231     };
232    
233 wakaba 1.131 my $svg_attr_name = {
234 wakaba 1.146 attributename => 'attributeName',
235 wakaba 1.131 attributetype => 'attributeType',
236     basefrequency => 'baseFrequency',
237     baseprofile => 'baseProfile',
238     calcmode => 'calcMode',
239     clippathunits => 'clipPathUnits',
240     contentscripttype => 'contentScriptType',
241     contentstyletype => 'contentStyleType',
242     diffuseconstant => 'diffuseConstant',
243     edgemode => 'edgeMode',
244     externalresourcesrequired => 'externalResourcesRequired',
245     filterres => 'filterRes',
246     filterunits => 'filterUnits',
247     glyphref => 'glyphRef',
248     gradienttransform => 'gradientTransform',
249     gradientunits => 'gradientUnits',
250     kernelmatrix => 'kernelMatrix',
251     kernelunitlength => 'kernelUnitLength',
252     keypoints => 'keyPoints',
253     keysplines => 'keySplines',
254     keytimes => 'keyTimes',
255     lengthadjust => 'lengthAdjust',
256     limitingconeangle => 'limitingConeAngle',
257     markerheight => 'markerHeight',
258     markerunits => 'markerUnits',
259     markerwidth => 'markerWidth',
260     maskcontentunits => 'maskContentUnits',
261     maskunits => 'maskUnits',
262     numoctaves => 'numOctaves',
263     pathlength => 'pathLength',
264     patterncontentunits => 'patternContentUnits',
265     patterntransform => 'patternTransform',
266     patternunits => 'patternUnits',
267     pointsatx => 'pointsAtX',
268     pointsaty => 'pointsAtY',
269     pointsatz => 'pointsAtZ',
270     preservealpha => 'preserveAlpha',
271     preserveaspectratio => 'preserveAspectRatio',
272     primitiveunits => 'primitiveUnits',
273     refx => 'refX',
274     refy => 'refY',
275     repeatcount => 'repeatCount',
276     repeatdur => 'repeatDur',
277     requiredextensions => 'requiredExtensions',
278 wakaba 1.146 requiredfeatures => 'requiredFeatures',
279 wakaba 1.131 specularconstant => 'specularConstant',
280     specularexponent => 'specularExponent',
281     spreadmethod => 'spreadMethod',
282     startoffset => 'startOffset',
283     stddeviation => 'stdDeviation',
284     stitchtiles => 'stitchTiles',
285     surfacescale => 'surfaceScale',
286     systemlanguage => 'systemLanguage',
287     tablevalues => 'tableValues',
288     targetx => 'targetX',
289     targety => 'targetY',
290     textlength => 'textLength',
291     viewbox => 'viewBox',
292     viewtarget => 'viewTarget',
293     xchannelselector => 'xChannelSelector',
294     ychannelselector => 'yChannelSelector',
295     zoomandpan => 'zoomAndPan',
296     };
297    
298     my $foreign_attr_xname = {
299     'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
300     'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
301     'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
302     'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
303     'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
304     'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
305     'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
306     'xml:base' => [$XML_NS, ['xml', 'base']],
307     'xml:lang' => [$XML_NS, ['xml', 'lang']],
308     'xml:space' => [$XML_NS, ['xml', 'space']],
309     'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
310     'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
311     };
312    
313     ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
314    
315 wakaba 1.4 my $c1_entity_char = {
316 wakaba 1.10 0x80 => 0x20AC,
317     0x81 => 0xFFFD,
318     0x82 => 0x201A,
319     0x83 => 0x0192,
320     0x84 => 0x201E,
321     0x85 => 0x2026,
322     0x86 => 0x2020,
323     0x87 => 0x2021,
324     0x88 => 0x02C6,
325     0x89 => 0x2030,
326     0x8A => 0x0160,
327     0x8B => 0x2039,
328     0x8C => 0x0152,
329     0x8D => 0xFFFD,
330     0x8E => 0x017D,
331     0x8F => 0xFFFD,
332     0x90 => 0xFFFD,
333     0x91 => 0x2018,
334     0x92 => 0x2019,
335     0x93 => 0x201C,
336     0x94 => 0x201D,
337     0x95 => 0x2022,
338     0x96 => 0x2013,
339     0x97 => 0x2014,
340     0x98 => 0x02DC,
341     0x99 => 0x2122,
342     0x9A => 0x0161,
343     0x9B => 0x203A,
344     0x9C => 0x0153,
345     0x9D => 0xFFFD,
346     0x9E => 0x017E,
347     0x9F => 0x0178,
348 wakaba 1.4 }; # $c1_entity_char
349 wakaba 1.1
350 wakaba 1.63 sub parse_byte_string ($$$$;$) {
351 wakaba 1.138 my $self = shift;
352     my $charset_name = shift;
353     open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
354     return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
355     } # parse_byte_string
356    
357 wakaba 1.162 sub parse_byte_stream ($$$$;$$) {
358     # my ($self, $charset_name, $byte_stream, $doc, $onerror, $get_wrapper) = @_;
359 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
360 wakaba 1.133 my $charset_name = shift;
361 wakaba 1.138 my $byte_stream = $_[0];
362 wakaba 1.133
363 wakaba 1.134 my $onerror = $_[2] || sub {
364     my (%opt) = @_;
365     warn "Parse error ($opt{type})\n";
366     };
367     $self->{parse_error} = $onerror; # updated later by parse_char_string
368    
369 wakaba 1.162 my $get_wrapper = $_[3] || sub ($) {
370     return $_[0]; # $_[0] = byte stream handle, returned = arg to char handle
371     };
372    
373 wakaba 1.133 ## HTML5 encoding sniffing algorithm
374     require Message::Charset::Info;
375     my $charset;
376 wakaba 1.136 my $buffer;
377     my ($char_stream, $e_status);
378 wakaba 1.133
379     SNIFFING: {
380 wakaba 1.160 ## NOTE: By setting |allow_fallback| option true when the
381     ## |get_decode_handle| method is invoked, we ignore what the HTML5
382     ## spec requires, i.e. unsupported encoding should be ignored.
383     ## TODO: We should not do this unless the parser is invoked
384     ## in the conformance checking mode, in which this behavior
385     ## would be useful.
386 wakaba 1.133
387     ## Step 1
388     if (defined $charset_name) {
389 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
390     ## TODO: Is this ok? Transfer protocol's parameter should be
391     ## interpreted in its semantics?
392 wakaba 1.133
393     ## ISSUE: Unsupported encoding is not ignored according to the spec.
394 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
395     ($byte_stream, allow_error_reporting => 1,
396 wakaba 1.133 allow_fallback => 1);
397 wakaba 1.136 if ($char_stream) {
398 wakaba 1.133 $self->{confident} = 1;
399     last SNIFFING;
400 wakaba 1.136 } else {
401     ## TODO: unsupported error
402 wakaba 1.133 }
403     }
404    
405     ## Step 2
406 wakaba 1.136 my $byte_buffer = '';
407     for (1..1024) {
408     my $char = $byte_stream->getc;
409     last unless defined $char;
410     $byte_buffer .= $char;
411     } ## TODO: timeout
412 wakaba 1.133
413     ## Step 3
414 wakaba 1.136 if ($byte_buffer =~ /^\xFE\xFF/) {
415 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-16be');
416 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
417     ($byte_stream, allow_error_reporting => 1,
418     allow_fallback => 1, byte_buffer => \$byte_buffer);
419 wakaba 1.133 $self->{confident} = 1;
420     last SNIFFING;
421 wakaba 1.136 } elsif ($byte_buffer =~ /^\xFF\xFE/) {
422 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-16le');
423 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
424     ($byte_stream, allow_error_reporting => 1,
425     allow_fallback => 1, byte_buffer => \$byte_buffer);
426 wakaba 1.133 $self->{confident} = 1;
427     last SNIFFING;
428 wakaba 1.136 } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
429 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
430 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
431     ($byte_stream, allow_error_reporting => 1,
432     allow_fallback => 1, byte_buffer => \$byte_buffer);
433 wakaba 1.133 $self->{confident} = 1;
434     last SNIFFING;
435     }
436    
437     ## Step 4
438     ## TODO: <meta charset>
439    
440     ## Step 5
441     ## TODO: from history
442    
443     ## Step 6
444 wakaba 1.65 require Whatpm::Charset::UniversalCharDet;
445 wakaba 1.133 $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
446 wakaba 1.136 ($byte_buffer);
447 wakaba 1.133 if (defined $charset_name) {
448 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
449 wakaba 1.133
450     ## ISSUE: Unsupported encoding is not ignored according to the spec.
451 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
452     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
453     ($byte_stream);
454     ($char_stream, $e_status) = $charset->get_decode_handle
455     ($buffer, allow_error_reporting => 1,
456     allow_fallback => 1, byte_buffer => \$byte_buffer);
457     if ($char_stream) {
458     $buffer->{buffer} = $byte_buffer;
459 wakaba 1.153 !!!parse-error (type => 'sniffing:chardet',
460     text => $charset_name,
461     level => $self->{level}->{info},
462     layer => 'encode',
463 wakaba 1.134 line => 1, column => 1);
464 wakaba 1.133 $self->{confident} = 0;
465     last SNIFFING;
466     }
467     }
468    
469     ## Step 7: default
470     ## TODO: Make this configurable.
471 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('windows-1252');
472 wakaba 1.133 ## NOTE: We choose |windows-1252| here, since |utf-8| should be
473     ## detectable in the step 6.
474 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
475     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
476     ($byte_stream);
477     ($char_stream, $e_status)
478     = $charset->get_decode_handle ($buffer,
479     allow_error_reporting => 1,
480     allow_fallback => 1,
481     byte_buffer => \$byte_buffer);
482     $buffer->{buffer} = $byte_buffer;
483 wakaba 1.153 !!!parse-error (type => 'sniffing:default',
484     text => 'windows-1252',
485     level => $self->{level}->{info},
486     line => 1, column => 1,
487     layer => 'encode');
488 wakaba 1.63 $self->{confident} = 0;
489 wakaba 1.133 } # SNIFFING
490    
491     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
492 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
493 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
494 wakaba 1.160 #text => $self->{input_encoding},
495 wakaba 1.153 level => $self->{level}->{uncertain},
496     line => 1, column => 1,
497     layer => 'encode');
498 wakaba 1.133 } elsif (not ($e_status &
499     Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) {
500 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name;
501 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
502     text => $self->{input_encoding},
503     level => $self->{level}->{uncertain},
504     line => 1, column => 1,
505     layer => 'encode');
506 wakaba 1.160 } else {
507     $self->{input_encoding} = $charset->get_iana_name;
508 wakaba 1.63 }
509    
510     $self->{change_encoding} = sub {
511     my $self = shift;
512 wakaba 1.134 $charset_name = shift;
513 wakaba 1.114 my $token = shift;
514 wakaba 1.63
515 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
516 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
517     ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
518     byte_buffer => \ $buffer->{buffer});
519 wakaba 1.134
520 wakaba 1.136 if ($char_stream) { # if supported
521 wakaba 1.134 ## "Change the encoding" algorithm:
522 wakaba 1.63
523 wakaba 1.134 ## Step 1
524 wakaba 1.149 if ($charset->{category} &
525     Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
526 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
527 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
528     ($byte_stream,
529     byte_buffer => \ $buffer->{buffer});
530 wakaba 1.134 }
531     $charset_name = $charset->get_iana_name;
532    
533     ## Step 2
534     if (defined $self->{input_encoding} and
535     $self->{input_encoding} eq $charset_name) {
536 wakaba 1.153 !!!parse-error (type => 'charset label:matching',
537     text => $charset_name,
538     level => $self->{level}->{info});
539 wakaba 1.134 $self->{confident} = 1;
540     return;
541     }
542 wakaba 1.63
543 wakaba 1.153 !!!parse-error (type => 'charset label detected',
544     text => $self->{input_encoding},
545     value => $charset_name,
546     level => $self->{level}->{warn},
547     token => $token);
548 wakaba 1.134
549     ## Step 3
550     # if (can) {
551     ## change the encoding on the fly.
552     #$self->{confident} = 1;
553     #return;
554     # }
555    
556     ## Step 4
557     throw Whatpm::HTML::RestartParser ();
558 wakaba 1.63 }
559     }; # $self->{change_encoding}
560    
561 wakaba 1.136 my $char_onerror = sub {
562     my (undef, $type, %opt) = @_;
563 wakaba 1.153 !!!parse-error (layer => 'encode',
564 wakaba 1.174 line => $self->{line}, column => $self->{column} + 1,
565     %opt, type => $type);
566 wakaba 1.136 if ($opt{octets}) {
567     ${$opt{octets}} = "\x{FFFD}"; # relacement character
568     }
569     };
570 wakaba 1.162
571     my $wrapped_char_stream = $get_wrapper->($char_stream);
572     $wrapped_char_stream->onerror ($char_onerror);
573 wakaba 1.136
574 wakaba 1.63 my @args = @_; shift @args; # $s
575     my $return;
576     try {
577 wakaba 1.162 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
578 wakaba 1.63 } catch Whatpm::HTML::RestartParser with {
579 wakaba 1.134 ## NOTE: Invoked after {change_encoding}.
580    
581     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
582 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
583 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
584     level => $self->{level}->{uncertain},
585 wakaba 1.160 #text => $self->{input_encoding},
586 wakaba 1.153 line => 1, column => 1,
587     layer => 'encode');
588 wakaba 1.134 } elsif (not ($e_status &
589     Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) {
590 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name;
591 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
592     text => $self->{input_encoding},
593     level => $self->{level}->{uncertain},
594     line => 1, column => 1,
595     layer => 'encode');
596 wakaba 1.160 } else {
597     $self->{input_encoding} = $charset->get_iana_name;
598 wakaba 1.134 }
599 wakaba 1.63 $self->{confident} = 1;
600 wakaba 1.162
601     $wrapped_char_stream = $get_wrapper->($char_stream);
602     $wrapped_char_stream->onerror ($char_onerror);
603    
604     $return = $self->parse_char_stream ($wrapped_char_stream, @args);
605 wakaba 1.63 };
606     return $return;
607 wakaba 1.138 } # parse_byte_stream
608 wakaba 1.63
609 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
610     ## and the HTML layer MUST ignore it. However, we does strip BOM in
611     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
612     ## because the core part of our HTML parser expects a string of character,
613     ## not a string of bytes or code units or anything which might contain a BOM.
614     ## Therefore, any parser interface that accepts a string of bytes,
615     ## such as |parse_byte_string| in this module, must ensure that it does
616     ## strip the BOM and never strip any ZWNBSP.
617    
618 wakaba 1.162 sub parse_char_string ($$$;$$) {
619     #my ($self, $s, $doc, $onerror, $get_wrapper) = @_;
620 wakaba 1.135 my $self = shift;
621 wakaba 1.139 my $s = ref $_[0] ? $_[0] : \($_[0]);
622 wakaba 1.171 require Whatpm::Charset::DecodeHandle;
623     my $input = Whatpm::Charset::DecodeHandle::CharString->new ($s);
624 wakaba 1.162 if ($_[3]) {
625     $input = $_[3]->($input);
626     }
627 wakaba 1.135 return $self->parse_char_stream ($input, @_[1..$#_]);
628     } # parse_char_string
629 wakaba 1.162 *parse_string = \&parse_char_string; ## NOTE: Alias for backward compatibility.
630 wakaba 1.63
631 wakaba 1.135 sub parse_char_stream ($$$;$) {
632 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
633 wakaba 1.135 my $input = $_[0];
634 wakaba 1.1 $self->{document} = $_[1];
635 wakaba 1.63 @{$self->{document}->child_nodes} = ();
636 wakaba 1.1
637 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
638    
639 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
640 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
641     if defined $self->{input_encoding};
642 wakaba 1.63
643 wakaba 1.1 my $i = 0;
644 wakaba 1.112 $self->{line_prev} = $self->{line} = 1;
645     $self->{column_prev} = $self->{column} = 0;
646 wakaba 1.76 $self->{set_next_char} = sub {
647 wakaba 1.1 my $self = shift;
648 wakaba 1.13
649 wakaba 1.76 pop @{$self->{prev_char}};
650     unshift @{$self->{prev_char}}, $self->{next_char};
651 wakaba 1.13
652 wakaba 1.139 my $char;
653     if (defined $self->{next_next_char}) {
654     $char = $self->{next_next_char};
655     delete $self->{next_next_char};
656     } else {
657     $char = $input->getc;
658     }
659 wakaba 1.135 $self->{next_char} = -1 and return unless defined $char;
660     $self->{next_char} = ord $char;
661 wakaba 1.112
662     ($self->{line_prev}, $self->{column_prev})
663     = ($self->{line}, $self->{column});
664     $self->{column}++;
665 wakaba 1.1
666 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
667 wakaba 1.132 !!!cp ('j1');
668 wakaba 1.112 $self->{line}++;
669     $self->{column} = 0;
670 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
671 wakaba 1.132 !!!cp ('j2');
672 wakaba 1.170 ## TODO: support for abort/streaming
673 wakaba 1.135 my $next = $input->getc;
674 wakaba 1.139 if (defined $next and $next ne "\x0A") {
675     $self->{next_next_char} = $next;
676 wakaba 1.135 }
677 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
678 wakaba 1.112 $self->{line}++;
679     $self->{column} = 0;
680 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
681 wakaba 1.132 !!!cp ('j3');
682 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
683     } elsif ($self->{next_char} == 0x0000) { # NULL
684 wakaba 1.132 !!!cp ('j4');
685 wakaba 1.8 !!!parse-error (type => 'NULL');
686 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
687 wakaba 1.132 } elsif ($self->{next_char} <= 0x0008 or
688     (0x000E <= $self->{next_char} and $self->{next_char} <= 0x001F) or
689     (0x007F <= $self->{next_char} and $self->{next_char} <= 0x009F) or
690     (0xD800 <= $self->{next_char} and $self->{next_char} <= 0xDFFF) or
691     (0xFDD0 <= $self->{next_char} and $self->{next_char} <= 0xFDDF) or
692 wakaba 1.171 ## ISSUE: U+FDE0-U+FDEF are not excluded
693 wakaba 1.132 {
694     0xFFFE => 1, 0xFFFF => 1, 0x1FFFE => 1, 0x1FFFF => 1,
695     0x2FFFE => 1, 0x2FFFF => 1, 0x3FFFE => 1, 0x3FFFF => 1,
696     0x4FFFE => 1, 0x4FFFF => 1, 0x5FFFE => 1, 0x5FFFF => 1,
697     0x6FFFE => 1, 0x6FFFF => 1, 0x7FFFE => 1, 0x7FFFF => 1,
698     0x8FFFE => 1, 0x8FFFF => 1, 0x9FFFE => 1, 0x9FFFF => 1,
699     0xAFFFE => 1, 0xAFFFF => 1, 0xBFFFE => 1, 0xBFFFF => 1,
700     0xCFFFE => 1, 0xCFFFF => 1, 0xDFFFE => 1, 0xDFFFF => 1,
701     0xEFFFE => 1, 0xEFFFF => 1, 0xFFFFE => 1, 0xFFFFF => 1,
702     0x10FFFE => 1, 0x10FFFF => 1,
703     }->{$self->{next_char}}) {
704     !!!cp ('j5');
705 wakaba 1.153 if ($self->{next_char} < 0x10000) {
706     !!!parse-error (type => 'control char',
707     text => (sprintf 'U+%04X', $self->{next_char}));
708     } else {
709     !!!parse-error (type => 'control char',
710     text => (sprintf 'U-%08X', $self->{next_char}));
711     }
712 wakaba 1.1 }
713     };
714 wakaba 1.76 $self->{prev_char} = [-1, -1, -1];
715     $self->{next_char} = -1;
716 wakaba 1.1
717 wakaba 1.172 $self->{read_until} = sub {
718     #my ($scalar, $specials_range, $offset) = @_;
719     my $specials_range = $_[1];
720     return 0 if defined $self->{next_next_char};
721     my $count = $input->manakai_read_until
722     ($_[0],
723 wakaba 1.174 qr/(?![$specials_range\x{FDD0}-\x{FDDF}\x{FFFD}-\x{FFFF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}\x{4FFFE}\x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}\x{8FFFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}\x{CFFFE}\x{CFFFF}\x{DFFFE}\x{DFFFF}\x{EFFFE}\x{EFFFF}\x{FFFFE}\x{FFFFF}])[\x20-\x7E\xA0-\x{D7FF}\x{E000}-\x{10FFFD}]/,
724 wakaba 1.172 $_[2]);
725 wakaba 1.174 ## NOTE: We need to exclude U+FFFD, otherwise reported line/column
726     ## of unassigned/illegal code point error would be wrong.
727 wakaba 1.172 if ($count) {
728     $self->{column} += $count;
729     $self->{column_prev} += $count;
730     $self->{prev_char} = [-1, -1, -1];
731     $self->{next_char} = -1;
732     }
733     return $count;
734     }; # $self->{read_until}
735 wakaba 1.171
736 wakaba 1.3 my $onerror = $_[2] || sub {
737     my (%opt) = @_;
738 wakaba 1.112 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
739     my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
740     warn "Parse error ($opt{type}) at line $line column $column\n";
741 wakaba 1.3 };
742     $self->{parse_error} = sub {
743 wakaba 1.112 $onerror->(line => $self->{line}, column => $self->{column}, @_);
744 wakaba 1.1 };
745    
746     $self->_initialize_tokenizer;
747     $self->_initialize_tree_constructor;
748     $self->_construct_tree;
749     $self->_terminate_tree_constructor;
750    
751 wakaba 1.112 delete $self->{parse_error}; # remove loop
752    
753 wakaba 1.1 return $self->{document};
754 wakaba 1.135 } # parse_char_stream
755 wakaba 1.1
756     sub new ($) {
757     my $class = shift;
758 wakaba 1.134 my $self = bless {
759 wakaba 1.153 level => {must => 'm',
760 wakaba 1.159 should => 's',
761 wakaba 1.153 warn => 'w',
762     info => 'i',
763     uncertain => 'u'},
764 wakaba 1.134 }, $class;
765 wakaba 1.76 $self->{set_next_char} = sub {
766     $self->{next_char} = -1;
767 wakaba 1.1 };
768     $self->{parse_error} = sub {
769     #
770     };
771 wakaba 1.63 $self->{change_encoding} = sub {
772     # if ($_[0] is a supported encoding) {
773     # run "change the encoding" algorithm;
774     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
775     # }
776     };
777 wakaba 1.61 $self->{application_cache_selection} = sub {
778     #
779     };
780 wakaba 1.1 return $self;
781     } # new
782    
783 wakaba 1.40 sub CM_ENTITY () { 0b001 } # & markup in data
784     sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
785     sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
786    
787     sub PLAINTEXT_CONTENT_MODEL () { 0 }
788     sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
789     sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
790     sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
791    
792 wakaba 1.57 sub DATA_STATE () { 0 }
793 wakaba 1.168 #sub ENTITY_DATA_STATE () { 1 }
794 wakaba 1.57 sub TAG_OPEN_STATE () { 2 }
795     sub CLOSE_TAG_OPEN_STATE () { 3 }
796     sub TAG_NAME_STATE () { 4 }
797     sub BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
798     sub ATTRIBUTE_NAME_STATE () { 6 }
799     sub AFTER_ATTRIBUTE_NAME_STATE () { 7 }
800     sub BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
801     sub ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
802     sub ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
803     sub ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
804 wakaba 1.168 #sub ENTITY_IN_ATTRIBUTE_VALUE_STATE () { 12 }
805 wakaba 1.57 sub MARKUP_DECLARATION_OPEN_STATE () { 13 }
806     sub COMMENT_START_STATE () { 14 }
807     sub COMMENT_START_DASH_STATE () { 15 }
808     sub COMMENT_STATE () { 16 }
809     sub COMMENT_END_STATE () { 17 }
810     sub COMMENT_END_DASH_STATE () { 18 }
811     sub BOGUS_COMMENT_STATE () { 19 }
812     sub DOCTYPE_STATE () { 20 }
813     sub BEFORE_DOCTYPE_NAME_STATE () { 21 }
814     sub DOCTYPE_NAME_STATE () { 22 }
815     sub AFTER_DOCTYPE_NAME_STATE () { 23 }
816     sub BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
817     sub DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
818     sub DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
819     sub AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
820     sub BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
821     sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
822     sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
823     sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
824     sub BOGUS_DOCTYPE_STATE () { 32 }
825 wakaba 1.72 sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
826 wakaba 1.125 sub SELF_CLOSING_START_TAG_STATE () { 34 }
827 wakaba 1.165 sub CDATA_SECTION_STATE () { 35 }
828 wakaba 1.164 sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec
829     sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec
830     sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec
831     sub CDATA_PCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec
832 wakaba 1.165 sub CDATA_SECTION_MSE1_STATE () { 40 } # "CDATA section state" in the spec
833     sub CDATA_SECTION_MSE2_STATE () { 41 } # "CDATA section state" in the spec
834 wakaba 1.166 sub PUBLIC_STATE () { 42 } # "after DOCTYPE name state" in the spec
835     sub SYSTEM_STATE () { 43 } # "after DOCTYPE name state" in the spec
836 wakaba 1.168 ## NOTE: "Entity data state", "entity in attribute value state", and
837     ## "consume a character reference" algorithm are jointly implemented
838     ## using the following six states:
839     sub ENTITY_STATE () { 44 }
840     sub ENTITY_HASH_STATE () { 45 }
841     sub NCR_NUM_STATE () { 46 }
842     sub HEXREF_X_STATE () { 47 }
843     sub HEXREF_HEX_STATE () { 48 }
844     sub ENTITY_NAME_STATE () { 49 }
845 wakaba 1.57
846 wakaba 1.55 sub DOCTYPE_TOKEN () { 1 }
847     sub COMMENT_TOKEN () { 2 }
848     sub START_TAG_TOKEN () { 3 }
849     sub END_TAG_TOKEN () { 4 }
850     sub END_OF_FILE_TOKEN () { 5 }
851     sub CHARACTER_TOKEN () { 6 }
852    
853 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
854     sub HEAD_IMS () { 0b1000 }
855     sub BODY_IMS () { 0b10000 }
856 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
857 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
858 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
859 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
860     sub FRAME_IMS () { 0b1000000000 }
861 wakaba 1.101 sub SELECT_IMS () { 0b10000000000 }
862 wakaba 1.126 sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 }
863     ## NOTE: "in foreign content" insertion mode is special; it is combined
864     ## with the secondary insertion mode. In this parser, they are stored
865     ## together in the bit-or'ed form.
866 wakaba 1.54
867 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
868    
869     ## NOTE: "after after body" insertion mode.
870 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
871 wakaba 1.84
872     ## NOTE: "after after frameset" insertion mode.
873 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
874 wakaba 1.84
875 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
876     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
877     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
878     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
879     sub IN_BODY_IM () { BODY_IMS }
880 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
881     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
882     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
883     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
884 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
885     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
886     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
887     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
888 wakaba 1.101 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
889     sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
890 wakaba 1.54 sub IN_COLUMN_GROUP_IM () { 0b10 }
891    
892 wakaba 1.1 ## Implementations MUST act as if state machine in the spec
893    
894     sub _initialize_tokenizer ($) {
895     my $self = shift;
896 wakaba 1.57 $self->{state} = DATA_STATE; # MUST
897 wakaba 1.163 #$self->{state_keyword}; # initialized when used
898 wakaba 1.169 #$self->{entity__value}; # initialized when used
899     #$self->{entity__match}; # initialized when used
900 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # be
901 wakaba 1.165 undef $self->{current_token};
902 wakaba 1.1 undef $self->{current_attribute};
903     undef $self->{last_emitted_start_tag_name};
904 wakaba 1.169 #$self->{prev_state}; # initialized when used
905 wakaba 1.125 delete $self->{self_closing};
906 wakaba 1.76 # $self->{next_char}
907 wakaba 1.1 !!!next-input-character;
908     $self->{token} = [];
909 wakaba 1.18 # $self->{escape}
910 wakaba 1.1 } # _initialize_tokenizer
911    
912     ## A token has:
913 wakaba 1.55 ## ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
914     ## CHARACTER_TOKEN, or END_OF_FILE_TOKEN
915     ## ->{name} (DOCTYPE_TOKEN)
916     ## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
917     ## ->{public_identifier} (DOCTYPE_TOKEN)
918     ## ->{system_identifier} (DOCTYPE_TOKEN)
919 wakaba 1.75 ## ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
920 wakaba 1.55 ## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
921 wakaba 1.66 ## ->{name}
922     ## ->{value}
923     ## ->{has_reference} == 1 or 0
924 wakaba 1.55 ## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)
925 wakaba 1.125 ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
926     ## |->{self_closing}| is used to save the value of |$self->{self_closing}|
927     ## while the token is pushed back to the stack.
928    
929 wakaba 1.1 ## Emitted token MUST immediately be handled by the tree construction state.
930    
931     ## Before each step, UA MAY check to see if either one of the scripts in
932     ## "list of scripts that will execute as soon as possible" or the first
933     ## script in the "list of scripts that will execute asynchronously",
934     ## has completed loading. If one has, then it MUST be executed
935     ## and removed from the list.
936    
937 wakaba 1.169 ## TODO: Polytheistic slash SHOULD NOT be used. (Applied only to atheists.)
938     ## (This requirement was dropped from HTML5 spec, unfortunately.)
939 wakaba 1.59
940 wakaba 1.1 sub _get_next_token ($) {
941     my $self = shift;
942 wakaba 1.125
943     if ($self->{self_closing}) {
944     !!!parse-error (type => 'nestc', token => $self->{current_token});
945     ## NOTE: The |self_closing| flag is only set by start tag token.
946     ## In addition, when a start tag token is emitted, it is always set to
947     ## |current_token|.
948     delete $self->{self_closing};
949     }
950    
951 wakaba 1.1 if (@{$self->{token}}) {
952 wakaba 1.125 $self->{self_closing} = $self->{token}->[0]->{self_closing};
953 wakaba 1.1 return shift @{$self->{token}};
954     }
955    
956     A: {
957 wakaba 1.57 if ($self->{state} == DATA_STATE) {
958 wakaba 1.76 if ($self->{next_char} == 0x0026) { # &
959 wakaba 1.72 if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
960     not $self->{escape}) {
961 wakaba 1.77 !!!cp (1);
962 wakaba 1.167 ## NOTE: In the spec, the tokenizer is switched to the
963     ## "entity data state". In this implementation, the tokenizer
964     ## is switched to the |ENTITY_STATE|, which is an implementation
965     ## of the "consume a character reference" algorithm.
966     $self->{entity_additional} = -1;
967 wakaba 1.169 $self->{prev_state} = DATA_STATE;
968 wakaba 1.167 $self->{state} = ENTITY_STATE;
969 wakaba 1.1 !!!next-input-character;
970     redo A;
971     } else {
972 wakaba 1.77 !!!cp (2);
973 wakaba 1.1 #
974     }
975 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
976 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
977 wakaba 1.13 unless ($self->{escape}) {
978 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
979     $self->{prev_char}->[1] == 0x0021 and # !
980     $self->{prev_char}->[2] == 0x003C) { # <
981 wakaba 1.77 !!!cp (3);
982 wakaba 1.13 $self->{escape} = 1;
983 wakaba 1.77 } else {
984     !!!cp (4);
985 wakaba 1.13 }
986 wakaba 1.77 } else {
987     !!!cp (5);
988 wakaba 1.13 }
989     }
990    
991     #
992 wakaba 1.76 } elsif ($self->{next_char} == 0x003C) { # <
993 wakaba 1.40 if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
994     (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
995 wakaba 1.13 not $self->{escape})) {
996 wakaba 1.77 !!!cp (6);
997 wakaba 1.57 $self->{state} = TAG_OPEN_STATE;
998 wakaba 1.1 !!!next-input-character;
999     redo A;
1000     } else {
1001 wakaba 1.77 !!!cp (7);
1002 wakaba 1.1 #
1003     }
1004 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1005 wakaba 1.13 if ($self->{escape} and
1006 wakaba 1.40 ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
1007 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
1008     $self->{prev_char}->[1] == 0x002D) { # -
1009 wakaba 1.77 !!!cp (8);
1010 wakaba 1.13 delete $self->{escape};
1011 wakaba 1.77 } else {
1012     !!!cp (9);
1013 wakaba 1.13 }
1014 wakaba 1.77 } else {
1015     !!!cp (10);
1016 wakaba 1.13 }
1017    
1018     #
1019 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1020 wakaba 1.77 !!!cp (11);
1021 wakaba 1.112 !!!emit ({type => END_OF_FILE_TOKEN,
1022     line => $self->{line}, column => $self->{column}});
1023 wakaba 1.1 last A; ## TODO: ok?
1024 wakaba 1.77 } else {
1025     !!!cp (12);
1026 wakaba 1.1 }
1027     # Anything else
1028 wakaba 1.55 my $token = {type => CHARACTER_TOKEN,
1029 wakaba 1.112 data => chr $self->{next_char},
1030 wakaba 1.120 line => $self->{line}, column => $self->{column},
1031 wakaba 1.118 };
1032 wakaba 1.172 $self->{read_until}->($token->{data}, q[-!<>&], length $token->{data});
1033 wakaba 1.171
1034 wakaba 1.1 ## Stay in the data state
1035     !!!next-input-character;
1036    
1037     !!!emit ($token);
1038    
1039     redo A;
1040 wakaba 1.57 } elsif ($self->{state} == TAG_OPEN_STATE) {
1041 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
1042 wakaba 1.76 if ($self->{next_char} == 0x002F) { # /
1043 wakaba 1.77 !!!cp (15);
1044 wakaba 1.1 !!!next-input-character;
1045 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
1046 wakaba 1.1 redo A;
1047     } else {
1048 wakaba 1.77 !!!cp (16);
1049 wakaba 1.1 ## reconsume
1050 wakaba 1.57 $self->{state} = DATA_STATE;
1051 wakaba 1.1
1052 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
1053 wakaba 1.120 line => $self->{line_prev},
1054     column => $self->{column_prev},
1055 wakaba 1.118 });
1056 wakaba 1.1
1057     redo A;
1058     }
1059 wakaba 1.40 } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
1060 wakaba 1.76 if ($self->{next_char} == 0x0021) { # !
1061 wakaba 1.77 !!!cp (17);
1062 wakaba 1.57 $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
1063 wakaba 1.1 !!!next-input-character;
1064     redo A;
1065 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1066 wakaba 1.77 !!!cp (18);
1067 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
1068 wakaba 1.1 !!!next-input-character;
1069     redo A;
1070 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1071     $self->{next_char} <= 0x005A) { # A..Z
1072 wakaba 1.77 !!!cp (19);
1073 wakaba 1.1 $self->{current_token}
1074 wakaba 1.55 = {type => START_TAG_TOKEN,
1075 wakaba 1.112 tag_name => chr ($self->{next_char} + 0x0020),
1076     line => $self->{line_prev},
1077     column => $self->{column_prev}};
1078 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1079 wakaba 1.1 !!!next-input-character;
1080     redo A;
1081 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
1082     $self->{next_char} <= 0x007A) { # a..z
1083 wakaba 1.77 !!!cp (20);
1084 wakaba 1.55 $self->{current_token} = {type => START_TAG_TOKEN,
1085 wakaba 1.112 tag_name => chr ($self->{next_char}),
1086     line => $self->{line_prev},
1087     column => $self->{column_prev}};
1088 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1089 wakaba 1.1 !!!next-input-character;
1090     redo A;
1091 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1092 wakaba 1.77 !!!cp (21);
1093 wakaba 1.115 !!!parse-error (type => 'empty start tag',
1094     line => $self->{line_prev},
1095     column => $self->{column_prev});
1096 wakaba 1.57 $self->{state} = DATA_STATE;
1097 wakaba 1.1 !!!next-input-character;
1098    
1099 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<>',
1100 wakaba 1.120 line => $self->{line_prev},
1101     column => $self->{column_prev},
1102 wakaba 1.118 });
1103 wakaba 1.1
1104     redo A;
1105 wakaba 1.76 } elsif ($self->{next_char} == 0x003F) { # ?
1106 wakaba 1.77 !!!cp (22);
1107 wakaba 1.115 !!!parse-error (type => 'pio',
1108     line => $self->{line_prev},
1109     column => $self->{column_prev});
1110 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1111 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1112 wakaba 1.120 line => $self->{line_prev},
1113     column => $self->{column_prev},
1114 wakaba 1.118 };
1115 wakaba 1.76 ## $self->{next_char} is intentionally left as is
1116 wakaba 1.1 redo A;
1117     } else {
1118 wakaba 1.77 !!!cp (23);
1119 wakaba 1.136 !!!parse-error (type => 'bare stago',
1120     line => $self->{line_prev},
1121     column => $self->{column_prev});
1122 wakaba 1.57 $self->{state} = DATA_STATE;
1123 wakaba 1.1 ## reconsume
1124    
1125 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
1126 wakaba 1.120 line => $self->{line_prev},
1127     column => $self->{column_prev},
1128 wakaba 1.118 });
1129 wakaba 1.1
1130     redo A;
1131     }
1132     } else {
1133 wakaba 1.40 die "$0: $self->{content_model} in tag open";
1134 wakaba 1.1 }
1135 wakaba 1.57 } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
1136 wakaba 1.164 ## NOTE: The "close tag open state" in the spec is implemented as
1137     ## |CLOSE_TAG_OPEN_STATE| and |CDATA_PCDATA_CLOSE_TAG_STATE|.
1138    
1139 wakaba 1.113 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
1140 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
1141 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
1142 wakaba 1.164 $self->{state} = CDATA_PCDATA_CLOSE_TAG_STATE;
1143     $self->{state_keyword} = '';
1144     ## Reconsume.
1145     redo A;
1146 wakaba 1.23 } else {
1147     ## No start tag token has ever been emitted
1148 wakaba 1.164 ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.
1149 wakaba 1.77 !!!cp (28);
1150 wakaba 1.57 $self->{state} = DATA_STATE;
1151 wakaba 1.164 ## Reconsume.
1152 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1153 wakaba 1.120 line => $l, column => $c,
1154 wakaba 1.118 });
1155 wakaba 1.1 redo A;
1156     }
1157     }
1158 wakaba 1.164
1159 wakaba 1.76 if (0x0041 <= $self->{next_char} and
1160     $self->{next_char} <= 0x005A) { # A..Z
1161 wakaba 1.77 !!!cp (29);
1162 wakaba 1.112 $self->{current_token}
1163     = {type => END_TAG_TOKEN,
1164     tag_name => chr ($self->{next_char} + 0x0020),
1165     line => $l, column => $c};
1166 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1167 wakaba 1.1 !!!next-input-character;
1168     redo A;
1169 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
1170     $self->{next_char} <= 0x007A) { # a..z
1171 wakaba 1.77 !!!cp (30);
1172 wakaba 1.55 $self->{current_token} = {type => END_TAG_TOKEN,
1173 wakaba 1.112 tag_name => chr ($self->{next_char}),
1174     line => $l, column => $c};
1175 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1176 wakaba 1.1 !!!next-input-character;
1177     redo A;
1178 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1179 wakaba 1.77 !!!cp (31);
1180 wakaba 1.115 !!!parse-error (type => 'empty end tag',
1181     line => $self->{line_prev}, ## "<" in "</>"
1182     column => $self->{column_prev} - 1);
1183 wakaba 1.57 $self->{state} = DATA_STATE;
1184 wakaba 1.1 !!!next-input-character;
1185     redo A;
1186 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1187 wakaba 1.77 !!!cp (32);
1188 wakaba 1.3 !!!parse-error (type => 'bare etago');
1189 wakaba 1.57 $self->{state} = DATA_STATE;
1190 wakaba 1.1 # reconsume
1191    
1192 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1193 wakaba 1.120 line => $l, column => $c,
1194 wakaba 1.118 });
1195 wakaba 1.1
1196     redo A;
1197     } else {
1198 wakaba 1.77 !!!cp (33);
1199 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
1200 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1201 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1202 wakaba 1.120 line => $self->{line_prev}, # "<" of "</"
1203     column => $self->{column_prev} - 1,
1204 wakaba 1.118 };
1205 wakaba 1.164 ## NOTE: $self->{next_char} is intentionally left as is.
1206     ## Although the "anything else" case of the spec not explicitly
1207     ## states that the next input character is to be reconsumed,
1208     ## it will be included to the |data| of the comment token
1209     ## generated from the bogus end tag, as defined in the
1210     ## "bogus comment state" entry.
1211     redo A;
1212     }
1213     } elsif ($self->{state} == CDATA_PCDATA_CLOSE_TAG_STATE) {
1214     my $ch = substr $self->{last_emitted_start_tag_name}, length $self->{state_keyword}, 1;
1215     if (length $ch) {
1216     my $CH = $ch;
1217     $ch =~ tr/a-z/A-Z/;
1218     my $nch = chr $self->{next_char};
1219     if ($nch eq $ch or $nch eq $CH) {
1220     !!!cp (24);
1221     ## Stay in the state.
1222     $self->{state_keyword} .= $nch;
1223     !!!next-input-character;
1224     redo A;
1225     } else {
1226     !!!cp (25);
1227     $self->{state} = DATA_STATE;
1228     ## Reconsume.
1229     !!!emit ({type => CHARACTER_TOKEN,
1230     data => '</' . $self->{state_keyword},
1231     line => $self->{line_prev},
1232     column => $self->{column_prev} - 1 - length $self->{state_keyword},
1233     });
1234     redo A;
1235     }
1236     } else { # after "<{tag-name}"
1237     unless ({
1238     0x0009 => 1, # HT
1239     0x000A => 1, # LF
1240     0x000B => 1, # VT
1241     0x000C => 1, # FF
1242     0x0020 => 1, # SP
1243     0x003E => 1, # >
1244     0x002F => 1, # /
1245     -1 => 1, # EOF
1246     }->{$self->{next_char}}) {
1247     !!!cp (26);
1248     ## Reconsume.
1249     $self->{state} = DATA_STATE;
1250     !!!emit ({type => CHARACTER_TOKEN,
1251     data => '</' . $self->{state_keyword},
1252     line => $self->{line_prev},
1253     column => $self->{column_prev} - 1 - length $self->{state_keyword},
1254     });
1255     redo A;
1256     } else {
1257     !!!cp (27);
1258     $self->{current_token}
1259     = {type => END_TAG_TOKEN,
1260     tag_name => $self->{last_emitted_start_tag_name},
1261     line => $self->{line_prev},
1262     column => $self->{column_prev} - 1 - length $self->{state_keyword}};
1263     $self->{state} = TAG_NAME_STATE;
1264     ## Reconsume.
1265     redo A;
1266     }
1267 wakaba 1.1 }
1268 wakaba 1.57 } elsif ($self->{state} == TAG_NAME_STATE) {
1269 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1270     $self->{next_char} == 0x000A or # LF
1271     $self->{next_char} == 0x000B or # VT
1272     $self->{next_char} == 0x000C or # FF
1273     $self->{next_char} == 0x0020) { # SP
1274 wakaba 1.77 !!!cp (34);
1275 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1276 wakaba 1.1 !!!next-input-character;
1277     redo A;
1278 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1279 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1280 wakaba 1.77 !!!cp (35);
1281 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1282 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1283 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1284 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
1285     # ## NOTE: This should never be reached.
1286     # !!! cp (36);
1287     # !!! parse-error (type => 'end tag attribute');
1288     #} else {
1289 wakaba 1.77 !!!cp (37);
1290 wakaba 1.78 #}
1291 wakaba 1.1 } else {
1292     die "$0: $self->{current_token}->{type}: Unknown token type";
1293     }
1294 wakaba 1.57 $self->{state} = DATA_STATE;
1295 wakaba 1.1 !!!next-input-character;
1296    
1297     !!!emit ($self->{current_token}); # start tag or end tag
1298    
1299     redo A;
1300 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1301     $self->{next_char} <= 0x005A) { # A..Z
1302 wakaba 1.77 !!!cp (38);
1303 wakaba 1.76 $self->{current_token}->{tag_name} .= chr ($self->{next_char} + 0x0020);
1304 wakaba 1.1 # start tag or end tag
1305     ## Stay in this state
1306     !!!next-input-character;
1307     redo A;
1308 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1309 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1310 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1311 wakaba 1.77 !!!cp (39);
1312 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1313 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1314 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1315 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
1316     # ## NOTE: This state should never be reached.
1317     # !!! cp (40);
1318     # !!! parse-error (type => 'end tag attribute');
1319     #} else {
1320 wakaba 1.77 !!!cp (41);
1321 wakaba 1.78 #}
1322 wakaba 1.1 } else {
1323     die "$0: $self->{current_token}->{type}: Unknown token type";
1324     }
1325 wakaba 1.57 $self->{state} = DATA_STATE;
1326 wakaba 1.1 # reconsume
1327    
1328     !!!emit ($self->{current_token}); # start tag or end tag
1329    
1330     redo A;
1331 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1332 wakaba 1.125 !!!cp (42);
1333     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1334 wakaba 1.1 !!!next-input-character;
1335     redo A;
1336     } else {
1337 wakaba 1.77 !!!cp (44);
1338 wakaba 1.76 $self->{current_token}->{tag_name} .= chr $self->{next_char};
1339 wakaba 1.1 # start tag or end tag
1340     ## Stay in the state
1341     !!!next-input-character;
1342     redo A;
1343     }
1344 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1345 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1346     $self->{next_char} == 0x000A or # LF
1347     $self->{next_char} == 0x000B or # VT
1348     $self->{next_char} == 0x000C or # FF
1349     $self->{next_char} == 0x0020) { # SP
1350 wakaba 1.77 !!!cp (45);
1351 wakaba 1.1 ## Stay in the state
1352     !!!next-input-character;
1353     redo A;
1354 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1355 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1356 wakaba 1.77 !!!cp (46);
1357 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1358 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1359 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1360 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1361 wakaba 1.77 !!!cp (47);
1362 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1363 wakaba 1.77 } else {
1364     !!!cp (48);
1365 wakaba 1.1 }
1366     } else {
1367     die "$0: $self->{current_token}->{type}: Unknown token type";
1368     }
1369 wakaba 1.57 $self->{state} = DATA_STATE;
1370 wakaba 1.1 !!!next-input-character;
1371    
1372     !!!emit ($self->{current_token}); # start tag or end tag
1373    
1374     redo A;
1375 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1376     $self->{next_char} <= 0x005A) { # A..Z
1377 wakaba 1.77 !!!cp (49);
1378 wakaba 1.119 $self->{current_attribute}
1379     = {name => chr ($self->{next_char} + 0x0020),
1380     value => '',
1381     line => $self->{line}, column => $self->{column}};
1382 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1383 wakaba 1.1 !!!next-input-character;
1384     redo A;
1385 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1386 wakaba 1.125 !!!cp (50);
1387     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1388 wakaba 1.1 !!!next-input-character;
1389     redo A;
1390 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1391 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1392 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1393 wakaba 1.77 !!!cp (52);
1394 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1395 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1396 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1397 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1398 wakaba 1.77 !!!cp (53);
1399 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1400 wakaba 1.77 } else {
1401     !!!cp (54);
1402 wakaba 1.1 }
1403     } else {
1404     die "$0: $self->{current_token}->{type}: Unknown token type";
1405     }
1406 wakaba 1.57 $self->{state} = DATA_STATE;
1407 wakaba 1.1 # reconsume
1408    
1409     !!!emit ($self->{current_token}); # start tag or end tag
1410    
1411     redo A;
1412     } else {
1413 wakaba 1.72 if ({
1414     0x0022 => 1, # "
1415     0x0027 => 1, # '
1416     0x003D => 1, # =
1417 wakaba 1.76 }->{$self->{next_char}}) {
1418 wakaba 1.77 !!!cp (55);
1419 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1420 wakaba 1.77 } else {
1421     !!!cp (56);
1422 wakaba 1.72 }
1423 wakaba 1.119 $self->{current_attribute}
1424     = {name => chr ($self->{next_char}),
1425     value => '',
1426     line => $self->{line}, column => $self->{column}};
1427 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1428 wakaba 1.1 !!!next-input-character;
1429     redo A;
1430     }
1431 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1432 wakaba 1.1 my $before_leave = sub {
1433     if (exists $self->{current_token}->{attributes} # start tag or end tag
1434     ->{$self->{current_attribute}->{name}}) { # MUST
1435 wakaba 1.77 !!!cp (57);
1436 wakaba 1.153 !!!parse-error (type => 'duplicate attribute', text => $self->{current_attribute}->{name}, line => $self->{current_attribute}->{line}, column => $self->{current_attribute}->{column});
1437 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
1438     } else {
1439 wakaba 1.77 !!!cp (58);
1440 wakaba 1.1 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
1441     = $self->{current_attribute};
1442     }
1443     }; # $before_leave
1444    
1445 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1446     $self->{next_char} == 0x000A or # LF
1447     $self->{next_char} == 0x000B or # VT
1448     $self->{next_char} == 0x000C or # FF
1449     $self->{next_char} == 0x0020) { # SP
1450 wakaba 1.77 !!!cp (59);
1451 wakaba 1.1 $before_leave->();
1452 wakaba 1.57 $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
1453 wakaba 1.1 !!!next-input-character;
1454     redo A;
1455 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1456 wakaba 1.77 !!!cp (60);
1457 wakaba 1.1 $before_leave->();
1458 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1459 wakaba 1.1 !!!next-input-character;
1460     redo A;
1461 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1462 wakaba 1.1 $before_leave->();
1463 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1464 wakaba 1.77 !!!cp (61);
1465 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1466 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1467 wakaba 1.77 !!!cp (62);
1468 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1469 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1470 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1471 wakaba 1.1 }
1472     } else {
1473     die "$0: $self->{current_token}->{type}: Unknown token type";
1474     }
1475 wakaba 1.57 $self->{state} = DATA_STATE;
1476 wakaba 1.1 !!!next-input-character;
1477    
1478     !!!emit ($self->{current_token}); # start tag or end tag
1479    
1480     redo A;
1481 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1482     $self->{next_char} <= 0x005A) { # A..Z
1483 wakaba 1.77 !!!cp (63);
1484 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char} + 0x0020);
1485 wakaba 1.1 ## Stay in the state
1486     !!!next-input-character;
1487     redo A;
1488 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1489 wakaba 1.125 !!!cp (64);
1490 wakaba 1.1 $before_leave->();
1491 wakaba 1.125 $self->{state} = SELF_CLOSING_START_TAG_STATE;
1492 wakaba 1.1 !!!next-input-character;
1493     redo A;
1494 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1495 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1496 wakaba 1.1 $before_leave->();
1497 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1498 wakaba 1.77 !!!cp (66);
1499 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1500 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1501 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1502 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1503 wakaba 1.77 !!!cp (67);
1504 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1505 wakaba 1.77 } else {
1506 wakaba 1.78 ## NOTE: This state should never be reached.
1507 wakaba 1.77 !!!cp (68);
1508 wakaba 1.1 }
1509     } else {
1510     die "$0: $self->{current_token}->{type}: Unknown token type";
1511     }
1512 wakaba 1.57 $self->{state} = DATA_STATE;
1513 wakaba 1.1 # reconsume
1514    
1515     !!!emit ($self->{current_token}); # start tag or end tag
1516    
1517     redo A;
1518     } else {
1519 wakaba 1.76 if ($self->{next_char} == 0x0022 or # "
1520     $self->{next_char} == 0x0027) { # '
1521 wakaba 1.77 !!!cp (69);
1522 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1523 wakaba 1.77 } else {
1524     !!!cp (70);
1525 wakaba 1.72 }
1526 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char});
1527 wakaba 1.1 ## Stay in the state
1528     !!!next-input-character;
1529     redo A;
1530     }
1531 wakaba 1.57 } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1532 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1533     $self->{next_char} == 0x000A or # LF
1534     $self->{next_char} == 0x000B or # VT
1535     $self->{next_char} == 0x000C or # FF
1536     $self->{next_char} == 0x0020) { # SP
1537 wakaba 1.77 !!!cp (71);
1538 wakaba 1.1 ## Stay in the state
1539     !!!next-input-character;
1540     redo A;
1541 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1542 wakaba 1.77 !!!cp (72);
1543 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1544 wakaba 1.1 !!!next-input-character;
1545     redo A;
1546 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1547 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1548 wakaba 1.77 !!!cp (73);
1549 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1550 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1551 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1552 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1553 wakaba 1.77 !!!cp (74);
1554 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1555 wakaba 1.77 } else {
1556 wakaba 1.78 ## NOTE: This state should never be reached.
1557 wakaba 1.77 !!!cp (75);
1558 wakaba 1.1 }
1559     } else {
1560     die "$0: $self->{current_token}->{type}: Unknown token type";
1561     }
1562 wakaba 1.57 $self->{state} = DATA_STATE;
1563 wakaba 1.1 !!!next-input-character;
1564    
1565     !!!emit ($self->{current_token}); # start tag or end tag
1566    
1567     redo A;
1568 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1569     $self->{next_char} <= 0x005A) { # A..Z
1570 wakaba 1.77 !!!cp (76);
1571 wakaba 1.119 $self->{current_attribute}
1572     = {name => chr ($self->{next_char} + 0x0020),
1573     value => '',
1574     line => $self->{line}, column => $self->{column}};
1575 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1576 wakaba 1.1 !!!next-input-character;
1577     redo A;
1578 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1579 wakaba 1.125 !!!cp (77);
1580     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1581 wakaba 1.1 !!!next-input-character;
1582     redo A;
1583 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1584 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1585 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1586 wakaba 1.77 !!!cp (79);
1587 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1588 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1589 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1590 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1591 wakaba 1.77 !!!cp (80);
1592 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1593 wakaba 1.77 } else {
1594 wakaba 1.78 ## NOTE: This state should never be reached.
1595 wakaba 1.77 !!!cp (81);
1596 wakaba 1.1 }
1597     } else {
1598     die "$0: $self->{current_token}->{type}: Unknown token type";
1599     }
1600 wakaba 1.57 $self->{state} = DATA_STATE;
1601 wakaba 1.1 # reconsume
1602    
1603     !!!emit ($self->{current_token}); # start tag or end tag
1604    
1605     redo A;
1606     } else {
1607 wakaba 1.156 if ($self->{next_char} == 0x0022 or # "
1608     $self->{next_char} == 0x0027) { # '
1609     !!!cp (78);
1610     !!!parse-error (type => 'bad attribute name');
1611     } else {
1612     !!!cp (82);
1613     }
1614 wakaba 1.119 $self->{current_attribute}
1615     = {name => chr ($self->{next_char}),
1616     value => '',
1617     line => $self->{line}, column => $self->{column}};
1618 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1619 wakaba 1.1 !!!next-input-character;
1620     redo A;
1621     }
1622 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1623 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1624     $self->{next_char} == 0x000A or # LF
1625     $self->{next_char} == 0x000B or # VT
1626     $self->{next_char} == 0x000C or # FF
1627     $self->{next_char} == 0x0020) { # SP
1628 wakaba 1.77 !!!cp (83);
1629 wakaba 1.1 ## Stay in the state
1630     !!!next-input-character;
1631     redo A;
1632 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
1633 wakaba 1.77 !!!cp (84);
1634 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
1635 wakaba 1.1 !!!next-input-character;
1636     redo A;
1637 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1638 wakaba 1.77 !!!cp (85);
1639 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1640 wakaba 1.1 ## reconsume
1641     redo A;
1642 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
1643 wakaba 1.77 !!!cp (86);
1644 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
1645 wakaba 1.1 !!!next-input-character;
1646     redo A;
1647 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1648 wakaba 1.156 !!!parse-error (type => 'empty unquoted attribute value');
1649 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1650 wakaba 1.77 !!!cp (87);
1651 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1652 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1653 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1654 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1655 wakaba 1.77 !!!cp (88);
1656 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1657 wakaba 1.77 } else {
1658 wakaba 1.78 ## NOTE: This state should never be reached.
1659 wakaba 1.77 !!!cp (89);
1660 wakaba 1.1 }
1661     } else {
1662     die "$0: $self->{current_token}->{type}: Unknown token type";
1663     }
1664 wakaba 1.57 $self->{state} = DATA_STATE;
1665 wakaba 1.1 !!!next-input-character;
1666    
1667     !!!emit ($self->{current_token}); # start tag or end tag
1668    
1669     redo A;
1670 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1671 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1672 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1673 wakaba 1.77 !!!cp (90);
1674 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1675 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1676 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1677 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1678 wakaba 1.77 !!!cp (91);
1679 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1680 wakaba 1.77 } else {
1681 wakaba 1.78 ## NOTE: This state should never be reached.
1682 wakaba 1.77 !!!cp (92);
1683 wakaba 1.1 }
1684     } else {
1685     die "$0: $self->{current_token}->{type}: Unknown token type";
1686     }
1687 wakaba 1.57 $self->{state} = DATA_STATE;
1688 wakaba 1.1 ## reconsume
1689    
1690     !!!emit ($self->{current_token}); # start tag or end tag
1691    
1692     redo A;
1693     } else {
1694 wakaba 1.76 if ($self->{next_char} == 0x003D) { # =
1695 wakaba 1.77 !!!cp (93);
1696 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1697 wakaba 1.77 } else {
1698     !!!cp (94);
1699 wakaba 1.72 }
1700 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1701 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1702 wakaba 1.1 !!!next-input-character;
1703     redo A;
1704     }
1705 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1706 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
1707 wakaba 1.77 !!!cp (95);
1708 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1709 wakaba 1.1 !!!next-input-character;
1710     redo A;
1711 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1712 wakaba 1.77 !!!cp (96);
1713 wakaba 1.167 ## NOTE: In the spec, the tokenizer is switched to the
1714     ## "entity in attribute value state". In this implementation, the
1715     ## tokenizer is switched to the |ENTITY_STATE|, which is an
1716     ## implementation of the "consume a character reference" algorithm.
1717 wakaba 1.169 $self->{prev_state} = $self->{state};
1718 wakaba 1.167 $self->{entity_additional} = 0x0022; # "
1719     $self->{state} = ENTITY_STATE;
1720 wakaba 1.1 !!!next-input-character;
1721     redo A;
1722 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1723 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1724 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1725 wakaba 1.77 !!!cp (97);
1726 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1727 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1728 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1729 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1730 wakaba 1.77 !!!cp (98);
1731 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1732 wakaba 1.77 } else {
1733 wakaba 1.78 ## NOTE: This state should never be reached.
1734 wakaba 1.77 !!!cp (99);
1735 wakaba 1.1 }
1736     } else {
1737     die "$0: $self->{current_token}->{type}: Unknown token type";
1738     }
1739 wakaba 1.57 $self->{state} = DATA_STATE;
1740 wakaba 1.1 ## reconsume
1741    
1742     !!!emit ($self->{current_token}); # start tag or end tag
1743    
1744     redo A;
1745     } else {
1746 wakaba 1.77 !!!cp (100);
1747 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1748 wakaba 1.173 $self->{read_until}->($self->{current_attribute}->{value},
1749     q["&],
1750     length $self->{current_attribute}->{value});
1751    
1752 wakaba 1.1 ## Stay in the state
1753     !!!next-input-character;
1754     redo A;
1755     }
1756 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1757 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
1758 wakaba 1.77 !!!cp (101);
1759 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1760 wakaba 1.1 !!!next-input-character;
1761     redo A;
1762 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1763 wakaba 1.77 !!!cp (102);
1764 wakaba 1.167 ## NOTE: In the spec, the tokenizer is switched to the
1765     ## "entity in attribute value state". In this implementation, the
1766     ## tokenizer is switched to the |ENTITY_STATE|, which is an
1767     ## implementation of the "consume a character reference" algorithm.
1768     $self->{entity_additional} = 0x0027; # '
1769 wakaba 1.169 $self->{prev_state} = $self->{state};
1770 wakaba 1.167 $self->{state} = ENTITY_STATE;
1771 wakaba 1.1 !!!next-input-character;
1772     redo A;
1773 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1774 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1775 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1776 wakaba 1.77 !!!cp (103);
1777 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1778 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1779 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1780 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1781 wakaba 1.77 !!!cp (104);
1782 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1783 wakaba 1.77 } else {
1784 wakaba 1.78 ## NOTE: This state should never be reached.
1785 wakaba 1.77 !!!cp (105);
1786 wakaba 1.1 }
1787     } else {
1788     die "$0: $self->{current_token}->{type}: Unknown token type";
1789     }
1790 wakaba 1.57 $self->{state} = DATA_STATE;
1791 wakaba 1.1 ## reconsume
1792    
1793     !!!emit ($self->{current_token}); # start tag or end tag
1794    
1795     redo A;
1796     } else {
1797 wakaba 1.77 !!!cp (106);
1798 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1799 wakaba 1.173 $self->{read_until}->($self->{current_attribute}->{value},
1800     q['&],
1801     length $self->{current_attribute}->{value});
1802    
1803 wakaba 1.1 ## Stay in the state
1804     !!!next-input-character;
1805     redo A;
1806     }
1807 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1808 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1809     $self->{next_char} == 0x000A or # LF
1810     $self->{next_char} == 0x000B or # HT
1811     $self->{next_char} == 0x000C or # FF
1812     $self->{next_char} == 0x0020) { # SP
1813 wakaba 1.77 !!!cp (107);
1814 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1815 wakaba 1.1 !!!next-input-character;
1816     redo A;
1817 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1818 wakaba 1.77 !!!cp (108);
1819 wakaba 1.167 ## NOTE: In the spec, the tokenizer is switched to the
1820     ## "entity in attribute value state". In this implementation, the
1821     ## tokenizer is switched to the |ENTITY_STATE|, which is an
1822     ## implementation of the "consume a character reference" algorithm.
1823     $self->{entity_additional} = -1;
1824 wakaba 1.169 $self->{prev_state} = $self->{state};
1825 wakaba 1.167 $self->{state} = ENTITY_STATE;
1826 wakaba 1.1 !!!next-input-character;
1827     redo A;
1828 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1829 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1830 wakaba 1.77 !!!cp (109);
1831 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1832 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1833 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1834 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1835 wakaba 1.77 !!!cp (110);
1836 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1837 wakaba 1.77 } else {
1838 wakaba 1.78 ## NOTE: This state should never be reached.
1839 wakaba 1.77 !!!cp (111);
1840 wakaba 1.1 }
1841     } else {
1842     die "$0: $self->{current_token}->{type}: Unknown token type";
1843     }
1844 wakaba 1.57 $self->{state} = DATA_STATE;
1845 wakaba 1.1 !!!next-input-character;
1846    
1847     !!!emit ($self->{current_token}); # start tag or end tag
1848    
1849     redo A;
1850 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1851 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1852 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1853 wakaba 1.77 !!!cp (112);
1854 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1855 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1856 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1857 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1858 wakaba 1.77 !!!cp (113);
1859 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1860 wakaba 1.77 } else {
1861 wakaba 1.78 ## NOTE: This state should never be reached.
1862 wakaba 1.77 !!!cp (114);
1863 wakaba 1.1 }
1864     } else {
1865     die "$0: $self->{current_token}->{type}: Unknown token type";
1866     }
1867 wakaba 1.57 $self->{state} = DATA_STATE;
1868 wakaba 1.1 ## reconsume
1869    
1870     !!!emit ($self->{current_token}); # start tag or end tag
1871    
1872     redo A;
1873     } else {
1874 wakaba 1.72 if ({
1875     0x0022 => 1, # "
1876     0x0027 => 1, # '
1877     0x003D => 1, # =
1878 wakaba 1.76 }->{$self->{next_char}}) {
1879 wakaba 1.77 !!!cp (115);
1880 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1881 wakaba 1.77 } else {
1882     !!!cp (116);
1883 wakaba 1.72 }
1884 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1885 wakaba 1.173 $self->{read_until}->($self->{current_attribute}->{value},
1886     q["'=& >],
1887     length $self->{current_attribute}->{value});
1888    
1889 wakaba 1.1 ## Stay in the state
1890     !!!next-input-character;
1891     redo A;
1892     }
1893 wakaba 1.72 } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1894 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1895     $self->{next_char} == 0x000A or # LF
1896     $self->{next_char} == 0x000B or # VT
1897     $self->{next_char} == 0x000C or # FF
1898     $self->{next_char} == 0x0020) { # SP
1899 wakaba 1.77 !!!cp (118);
1900 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1901     !!!next-input-character;
1902     redo A;
1903 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1904 wakaba 1.72 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1905 wakaba 1.77 !!!cp (119);
1906 wakaba 1.72 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1907     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1908     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1909     if ($self->{current_token}->{attributes}) {
1910 wakaba 1.77 !!!cp (120);
1911 wakaba 1.72 !!!parse-error (type => 'end tag attribute');
1912 wakaba 1.77 } else {
1913 wakaba 1.78 ## NOTE: This state should never be reached.
1914 wakaba 1.77 !!!cp (121);
1915 wakaba 1.72 }
1916     } else {
1917     die "$0: $self->{current_token}->{type}: Unknown token type";
1918     }
1919     $self->{state} = DATA_STATE;
1920     !!!next-input-character;
1921    
1922     !!!emit ($self->{current_token}); # start tag or end tag
1923    
1924     redo A;
1925 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1926 wakaba 1.125 !!!cp (122);
1927     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1928 wakaba 1.72 !!!next-input-character;
1929 wakaba 1.125 redo A;
1930 wakaba 1.141 } elsif ($self->{next_char} == -1) {
1931     !!!parse-error (type => 'unclosed tag');
1932     if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1933     !!!cp (122.3);
1934     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1935     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1936     if ($self->{current_token}->{attributes}) {
1937     !!!cp (122.1);
1938     !!!parse-error (type => 'end tag attribute');
1939     } else {
1940     ## NOTE: This state should never be reached.
1941     !!!cp (122.2);
1942     }
1943     } else {
1944     die "$0: $self->{current_token}->{type}: Unknown token type";
1945     }
1946     $self->{state} = DATA_STATE;
1947     ## Reconsume.
1948     !!!emit ($self->{current_token}); # start tag or end tag
1949     redo A;
1950 wakaba 1.125 } else {
1951     !!!cp ('124.1');
1952     !!!parse-error (type => 'no space between attributes');
1953     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1954     ## reconsume
1955     redo A;
1956     }
1957     } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
1958     if ($self->{next_char} == 0x003E) { # >
1959     if ($self->{current_token}->{type} == END_TAG_TOKEN) {
1960     !!!cp ('124.2');
1961     !!!parse-error (type => 'nestc', token => $self->{current_token});
1962     ## TODO: Different type than slash in start tag
1963     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1964     if ($self->{current_token}->{attributes}) {
1965     !!!cp ('124.4');
1966     !!!parse-error (type => 'end tag attribute');
1967     } else {
1968     !!!cp ('124.5');
1969     }
1970     ## TODO: Test |<title></title/>|
1971 wakaba 1.72 } else {
1972 wakaba 1.125 !!!cp ('124.3');
1973     $self->{self_closing} = 1;
1974 wakaba 1.72 }
1975 wakaba 1.125
1976     $self->{state} = DATA_STATE;
1977     !!!next-input-character;
1978    
1979     !!!emit ($self->{current_token}); # start tag or end tag
1980    
1981 wakaba 1.72 redo A;
1982 wakaba 1.141 } elsif ($self->{next_char} == -1) {
1983     !!!parse-error (type => 'unclosed tag');
1984     if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1985     !!!cp (124.7);
1986     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1987     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1988     if ($self->{current_token}->{attributes}) {
1989     !!!cp (124.5);
1990     !!!parse-error (type => 'end tag attribute');
1991     } else {
1992     ## NOTE: This state should never be reached.
1993     !!!cp (124.6);
1994     }
1995     } else {
1996     die "$0: $self->{current_token}->{type}: Unknown token type";
1997     }
1998     $self->{state} = DATA_STATE;
1999     ## Reconsume.
2000     !!!emit ($self->{current_token}); # start tag or end tag
2001     redo A;
2002 wakaba 1.72 } else {
2003 wakaba 1.125 !!!cp ('124.4');
2004     !!!parse-error (type => 'nestc');
2005     ## TODO: This error type is wrong.
2006 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
2007 wakaba 1.125 ## Reconsume.
2008 wakaba 1.72 redo A;
2009     }
2010 wakaba 1.57 } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
2011 wakaba 1.1 ## (only happen if PCDATA state)
2012 wakaba 1.167
2013     ## NOTE: Unlike spec's "bogus comment state", this implementation
2014     ## consumes characters one-by-one basis.
2015 wakaba 1.1
2016 wakaba 1.167 if ($self->{next_char} == 0x003E) { # >
2017     !!!cp (124);
2018     $self->{state} = DATA_STATE;
2019     !!!next-input-character;
2020 wakaba 1.1
2021 wakaba 1.167 !!!emit ($self->{current_token}); # comment
2022     redo A;
2023     } elsif ($self->{next_char} == -1) {
2024     !!!cp (125);
2025     $self->{state} = DATA_STATE;
2026     ## reconsume
2027 wakaba 1.1
2028 wakaba 1.167 !!!emit ($self->{current_token}); # comment
2029     redo A;
2030     } else {
2031     !!!cp (126);
2032     $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
2033 wakaba 1.173 $self->{read_until}->($self->{current_token}->{data},
2034     q[>],
2035     length $self->{current_token}->{data});
2036    
2037 wakaba 1.167 ## Stay in the state.
2038     !!!next-input-character;
2039     redo A;
2040     }
2041 wakaba 1.57 } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
2042 wakaba 1.1 ## (only happen if PCDATA state)
2043    
2044 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2045 wakaba 1.163 !!!cp (133);
2046     $self->{state} = MD_HYPHEN_STATE;
2047 wakaba 1.1 !!!next-input-character;
2048 wakaba 1.163 redo A;
2049 wakaba 1.76 } elsif ($self->{next_char} == 0x0044 or # D
2050     $self->{next_char} == 0x0064) { # d
2051 wakaba 1.163 ## ASCII case-insensitive.
2052     !!!cp (130);
2053     $self->{state} = MD_DOCTYPE_STATE;
2054     $self->{state_keyword} = chr $self->{next_char};
2055 wakaba 1.1 !!!next-input-character;
2056 wakaba 1.163 redo A;
2057 wakaba 1.127 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
2058     $self->{open_elements}->[-1]->[1] & FOREIGN_EL and
2059     $self->{next_char} == 0x005B) { # [
2060 wakaba 1.163 !!!cp (135.4);
2061     $self->{state} = MD_CDATA_STATE;
2062     $self->{state_keyword} = '[';
2063 wakaba 1.127 !!!next-input-character;
2064 wakaba 1.163 redo A;
2065 wakaba 1.77 } else {
2066     !!!cp (136);
2067 wakaba 1.1 }
2068    
2069 wakaba 1.163 !!!parse-error (type => 'bogus comment',
2070     line => $self->{line_prev},
2071     column => $self->{column_prev} - 1);
2072     ## Reconsume.
2073 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
2074 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
2075 wakaba 1.163 line => $self->{line_prev},
2076     column => $self->{column_prev} - 1,
2077 wakaba 1.118 };
2078 wakaba 1.1 redo A;
2079 wakaba 1.163 } elsif ($self->{state} == MD_HYPHEN_STATE) {
2080     if ($self->{next_char} == 0x002D) { # -
2081     !!!cp (127);
2082     $self->{current_token} = {type => COMMENT_TOKEN, data => '',
2083     line => $self->{line_prev},
2084     column => $self->{column_prev} - 2,
2085     };
2086     $self->{state} = COMMENT_START_STATE;
2087     !!!next-input-character;
2088     redo A;
2089     } else {
2090     !!!cp (128);
2091     !!!parse-error (type => 'bogus comment',
2092     line => $self->{line_prev},
2093     column => $self->{column_prev} - 2);
2094     $self->{state} = BOGUS_COMMENT_STATE;
2095     ## Reconsume.
2096     $self->{current_token} = {type => COMMENT_TOKEN,
2097     data => '-',
2098     line => $self->{line_prev},
2099     column => $self->{column_prev} - 2,
2100     };
2101     redo A;
2102     }
2103     } elsif ($self->{state} == MD_DOCTYPE_STATE) {
2104     ## ASCII case-insensitive.
2105     if ($self->{next_char} == [
2106     undef,
2107     0x004F, # O
2108     0x0043, # C
2109     0x0054, # T
2110     0x0059, # Y
2111     0x0050, # P
2112     ]->[length $self->{state_keyword}] or
2113     $self->{next_char} == [
2114     undef,
2115     0x006F, # o
2116     0x0063, # c
2117     0x0074, # t
2118     0x0079, # y
2119     0x0070, # p
2120     ]->[length $self->{state_keyword}]) {
2121     !!!cp (131);
2122     ## Stay in the state.
2123     $self->{state_keyword} .= chr $self->{next_char};
2124     !!!next-input-character;
2125     redo A;
2126     } elsif ((length $self->{state_keyword}) == 6 and
2127     ($self->{next_char} == 0x0045 or # E
2128     $self->{next_char} == 0x0065)) { # e
2129     !!!cp (129);
2130     $self->{state} = DOCTYPE_STATE;
2131     $self->{current_token} = {type => DOCTYPE_TOKEN,
2132     quirks => 1,
2133     line => $self->{line_prev},
2134     column => $self->{column_prev} - 7,
2135     };
2136     !!!next-input-character;
2137     redo A;
2138     } else {
2139     !!!cp (132);
2140     !!!parse-error (type => 'bogus comment',
2141     line => $self->{line_prev},
2142     column => $self->{column_prev} - 1 - length $self->{state_keyword});
2143     $self->{state} = BOGUS_COMMENT_STATE;
2144     ## Reconsume.
2145     $self->{current_token} = {type => COMMENT_TOKEN,
2146     data => $self->{state_keyword},
2147     line => $self->{line_prev},
2148     column => $self->{column_prev} - 1 - length $self->{state_keyword},
2149     };
2150     redo A;
2151     }
2152     } elsif ($self->{state} == MD_CDATA_STATE) {
2153     if ($self->{next_char} == {
2154     '[' => 0x0043, # C
2155     '[C' => 0x0044, # D
2156     '[CD' => 0x0041, # A
2157     '[CDA' => 0x0054, # T
2158     '[CDAT' => 0x0041, # A
2159     }->{$self->{state_keyword}}) {
2160     !!!cp (135.1);
2161     ## Stay in the state.
2162     $self->{state_keyword} .= chr $self->{next_char};
2163     !!!next-input-character;
2164     redo A;
2165     } elsif ($self->{state_keyword} eq '[CDATA' and
2166     $self->{next_char} == 0x005B) { # [
2167     !!!cp (135.2);
2168 wakaba 1.165 $self->{current_token} = {type => CHARACTER_TOKEN,
2169     data => '',
2170     line => $self->{line_prev},
2171     column => $self->{column_prev} - 7};
2172     $self->{state} = CDATA_SECTION_STATE;
2173 wakaba 1.163 !!!next-input-character;
2174     redo A;
2175     } else {
2176     !!!cp (135.3);
2177     !!!parse-error (type => 'bogus comment',
2178     line => $self->{line_prev},
2179     column => $self->{column_prev} - 1 - length $self->{state_keyword});
2180     $self->{state} = BOGUS_COMMENT_STATE;
2181     ## Reconsume.
2182     $self->{current_token} = {type => COMMENT_TOKEN,
2183     data => $self->{state_keyword},
2184     line => $self->{line_prev},
2185     column => $self->{column_prev} - 1 - length $self->{state_keyword},
2186     };
2187     redo A;
2188     }
2189 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_STATE) {
2190 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2191 wakaba 1.77 !!!cp (137);
2192 wakaba 1.57 $self->{state} = COMMENT_START_DASH_STATE;
2193 wakaba 1.23 !!!next-input-character;
2194     redo A;
2195 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2196 wakaba 1.77 !!!cp (138);
2197 wakaba 1.23 !!!parse-error (type => 'bogus comment');
2198 wakaba 1.57 $self->{state} = DATA_STATE;
2199 wakaba 1.23 !!!next-input-character;
2200    
2201     !!!emit ($self->{current_token}); # comment
2202    
2203     redo A;
2204 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2205 wakaba 1.77 !!!cp (139);
2206 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
2207 wakaba 1.57 $self->{state} = DATA_STATE;
2208 wakaba 1.23 ## reconsume
2209    
2210     !!!emit ($self->{current_token}); # comment
2211    
2212     redo A;
2213     } else {
2214 wakaba 1.77 !!!cp (140);
2215 wakaba 1.23 $self->{current_token}->{data} # comment
2216 wakaba 1.76 .= chr ($self->{next_char});
2217 wakaba 1.57 $self->{state} = COMMENT_STATE;
2218 wakaba 1.23 !!!next-input-character;
2219     redo A;
2220     }
2221 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
2222 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2223 wakaba 1.77 !!!cp (141);
2224 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
2225 wakaba 1.23 !!!next-input-character;
2226     redo A;
2227 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2228 wakaba 1.77 !!!cp (142);
2229 wakaba 1.23 !!!parse-error (type => 'bogus comment');
2230 wakaba 1.57 $self->{state} = DATA_STATE;
2231 wakaba 1.23 !!!next-input-character;
2232    
2233     !!!emit ($self->{current_token}); # comment
2234    
2235     redo A;
2236 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2237 wakaba 1.77 !!!cp (143);
2238 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
2239 wakaba 1.57 $self->{state} = DATA_STATE;
2240 wakaba 1.23 ## reconsume
2241    
2242     !!!emit ($self->{current_token}); # comment
2243    
2244     redo A;
2245     } else {
2246 wakaba 1.77 !!!cp (144);
2247 wakaba 1.23 $self->{current_token}->{data} # comment
2248 wakaba 1.76 .= '-' . chr ($self->{next_char});
2249 wakaba 1.57 $self->{state} = COMMENT_STATE;
2250 wakaba 1.23 !!!next-input-character;
2251     redo A;
2252     }
2253 wakaba 1.57 } elsif ($self->{state} == COMMENT_STATE) {
2254 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2255 wakaba 1.77 !!!cp (145);
2256 wakaba 1.57 $self->{state} = COMMENT_END_DASH_STATE;
2257 wakaba 1.1 !!!next-input-character;
2258     redo A;
2259 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2260 wakaba 1.77 !!!cp (146);
2261 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2262 wakaba 1.57 $self->{state} = DATA_STATE;
2263 wakaba 1.1 ## reconsume
2264    
2265     !!!emit ($self->{current_token}); # comment
2266    
2267     redo A;
2268     } else {
2269 wakaba 1.77 !!!cp (147);
2270 wakaba 1.76 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
2271 wakaba 1.173 $self->{read_until}->($self->{current_token}->{data},
2272     q[-],
2273     length $self->{current_token}->{data});
2274    
2275 wakaba 1.1 ## Stay in the state
2276     !!!next-input-character;
2277     redo A;
2278     }
2279 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2280 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2281 wakaba 1.77 !!!cp (148);
2282 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
2283 wakaba 1.1 !!!next-input-character;
2284     redo A;
2285 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2286 wakaba 1.77 !!!cp (149);
2287 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2288 wakaba 1.57 $self->{state} = DATA_STATE;
2289 wakaba 1.1 ## reconsume
2290    
2291     !!!emit ($self->{current_token}); # comment
2292    
2293     redo A;
2294     } else {
2295 wakaba 1.77 !!!cp (150);
2296 wakaba 1.76 $self->{current_token}->{data} .= '-' . chr ($self->{next_char}); # comment
2297 wakaba 1.57 $self->{state} = COMMENT_STATE;
2298 wakaba 1.1 !!!next-input-character;
2299     redo A;
2300     }
2301 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_STATE) {
2302 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2303 wakaba 1.77 !!!cp (151);
2304 wakaba 1.57 $self->{state} = DATA_STATE;
2305 wakaba 1.1 !!!next-input-character;
2306    
2307     !!!emit ($self->{current_token}); # comment
2308    
2309     redo A;
2310 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
2311 wakaba 1.77 !!!cp (152);
2312 wakaba 1.114 !!!parse-error (type => 'dash in comment',
2313     line => $self->{line_prev},
2314     column => $self->{column_prev});
2315 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
2316     ## Stay in the state
2317     !!!next-input-character;
2318     redo A;
2319 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2320 wakaba 1.77 !!!cp (153);
2321 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2322 wakaba 1.57 $self->{state} = DATA_STATE;
2323 wakaba 1.1 ## reconsume
2324    
2325     !!!emit ($self->{current_token}); # comment
2326    
2327     redo A;
2328     } else {
2329 wakaba 1.77 !!!cp (154);
2330 wakaba 1.114 !!!parse-error (type => 'dash in comment',
2331     line => $self->{line_prev},
2332     column => $self->{column_prev});
2333 wakaba 1.76 $self->{current_token}->{data} .= '--' . chr ($self->{next_char}); # comment
2334 wakaba 1.57 $self->{state} = COMMENT_STATE;
2335 wakaba 1.1 !!!next-input-character;
2336     redo A;
2337     }
2338 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_STATE) {
2339 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2340     $self->{next_char} == 0x000A or # LF
2341     $self->{next_char} == 0x000B or # VT
2342     $self->{next_char} == 0x000C or # FF
2343     $self->{next_char} == 0x0020) { # SP
2344 wakaba 1.77 !!!cp (155);
2345 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2346 wakaba 1.1 !!!next-input-character;
2347     redo A;
2348     } else {
2349 wakaba 1.77 !!!cp (156);
2350 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
2351 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2352 wakaba 1.1 ## reconsume
2353     redo A;
2354     }
2355 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
2356 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2357     $self->{next_char} == 0x000A or # LF
2358     $self->{next_char} == 0x000B or # VT
2359     $self->{next_char} == 0x000C or # FF
2360     $self->{next_char} == 0x0020) { # SP
2361 wakaba 1.77 !!!cp (157);
2362 wakaba 1.1 ## Stay in the state
2363     !!!next-input-character;
2364     redo A;
2365 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2366 wakaba 1.77 !!!cp (158);
2367 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
2368 wakaba 1.57 $self->{state} = DATA_STATE;
2369 wakaba 1.1 !!!next-input-character;
2370    
2371 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
2372 wakaba 1.1
2373     redo A;
2374 wakaba 1.77 } elsif ($self->{next_char} == -1) {
2375     !!!cp (159);
2376 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
2377 wakaba 1.57 $self->{state} = DATA_STATE;
2378 wakaba 1.1 ## reconsume
2379    
2380 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
2381 wakaba 1.1
2382     redo A;
2383     } else {
2384 wakaba 1.77 !!!cp (160);
2385 wakaba 1.112 $self->{current_token}->{name} = chr $self->{next_char};
2386     delete $self->{current_token}->{quirks};
2387 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
2388 wakaba 1.57 $self->{state} = DOCTYPE_NAME_STATE;
2389 wakaba 1.1 !!!next-input-character;
2390     redo A;
2391     }
2392 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
2393 wakaba 1.18 ## ISSUE: Redundant "First," in the spec.
2394 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2395     $self->{next_char} == 0x000A or # LF
2396     $self->{next_char} == 0x000B or # VT
2397     $self->{next_char} == 0x000C or # FF
2398     $self->{next_char} == 0x0020) { # SP
2399 wakaba 1.77 !!!cp (161);
2400 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_NAME_STATE;
2401 wakaba 1.1 !!!next-input-character;
2402     redo A;
2403 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2404 wakaba 1.77 !!!cp (162);
2405 wakaba 1.57 $self->{state} = DATA_STATE;
2406 wakaba 1.1 !!!next-input-character;
2407    
2408     !!!emit ($self->{current_token}); # DOCTYPE
2409    
2410     redo A;
2411 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2412 wakaba 1.77 !!!cp (163);
2413 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2414 wakaba 1.57 $self->{state} = DATA_STATE;
2415 wakaba 1.1 ## reconsume
2416    
2417 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2418 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2419 wakaba 1.1
2420     redo A;
2421     } else {
2422 wakaba 1.77 !!!cp (164);
2423 wakaba 1.1 $self->{current_token}->{name}
2424 wakaba 1.76 .= chr ($self->{next_char}); # DOCTYPE
2425 wakaba 1.1 ## Stay in the state
2426     !!!next-input-character;
2427     redo A;
2428     }
2429 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
2430 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2431     $self->{next_char} == 0x000A or # LF
2432     $self->{next_char} == 0x000B or # VT
2433     $self->{next_char} == 0x000C or # FF
2434     $self->{next_char} == 0x0020) { # SP
2435 wakaba 1.77 !!!cp (165);
2436 wakaba 1.1 ## Stay in the state
2437     !!!next-input-character;
2438     redo A;
2439 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2440 wakaba 1.77 !!!cp (166);
2441 wakaba 1.57 $self->{state} = DATA_STATE;
2442 wakaba 1.1 !!!next-input-character;
2443    
2444     !!!emit ($self->{current_token}); # DOCTYPE
2445    
2446     redo A;
2447 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2448 wakaba 1.77 !!!cp (167);
2449 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2450 wakaba 1.57 $self->{state} = DATA_STATE;
2451 wakaba 1.1 ## reconsume
2452    
2453 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2454 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2455    
2456     redo A;
2457 wakaba 1.76 } elsif ($self->{next_char} == 0x0050 or # P
2458     $self->{next_char} == 0x0070) { # p
2459 wakaba 1.166 $self->{state} = PUBLIC_STATE;
2460     $self->{state_keyword} = chr $self->{next_char};
2461 wakaba 1.18 !!!next-input-character;
2462 wakaba 1.166 redo A;
2463 wakaba 1.76 } elsif ($self->{next_char} == 0x0053 or # S
2464     $self->{next_char} == 0x0073) { # s
2465 wakaba 1.166 $self->{state} = SYSTEM_STATE;
2466     $self->{state_keyword} = chr $self->{next_char};
2467 wakaba 1.18 !!!next-input-character;
2468 wakaba 1.166 redo A;
2469 wakaba 1.18 } else {
2470 wakaba 1.77 !!!cp (180);
2471 wakaba 1.166 !!!parse-error (type => 'string after DOCTYPE name');
2472     $self->{current_token}->{quirks} = 1;
2473    
2474     $self->{state} = BOGUS_DOCTYPE_STATE;
2475 wakaba 1.18 !!!next-input-character;
2476 wakaba 1.166 redo A;
2477 wakaba 1.18 }
2478 wakaba 1.166 } elsif ($self->{state} == PUBLIC_STATE) {
2479     ## ASCII case-insensitive
2480     if ($self->{next_char} == [
2481     undef,
2482     0x0055, # U
2483     0x0042, # B
2484     0x004C, # L
2485     0x0049, # I
2486     ]->[length $self->{state_keyword}] or
2487     $self->{next_char} == [
2488     undef,
2489     0x0075, # u
2490     0x0062, # b
2491     0x006C, # l
2492     0x0069, # i
2493     ]->[length $self->{state_keyword}]) {
2494     !!!cp (175);
2495     ## Stay in the state.
2496     $self->{state_keyword} .= chr $self->{next_char};
2497     !!!next-input-character;
2498     redo A;
2499     } elsif ((length $self->{state_keyword}) == 5 and
2500     ($self->{next_char} == 0x0043 or # C
2501     $self->{next_char} == 0x0063)) { # c
2502     !!!cp (168);
2503     $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2504     !!!next-input-character;
2505     redo A;
2506     } else {
2507     !!!cp (169);
2508     !!!parse-error (type => 'string after DOCTYPE name',
2509     line => $self->{line_prev},
2510     column => $self->{column_prev} + 1 - length $self->{state_keyword});
2511     $self->{current_token}->{quirks} = 1;
2512 wakaba 1.18
2513 wakaba 1.166 $self->{state} = BOGUS_DOCTYPE_STATE;
2514     ## Reconsume.
2515     redo A;
2516     }
2517     } elsif ($self->{state} == SYSTEM_STATE) {
2518     ## ASCII case-insensitive
2519     if ($self->{next_char} == [
2520     undef,
2521     0x0059, # Y
2522     0x0053, # S
2523     0x0054, # T
2524     0x0045, # E
2525     ]->[length $self->{state_keyword}] or
2526     $self->{next_char} == [
2527     undef,
2528     0x0079, # y
2529     0x0073, # s
2530     0x0074, # t
2531     0x0065, # e
2532     ]->[length $self->{state_keyword}]) {
2533     !!!cp (170);
2534     ## Stay in the state.
2535     $self->{state_keyword} .= chr $self->{next_char};
2536     !!!next-input-character;
2537     redo A;
2538     } elsif ((length $self->{state_keyword}) == 5 and
2539     ($self->{next_char} == 0x004D or # M
2540     $self->{next_char} == 0x006D)) { # m
2541     !!!cp (171);
2542     $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2543     !!!next-input-character;
2544     redo A;
2545     } else {
2546     !!!cp (172);
2547     !!!parse-error (type => 'string after DOCTYPE name',
2548     line => $self->{line_prev},
2549     column => $self->{column_prev} + 1 - length $self->{state_keyword});
2550     $self->{current_token}->{quirks} = 1;
2551 wakaba 1.73
2552 wakaba 1.166 $self->{state} = BOGUS_DOCTYPE_STATE;
2553     ## Reconsume.
2554     redo A;
2555     }
2556 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2557 wakaba 1.18 if ({
2558     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2559     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2560 wakaba 1.76 }->{$self->{next_char}}) {
2561 wakaba 1.77 !!!cp (181);
2562 wakaba 1.18 ## Stay in the state
2563     !!!next-input-character;
2564     redo A;
2565 wakaba 1.76 } elsif ($self->{next_char} eq 0x0022) { # "
2566 wakaba 1.77 !!!cp (182);
2567 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2568 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
2569 wakaba 1.18 !!!next-input-character;
2570     redo A;
2571 wakaba 1.76 } elsif ($self->{next_char} eq 0x0027) { # '
2572 wakaba 1.77 !!!cp (183);
2573 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2574 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
2575 wakaba 1.18 !!!next-input-character;
2576     redo A;
2577 wakaba 1.76 } elsif ($self->{next_char} eq 0x003E) { # >
2578 wakaba 1.77 !!!cp (184);
2579 wakaba 1.18 !!!parse-error (type => 'no PUBLIC literal');
2580    
2581 wakaba 1.57 $self->{state} = DATA_STATE;
2582 wakaba 1.18 !!!next-input-character;
2583    
2584 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2585 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2586    
2587     redo A;
2588 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2589 wakaba 1.77 !!!cp (185);
2590 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2591    
2592 wakaba 1.57 $self->{state} = DATA_STATE;
2593 wakaba 1.18 ## reconsume
2594    
2595 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2596 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2597    
2598     redo A;
2599     } else {
2600 wakaba 1.77 !!!cp (186);
2601 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC');
2602 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2603 wakaba 1.73
2604 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2605 wakaba 1.18 !!!next-input-character;
2606     redo A;
2607     }
2608 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2609 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2610 wakaba 1.77 !!!cp (187);
2611 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2612 wakaba 1.18 !!!next-input-character;
2613     redo A;
2614 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2615 wakaba 1.77 !!!cp (188);
2616 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2617    
2618     $self->{state} = DATA_STATE;
2619     !!!next-input-character;
2620    
2621 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2622 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2623    
2624     redo A;
2625 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2626 wakaba 1.77 !!!cp (189);
2627 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2628    
2629 wakaba 1.57 $self->{state} = DATA_STATE;
2630 wakaba 1.18 ## reconsume
2631    
2632 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2633 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2634    
2635     redo A;
2636     } else {
2637 wakaba 1.77 !!!cp (190);
2638 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2639 wakaba 1.76 .= chr $self->{next_char};
2640 wakaba 1.173 $self->{read_until}->($self->{current_token}->{public_identifier},
2641     q[">],
2642     length $self->{current_token}->{public_identifier});
2643    
2644 wakaba 1.18 ## Stay in the state
2645     !!!next-input-character;
2646     redo A;
2647     }
2648 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
2649 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2650 wakaba 1.77 !!!cp (191);
2651 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2652 wakaba 1.18 !!!next-input-character;
2653     redo A;
2654 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2655 wakaba 1.77 !!!cp (192);
2656 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2657    
2658     $self->{state} = DATA_STATE;
2659     !!!next-input-character;
2660    
2661 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2662 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2663    
2664     redo A;
2665 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2666 wakaba 1.77 !!!cp (193);
2667 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2668    
2669 wakaba 1.57 $self->{state} = DATA_STATE;
2670 wakaba 1.18 ## reconsume
2671    
2672 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2673 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2674    
2675     redo A;
2676     } else {
2677 wakaba 1.77 !!!cp (194);
2678 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2679 wakaba 1.76 .= chr $self->{next_char};
2680 wakaba 1.173 $self->{read_until}->($self->{current_token}->{public_identifier},
2681     q['>],
2682     length $self->{current_token}->{public_identifier});
2683    
2684 wakaba 1.18 ## Stay in the state
2685     !!!next-input-character;
2686     redo A;
2687     }
2688 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2689 wakaba 1.18 if ({
2690     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2691     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2692 wakaba 1.76 }->{$self->{next_char}}) {
2693 wakaba 1.77 !!!cp (195);
2694 wakaba 1.18 ## Stay in the state
2695     !!!next-input-character;
2696     redo A;
2697 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2698 wakaba 1.77 !!!cp (196);
2699 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2700 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2701 wakaba 1.18 !!!next-input-character;
2702     redo A;
2703 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2704 wakaba 1.77 !!!cp (197);
2705 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2706 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2707 wakaba 1.18 !!!next-input-character;
2708     redo A;
2709 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2710 wakaba 1.77 !!!cp (198);
2711 wakaba 1.57 $self->{state} = DATA_STATE;
2712 wakaba 1.18 !!!next-input-character;
2713    
2714     !!!emit ($self->{current_token}); # DOCTYPE
2715    
2716     redo A;
2717 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2718 wakaba 1.77 !!!cp (199);
2719 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2720    
2721 wakaba 1.57 $self->{state} = DATA_STATE;
2722 wakaba 1.26 ## reconsume
2723 wakaba 1.18
2724 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2725 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2726    
2727     redo A;
2728     } else {
2729 wakaba 1.77 !!!cp (200);
2730 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC literal');
2731 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2732 wakaba 1.73
2733 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2734 wakaba 1.18 !!!next-input-character;
2735     redo A;
2736     }
2737 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2738 wakaba 1.18 if ({
2739     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2740     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2741 wakaba 1.76 }->{$self->{next_char}}) {
2742 wakaba 1.77 !!!cp (201);
2743 wakaba 1.18 ## Stay in the state
2744     !!!next-input-character;
2745     redo A;
2746 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2747 wakaba 1.77 !!!cp (202);
2748 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2749 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2750 wakaba 1.18 !!!next-input-character;
2751     redo A;
2752 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2753 wakaba 1.77 !!!cp (203);
2754 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2755 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2756 wakaba 1.18 !!!next-input-character;
2757     redo A;
2758 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2759 wakaba 1.77 !!!cp (204);
2760 wakaba 1.18 !!!parse-error (type => 'no SYSTEM literal');
2761 wakaba 1.57 $self->{state} = DATA_STATE;
2762 wakaba 1.18 !!!next-input-character;
2763    
2764 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2765 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2766    
2767     redo A;
2768 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2769 wakaba 1.77 !!!cp (205);
2770 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2771    
2772 wakaba 1.57 $self->{state} = DATA_STATE;
2773 wakaba 1.26 ## reconsume
2774 wakaba 1.18
2775 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2776 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2777    
2778     redo A;
2779     } else {
2780 wakaba 1.77 !!!cp (206);
2781 wakaba 1.30 !!!parse-error (type => 'string after SYSTEM');
2782 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2783 wakaba 1.73
2784 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2785 wakaba 1.18 !!!next-input-character;
2786     redo A;
2787     }
2788 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2789 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2790 wakaba 1.77 !!!cp (207);
2791 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2792 wakaba 1.18 !!!next-input-character;
2793     redo A;
2794 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2795 wakaba 1.77 !!!cp (208);
2796 wakaba 1.153 !!!parse-error (type => 'unclosed SYSTEM literal');
2797 wakaba 1.69
2798     $self->{state} = DATA_STATE;
2799     !!!next-input-character;
2800    
2801 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2802 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2803    
2804     redo A;
2805 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2806 wakaba 1.77 !!!cp (209);
2807 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2808    
2809 wakaba 1.57 $self->{state} = DATA_STATE;
2810 wakaba 1.18 ## reconsume
2811    
2812 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2813 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2814    
2815     redo A;
2816     } else {
2817 wakaba 1.77 !!!cp (210);
2818 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2819 wakaba 1.76 .= chr $self->{next_char};
2820 wakaba 1.173 $self->{read_until}->($self->{current_token}->{system_identifier},
2821     q[">],
2822     length $self->{current_token}->{system_identifier});
2823    
2824 wakaba 1.18 ## Stay in the state
2825     !!!next-input-character;
2826     redo A;
2827     }
2828 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
2829 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2830 wakaba 1.77 !!!cp (211);
2831 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2832 wakaba 1.18 !!!next-input-character;
2833     redo A;
2834 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2835 wakaba 1.77 !!!cp (212);
2836 wakaba 1.153 !!!parse-error (type => 'unclosed SYSTEM literal');
2837 wakaba 1.69
2838     $self->{state} = DATA_STATE;
2839     !!!next-input-character;
2840    
2841 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2842 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2843    
2844     redo A;
2845 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2846 wakaba 1.77 !!!cp (213);
2847 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2848    
2849 wakaba 1.57 $self->{state} = DATA_STATE;
2850 wakaba 1.18 ## reconsume
2851    
2852 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2853 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
2854    
2855     redo A;
2856     } else {
2857 wakaba 1.77 !!!cp (214);
2858 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2859 wakaba 1.76 .= chr $self->{next_char};
2860 wakaba 1.173 $self->{read_until}->($self->{current_token}->{system_identifier},
2861     q['>],
2862     length $self->{current_token}->{system_identifier});
2863    
2864 wakaba 1.18 ## Stay in the state
2865     !!!next-input-character;
2866     redo A;
2867     }
2868 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2869 wakaba 1.18 if ({
2870     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2871     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2872 wakaba 1.76 }->{$self->{next_char}}) {
2873 wakaba 1.77 !!!cp (215);
2874 wakaba 1.18 ## Stay in the state
2875     !!!next-input-character;
2876     redo A;
2877 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2878 wakaba 1.77 !!!cp (216);
2879 wakaba 1.57 $self->{state} = DATA_STATE;
2880 wakaba 1.18 !!!next-input-character;
2881    
2882     !!!emit ($self->{current_token}); # DOCTYPE
2883    
2884     redo A;
2885 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2886 wakaba 1.77 !!!cp (217);
2887 wakaba 1.150 !!!parse-error (type => 'unclosed DOCTYPE');
2888 wakaba 1.57 $self->{state} = DATA_STATE;
2889 wakaba 1.26 ## reconsume
2890 wakaba 1.18
2891 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2892 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2893    
2894     redo A;
2895     } else {
2896 wakaba 1.77 !!!cp (218);
2897 wakaba 1.18 !!!parse-error (type => 'string after SYSTEM literal');
2898 wakaba 1.75 #$self->{current_token}->{quirks} = 1;
2899 wakaba 1.73
2900 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2901 wakaba 1.1 !!!next-input-character;
2902     redo A;
2903     }
2904 wakaba 1.57 } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
2905 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2906 wakaba 1.77 !!!cp (219);
2907 wakaba 1.57 $self->{state} = DATA_STATE;
2908 wakaba 1.1 !!!next-input-character;
2909    
2910     !!!emit ($self->{current_token}); # DOCTYPE
2911    
2912     redo A;
2913 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2914 wakaba 1.77 !!!cp (220);
2915 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2916 wakaba 1.57 $self->{state} = DATA_STATE;
2917 wakaba 1.1 ## reconsume
2918    
2919     !!!emit ($self->{current_token}); # DOCTYPE
2920    
2921     redo A;
2922     } else {
2923 wakaba 1.77 !!!cp (221);
2924 wakaba 1.173 my $s = '';
2925     $self->{read_until}->($s, q[>], 0);
2926    
2927 wakaba 1.1 ## Stay in the state
2928     !!!next-input-character;
2929     redo A;
2930     }
2931 wakaba 1.165 } elsif ($self->{state} == CDATA_SECTION_STATE) {
2932     ## NOTE: "CDATA section state" in the state is jointly implemented
2933     ## by three states, |CDATA_SECTION_STATE|, |CDATA_SECTION_MSE1_STATE|,
2934     ## and |CDATA_SECTION_MSE2_STATE|.
2935 wakaba 1.127
2936 wakaba 1.165 if ($self->{next_char} == 0x005D) { # ]
2937     !!!cp (221.1);
2938     $self->{state} = CDATA_SECTION_MSE1_STATE;
2939     !!!next-input-character;
2940     redo A;
2941     } elsif ($self->{next_char} == -1) {
2942     $self->{state} = DATA_STATE;
2943     !!!next-input-character;
2944     if (length $self->{current_token}->{data}) { # character
2945     !!!cp (221.2);
2946     !!!emit ($self->{current_token}); # character
2947     } else {
2948     !!!cp (221.3);
2949     ## No token to emit. $self->{current_token} is discarded.
2950     }
2951     redo A;
2952     } else {
2953     !!!cp (221.4);
2954     $self->{current_token}->{data} .= chr $self->{next_char};
2955 wakaba 1.173 $self->{read_until}->($self->{current_token}->{data},
2956     q<]>,
2957     length $self->{current_token}->{data});
2958    
2959 wakaba 1.165 ## Stay in the state.
2960     !!!next-input-character;
2961     redo A;
2962     }
2963 wakaba 1.127
2964 wakaba 1.165 ## ISSUE: "text tokens" in spec.
2965     } elsif ($self->{state} == CDATA_SECTION_MSE1_STATE) {
2966     if ($self->{next_char} == 0x005D) { # ]
2967     !!!cp (221.5);
2968     $self->{state} = CDATA_SECTION_MSE2_STATE;
2969     !!!next-input-character;
2970     redo A;
2971     } else {
2972     !!!cp (221.6);
2973     $self->{current_token}->{data} .= ']';
2974     $self->{state} = CDATA_SECTION_STATE;
2975     ## Reconsume.
2976     redo A;
2977     }
2978     } elsif ($self->{state} == CDATA_SECTION_MSE2_STATE) {
2979     if ($self->{next_char} == 0x003E) { # >
2980     $self->{state} = DATA_STATE;
2981     !!!next-input-character;
2982     if (length $self->{current_token}->{data}) { # character
2983     !!!cp (221.7);
2984     !!!emit ($self->{current_token}); # character
2985 wakaba 1.127 } else {
2986 wakaba 1.165 !!!cp (221.8);
2987     ## No token to emit. $self->{current_token} is discarded.
2988 wakaba 1.127 }
2989 wakaba 1.165 redo A;
2990     } elsif ($self->{next_char} == 0x005D) { # ]
2991     !!!cp (221.9); # character
2992     $self->{current_token}->{data} .= ']'; ## Add first "]" of "]]]".
2993     ## Stay in the state.
2994 wakaba 1.127 !!!next-input-character;
2995 wakaba 1.165 redo A;
2996 wakaba 1.127 } else {
2997 wakaba 1.165 !!!cp (221.11);
2998     $self->{current_token}->{data} .= ']]'; # character
2999     $self->{state} = CDATA_SECTION_STATE;
3000     ## Reconsume.
3001     redo A;
3002 wakaba 1.127 }
3003 wakaba 1.167 } elsif ($self->{state} == ENTITY_STATE) {
3004 wakaba 1.168 if ({
3005     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
3006     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, &
3007     $self->{entity_additional} => 1,
3008     }->{$self->{next_char}}) {
3009     !!!cp (1001);
3010     ## Don't consume
3011     ## No error
3012     ## Return nothing.
3013     #
3014     } elsif ($self->{next_char} == 0x0023) { # #
3015 wakaba 1.170 !!!cp (999);
3016 wakaba 1.168 $self->{state} = ENTITY_HASH_STATE;
3017     $self->{state_keyword} = '#';
3018     !!!next-input-character;
3019     redo A;
3020     } elsif ((0x0041 <= $self->{next_char} and
3021     $self->{next_char} <= 0x005A) or # A..Z
3022     (0x0061 <= $self->{next_char} and
3023     $self->{next_char} <= 0x007A)) { # a..z
3024 wakaba 1.170 !!!cp (998);
3025 wakaba 1.168 require Whatpm::_NamedEntityList;
3026     $self->{state} = ENTITY_NAME_STATE;
3027     $self->{state_keyword} = chr $self->{next_char};
3028     $self->{entity__value} = $self->{state_keyword};
3029     $self->{entity__match} = 0;
3030     !!!next-input-character;
3031     redo A;
3032     } else {
3033     !!!cp (1027);
3034     !!!parse-error (type => 'bare ero');
3035     ## Return nothing.
3036     #
3037     }
3038 wakaba 1.20
3039 wakaba 1.168 ## NOTE: No character is consumed by the "consume a character
3040     ## reference" algorithm. In other word, there is an "&" character
3041     ## that does not introduce a character reference, which would be
3042     ## appended to the parent element or the attribute value in later
3043     ## process of the tokenizer.
3044 wakaba 1.112
3045 wakaba 1.169 if ($self->{prev_state} == DATA_STATE) {
3046 wakaba 1.170 !!!cp (997);
3047 wakaba 1.169 $self->{state} = $self->{prev_state};
3048 wakaba 1.168 ## Reconsume.
3049     !!!emit ({type => CHARACTER_TOKEN, data => '&',
3050     line => $self->{line_prev},
3051     column => $self->{column_prev},
3052     });
3053     redo A;
3054 wakaba 1.169 } else {
3055 wakaba 1.170 !!!cp (996);
3056 wakaba 1.169 $self->{current_attribute}->{value} .= '&';
3057     $self->{state} = $self->{prev_state};
3058     ## Reconsume.
3059     redo A;
3060 wakaba 1.168 }
3061     } elsif ($self->{state} == ENTITY_HASH_STATE) {
3062     if ($self->{next_char} == 0x0078 or # x
3063     $self->{next_char} == 0x0058) { # X
3064 wakaba 1.170 !!!cp (995);
3065 wakaba 1.168 $self->{state} = HEXREF_X_STATE;
3066     $self->{state_keyword} .= chr $self->{next_char};
3067     !!!next-input-character;
3068     redo A;
3069     } elsif (0x0030 <= $self->{next_char} and
3070     $self->{next_char} <= 0x0039) { # 0..9
3071 wakaba 1.170 !!!cp (994);
3072 wakaba 1.168 $self->{state} = NCR_NUM_STATE;
3073     $self->{state_keyword} = $self->{next_char} - 0x0030;
3074     !!!next-input-character;
3075     redo A;
3076     } else {
3077     !!!parse-error (type => 'bare nero',
3078     line => $self->{line_prev},
3079     column => $self->{column_prev} - 1);
3080    
3081     ## NOTE: According to the spec algorithm, nothing is returned,
3082     ## and then "&#" is appended to the parent element or the attribute
3083     ## value in the later processing.
3084    
3085 wakaba 1.169 if ($self->{prev_state} == DATA_STATE) {
3086 wakaba 1.170 !!!cp (1019);
3087 wakaba 1.169 $self->{state} = $self->{prev_state};
3088 wakaba 1.168 ## Reconsume.
3089     !!!emit ({type => CHARACTER_TOKEN,
3090     data => '&#',
3091     line => $self->{line_prev},
3092     column => $self->{column_prev} - 1,
3093     });
3094     redo A;
3095 wakaba 1.169 } else {
3096 wakaba 1.170 !!!cp (993);
3097 wakaba 1.169 $self->{current_attribute}->{value} .= '&#';
3098     $self->{state} = $self->{prev_state};
3099     ## Reconsume.
3100     redo A;
3101 wakaba 1.1 }
3102 wakaba 1.168 }
3103     } elsif ($self->{state} == NCR_NUM_STATE) {
3104     if (0x0030 <= $self->{next_char} and
3105     $self->{next_char} <= 0x0039) { # 0..9
3106 wakaba 1.78 !!!cp (1012);
3107 wakaba 1.168 $self->{state_keyword} *= 10;
3108     $self->{state_keyword} += $self->{next_char} - 0x0030;
3109 wakaba 1.1
3110 wakaba 1.168 ## Stay in the state.
3111 wakaba 1.1 !!!next-input-character;
3112 wakaba 1.168 redo A;
3113     } elsif ($self->{next_char} == 0x003B) { # ;
3114 wakaba 1.78 !!!cp (1013);
3115 wakaba 1.1 !!!next-input-character;
3116 wakaba 1.168 #
3117 wakaba 1.1 } else {
3118 wakaba 1.78 !!!cp (1014);
3119 wakaba 1.168 !!!parse-error (type => 'no refc');
3120     ## Reconsume.
3121     #
3122 wakaba 1.1 }
3123    
3124 wakaba 1.168 my $code = $self->{state_keyword};
3125     my $l = $self->{line_prev};
3126     my $c = $self->{column_prev};
3127 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
3128 wakaba 1.78 !!!cp (1015);
3129 wakaba 1.153 !!!parse-error (type => 'invalid character reference',
3130     text => (sprintf 'U+%04X', $code),
3131     line => $l, column => $c);
3132 wakaba 1.26 $code = 0xFFFD;
3133     } elsif ($code > 0x10FFFF) {
3134 wakaba 1.78 !!!cp (1016);
3135 wakaba 1.153 !!!parse-error (type => 'invalid character reference',
3136     text => (sprintf 'U-%08X', $code),
3137     line => $l, column => $c);
3138 wakaba 1.26 $code = 0xFFFD;
3139     } elsif ($code == 0x000D) {
3140 wakaba 1.78 !!!cp (1017);
3141 wakaba 1.153 !!!parse-error (type => 'CR character reference',
3142     line => $l, column => $c);
3143 wakaba 1.26 $code = 0x000A;
3144 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
3145 wakaba 1.78 !!!cp (1018);
3146 wakaba 1.153 !!!parse-error (type => 'C1 character reference',
3147     text => (sprintf 'U+%04X', $code),
3148     line => $l, column => $c);
3149 wakaba 1.4 $code = $c1_entity_char->{$code};
3150 wakaba 1.1 }
3151 wakaba 1.168
3152 wakaba 1.169 if ($self->{prev_state} == DATA_STATE) {
3153 wakaba 1.170 !!!cp (992);
3154 wakaba 1.169 $self->{state} = $self->{prev_state};
3155 wakaba 1.168 ## Reconsume.
3156 wakaba 1.169 !!!emit ({type => CHARACTER_TOKEN, data => chr $code,
3157     line => $l, column => $c,
3158     });
3159 wakaba 1.168 redo A;
3160     } else {
3161 wakaba 1.170 !!!cp (991);
3162 wakaba 1.169 $self->{current_attribute}->{value} .= chr $code;
3163     $self->{current_attribute}->{has_reference} = 1;
3164     $self->{state} = $self->{prev_state};
3165 wakaba 1.168 ## Reconsume.
3166     redo A;
3167     }
3168     } elsif ($self->{state} == HEXREF_X_STATE) {
3169     if ((0x0030 <= $self->{next_char} and $self->{next_char} <= 0x0039) or
3170     (0x0041 <= $self->{next_char} and $self->{next_char} <= 0x0046) or
3171     (0x0061 <= $self->{next_char} and $self->{next_char} <= 0x0066)) {
3172     # 0..9, A..F, a..f
3173 wakaba 1.170 !!!cp (990);
3174 wakaba 1.168 $self->{state} = HEXREF_HEX_STATE;
3175     $self->{state_keyword} = 0;
3176     ## Reconsume.
3177     redo A;
3178     } else {
3179     !!!parse-error (type => 'bare hcro',
3180     line => $self->{line_prev},
3181     column => $self->{column_prev} - 2);
3182    
3183     ## NOTE: According to the spec algorithm, nothing is returned,
3184     ## and then "&#" followed by "X" or "x" is appended to the parent
3185     ## element or the attribute value in the later processing.
3186    
3187 wakaba 1.169 if ($self->{prev_state} == DATA_STATE) {
3188 wakaba 1.170 !!!cp (1005);
3189 wakaba 1.169 $self->{state} = $self->{prev_state};
3190 wakaba 1.168 ## Reconsume.
3191     !!!emit ({type => CHARACTER_TOKEN,
3192     data => '&' . $self->{state_keyword},
3193     line => $self->{line_prev},
3194     column => $self->{column_prev} - length $self->{state_keyword},
3195     });
3196     redo A;
3197 wakaba 1.169 } else {
3198 wakaba 1.170 !!!cp (989);
3199 wakaba 1.169 $self->{current_attribute}->{value} .= '&' . $self->{state_keyword};
3200     $self->{state} = $self->{prev_state};
3201     ## Reconsume.
3202     redo A;
3203 wakaba 1.168 }
3204     }
3205     } elsif ($self->{state} == HEXREF_HEX_STATE) {
3206     if (0x0030 <= $self->{next_char} and $self->{next_char} <= 0x0039) {
3207     # 0..9
3208     !!!cp (1002);
3209     $self->{state_keyword} *= 0x10;
3210     $self->{state_keyword} += $self->{next_char} - 0x0030;
3211     ## Stay in the state.
3212     !!!next-input-character;
3213     redo A;
3214     } elsif (0x0061 <= $self->{next_char} and
3215     $self->{next_char} <= 0x0066) { # a..f
3216     !!!cp (1003);
3217     $self->{state_keyword} *= 0x10;
3218     $self->{state_keyword} += $self->{next_char} - 0x0060 + 9;
3219     ## Stay in the state.
3220     !!!next-input-character;
3221     redo A;
3222     } elsif (0x0041 <= $self->{next_char} and
3223     $self->{next_char} <= 0x0046) { # A..F
3224     !!!cp (1004);
3225     $self->{state_keyword} *= 0x10;
3226     $self->{state_keyword} += $self->{next_char} - 0x0040 + 9;
3227     ## Stay in the state.
3228     !!!next-input-character;
3229     redo A;
3230     } elsif ($self->{next_char} == 0x003B) { # ;
3231     !!!cp (1006);
3232     !!!next-input-character;
3233     #
3234     } else {
3235     !!!cp (1007);
3236     !!!parse-error (type => 'no refc',
3237     line => $self->{line},
3238     column => $self->{column});
3239     ## Reconsume.
3240     #
3241     }
3242    
3243     my $code = $self->{state_keyword};
3244     my $l = $self->{line_prev};
3245     my $c = $self->{column_prev};
3246     if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
3247     !!!cp (1008);
3248     !!!parse-error (type => 'invalid character reference',
3249     text => (sprintf 'U+%04X', $code),
3250     line => $l, column => $c);
3251     $code = 0xFFFD;
3252     } elsif ($code > 0x10FFFF) {
3253     !!!cp (1009);
3254     !!!parse-error (type => 'invalid character reference',
3255     text => (sprintf 'U-%08X', $code),
3256     line => $l, column => $c);
3257     $code = 0xFFFD;
3258     } elsif ($code == 0x000D) {
3259     !!!cp (1010);
3260     !!!parse-error (type => 'CR character reference', line => $l, column => $c);
3261     $code = 0x000A;
3262     } elsif (0x80 <= $code and $code <= 0x9F) {
3263     !!!cp (1011);
3264     !!!parse-error (type => 'C1 character reference', text => (sprintf 'U+%04X', $code), line => $l, column => $c);
3265     $code = $c1_entity_char->{$code};
3266     }
3267    
3268 wakaba 1.169 if ($self->{prev_state} == DATA_STATE) {
3269 wakaba 1.170 !!!cp (988);
3270 wakaba 1.169 $self->{state} = $self->{prev_state};
3271 wakaba 1.168 ## Reconsume.
3272 wakaba 1.169 !!!emit ({type => CHARACTER_TOKEN, data => chr $code,
3273     line => $l, column => $c,
3274     });
3275 wakaba 1.168 redo A;
3276     } else {
3277 wakaba 1.170 !!!cp (987);
3278 wakaba 1.169 $self->{current_attribute}->{value} .= chr $code;
3279     $self->{current_attribute}->{has_reference} = 1;
3280     $self->{state} = $self->{prev_state};
3281 wakaba 1.168 ## Reconsume.
3282     redo A;
3283     }
3284     } elsif ($self->{state} == ENTITY_NAME_STATE) {
3285     if (length $self->{state_keyword} < 30 and
3286     ## NOTE: Some number greater than the maximum length of entity name
3287     ((0x0041 <= $self->{next_char} and # a
3288     $self->{next_char} <= 0x005A) or # x
3289     (0x0061 <= $self->{next_char} and # a
3290     $self->{next_char} <= 0x007A) or # z
3291     (0x0030 <= $self->{next_char} and # 0
3292     $self->{next_char} <= 0x0039) or # 9
3293     $self->{next_char} == 0x003B)) { # ;
3294     our $EntityChar;
3295     $self->{state_keyword} .= chr $self->{next_char};
3296     if (defined $EntityChar->{$self->{state_keyword}}) {
3297     if ($self->{next_char} == 0x003B) { # ;
3298     !!!cp (1020);
3299     $self->{entity__value} = $EntityChar->{$self->{state_keyword}};
3300     $self->{entity__match} = 1;
3301     !!!next-input-character;
3302     #
3303     } else {
3304     !!!cp (1021);
3305     $self->{entity__value} = $EntityChar->{$self->{state_keyword}};
3306     $self->{entity__match} = -1;
3307     ## Stay in the state.
3308     !!!next-input-character;
3309     redo A;
3310     }
3311     } else {
3312     !!!cp (1022);
3313     $self->{entity__value} .= chr $self->{next_char};
3314     $self->{entity__match} *= 2;
3315     ## Stay in the state.
3316 wakaba 1.16 !!!next-input-character;
3317 wakaba 1.168 redo A;
3318     }
3319     }
3320    
3321     my $data;
3322     my $has_ref;
3323     if ($self->{entity__match} > 0) {
3324     !!!cp (1023);
3325     $data = $self->{entity__value};
3326     $has_ref = 1;
3327     #
3328     } elsif ($self->{entity__match} < 0) {
3329     !!!parse-error (type => 'no refc');
3330 wakaba 1.169 if ($self->{prev_state} != DATA_STATE and # in attribute
3331     $self->{entity__match} < -1) {
3332 wakaba 1.168 !!!cp (1024);
3333     $data = '&' . $self->{state_keyword};
3334     #
3335 wakaba 1.37 } else {
3336 wakaba 1.168 !!!cp (1025);
3337     $data = $self->{entity__value};
3338     $has_ref = 1;
3339     #
3340 wakaba 1.16 }
3341 wakaba 1.1 } else {
3342 wakaba 1.168 !!!cp (1026);
3343     !!!parse-error (type => 'bare ero',
3344     line => $self->{line_prev},
3345     column => $self->{column_prev});
3346     $data = '&' . $self->{state_keyword};
3347     #
3348 wakaba 1.1 }
3349 wakaba 1.168
3350     ## NOTE: In these cases, when a character reference is found,
3351     ## it is consumed and a character token is returned, or, otherwise,
3352     ## nothing is consumed and returned, according to the spec algorithm.
3353     ## In this implementation, anything that has been examined by the
3354     ## tokenizer is appended to the parent element or the attribute value
3355     ## as string, either literal string when no character reference or
3356     ## entity-replaced string otherwise, in this stage, since any characters
3357     ## that would not be consumed are appended in the data state or in an
3358     ## appropriate attribute value state anyway.
3359    
3360 wakaba 1.169 if ($self->{prev_state} == DATA_STATE) {
3361 wakaba 1.170 !!!cp (986);
3362 wakaba 1.169 $self->{state} = $self->{prev_state};
3363 wakaba 1.168 ## Reconsume.
3364     !!!emit ({type => CHARACTER_TOKEN,
3365 wakaba 1.169 data => $data,
3366 wakaba 1.168 line => $self->{line_prev},
3367     column => $self->{column_prev} + 1 - length $self->{state_keyword},
3368     });
3369 wakaba 1.167 redo A;
3370 wakaba 1.169 } else {
3371 wakaba 1.170 !!!cp (985);
3372 wakaba 1.169 $self->{current_attribute}->{value} .= $data;
3373     $self->{current_attribute}->{has_reference} = 1 if $has_ref;
3374     $self->{state} = $self->{prev_state};
3375     ## Reconsume.
3376     redo A;
3377 wakaba 1.37 }
3378 wakaba 1.1 } else {
3379 wakaba 1.167 die "$0: $self->{state}: Unknown state";
3380     }
3381     } # A
3382    
3383     die "$0: _get_next_token: unexpected case";
3384     } # _get_next_token
3385 wakaba 1.1
3386     sub _initialize_tree_constructor ($) {
3387     my $self = shift;
3388     ## NOTE: $self->{document} MUST be specified before this method is called
3389     $self->{document}->strict_error_checking (0);
3390     ## TODO: Turn mutation events off # MUST
3391     ## TODO: Turn loose Document option (manakai extension) on
3392 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
3393 wakaba 1.154 $self->{document}->set_user_data (manakai_source_line => 1);
3394     $self->{document}->set_user_data (manakai_source_column => 1);
3395 wakaba 1.1 } # _initialize_tree_constructor
3396    
3397     sub _terminate_tree_constructor ($) {
3398     my $self = shift;
3399     $self->{document}->strict_error_checking (1);
3400     ## TODO: Turn mutation events on
3401     } # _terminate_tree_constructor
3402    
3403     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
3404    
3405 wakaba 1.3 { # tree construction stage
3406     my $token;
3407    
3408 wakaba 1.1 sub _construct_tree ($) {
3409     my ($self) = @_;
3410    
3411     ## When an interactive UA render the $self->{document} available
3412     ## to the user, or when it begin accepting user input, are
3413     ## not defined.
3414    
3415     ## Append a character: collect it and all subsequent consecutive
3416     ## characters and insert one Text node whose data is concatenation
3417     ## of all those characters. # MUST
3418    
3419     !!!next-token;
3420    
3421 wakaba 1.3 undef $self->{form_element};
3422     undef $self->{head_element};
3423     $self->{open_elements} = [];
3424     undef $self->{inner_html_node};
3425    
3426 wakaba 1.84 ## NOTE: The "initial" insertion mode.
3427 wakaba 1.3 $self->_tree_construction_initial; # MUST
3428 wakaba 1.84
3429     ## NOTE: The "before html" insertion mode.
3430 wakaba 1.3 $self->_tree_construction_root_element;
3431 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
3432    
3433     ## NOTE: The "before head" insertion mode and so on.
3434 wakaba 1.3 $self->_tree_construction_main;
3435     } # _construct_tree
3436    
3437     sub _tree_construction_initial ($) {
3438     my $self = shift;
3439 wakaba 1.84
3440     ## NOTE: "initial" insertion mode
3441    
3442 wakaba 1.18 INITIAL: {
3443 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3444 wakaba 1.18 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
3445     ## error, switch to a conformance checking mode for another
3446     ## language.
3447     my $doctype_name = $token->{name};
3448     $doctype_name = '' unless defined $doctype_name;
3449 wakaba 1.159 $doctype_name =~ tr/a-z/A-Z/; # ASCII case-insensitive
3450 wakaba 1.18 if (not defined $token->{name} or # <!DOCTYPE>
3451     defined $token->{system_identifier}) {
3452 wakaba 1.79 !!!cp ('t1');
3453 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
3454 wakaba 1.18 } elsif ($doctype_name ne 'HTML') {
3455 wakaba 1.79 !!!cp ('t2');
3456 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
3457 wakaba 1.159 } elsif (defined $token->{public_identifier}) {
3458     if ($token->{public_identifier} eq 'XSLT-compat') {
3459     !!!cp ('t1.2');
3460     !!!parse-error (type => 'XSLT-compat', token => $token,
3461     level => $self->{level}->{should});
3462     } else {
3463     !!!parse-error (type => 'not HTML5', token => $token);
3464     }
3465 wakaba 1.79 } else {
3466     !!!cp ('t3');
3467 wakaba 1.159 #
3468 wakaba 1.18 }
3469    
3470     my $doctype = $self->{document}->create_document_type_definition
3471     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
3472 wakaba 1.122 ## NOTE: Default value for both |public_id| and |system_id| attributes
3473     ## are empty strings, so that we don't set any value in missing cases.
3474 wakaba 1.18 $doctype->public_id ($token->{public_identifier})
3475     if defined $token->{public_identifier};
3476     $doctype->system_id ($token->{system_identifier})
3477     if defined $token->{system_identifier};
3478     ## NOTE: Other DocumentType attributes are null or empty lists.
3479     ## ISSUE: internalSubset = null??
3480     $self->{document}->append_child ($doctype);
3481    
3482 wakaba 1.75 if ($token->{quirks} or $doctype_name ne 'HTML') {
3483 wakaba 1.79 !!!cp ('t4');
3484 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3485     } elsif (defined $token->{public_identifier}) {
3486     my $pubid = $token->{public_identifier};
3487     $pubid =~ tr/a-z/A-z/;
3488 wakaba 1.143 my $prefix = [
3489     "+//SILMARIL//DTD HTML PRO V0R11 19970101//",
3490     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
3491     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
3492     "-//IETF//DTD HTML 2.0 LEVEL 1//",
3493     "-//IETF//DTD HTML 2.0 LEVEL 2//",
3494     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//",
3495     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//",
3496     "-//IETF//DTD HTML 2.0 STRICT//",
3497     "-//IETF//DTD HTML 2.0//",
3498     "-//IETF//DTD HTML 2.1E//",
3499     "-//IETF//DTD HTML 3.0//",
3500     "-//IETF//DTD HTML 3.2 FINAL//",
3501     "-//IETF//DTD HTML 3.2//",
3502     "-//IETF//DTD HTML 3//",
3503     "-//IETF//DTD HTML LEVEL 0//",
3504     "-//IETF//DTD HTML LEVEL 1//",
3505     "-//IETF//DTD HTML LEVEL 2//",
3506     "-//IETF//DTD HTML LEVEL 3//",
3507     "-//IETF//DTD HTML STRICT LEVEL 0//",
3508     "-//IETF//DTD HTML STRICT LEVEL 1//",
3509     "-//IETF//DTD HTML STRICT LEVEL 2//",
3510     "-//IETF//DTD HTML STRICT LEVEL 3//",
3511     "-//IETF//DTD HTML STRICT//",
3512     "-//IETF//DTD HTML//",
3513     "-//METRIUS//DTD METRIUS PRESENTATIONAL//",
3514     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//",
3515     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//",
3516     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//",
3517     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//",
3518     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//",
3519     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//",
3520     "-//NETSCAPE COMM. CORP.//DTD HTML//",
3521     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//",
3522     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//",
3523     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//",
3524     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//",
3525     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//",
3526     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//",
3527     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//",
3528     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//",
3529     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//",
3530     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//",
3531     "-//W3C//DTD HTML 3 1995-03-24//",
3532     "-//W3C//DTD HTML 3.2 DRAFT//",
3533     "-//W3C//DTD HTML 3.2 FINAL//",
3534     "-//W3C//DTD HTML 3.2//",
3535     "-//W3C//DTD HTML 3.2S DRAFT//",
3536     "-//W3C//DTD HTML 4.0 FRAMESET//",
3537     "-//W3C//DTD HTML 4.0 TRANSITIONAL//",
3538     "-//W3C//DTD HTML EXPERIMETNAL 19960712//",
3539     "-//W3C//DTD HTML EXPERIMENTAL 970421//",
3540     "-//W3C//DTD W3 HTML//",
3541     "-//W3O//DTD W3 HTML 3.0//",
3542     "-//WEBTECHS//DTD MOZILLA HTML 2.0//",
3543     "-//WEBTECHS//DTD MOZILLA HTML//",
3544     ]; # $prefix
3545     my $match;
3546     for (@$prefix) {
3547     if (substr ($prefix, 0, length $_) eq $_) {
3548     $match = 1;
3549     last;
3550     }
3551     }
3552     if ($match or
3553     $pubid eq "-//W3O//DTD W3 HTML STRICT 3.0//EN//" or
3554     $pubid eq "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" or
3555     $pubid eq "HTML") {
3556 wakaba 1.79 !!!cp ('t5');
3557 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3558 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
3559     $pubid =~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
3560 wakaba 1.18 if (defined $token->{system_identifier}) {
3561 wakaba 1.79 !!!cp ('t6');
3562 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3563     } else {
3564 wakaba 1.79 !!!cp ('t7');
3565 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
3566 wakaba 1.3 }
3567 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
3568     $pubid =~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
3569 wakaba 1.79 !!!cp ('t8');
3570 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
3571 wakaba 1.79 } else {
3572     !!!cp ('t9');
3573 wakaba 1.18 }
3574 wakaba 1.79 } else {
3575     !!!cp ('t10');
3576 wakaba 1.18 }
3577     if (defined $token->{system_identifier}) {
3578     my $sysid = $token->{system_identifier};
3579     $sysid =~ tr/A-Z/a-z/;
3580     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
3581 wakaba 1.143 ## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"| is
3582     ## marked as quirks.
3583 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3584 wakaba 1.79 !!!cp ('t11');
3585     } else {
3586     !!!cp ('t12');
3587 wakaba 1.18 }
3588 wakaba 1.79 } else {
3589     !!!cp ('t13');
3590 wakaba 1.18 }
3591    
3592 wakaba 1.84 ## Go to the "before html" insertion mode.
3593 wakaba 1.18 !!!next-token;
3594     return;
3595     } elsif ({
3596 wakaba 1.55 START_TAG_TOKEN, 1,
3597     END_TAG_TOKEN, 1,
3598     END_OF_FILE_TOKEN, 1,
3599 wakaba 1.18 }->{$token->{type}}) {
3600 wakaba 1.79 !!!cp ('t14');
3601 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
3602 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3603 wakaba 1.84 ## Go to the "before html" insertion mode.
3604 wakaba 1.18 ## reprocess
3605 wakaba 1.125 !!!ack-later;
3606 wakaba 1.18 return;
3607 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
3608 wakaba 1.18 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
3609     ## Ignore the token
3610 wakaba 1.26
3611 wakaba 1.18 unless (length $token->{data}) {
3612 wakaba 1.79 !!!cp ('t15');
3613 wakaba 1.84 ## Stay in the insertion mode.
3614 wakaba 1.18 !!!next-token;
3615     redo INITIAL;
3616 wakaba 1.79 } else {
3617     !!!cp ('t16');
3618 wakaba 1.3 }
3619 wakaba 1.79 } else {
3620     !!!cp ('t17');
3621 wakaba 1.3 }
3622 wakaba 1.18
3623 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
3624 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3625 wakaba 1.84 ## Go to the "before html" insertion mode.
3626 wakaba 1.18 ## reprocess
3627     return;
3628 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3629 wakaba 1.79 !!!cp ('t18');
3630 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
3631     $self->{document}->append_child ($comment);
3632    
3633 wakaba 1.84 ## Stay in the insertion mode.
3634 wakaba 1.18 !!!next-token;
3635     redo INITIAL;
3636     } else {
3637 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
3638 wakaba 1.18 }
3639     } # INITIAL
3640 wakaba 1.79
3641     die "$0: _tree_construction_initial: This should be never reached";
3642 wakaba 1.3 } # _tree_construction_initial
3643    
3644     sub _tree_construction_root_element ($) {
3645     my $self = shift;
3646 wakaba 1.84
3647     ## NOTE: "before html" insertion mode.
3648 wakaba 1.3
3649     B: {
3650 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3651 wakaba 1.79 !!!cp ('t19');
3652 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
3653 wakaba 1.3 ## Ignore the token
3654 wakaba 1.84 ## Stay in the insertion mode.
3655 wakaba 1.3 !!!next-token;
3656     redo B;
3657 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3658 wakaba 1.79 !!!cp ('t20');
3659 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
3660     $self->{document}->append_child ($comment);
3661 wakaba 1.84 ## Stay in the insertion mode.
3662 wakaba 1.3 !!!next-token;
3663     redo B;
3664 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
3665 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
3666     ## Ignore the token.
3667    
3668 wakaba 1.3 unless (length $token->{data}) {
3669 wakaba 1.79 !!!cp ('t21');
3670 wakaba 1.84 ## Stay in the insertion mode.
3671 wakaba 1.3 !!!next-token;
3672     redo B;
3673 wakaba 1.79 } else {
3674     !!!cp ('t22');
3675 wakaba 1.3 }
3676 wakaba 1.79 } else {
3677     !!!cp ('t23');
3678 wakaba 1.3 }
3679 wakaba 1.61
3680     $self->{application_cache_selection}->(undef);
3681    
3682     #
3683     } elsif ($token->{type} == START_TAG_TOKEN) {
3684 wakaba 1.84 if ($token->{tag_name} eq 'html') {
3685     my $root_element;
3686 wakaba 1.126 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
3687 wakaba 1.84 $self->{document}->append_child ($root_element);
3688 wakaba 1.123 push @{$self->{open_elements}},
3689     [$root_element, $el_category->{html}];
3690 wakaba 1.84
3691     if ($token->{attributes}->{manifest}) {
3692     !!!cp ('t24');
3693     $self->{application_cache_selection}
3694     ->($token->{attributes}->{manifest}->{value});
3695 wakaba 1.118 ## ISSUE: Spec is unclear on relative references.
3696     ## According to Hixie (#whatwg 2008-03-19), it should be
3697     ## resolved against the base URI of the document in HTML
3698     ## or xml:base of the element in XHTML.
3699 wakaba 1.84 } else {
3700     !!!cp ('t25');
3701     $self->{application_cache_selection}->(undef);
3702     }
3703    
3704 wakaba 1.125 !!!nack ('t25c');
3705    
3706 wakaba 1.84 !!!next-token;
3707     return; ## Go to the "before head" insertion mode.
3708 wakaba 1.61 } else {
3709 wakaba 1.84 !!!cp ('t25.1');
3710     #
3711 wakaba 1.61 }
3712 wakaba 1.3 } elsif ({
3713 wakaba 1.55 END_TAG_TOKEN, 1,
3714     END_OF_FILE_TOKEN, 1,
3715 wakaba 1.3 }->{$token->{type}}) {
3716 wakaba 1.79 !!!cp ('t26');
3717 wakaba 1.3 #
3718     } else {
3719 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
3720 wakaba 1.3 }
3721 wakaba 1.61
3722 wakaba 1.126 my $root_element;
3723     !!!create-element ($root_element, $HTML_NS, 'html',, $token);
3724 wakaba 1.84 $self->{document}->append_child ($root_element);
3725 wakaba 1.123 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
3726 wakaba 1.84
3727     $self->{application_cache_selection}->(undef);
3728    
3729     ## NOTE: Reprocess the token.
3730 wakaba 1.125 !!!ack-later;
3731 wakaba 1.84 return; ## Go to the "before head" insertion mode.
3732    
3733     ## ISSUE: There is an issue in the spec
3734 wakaba 1.3 } # B
3735 wakaba 1.79
3736     die "$0: _tree_construction_root_element: This should never be reached";
3737 wakaba 1.3 } # _tree_construction_root_element
3738    
3739     sub _reset_insertion_mode ($) {
3740     my $self = shift;
3741    
3742     ## Step 1
3743     my $last;
3744    
3745     ## Step 2
3746     my $i = -1;
3747     my $node = $self->{open_elements}->[$i];
3748    
3749     ## Step 3
3750     S3: {
3751 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
3752     $last = 1;
3753     if (defined $self->{inner_html_node}) {
3754 wakaba 1.140 !!!cp ('t28');
3755     $node = $self->{inner_html_node};
3756     } else {
3757     die "_reset_insertion_mode: t27";
3758 wakaba 1.3 }
3759     }
3760 wakaba 1.140
3761     ## Step 4..14
3762     my $new_mode;
3763     if ($node->[1] & FOREIGN_EL) {
3764     !!!cp ('t28.1');
3765     ## NOTE: Strictly spaking, the line below only applies to MathML and
3766     ## SVG elements. Currently the HTML syntax supports only MathML and
3767     ## SVG elements as foreigners.
3768 wakaba 1.148 $new_mode = IN_BODY_IM | IN_FOREIGN_CONTENT_IM;
3769 wakaba 1.140 } elsif ($node->[1] & TABLE_CELL_EL) {
3770     if ($last) {
3771     !!!cp ('t28.2');
3772     #
3773     } else {
3774     !!!cp ('t28.3');
3775     $new_mode = IN_CELL_IM;
3776     }
3777     } else {
3778     !!!cp ('t28.4');
3779     $new_mode = {
3780 wakaba 1.54 select => IN_SELECT_IM,
3781 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
3782     ## insertion mode to "in select" by themselves.
3783 wakaba 1.54 tr => IN_ROW_IM,
3784     tbody => IN_TABLE_BODY_IM,
3785     thead => IN_TABLE_BODY_IM,
3786     tfoot => IN_TABLE_BODY_IM,
3787     caption => IN_CAPTION_IM,
3788     colgroup => IN_COLUMN_GROUP_IM,
3789     table => IN_TABLE_IM,
3790     head => IN_BODY_IM, # not in head!
3791     body => IN_BODY_IM,
3792     frameset => IN_FRAMESET_IM,
3793 wakaba 1.123 }->{$node->[0]->manakai_local_name};
3794 wakaba 1.140 }
3795     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
3796 wakaba 1.3
3797 wakaba 1.126 ## Step 15
3798 wakaba 1.123 if ($node->[1] & HTML_EL) {
3799 wakaba 1.3 unless (defined $self->{head_element}) {
3800 wakaba 1.79 !!!cp ('t29');
3801 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
3802 wakaba 1.3 } else {
3803 wakaba 1.81 ## ISSUE: Can this state be reached?
3804 wakaba 1.79 !!!cp ('t30');
3805 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3806 wakaba 1.3 }
3807     return;
3808 wakaba 1.79 } else {
3809     !!!cp ('t31');
3810 wakaba 1.3 }
3811    
3812 wakaba 1.126 ## Step 16
3813 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
3814 wakaba 1.3
3815 wakaba 1.126 ## Step 17
3816 wakaba 1.3 $i--;
3817     $node = $self->{open_elements}->[$i];
3818    
3819 wakaba 1.126 ## Step 18
3820 wakaba 1.3 redo S3;
3821     } # S3
3822 wakaba 1.79
3823     die "$0: _reset_insertion_mode: This line should never be reached";
3824 wakaba 1.3 } # _reset_insertion_mode
3825    
3826     sub _tree_construction_main ($) {
3827     my $self = shift;
3828    
3829 wakaba 1.1 my $active_formatting_elements = [];
3830    
3831     my $reconstruct_active_formatting_elements = sub { # MUST
3832     my $insert = shift;
3833    
3834     ## Step 1
3835     return unless @$active_formatting_elements;
3836    
3837     ## Step 3
3838     my $i = -1;
3839     my $entry = $active_formatting_elements->[$i];
3840    
3841     ## Step 2
3842     return if $entry->[0] eq '#marker';
3843 wakaba 1.3 for (@{$self->{open_elements}}) {
3844 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3845 wakaba 1.79 !!!cp ('t32');
3846 wakaba 1.1 return;
3847     }
3848     }
3849    
3850     S4: {
3851     ## Step 4
3852     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
3853    
3854     ## Step 5
3855     $i--;
3856     $entry = $active_formatting_elements->[$i];
3857    
3858     ## Step 6
3859     if ($entry->[0] eq '#marker') {
3860 wakaba 1.81 !!!cp ('t33_1');
3861 wakaba 1.1 #
3862     } else {
3863     my $in_open_elements;
3864 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
3865 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3866 wakaba 1.79 !!!cp ('t33');
3867 wakaba 1.1 $in_open_elements = 1;
3868     last OE;
3869     }
3870     }
3871     if ($in_open_elements) {
3872 wakaba 1.79 !!!cp ('t34');
3873 wakaba 1.1 #
3874     } else {
3875 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
3876 wakaba 1.79 !!!cp ('t35');
3877 wakaba 1.1 redo S4;
3878     }
3879     }
3880    
3881     ## Step 7
3882     $i++;
3883     $entry = $active_formatting_elements->[$i];
3884     } # S4
3885    
3886     S7: {
3887     ## Step 8
3888     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
3889    
3890     ## Step 9
3891     $insert->($clone->[0]);
3892 wakaba 1.3 push @{$self->{open_elements}}, $clone;
3893 wakaba 1.1
3894     ## Step 10
3895 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
3896 wakaba 1.1
3897     ## Step 11
3898     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
3899 wakaba 1.79 !!!cp ('t36');
3900 wakaba 1.1 ## Step 7'
3901     $i++;
3902     $entry = $active_formatting_elements->[$i];
3903    
3904     redo S7;
3905     }
3906 wakaba 1.79
3907     !!!cp ('t37');
3908 wakaba 1.1 } # S7
3909     }; # $reconstruct_active_formatting_elements
3910    
3911     my $clear_up_to_marker = sub {
3912     for (reverse 0..$#$active_formatting_elements) {
3913     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3914 wakaba 1.79 !!!cp ('t38');
3915 wakaba 1.1 splice @$active_formatting_elements, $_;
3916     return;
3917     }
3918     }
3919 wakaba 1.79
3920     !!!cp ('t39');
3921 wakaba 1.1 }; # $clear_up_to_marker
3922    
3923 wakaba 1.96 my $insert;
3924    
3925     my $parse_rcdata = sub ($) {
3926     my ($content_model_flag) = @_;
3927 wakaba 1.25
3928     ## Step 1
3929     my $start_tag_name = $token->{tag_name};
3930     my $el;
3931 wakaba 1.126 !!!create-element ($el, $HTML_NS, $start_tag_name, $token->{attributes}, $token);
3932 wakaba 1.25
3933     ## Step 2
3934 wakaba 1.96 $insert->($el);
3935 wakaba 1.25
3936     ## Step 3
3937 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
3938 wakaba 1.13 delete $self->{escape}; # MUST
3939 wakaba 1.25
3940     ## Step 4
3941 wakaba 1.1 my $text = '';
3942 wakaba 1.125 !!!nack ('t40.1');
3943 wakaba 1.1 !!!next-token;
3944 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing
3945 wakaba 1.79 !!!cp ('t40');
3946 wakaba 1.1 $text .= $token->{data};
3947     !!!next-token;
3948 wakaba 1.25 }
3949    
3950     ## Step 5
3951 wakaba 1.1 if (length $text) {
3952 wakaba 1.79 !!!cp ('t41');
3953 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
3954     $el->append_child ($text);
3955 wakaba 1.1 }
3956 wakaba 1.25
3957     ## Step 6
3958 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3959 wakaba 1.25
3960     ## Step 7
3961 wakaba 1.79 if ($token->{type} == END_TAG_TOKEN and
3962     $token->{tag_name} eq $start_tag_name) {
3963     !!!cp ('t42');
3964 wakaba 1.1 ## Ignore the token
3965     } else {
3966 wakaba 1.96 ## NOTE: An end-of-file token.
3967     if ($content_model_flag == CDATA_CONTENT_MODEL) {
3968     !!!cp ('t43');
3969 wakaba 1.153 !!!parse-error (type => 'in CDATA:#eof', token => $token);
3970 wakaba 1.96 } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
3971     !!!cp ('t44');
3972 wakaba 1.153 !!!parse-error (type => 'in RCDATA:#eof', token => $token);
3973 wakaba 1.96 } else {
3974     die "$0: $content_model_flag in parse_rcdata";
3975     }
3976 wakaba 1.1 }
3977     !!!next-token;
3978 wakaba 1.25 }; # $parse_rcdata
3979 wakaba 1.1
3980 wakaba 1.96 my $script_start_tag = sub () {
3981 wakaba 1.1 my $script_el;
3982 wakaba 1.126 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
3983 wakaba 1.1 ## TODO: mark as "parser-inserted"
3984    
3985 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
3986 wakaba 1.13 delete $self->{escape}; # MUST
3987 wakaba 1.1
3988     my $text = '';
3989 wakaba 1.125 !!!nack ('t45.1');
3990 wakaba 1.1 !!!next-token;
3991 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
3992 wakaba 1.79 !!!cp ('t45');
3993 wakaba 1.1 $text .= $token->{data};
3994     !!!next-token;
3995     } # stop if non-character token or tokenizer stops tokenising
3996     if (length $text) {
3997 wakaba 1.79 !!!cp ('t46');
3998 wakaba 1.1 $script_el->manakai_append_text ($text);
3999     }
4000    
4001 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
4002 wakaba 1.1
4003 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
4004 wakaba 1.1 $token->{tag_name} eq 'script') {
4005 wakaba 1.79 !!!cp ('t47');
4006 wakaba 1.1 ## Ignore the token
4007     } else {
4008 wakaba 1.79 !!!cp ('t48');
4009 wakaba 1.153 !!!parse-error (type => 'in CDATA:#eof', token => $token);
4010 wakaba 1.1 ## ISSUE: And ignore?
4011     ## TODO: mark as "already executed"
4012     }
4013    
4014 wakaba 1.3 if (defined $self->{inner_html_node}) {
4015 wakaba 1.79 !!!cp ('t49');
4016 wakaba 1.3 ## TODO: mark as "already executed"
4017     } else {
4018 wakaba 1.79 !!!cp ('t50');
4019 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
4020     ## TODO: insertion point = just before the next input character
4021 wakaba 1.25
4022     $insert->($script_el);
4023 wakaba 1.1
4024     ## TODO: insertion point = $old_insertion_point (might be "undefined")
4025    
4026     ## TODO: if there is a script that will execute as soon as the parser resume, then...
4027     }
4028    
4029     !!!next-token;
4030     }; # $script_start_tag
4031    
4032 wakaba 1.102 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
4033     ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
4034     my $open_tables = [[$self->{open_elements}->[0]->[0]]];
4035    
4036 wakaba 1.1 my $formatting_end_tag = sub {
4037 wakaba 1.113 my $end_tag_token = shift;
4038     my $tag_name = $end_tag_token->{tag_name};
4039 wakaba 1.1
4040 wakaba 1.103 ## NOTE: The adoption agency algorithm (AAA).
4041 wakaba 1.102
4042 wakaba 1.1 FET: {
4043     ## Step 1
4044     my $formatting_element;
4045     my $formatting_element_i_in_active;
4046     AFE: for (reverse 0..$#$active_formatting_elements) {
4047 wakaba 1.123 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
4048     !!!cp ('t52');
4049     last AFE;
4050     } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
4051     eq $tag_name) {
4052 wakaba 1.79 !!!cp ('t51');
4053 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
4054     $formatting_element_i_in_active = $_;
4055     last AFE;
4056     }
4057     } # AFE
4058     unless (defined $formatting_element) {
4059 wakaba 1.79 !!!cp ('t53');
4060 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => $tag_name, token => $end_tag_token);
4061 wakaba 1.1 ## Ignore the token
4062     !!!next-token;
4063     return;
4064     }
4065     ## has an element in scope
4066     my $in_scope = 1;
4067     my $formatting_element_i_in_open;
4068 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4069     my $node = $self->{open_elements}->[$_];
4070 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
4071     if ($in_scope) {
4072 wakaba 1.79 !!!cp ('t54');
4073 wakaba 1.1 $formatting_element_i_in_open = $_;
4074     last INSCOPE;
4075     } else { # in open elements but not in scope
4076 wakaba 1.79 !!!cp ('t55');
4077 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4078     text => $token->{tag_name},
4079 wakaba 1.113 token => $end_tag_token);
4080 wakaba 1.1 ## Ignore the token
4081     !!!next-token;
4082     return;
4083     }
4084 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
4085 wakaba 1.79 !!!cp ('t56');
4086 wakaba 1.1 $in_scope = 0;
4087     }
4088     } # INSCOPE
4089     unless (defined $formatting_element_i_in_open) {
4090 wakaba 1.79 !!!cp ('t57');
4091 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4092     text => $token->{tag_name},
4093 wakaba 1.113 token => $end_tag_token);
4094 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
4095     !!!next-token; ## TODO: ok?
4096     return;
4097     }
4098 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
4099 wakaba 1.79 !!!cp ('t58');
4100 wakaba 1.122 !!!parse-error (type => 'not closed',
4101 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4102 wakaba 1.122 ->manakai_local_name,
4103 wakaba 1.113 token => $end_tag_token);
4104 wakaba 1.1 }
4105    
4106     ## Step 2
4107     my $furthest_block;
4108     my $furthest_block_i_in_open;
4109 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
4110     my $node = $self->{open_elements}->[$_];
4111 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
4112 wakaba 1.1 #not $phrasing_category->{$node->[1]} and
4113 wakaba 1.123 ($node->[1] & SPECIAL_EL or
4114     $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
4115 wakaba 1.79 !!!cp ('t59');
4116 wakaba 1.1 $furthest_block = $node;
4117     $furthest_block_i_in_open = $_;
4118     } elsif ($node->[0] eq $formatting_element->[0]) {
4119 wakaba 1.79 !!!cp ('t60');
4120 wakaba 1.1 last OE;
4121     }
4122     } # OE
4123    
4124     ## Step 3
4125     unless (defined $furthest_block) { # MUST
4126 wakaba 1.79 !!!cp ('t61');
4127 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
4128 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
4129     !!!next-token;
4130     return;
4131     }
4132    
4133     ## Step 4
4134 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
4135 wakaba 1.1
4136     ## Step 5
4137     my $furthest_block_parent = $furthest_block->[0]->parent_node;
4138     if (defined $furthest_block_parent) {
4139 wakaba 1.79 !!!cp ('t62');
4140 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
4141     }
4142    
4143     ## Step 6
4144     my $bookmark_prev_el
4145     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
4146     ->[0];
4147    
4148     ## Step 7
4149     my $node = $furthest_block;
4150     my $node_i_in_open = $furthest_block_i_in_open;
4151     my $last_node = $furthest_block;
4152     S7: {
4153     ## Step 1
4154     $node_i_in_open--;
4155 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
4156 wakaba 1.1
4157     ## Step 2
4158     my $node_i_in_active;
4159     S7S2: {
4160     for (reverse 0..$#$active_formatting_elements) {
4161     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
4162 wakaba 1.79 !!!cp ('t63');
4163 wakaba 1.1 $node_i_in_active = $_;
4164     last S7S2;
4165     }
4166     }
4167 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
4168 wakaba 1.1 redo S7;
4169     } # S7S2
4170    
4171     ## Step 3
4172     last S7 if $node->[0] eq $formatting_element->[0];
4173    
4174     ## Step 4
4175     if ($last_node->[0] eq $furthest_block->[0]) {
4176 wakaba 1.79 !!!cp ('t64');
4177 wakaba 1.1 $bookmark_prev_el = $node->[0];
4178     }
4179    
4180     ## Step 5
4181     if ($node->[0]->has_child_nodes ()) {
4182 wakaba 1.79 !!!cp ('t65');
4183 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
4184     $active_formatting_elements->[$node_i_in_active] = $clone;
4185 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
4186 wakaba 1.1 $node = $clone;
4187     }
4188    
4189     ## Step 6
4190     $node->[0]->append_child ($last_node->[0]);
4191    
4192     ## Step 7
4193     $last_node = $node;
4194    
4195     ## Step 8
4196     redo S7;
4197     } # S7
4198    
4199     ## Step 8
4200 wakaba 1.123 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
4201 wakaba 1.102 my $foster_parent_element;
4202     my $next_sibling;
4203 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
4204     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
4205 wakaba 1.102 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4206     if (defined $parent and $parent->node_type == 1) {
4207     !!!cp ('t65.1');
4208     $foster_parent_element = $parent;
4209     $next_sibling = $self->{open_elements}->[$_]->[0];
4210     } else {
4211     !!!cp ('t65.2');
4212     $foster_parent_element
4213     = $self->{open_elements}->[$_ - 1]->[0];
4214     }
4215     last OE;
4216     }
4217     } # OE
4218     $foster_parent_element = $self->{open_elements}->[0]->[0]
4219     unless defined $foster_parent_element;
4220     $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
4221     $open_tables->[-1]->[1] = 1; # tainted
4222     } else {
4223     !!!cp ('t65.3');
4224     $common_ancestor_node->[0]->append_child ($last_node->[0]);
4225     }
4226 wakaba 1.1
4227     ## Step 9
4228     my $clone = [$formatting_element->[0]->clone_node (0),
4229     $formatting_element->[1]];
4230    
4231     ## Step 10
4232     my @cn = @{$furthest_block->[0]->child_nodes};
4233     $clone->[0]->append_child ($_) for @cn;
4234    
4235     ## Step 11
4236     $furthest_block->[0]->append_child ($clone->[0]);
4237    
4238     ## Step 12
4239     my $i;
4240     AFE: for (reverse 0..$#$active_formatting_elements) {
4241     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
4242 wakaba 1.79 !!!cp ('t66');
4243 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
4244     $i-- and last AFE if defined $i;
4245     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
4246 wakaba 1.79 !!!cp ('t67');
4247 wakaba 1.1 $i = $_;
4248     }
4249     } # AFE
4250     splice @$active_formatting_elements, $i + 1, 0, $clone;
4251    
4252     ## Step 13
4253     undef $i;
4254 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
4255     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
4256 wakaba 1.79 !!!cp ('t68');
4257 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
4258 wakaba 1.1 $i-- and last OE if defined $i;
4259 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
4260 wakaba 1.79 !!!cp ('t69');
4261 wakaba 1.1 $i = $_;
4262     }
4263     } # OE
4264 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
4265 wakaba 1.1
4266     ## Step 14
4267     redo FET;
4268     } # FET
4269     }; # $formatting_end_tag
4270    
4271 wakaba 1.96 $insert = my $insert_to_current = sub {
4272 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
4273 wakaba 1.1 }; # $insert_to_current
4274    
4275     my $insert_to_foster = sub {
4276 wakaba 1.95 my $child = shift;
4277 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
4278 wakaba 1.95 # MUST
4279     my $foster_parent_element;
4280     my $next_sibling;
4281 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
4282     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
4283 wakaba 1.3 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4284 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
4285 wakaba 1.79 !!!cp ('t70');
4286 wakaba 1.1 $foster_parent_element = $parent;
4287 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
4288 wakaba 1.1 } else {
4289 wakaba 1.79 !!!cp ('t71');
4290 wakaba 1.1 $foster_parent_element
4291 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
4292 wakaba 1.1 }
4293     last OE;
4294     }
4295     } # OE
4296 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
4297 wakaba 1.1 unless defined $foster_parent_element;
4298     $foster_parent_element->insert_before
4299     ($child, $next_sibling);
4300 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
4301     } else {
4302     !!!cp ('t72');
4303     $self->{open_elements}->[-1]->[0]->append_child ($child);
4304     }
4305 wakaba 1.1 }; # $insert_to_foster
4306    
4307 wakaba 1.126 B: while (1) {
4308 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
4309 wakaba 1.79 !!!cp ('t73');
4310 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
4311 wakaba 1.52 ## Ignore the token
4312     ## Stay in the phase
4313     !!!next-token;
4314 wakaba 1.126 next B;
4315 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
4316 wakaba 1.52 $token->{tag_name} eq 'html') {
4317 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4318 wakaba 1.79 !!!cp ('t79');
4319 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
4320 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
4321     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
4322 wakaba 1.79 !!!cp ('t80');
4323 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
4324 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
4325 wakaba 1.79 } else {
4326     !!!cp ('t81');
4327 wakaba 1.52 }
4328    
4329 wakaba 1.84 !!!cp ('t82');
4330 wakaba 1.113 !!!parse-error (type => 'not first start tag', token => $token);
4331 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
4332     for my $attr_name (keys %{$token->{attributes}}) {
4333     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
4334 wakaba 1.79 !!!cp ('t84');
4335 wakaba 1.52 $top_el->set_attribute_ns
4336     (undef, [undef, $attr_name],
4337     $token->{attributes}->{$attr_name}->{value});
4338     }
4339     }
4340 wakaba 1.125 !!!nack ('t84.1');
4341 wakaba 1.52 !!!next-token;
4342 wakaba 1.126 next B;
4343 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
4344 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
4345 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
4346 wakaba 1.79 !!!cp ('t85');
4347 wakaba 1.52 $self->{document}->append_child ($comment);
4348 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
4349 wakaba 1.79 !!!cp ('t86');
4350 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
4351     } else {
4352 wakaba 1.79 !!!cp ('t87');
4353 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4354     }
4355     !!!next-token;
4356 wakaba 1.126 next B;
4357     } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
4358     if ($token->{type} == CHARACTER_TOKEN) {
4359     !!!cp ('t87.1');
4360     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4361     !!!next-token;
4362     next B;
4363     } elsif ($token->{type} == START_TAG_TOKEN) {
4364 wakaba 1.129 if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
4365     $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
4366 wakaba 1.126 not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
4367     ($token->{tag_name} eq 'svg' and
4368     $self->{open_elements}->[-1]->[1] & MML_AXML_EL)) {
4369     ## NOTE: "using the rules for secondary insertion mode"then"continue"
4370     !!!cp ('t87.2');
4371     #
4372     } elsif ({
4373 wakaba 1.130 b => 1, big => 1, blockquote => 1, body => 1, br => 1,
4374 wakaba 1.146 center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1,
4375     em => 1, embed => 1, font => 1, h1 => 1, h2 => 1, h3 => 1,
4376     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1,
4377     img => 1, li => 1, listing => 1, menu => 1, meta => 1,
4378     nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1,
4379     small => 1, span => 1, strong => 1, strike => 1, sub => 1,
4380     sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1,
4381 wakaba 1.126 }->{$token->{tag_name}}) {
4382     !!!cp ('t87.2');
4383     !!!parse-error (type => 'not closed',
4384 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4385 wakaba 1.126 ->manakai_local_name,
4386     token => $token);
4387    
4388     pop @{$self->{open_elements}}
4389     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
4390    
4391 wakaba 1.130 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
4392 wakaba 1.126 ## Reprocess.
4393     next B;
4394     } else {
4395 wakaba 1.131 my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
4396     my $tag_name = $token->{tag_name};
4397     if ($nsuri eq $SVG_NS) {
4398     $tag_name = {
4399     altglyph => 'altGlyph',
4400     altglyphdef => 'altGlyphDef',
4401     altglyphitem => 'altGlyphItem',
4402     animatecolor => 'animateColor',
4403     animatemotion => 'animateMotion',
4404     animatetransform => 'animateTransform',
4405     clippath => 'clipPath',
4406     feblend => 'feBlend',
4407     fecolormatrix => 'feColorMatrix',
4408     fecomponenttransfer => 'feComponentTransfer',
4409     fecomposite => 'feComposite',
4410     feconvolvematrix => 'feConvolveMatrix',
4411     fediffuselighting => 'feDiffuseLighting',
4412     fedisplacementmap => 'feDisplacementMap',
4413     fedistantlight => 'feDistantLight',
4414     feflood => 'feFlood',
4415     fefunca => 'feFuncA',
4416     fefuncb => 'feFuncB',
4417     fefuncg => 'feFuncG',
4418     fefuncr => 'feFuncR',
4419     fegaussianblur => 'feGaussianBlur',
4420     feimage => 'feImage',
4421     femerge => 'feMerge',
4422     femergenode => 'feMergeNode',
4423     femorphology => 'feMorphology',
4424     feoffset => 'feOffset',
4425     fepointlight => 'fePointLight',
4426     fespecularlighting => 'feSpecularLighting',
4427     fespotlight => 'feSpotLight',
4428     fetile => 'feTile',
4429     feturbulence => 'feTurbulence',
4430     foreignobject => 'foreignObject',
4431     glyphref => 'glyphRef',
4432     lineargradient => 'linearGradient',
4433     radialgradient => 'radialGradient',
4434     #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
4435     textpath => 'textPath',
4436     }->{$tag_name} || $tag_name;
4437     }
4438    
4439     ## "adjust SVG attributes" (SVG only) - done in insert-element-f
4440    
4441     ## "adjust foreign attributes" - done in insert-element-f
4442 wakaba 1.126
4443 wakaba 1.131 !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);
4444 wakaba 1.126
4445     if ($self->{self_closing}) {
4446     pop @{$self->{open_elements}};
4447     !!!ack ('t87.3');
4448     } else {
4449     !!!cp ('t87.4');
4450     }
4451    
4452     !!!next-token;
4453     next B;
4454     }
4455     } elsif ($token->{type} == END_TAG_TOKEN) {
4456     ## NOTE: "using the rules for secondary insertion mode" then "continue"
4457     !!!cp ('t87.5');
4458     #
4459     } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4460     !!!cp ('t87.6');
4461 wakaba 1.146 !!!parse-error (type => 'not closed',
4462 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4463 wakaba 1.146 ->manakai_local_name,
4464     token => $token);
4465    
4466     pop @{$self->{open_elements}}
4467     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
4468    
4469     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
4470     ## Reprocess.
4471     next B;
4472 wakaba 1.126 } else {
4473     die "$0: $token->{type}: Unknown token type";
4474     }
4475     }
4476    
4477     if ($self->{insertion_mode} & HEAD_IMS) {
4478 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4479 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4480 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4481     !!!cp ('t88.2');
4482     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4483     } else {
4484     !!!cp ('t88.1');
4485     ## Ignore the token.
4486     !!!next-token;
4487 wakaba 1.126 next B;
4488 wakaba 1.99 }
4489 wakaba 1.52 unless (length $token->{data}) {
4490 wakaba 1.79 !!!cp ('t88');
4491 wakaba 1.52 !!!next-token;
4492 wakaba 1.126 next B;
4493 wakaba 1.1 }
4494     }
4495 wakaba 1.52
4496 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4497 wakaba 1.79 !!!cp ('t89');
4498 wakaba 1.52 ## As if <head>
4499 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4500 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4501 wakaba 1.123 push @{$self->{open_elements}},
4502     [$self->{head_element}, $el_category->{head}];
4503 wakaba 1.52
4504     ## Reprocess in the "in head" insertion mode...
4505     pop @{$self->{open_elements}};
4506    
4507     ## Reprocess in the "after head" insertion mode...
4508 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4509 wakaba 1.79 !!!cp ('t90');
4510 wakaba 1.52 ## As if </noscript>
4511     pop @{$self->{open_elements}};
4512 wakaba 1.153 !!!parse-error (type => 'in noscript:#text', token => $token);
4513 wakaba 1.1
4514 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
4515     ## As if </head>
4516     pop @{$self->{open_elements}};
4517    
4518     ## Reprocess in the "after head" insertion mode...
4519 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4520 wakaba 1.79 !!!cp ('t91');
4521 wakaba 1.52 pop @{$self->{open_elements}};
4522    
4523     ## Reprocess in the "after head" insertion mode...
4524 wakaba 1.79 } else {
4525     !!!cp ('t92');
4526 wakaba 1.1 }
4527 wakaba 1.52
4528 wakaba 1.123 ## "after head" insertion mode
4529     ## As if <body>
4530     !!!insert-element ('body',, $token);
4531     $self->{insertion_mode} = IN_BODY_IM;
4532     ## reprocess
4533 wakaba 1.126 next B;
4534 wakaba 1.123 } elsif ($token->{type} == START_TAG_TOKEN) {
4535     if ($token->{tag_name} eq 'head') {
4536     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4537     !!!cp ('t93');
4538 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
4539 wakaba 1.123 $self->{open_elements}->[-1]->[0]->append_child
4540     ($self->{head_element});
4541     push @{$self->{open_elements}},
4542     [$self->{head_element}, $el_category->{head}];
4543     $self->{insertion_mode} = IN_HEAD_IM;
4544 wakaba 1.125 !!!nack ('t93.1');
4545 wakaba 1.123 !!!next-token;
4546 wakaba 1.126 next B;
4547 wakaba 1.125 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4548 wakaba 1.139 !!!cp ('t93.2');
4549 wakaba 1.153 !!!parse-error (type => 'after head', text => 'head',
4550     token => $token);
4551 wakaba 1.139 ## Ignore the token
4552     !!!nack ('t93.3');
4553     !!!next-token;
4554     next B;
4555 wakaba 1.125 } else {
4556     !!!cp ('t95');
4557 wakaba 1.153 !!!parse-error (type => 'in head:head',
4558     token => $token); # or in head noscript
4559 wakaba 1.125 ## Ignore the token
4560     !!!nack ('t95.1');
4561     !!!next-token;
4562 wakaba 1.126 next B;
4563 wakaba 1.125 }
4564     } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4565 wakaba 1.126 !!!cp ('t96');
4566     ## As if <head>
4567     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4568     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4569     push @{$self->{open_elements}},
4570     [$self->{head_element}, $el_category->{head}];
4571 wakaba 1.52
4572 wakaba 1.126 $self->{insertion_mode} = IN_HEAD_IM;
4573     ## Reprocess in the "in head" insertion mode...
4574     } else {
4575     !!!cp ('t97');
4576     }
4577 wakaba 1.52
4578 wakaba 1.49 if ($token->{tag_name} eq 'base') {
4579 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4580 wakaba 1.79 !!!cp ('t98');
4581 wakaba 1.49 ## As if </noscript>
4582     pop @{$self->{open_elements}};
4583 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'base',
4584     token => $token);
4585 wakaba 1.49
4586 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4587 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4588 wakaba 1.79 } else {
4589     !!!cp ('t99');
4590 wakaba 1.49 }
4591    
4592     ## NOTE: There is a "as if in head" code clone.
4593 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4594 wakaba 1.79 !!!cp ('t100');
4595 wakaba 1.153 !!!parse-error (type => 'after head',
4596     text => $token->{tag_name}, token => $token);
4597 wakaba 1.123 push @{$self->{open_elements}},
4598     [$self->{head_element}, $el_category->{head}];
4599 wakaba 1.79 } else {
4600     !!!cp ('t101');
4601 wakaba 1.49 }
4602 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4603 wakaba 1.49 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4604 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4605 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4606 wakaba 1.125 !!!nack ('t101.1');
4607 wakaba 1.49 !!!next-token;
4608 wakaba 1.126 next B;
4609 wakaba 1.49 } elsif ($token->{tag_name} eq 'link') {
4610 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4611 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4612 wakaba 1.79 !!!cp ('t102');
4613 wakaba 1.153 !!!parse-error (type => 'after head',
4614     text => $token->{tag_name}, token => $token);
4615 wakaba 1.123 push @{$self->{open_elements}},
4616     [$self->{head_element}, $el_category->{head}];
4617 wakaba 1.79 } else {
4618     !!!cp ('t103');
4619 wakaba 1.25 }
4620 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4621 wakaba 1.25 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4622 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4623 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4624 wakaba 1.125 !!!ack ('t103.1');
4625 wakaba 1.1 !!!next-token;
4626 wakaba 1.126 next B;
4627 wakaba 1.34 } elsif ($token->{tag_name} eq 'meta') {
4628     ## NOTE: There is a "as if in head" code clone.
4629 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4630 wakaba 1.79 !!!cp ('t104');
4631 wakaba 1.153 !!!parse-error (type => 'after head',
4632     text => $token->{tag_name}, token => $token);
4633 wakaba 1.123 push @{$self->{open_elements}},
4634     [$self->{head_element}, $el_category->{head}];
4635 wakaba 1.79 } else {
4636     !!!cp ('t105');
4637 wakaba 1.34 }
4638 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4639 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4640 wakaba 1.34
4641     unless ($self->{confident}) {
4642 wakaba 1.134 if ($token->{attributes}->{charset}) {
4643 wakaba 1.79 !!!cp ('t106');
4644 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4645     ## in the {change_encoding} callback.
4646 wakaba 1.63 $self->{change_encoding}
4647 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value},
4648     $token);
4649 wakaba 1.66
4650     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4651     ->set_user_data (manakai_has_reference =>
4652     $token->{attributes}->{charset}
4653     ->{has_reference});
4654 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
4655     if ($token->{attributes}->{content}->{value}
4656 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4657 wakaba 1.70 [\x09-\x0D\x20]*=
4658 wakaba 1.34 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4659 wakaba 1.145 ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {
4660 wakaba 1.79 !!!cp ('t107');
4661 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4662     ## in the {change_encoding} callback.
4663 wakaba 1.63 $self->{change_encoding}
4664 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
4665     $token);
4666 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4667     ->set_user_data (manakai_has_reference =>
4668     $token->{attributes}->{content}
4669     ->{has_reference});
4670 wakaba 1.79 } else {
4671     !!!cp ('t108');
4672 wakaba 1.63 }
4673 wakaba 1.34 }
4674 wakaba 1.66 } else {
4675     if ($token->{attributes}->{charset}) {
4676 wakaba 1.79 !!!cp ('t109');
4677 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4678     ->set_user_data (manakai_has_reference =>
4679     $token->{attributes}->{charset}
4680     ->{has_reference});
4681     }
4682 wakaba 1.68 if ($token->{attributes}->{content}) {
4683 wakaba 1.79 !!!cp ('t110');
4684 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4685     ->set_user_data (manakai_has_reference =>
4686     $token->{attributes}->{content}
4687     ->{has_reference});
4688     }
4689 wakaba 1.34 }
4690    
4691 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4692 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4693 wakaba 1.125 !!!ack ('t110.1');
4694 wakaba 1.34 !!!next-token;
4695 wakaba 1.126 next B;
4696 wakaba 1.49 } elsif ($token->{tag_name} eq 'title') {
4697 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4698 wakaba 1.79 !!!cp ('t111');
4699 wakaba 1.49 ## As if </noscript>
4700     pop @{$self->{open_elements}};
4701 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'title',
4702     token => $token);
4703 wakaba 1.49
4704 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4705 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4706 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4707 wakaba 1.79 !!!cp ('t112');
4708 wakaba 1.153 !!!parse-error (type => 'after head',
4709     text => $token->{tag_name}, token => $token);
4710 wakaba 1.123 push @{$self->{open_elements}},
4711     [$self->{head_element}, $el_category->{head}];
4712 wakaba 1.79 } else {
4713     !!!cp ('t113');
4714 wakaba 1.25 }
4715 wakaba 1.49
4716     ## NOTE: There is a "as if in head" code clone.
4717 wakaba 1.31 my $parent = defined $self->{head_element} ? $self->{head_element}
4718     : $self->{open_elements}->[-1]->[0];
4719 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
4720 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4721 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4722 wakaba 1.126 next B;
4723 wakaba 1.148 } elsif ($token->{tag_name} eq 'style' or
4724     $token->{tag_name} eq 'noframes') {
4725 wakaba 1.25 ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
4726 wakaba 1.54 ## insertion mode IN_HEAD_IM)
4727 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4728 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4729 wakaba 1.79 !!!cp ('t114');
4730 wakaba 1.153 !!!parse-error (type => 'after head',
4731     text => $token->{tag_name}, token => $token);
4732 wakaba 1.123 push @{$self->{open_elements}},
4733     [$self->{head_element}, $el_category->{head}];
4734 wakaba 1.79 } else {
4735     !!!cp ('t115');
4736 wakaba 1.25 }
4737 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4738 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4739 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4740 wakaba 1.126 next B;
4741 wakaba 1.25 } elsif ($token->{tag_name} eq 'noscript') {
4742 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
4743 wakaba 1.79 !!!cp ('t116');
4744 wakaba 1.25 ## NOTE: and scripting is disalbed
4745 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4746 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
4747 wakaba 1.125 !!!nack ('t116.1');
4748 wakaba 1.1 !!!next-token;
4749 wakaba 1.126 next B;
4750 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4751 wakaba 1.79 !!!cp ('t117');
4752 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'noscript',
4753     token => $token);
4754 wakaba 1.1 ## Ignore the token
4755 wakaba 1.125 !!!nack ('t117.1');
4756 wakaba 1.41 !!!next-token;
4757 wakaba 1.126 next B;
4758 wakaba 1.1 } else {
4759 wakaba 1.79 !!!cp ('t118');
4760 wakaba 1.25 #
4761 wakaba 1.1 }
4762 wakaba 1.49 } elsif ($token->{tag_name} eq 'script') {
4763 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4764 wakaba 1.79 !!!cp ('t119');
4765 wakaba 1.49 ## As if </noscript>
4766     pop @{$self->{open_elements}};
4767 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'script',
4768     token => $token);
4769 wakaba 1.49
4770 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4771 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4772 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4773 wakaba 1.79 !!!cp ('t120');
4774 wakaba 1.153 !!!parse-error (type => 'after head',
4775     text => $token->{tag_name}, token => $token);
4776 wakaba 1.123 push @{$self->{open_elements}},
4777     [$self->{head_element}, $el_category->{head}];
4778 wakaba 1.79 } else {
4779     !!!cp ('t121');
4780 wakaba 1.25 }
4781 wakaba 1.49
4782 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4783 wakaba 1.100 $script_start_tag->();
4784     pop @{$self->{open_elements}} # <head>
4785 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4786 wakaba 1.126 next B;
4787 wakaba 1.49 } elsif ($token->{tag_name} eq 'body' or
4788 wakaba 1.25 $token->{tag_name} eq 'frameset') {
4789 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4790 wakaba 1.79 !!!cp ('t122');
4791 wakaba 1.49 ## As if </noscript>
4792     pop @{$self->{open_elements}};
4793 wakaba 1.153 !!!parse-error (type => 'in noscript',
4794     text => $token->{tag_name}, token => $token);
4795 wakaba 1.49
4796     ## Reprocess in the "in head" insertion mode...
4797     ## As if </head>
4798     pop @{$self->{open_elements}};
4799    
4800     ## Reprocess in the "after head" insertion mode...
4801 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4802 wakaba 1.79 !!!cp ('t124');
4803 wakaba 1.49 pop @{$self->{open_elements}};
4804    
4805     ## Reprocess in the "after head" insertion mode...
4806 wakaba 1.79 } else {
4807     !!!cp ('t125');
4808 wakaba 1.49 }
4809    
4810     ## "after head" insertion mode
4811 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4812 wakaba 1.54 if ($token->{tag_name} eq 'body') {
4813 wakaba 1.79 !!!cp ('t126');
4814 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4815     } elsif ($token->{tag_name} eq 'frameset') {
4816 wakaba 1.79 !!!cp ('t127');
4817 wakaba 1.54 $self->{insertion_mode} = IN_FRAMESET_IM;
4818     } else {
4819     die "$0: tag name: $self->{tag_name}";
4820     }
4821 wakaba 1.125 !!!nack ('t127.1');
4822 wakaba 1.1 !!!next-token;
4823 wakaba 1.126 next B;
4824 wakaba 1.1 } else {
4825 wakaba 1.79 !!!cp ('t128');
4826 wakaba 1.1 #
4827     }
4828 wakaba 1.49
4829 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4830 wakaba 1.79 !!!cp ('t129');
4831 wakaba 1.49 ## As if </noscript>
4832     pop @{$self->{open_elements}};
4833 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
4834     text => $token->{tag_name}, token => $token);
4835 wakaba 1.49
4836     ## Reprocess in the "in head" insertion mode...
4837     ## As if </head>
4838 wakaba 1.25 pop @{$self->{open_elements}};
4839 wakaba 1.49
4840     ## Reprocess in the "after head" insertion mode...
4841 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4842 wakaba 1.79 !!!cp ('t130');
4843 wakaba 1.49 ## As if </head>
4844 wakaba 1.25 pop @{$self->{open_elements}};
4845 wakaba 1.49
4846     ## Reprocess in the "after head" insertion mode...
4847 wakaba 1.79 } else {
4848     !!!cp ('t131');
4849 wakaba 1.49 }
4850    
4851     ## "after head" insertion mode
4852     ## As if <body>
4853 wakaba 1.116 !!!insert-element ('body',, $token);
4854 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4855 wakaba 1.49 ## reprocess
4856 wakaba 1.125 !!!ack-later;
4857 wakaba 1.126 next B;
4858 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4859 wakaba 1.49 if ($token->{tag_name} eq 'head') {
4860 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4861 wakaba 1.79 !!!cp ('t132');
4862 wakaba 1.50 ## As if <head>
4863 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4864 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4865 wakaba 1.123 push @{$self->{open_elements}},
4866     [$self->{head_element}, $el_category->{head}];
4867 wakaba 1.50
4868     ## Reprocess in the "in head" insertion mode...
4869     pop @{$self->{open_elements}};
4870 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4871 wakaba 1.50 !!!next-token;
4872 wakaba 1.126 next B;
4873 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4874 wakaba 1.79 !!!cp ('t133');
4875 wakaba 1.49 ## As if </noscript>
4876     pop @{$self->{open_elements}};
4877 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
4878     text => 'head', token => $token);
4879 wakaba 1.49
4880     ## Reprocess in the "in head" insertion mode...
4881 wakaba 1.50 pop @{$self->{open_elements}};
4882 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4883 wakaba 1.50 !!!next-token;
4884 wakaba 1.126 next B;
4885 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4886 wakaba 1.79 !!!cp ('t134');
4887 wakaba 1.49 pop @{$self->{open_elements}};
4888 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4889 wakaba 1.49 !!!next-token;
4890 wakaba 1.126 next B;
4891 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4892     !!!cp ('t134.1');
4893 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => 'head',
4894     token => $token);
4895 wakaba 1.139 ## Ignore the token
4896     !!!next-token;
4897     next B;
4898 wakaba 1.49 } else {
4899 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4900 wakaba 1.49 }
4901     } elsif ($token->{tag_name} eq 'noscript') {
4902 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4903 wakaba 1.79 !!!cp ('t136');
4904 wakaba 1.49 pop @{$self->{open_elements}};
4905 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4906 wakaba 1.49 !!!next-token;
4907 wakaba 1.126 next B;
4908 wakaba 1.139 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or
4909     $self->{insertion_mode} == AFTER_HEAD_IM) {
4910 wakaba 1.79 !!!cp ('t137');
4911 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4912     text => 'noscript', token => $token);
4913 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
4914     !!!next-token;
4915 wakaba 1.126 next B;
4916 wakaba 1.49 } else {
4917 wakaba 1.79 !!!cp ('t138');
4918 wakaba 1.49 #
4919     }
4920     } elsif ({
4921 wakaba 1.31 body => 1, html => 1,
4922     }->{$token->{tag_name}}) {
4923 wakaba 1.139 if ($self->{insertion_mode} == BEFORE_HEAD_IM or
4924     $self->{insertion_mode} == IN_HEAD_IM or
4925     $self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4926 wakaba 1.79 !!!cp ('t140');
4927 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4928     text => $token->{tag_name}, token => $token);
4929 wakaba 1.49 ## Ignore the token
4930     !!!next-token;
4931 wakaba 1.126 next B;
4932 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4933     !!!cp ('t140.1');
4934 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4935     text => $token->{tag_name}, token => $token);
4936 wakaba 1.139 ## Ignore the token
4937     !!!next-token;
4938     next B;
4939 wakaba 1.79 } else {
4940 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4941 wakaba 1.49 }
4942 wakaba 1.139 } elsif ($token->{tag_name} eq 'p') {
4943     !!!cp ('t142');
4944 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4945     text => $token->{tag_name}, token => $token);
4946 wakaba 1.139 ## Ignore the token
4947     !!!next-token;
4948     next B;
4949     } elsif ($token->{tag_name} eq 'br') {
4950 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4951 wakaba 1.139 !!!cp ('t142.2');
4952     ## (before head) as if <head>, (in head) as if </head>
4953 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4954 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4955 wakaba 1.139 $self->{insertion_mode} = AFTER_HEAD_IM;
4956    
4957     ## Reprocess in the "after head" insertion mode...
4958     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4959     !!!cp ('t143.2');
4960     ## As if </head>
4961     pop @{$self->{open_elements}};
4962     $self->{insertion_mode} = AFTER_HEAD_IM;
4963    
4964     ## Reprocess in the "after head" insertion mode...
4965     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4966     !!!cp ('t143.3');
4967     ## ISSUE: Two parse errors for <head><noscript></br>
4968 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4969     text => 'br', token => $token);
4970 wakaba 1.139 ## As if </noscript>
4971     pop @{$self->{open_elements}};
4972     $self->{insertion_mode} = IN_HEAD_IM;
4973 wakaba 1.50
4974     ## Reprocess in the "in head" insertion mode...
4975 wakaba 1.139 ## As if </head>
4976     pop @{$self->{open_elements}};
4977     $self->{insertion_mode} = AFTER_HEAD_IM;
4978    
4979     ## Reprocess in the "after head" insertion mode...
4980     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4981     !!!cp ('t143.4');
4982     #
4983 wakaba 1.79 } else {
4984 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4985 wakaba 1.50 }
4986    
4987 wakaba 1.139 ## ISSUE: does not agree with IE7 - it doesn't ignore </br>.
4988 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4989     text => 'br', token => $token);
4990 wakaba 1.139 ## Ignore the token
4991     !!!next-token;
4992     next B;
4993 wakaba 1.25 } else {
4994 wakaba 1.139 !!!cp ('t145');
4995 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4996     text => $token->{tag_name}, token => $token);
4997 wakaba 1.139 ## Ignore the token
4998     !!!next-token;
4999     next B;
5000 wakaba 1.49 }
5001    
5002 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
5003 wakaba 1.79 !!!cp ('t146');
5004 wakaba 1.49 ## As if </noscript>
5005     pop @{$self->{open_elements}};
5006 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
5007     text => $token->{tag_name}, token => $token);
5008 wakaba 1.49
5009     ## Reprocess in the "in head" insertion mode...
5010     ## As if </head>
5011     pop @{$self->{open_elements}};
5012    
5013     ## Reprocess in the "after head" insertion mode...
5014 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
5015 wakaba 1.79 !!!cp ('t147');
5016 wakaba 1.49 ## As if </head>
5017     pop @{$self->{open_elements}};
5018    
5019     ## Reprocess in the "after head" insertion mode...
5020 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
5021 wakaba 1.82 ## ISSUE: This case cannot be reached?
5022 wakaba 1.79 !!!cp ('t148');
5023 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5024     text => $token->{tag_name}, token => $token);
5025 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
5026     !!!next-token;
5027 wakaba 1.126 next B;
5028 wakaba 1.79 } else {
5029     !!!cp ('t149');
5030 wakaba 1.1 }
5031    
5032 wakaba 1.49 ## "after head" insertion mode
5033     ## As if <body>
5034 wakaba 1.116 !!!insert-element ('body',, $token);
5035 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5036 wakaba 1.52 ## reprocess
5037 wakaba 1.126 next B;
5038 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5039     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
5040     !!!cp ('t149.1');
5041    
5042     ## NOTE: As if <head>
5043 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
5044 wakaba 1.104 $self->{open_elements}->[-1]->[0]->append_child
5045     ($self->{head_element});
5046 wakaba 1.123 #push @{$self->{open_elements}},
5047     # [$self->{head_element}, $el_category->{head}];
5048 wakaba 1.104 #$self->{insertion_mode} = IN_HEAD_IM;
5049     ## NOTE: Reprocess.
5050    
5051     ## NOTE: As if </head>
5052     #pop @{$self->{open_elements}};
5053     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
5054     ## NOTE: Reprocess.
5055    
5056     #
5057     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
5058     !!!cp ('t149.2');
5059    
5060     ## NOTE: As if </head>
5061     pop @{$self->{open_elements}};
5062     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
5063     ## NOTE: Reprocess.
5064    
5065     #
5066     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
5067     !!!cp ('t149.3');
5068    
5069 wakaba 1.113 !!!parse-error (type => 'in noscript:#eof', token => $token);
5070 wakaba 1.104
5071     ## As if </noscript>
5072     pop @{$self->{open_elements}};
5073     #$self->{insertion_mode} = IN_HEAD_IM;
5074     ## NOTE: Reprocess.
5075    
5076     ## NOTE: As if </head>
5077     pop @{$self->{open_elements}};
5078     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
5079     ## NOTE: Reprocess.
5080    
5081     #
5082     } else {
5083     !!!cp ('t149.4');
5084     #
5085     }
5086    
5087     ## NOTE: As if <body>
5088 wakaba 1.116 !!!insert-element ('body',, $token);
5089 wakaba 1.104 $self->{insertion_mode} = IN_BODY_IM;
5090     ## NOTE: Reprocess.
5091 wakaba 1.126 next B;
5092 wakaba 1.104 } else {
5093     die "$0: $token->{type}: Unknown token type";
5094     }
5095 wakaba 1.52
5096     ## ISSUE: An issue in the spec.
5097 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
5098 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5099 wakaba 1.79 !!!cp ('t150');
5100 wakaba 1.52 ## NOTE: There is a code clone of "character in body".
5101     $reconstruct_active_formatting_elements->($insert_to_current);
5102    
5103     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5104    
5105     !!!next-token;
5106 wakaba 1.126 next B;
5107 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5108 wakaba 1.52 if ({
5109     caption => 1, col => 1, colgroup => 1, tbody => 1,
5110     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
5111     }->{$token->{tag_name}}) {
5112 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
5113 wakaba 1.52 ## have an element in table scope
5114 wakaba 1.108 for (reverse 0..$#{$self->{open_elements}}) {
5115 wakaba 1.52 my $node = $self->{open_elements}->[$_];
5116 wakaba 1.123 if ($node->[1] & TABLE_CELL_EL) {
5117 wakaba 1.79 !!!cp ('t151');
5118 wakaba 1.108
5119     ## Close the cell
5120 wakaba 1.125 !!!back-token; # <x>
5121 wakaba 1.122 $token = {type => END_TAG_TOKEN,
5122     tag_name => $node->[0]->manakai_local_name,
5123 wakaba 1.114 line => $token->{line},
5124     column => $token->{column}};
5125 wakaba 1.126 next B;
5126 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5127 wakaba 1.79 !!!cp ('t152');
5128 wakaba 1.108 ## ISSUE: This case can never be reached, maybe.
5129     last;
5130 wakaba 1.52 }
5131 wakaba 1.108 }
5132    
5133     !!!cp ('t153');
5134     !!!parse-error (type => 'start tag not allowed',
5135 wakaba 1.153 text => $token->{tag_name}, token => $token);
5136 wakaba 1.108 ## Ignore the token
5137 wakaba 1.125 !!!nack ('t153.1');
5138 wakaba 1.108 !!!next-token;
5139 wakaba 1.126 next B;
5140 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
5141 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
5142     token => $token);
5143 wakaba 1.52
5144 wakaba 1.108 ## NOTE: As if </caption>.
5145 wakaba 1.52 ## have a table element in table scope
5146     my $i;
5147 wakaba 1.108 INSCOPE: {
5148     for (reverse 0..$#{$self->{open_elements}}) {
5149     my $node = $self->{open_elements}->[$_];
5150 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
5151 wakaba 1.108 !!!cp ('t155');
5152     $i = $_;
5153     last INSCOPE;
5154 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5155 wakaba 1.108 !!!cp ('t156');
5156     last;
5157     }
5158 wakaba 1.52 }
5159 wakaba 1.108
5160     !!!cp ('t157');
5161     !!!parse-error (type => 'start tag not allowed',
5162 wakaba 1.153 text => $token->{tag_name}, token => $token);
5163 wakaba 1.108 ## Ignore the token
5164 wakaba 1.125 !!!nack ('t157.1');
5165 wakaba 1.108 !!!next-token;
5166 wakaba 1.126 next B;
5167 wakaba 1.52 } # INSCOPE
5168    
5169     ## generate implied end tags
5170 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
5171     & END_TAG_OPTIONAL_EL) {
5172 wakaba 1.79 !!!cp ('t158');
5173 wakaba 1.86 pop @{$self->{open_elements}};
5174 wakaba 1.52 }
5175    
5176 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
5177 wakaba 1.79 !!!cp ('t159');
5178 wakaba 1.122 !!!parse-error (type => 'not closed',
5179 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5180 wakaba 1.122 ->manakai_local_name,
5181     token => $token);
5182 wakaba 1.79 } else {
5183     !!!cp ('t160');
5184 wakaba 1.52 }
5185    
5186     splice @{$self->{open_elements}}, $i;
5187    
5188     $clear_up_to_marker->();
5189    
5190 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5191 wakaba 1.52
5192     ## reprocess
5193 wakaba 1.125 !!!ack-later;
5194 wakaba 1.126 next B;
5195 wakaba 1.52 } else {
5196 wakaba 1.79 !!!cp ('t161');
5197 wakaba 1.52 #
5198     }
5199     } else {
5200 wakaba 1.79 !!!cp ('t162');
5201 wakaba 1.52 #
5202     }
5203 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5204 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
5205 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
5206 wakaba 1.43 ## have an element in table scope
5207 wakaba 1.52 my $i;
5208 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5209     my $node = $self->{open_elements}->[$_];
5210 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5211 wakaba 1.79 !!!cp ('t163');
5212 wakaba 1.52 $i = $_;
5213 wakaba 1.43 last INSCOPE;
5214 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5215 wakaba 1.79 !!!cp ('t164');
5216 wakaba 1.43 last INSCOPE;
5217     }
5218     } # INSCOPE
5219 wakaba 1.52 unless (defined $i) {
5220 wakaba 1.79 !!!cp ('t165');
5221 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5222     text => $token->{tag_name},
5223     token => $token);
5224 wakaba 1.43 ## Ignore the token
5225     !!!next-token;
5226 wakaba 1.126 next B;
5227 wakaba 1.43 }
5228    
5229 wakaba 1.52 ## generate implied end tags
5230 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
5231     & END_TAG_OPTIONAL_EL) {
5232 wakaba 1.79 !!!cp ('t166');
5233 wakaba 1.86 pop @{$self->{open_elements}};
5234 wakaba 1.52 }
5235 wakaba 1.86
5236 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5237     ne $token->{tag_name}) {
5238 wakaba 1.79 !!!cp ('t167');
5239 wakaba 1.122 !!!parse-error (type => 'not closed',
5240 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5241 wakaba 1.122 ->manakai_local_name,
5242     token => $token);
5243 wakaba 1.79 } else {
5244     !!!cp ('t168');
5245 wakaba 1.52 }
5246    
5247     splice @{$self->{open_elements}}, $i;
5248    
5249     $clear_up_to_marker->();
5250    
5251 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
5252 wakaba 1.52
5253     !!!next-token;
5254 wakaba 1.126 next B;
5255 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
5256 wakaba 1.79 !!!cp ('t169');
5257 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5258     text => $token->{tag_name}, token => $token);
5259 wakaba 1.52 ## Ignore the token
5260     !!!next-token;
5261 wakaba 1.126 next B;
5262 wakaba 1.52 } else {
5263 wakaba 1.79 !!!cp ('t170');
5264 wakaba 1.52 #
5265     }
5266     } elsif ($token->{tag_name} eq 'caption') {
5267 wakaba 1.54 if ($self->{insertion_mode} == IN_CAPTION_IM) {
5268 wakaba 1.43 ## have a table element in table scope
5269     my $i;
5270 wakaba 1.108 INSCOPE: {
5271     for (reverse 0..$#{$self->{open_elements}}) {
5272     my $node = $self->{open_elements}->[$_];
5273 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
5274 wakaba 1.108 !!!cp ('t171');
5275     $i = $_;
5276     last INSCOPE;
5277 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5278 wakaba 1.108 !!!cp ('t172');
5279     last;
5280     }
5281 wakaba 1.43 }
5282 wakaba 1.108
5283     !!!cp ('t173');
5284     !!!parse-error (type => 'unmatched end tag',
5285 wakaba 1.153 text => $token->{tag_name}, token => $token);
5286 wakaba 1.108 ## Ignore the token
5287     !!!next-token;
5288 wakaba 1.126 next B;
5289 wakaba 1.43 } # INSCOPE
5290    
5291     ## generate implied end tags
5292 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
5293     & END_TAG_OPTIONAL_EL) {
5294 wakaba 1.79 !!!cp ('t174');
5295 wakaba 1.86 pop @{$self->{open_elements}};
5296 wakaba 1.43 }
5297 wakaba 1.52
5298 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
5299 wakaba 1.79 !!!cp ('t175');
5300 wakaba 1.122 !!!parse-error (type => 'not closed',
5301 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5302 wakaba 1.122 ->manakai_local_name,
5303     token => $token);
5304 wakaba 1.79 } else {
5305     !!!cp ('t176');
5306 wakaba 1.52 }
5307    
5308     splice @{$self->{open_elements}}, $i;
5309    
5310     $clear_up_to_marker->();
5311    
5312 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5313 wakaba 1.52
5314     !!!next-token;
5315 wakaba 1.126 next B;
5316 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CELL_IM) {
5317 wakaba 1.79 !!!cp ('t177');
5318 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5319     text => $token->{tag_name}, token => $token);
5320 wakaba 1.52 ## Ignore the token
5321     !!!next-token;
5322 wakaba 1.126 next B;
5323 wakaba 1.52 } else {
5324 wakaba 1.79 !!!cp ('t178');
5325 wakaba 1.52 #
5326     }
5327     } elsif ({
5328     table => 1, tbody => 1, tfoot => 1,
5329     thead => 1, tr => 1,
5330     }->{$token->{tag_name}} and
5331 wakaba 1.54 $self->{insertion_mode} == IN_CELL_IM) {
5332 wakaba 1.52 ## have an element in table scope
5333     my $i;
5334     my $tn;
5335 wakaba 1.108 INSCOPE: {
5336     for (reverse 0..$#{$self->{open_elements}}) {
5337     my $node = $self->{open_elements}->[$_];
5338 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5339 wakaba 1.108 !!!cp ('t179');
5340     $i = $_;
5341    
5342     ## Close the cell
5343 wakaba 1.125 !!!back-token; # </x>
5344 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $tn,
5345     line => $token->{line},
5346     column => $token->{column}};
5347 wakaba 1.126 next B;
5348 wakaba 1.123 } elsif ($node->[1] & TABLE_CELL_EL) {
5349 wakaba 1.108 !!!cp ('t180');
5350 wakaba 1.123 $tn = $node->[0]->manakai_local_name;
5351 wakaba 1.108 ## NOTE: There is exactly one |td| or |th| element
5352     ## in scope in the stack of open elements by definition.
5353 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5354 wakaba 1.108 ## ISSUE: Can this be reached?
5355     !!!cp ('t181');
5356     last;
5357     }
5358 wakaba 1.52 }
5359 wakaba 1.108
5360 wakaba 1.79 !!!cp ('t182');
5361 wakaba 1.108 !!!parse-error (type => 'unmatched end tag',
5362 wakaba 1.153 text => $token->{tag_name}, token => $token);
5363 wakaba 1.52 ## Ignore the token
5364     !!!next-token;
5365 wakaba 1.126 next B;
5366 wakaba 1.108 } # INSCOPE
5367 wakaba 1.52 } elsif ($token->{tag_name} eq 'table' and
5368 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
5369 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
5370     token => $token);
5371 wakaba 1.52
5372     ## As if </caption>
5373     ## have a table element in table scope
5374     my $i;
5375     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5376     my $node = $self->{open_elements}->[$_];
5377 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
5378 wakaba 1.79 !!!cp ('t184');
5379 wakaba 1.52 $i = $_;
5380     last INSCOPE;
5381 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5382 wakaba 1.79 !!!cp ('t185');
5383 wakaba 1.52 last INSCOPE;
5384     }
5385     } # INSCOPE
5386     unless (defined $i) {
5387 wakaba 1.79 !!!cp ('t186');
5388 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5389     text => 'caption', token => $token);
5390 wakaba 1.52 ## Ignore the token
5391     !!!next-token;
5392 wakaba 1.126 next B;
5393 wakaba 1.52 }
5394    
5395     ## generate implied end tags
5396 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5397 wakaba 1.79 !!!cp ('t187');
5398 wakaba 1.86 pop @{$self->{open_elements}};
5399 wakaba 1.52 }
5400    
5401 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
5402 wakaba 1.79 !!!cp ('t188');
5403 wakaba 1.122 !!!parse-error (type => 'not closed',
5404 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5405 wakaba 1.122 ->manakai_local_name,
5406     token => $token);
5407 wakaba 1.79 } else {
5408     !!!cp ('t189');
5409 wakaba 1.52 }
5410    
5411     splice @{$self->{open_elements}}, $i;
5412    
5413     $clear_up_to_marker->();
5414    
5415 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5416 wakaba 1.52
5417     ## reprocess
5418 wakaba 1.126 next B;
5419 wakaba 1.52 } elsif ({
5420     body => 1, col => 1, colgroup => 1, html => 1,
5421     }->{$token->{tag_name}}) {
5422 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
5423 wakaba 1.79 !!!cp ('t190');
5424 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5425     text => $token->{tag_name}, token => $token);
5426 wakaba 1.52 ## Ignore the token
5427     !!!next-token;
5428 wakaba 1.126 next B;
5429 wakaba 1.52 } else {
5430 wakaba 1.79 !!!cp ('t191');
5431 wakaba 1.52 #
5432     }
5433     } elsif ({
5434     tbody => 1, tfoot => 1,
5435     thead => 1, tr => 1,
5436     }->{$token->{tag_name}} and
5437 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
5438 wakaba 1.79 !!!cp ('t192');
5439 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5440     text => $token->{tag_name}, token => $token);
5441 wakaba 1.52 ## Ignore the token
5442     !!!next-token;
5443 wakaba 1.126 next B;
5444 wakaba 1.52 } else {
5445 wakaba 1.79 !!!cp ('t193');
5446 wakaba 1.52 #
5447     }
5448 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5449     for my $entry (@{$self->{open_elements}}) {
5450 wakaba 1.123 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
5451 wakaba 1.104 !!!cp ('t75');
5452 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5453 wakaba 1.104 last;
5454     }
5455     }
5456    
5457     ## Stop parsing.
5458     last B;
5459 wakaba 1.52 } else {
5460     die "$0: $token->{type}: Unknown token type";
5461     }
5462    
5463     $insert = $insert_to_current;
5464     #
5465 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
5466 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
5467 wakaba 1.95 if (not $open_tables->[-1]->[1] and # tainted
5468     $token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5469     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5470 wakaba 1.52
5471 wakaba 1.95 unless (length $token->{data}) {
5472     !!!cp ('t194');
5473     !!!next-token;
5474 wakaba 1.126 next B;
5475 wakaba 1.95 } else {
5476     !!!cp ('t195');
5477     }
5478     }
5479 wakaba 1.52
5480 wakaba 1.153 !!!parse-error (type => 'in table:#text', token => $token);
5481 wakaba 1.52
5482     ## As if in body, but insert into foster parent element
5483     ## ISSUE: Spec says that "whenever a node would be inserted
5484     ## into the current node" while characters might not be
5485     ## result in a new Text node.
5486     $reconstruct_active_formatting_elements->($insert_to_foster);
5487    
5488 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
5489 wakaba 1.52 # MUST
5490     my $foster_parent_element;
5491     my $next_sibling;
5492     my $prev_sibling;
5493     OE: for (reverse 0..$#{$self->{open_elements}}) {
5494 wakaba 1.123 if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
5495 wakaba 1.52 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5496     if (defined $parent and $parent->node_type == 1) {
5497 wakaba 1.79 !!!cp ('t196');
5498 wakaba 1.52 $foster_parent_element = $parent;
5499     $next_sibling = $self->{open_elements}->[$_]->[0];
5500     $prev_sibling = $next_sibling->previous_sibling;
5501     } else {
5502 wakaba 1.79 !!!cp ('t197');
5503 wakaba 1.52 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5504     $prev_sibling = $foster_parent_element->last_child;
5505     }
5506     last OE;
5507     }
5508     } # OE
5509     $foster_parent_element = $self->{open_elements}->[0]->[0] and
5510     $prev_sibling = $foster_parent_element->last_child
5511     unless defined $foster_parent_element;
5512     if (defined $prev_sibling and
5513     $prev_sibling->node_type == 3) {
5514 wakaba 1.79 !!!cp ('t198');
5515 wakaba 1.52 $prev_sibling->manakai_append_text ($token->{data});
5516     } else {
5517 wakaba 1.79 !!!cp ('t199');
5518 wakaba 1.52 $foster_parent_element->insert_before
5519     ($self->{document}->create_text_node ($token->{data}),
5520     $next_sibling);
5521     }
5522 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
5523     } else {
5524     !!!cp ('t200');
5525     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5526     }
5527 wakaba 1.52
5528 wakaba 1.95 !!!next-token;
5529 wakaba 1.126 next B;
5530 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
5531 wakaba 1.153 if ({
5532     tr => ($self->{insertion_mode} != IN_ROW_IM),
5533     th => 1, td => 1,
5534     }->{$token->{tag_name}}) {
5535     if ($self->{insertion_mode} == IN_TABLE_IM) {
5536     ## Clear back to table context
5537     while (not ($self->{open_elements}->[-1]->[1]
5538     & TABLE_SCOPING_EL)) {
5539     !!!cp ('t201');
5540     pop @{$self->{open_elements}};
5541     }
5542    
5543     !!!insert-element ('tbody',, $token);
5544     $self->{insertion_mode} = IN_TABLE_BODY_IM;
5545     ## reprocess in the "in table body" insertion mode...
5546     }
5547    
5548     if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5549     unless ($token->{tag_name} eq 'tr') {
5550     !!!cp ('t202');
5551     !!!parse-error (type => 'missing start tag:tr', token => $token);
5552     }
5553 wakaba 1.43
5554 wakaba 1.153 ## Clear back to table body context
5555     while (not ($self->{open_elements}->[-1]->[1]
5556     & TABLE_ROWS_SCOPING_EL)) {
5557     !!!cp ('t203');
5558     ## ISSUE: Can this case be reached?
5559     pop @{$self->{open_elements}};
5560     }
5561 wakaba 1.43
5562 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
5563 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
5564 wakaba 1.79 !!!cp ('t204');
5565 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5566 wakaba 1.125 !!!nack ('t204');
5567 wakaba 1.52 !!!next-token;
5568 wakaba 1.126 next B;
5569 wakaba 1.52 } else {
5570 wakaba 1.79 !!!cp ('t205');
5571 wakaba 1.116 !!!insert-element ('tr',, $token);
5572 wakaba 1.52 ## reprocess in the "in row" insertion mode
5573     }
5574 wakaba 1.79 } else {
5575     !!!cp ('t206');
5576 wakaba 1.52 }
5577    
5578     ## Clear back to table row context
5579 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5580     & TABLE_ROW_SCOPING_EL)) {
5581 wakaba 1.79 !!!cp ('t207');
5582 wakaba 1.52 pop @{$self->{open_elements}};
5583 wakaba 1.43 }
5584 wakaba 1.52
5585 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5586 wakaba 1.54 $self->{insertion_mode} = IN_CELL_IM;
5587 wakaba 1.52
5588     push @$active_formatting_elements, ['#marker', ''];
5589    
5590 wakaba 1.125 !!!nack ('t207.1');
5591 wakaba 1.52 !!!next-token;
5592 wakaba 1.126 next B;
5593 wakaba 1.52 } elsif ({
5594     caption => 1, col => 1, colgroup => 1,
5595     tbody => 1, tfoot => 1, thead => 1,
5596 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
5597 wakaba 1.52 }->{$token->{tag_name}}) {
5598 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5599 wakaba 1.52 ## As if </tr>
5600 wakaba 1.43 ## have an element in table scope
5601     my $i;
5602     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5603     my $node = $self->{open_elements}->[$_];
5604 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5605 wakaba 1.79 !!!cp ('t208');
5606 wakaba 1.43 $i = $_;
5607     last INSCOPE;
5608 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5609 wakaba 1.79 !!!cp ('t209');
5610 wakaba 1.43 last INSCOPE;
5611     }
5612     } # INSCOPE
5613 wakaba 1.79 unless (defined $i) {
5614 wakaba 1.125 !!!cp ('t210');
5615 wakaba 1.83 ## TODO: This type is wrong.
5616 wakaba 1.153 !!!parse-error (type => 'unmacthed end tag',
5617     text => $token->{tag_name}, token => $token);
5618 wakaba 1.52 ## Ignore the token
5619 wakaba 1.125 !!!nack ('t210.1');
5620 wakaba 1.52 !!!next-token;
5621 wakaba 1.126 next B;
5622 wakaba 1.43 }
5623    
5624 wakaba 1.52 ## Clear back to table row context
5625 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5626     & TABLE_ROW_SCOPING_EL)) {
5627 wakaba 1.79 !!!cp ('t211');
5628 wakaba 1.83 ## ISSUE: Can this case be reached?
5629 wakaba 1.52 pop @{$self->{open_elements}};
5630 wakaba 1.1 }
5631 wakaba 1.43
5632 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5633 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5634 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
5635 wakaba 1.79 !!!cp ('t212');
5636 wakaba 1.52 ## reprocess
5637 wakaba 1.125 !!!ack-later;
5638 wakaba 1.126 next B;
5639 wakaba 1.52 } else {
5640 wakaba 1.79 !!!cp ('t213');
5641 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
5642     }
5643 wakaba 1.1 }
5644 wakaba 1.52
5645 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5646 wakaba 1.52 ## have an element in table scope
5647 wakaba 1.43 my $i;
5648     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5649     my $node = $self->{open_elements}->[$_];
5650 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
5651 wakaba 1.79 !!!cp ('t214');
5652 wakaba 1.43 $i = $_;
5653     last INSCOPE;
5654 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5655 wakaba 1.79 !!!cp ('t215');
5656 wakaba 1.43 last INSCOPE;
5657     }
5658     } # INSCOPE
5659 wakaba 1.52 unless (defined $i) {
5660 wakaba 1.79 !!!cp ('t216');
5661 wakaba 1.153 ## TODO: This erorr type is wrong.
5662     !!!parse-error (type => 'unmatched end tag',
5663     text => $token->{tag_name}, token => $token);
5664 wakaba 1.52 ## Ignore the token
5665 wakaba 1.125 !!!nack ('t216.1');
5666 wakaba 1.52 !!!next-token;
5667 wakaba 1.126 next B;
5668 wakaba 1.43 }
5669 wakaba 1.52
5670     ## Clear back to table body context
5671 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5672     & TABLE_ROWS_SCOPING_EL)) {
5673 wakaba 1.79 !!!cp ('t217');
5674 wakaba 1.83 ## ISSUE: Can this state be reached?
5675 wakaba 1.52 pop @{$self->{open_elements}};
5676 wakaba 1.43 }
5677    
5678 wakaba 1.52 ## As if <{current node}>
5679     ## have an element in table scope
5680     ## true by definition
5681 wakaba 1.43
5682 wakaba 1.52 ## Clear back to table body context
5683     ## nop by definition
5684 wakaba 1.43
5685 wakaba 1.52 pop @{$self->{open_elements}};
5686 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5687 wakaba 1.52 ## reprocess in "in table" insertion mode...
5688 wakaba 1.79 } else {
5689     !!!cp ('t218');
5690 wakaba 1.52 }
5691    
5692     if ($token->{tag_name} eq 'col') {
5693     ## Clear back to table context
5694 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5695     & TABLE_SCOPING_EL)) {
5696 wakaba 1.79 !!!cp ('t219');
5697 wakaba 1.83 ## ISSUE: Can this state be reached?
5698 wakaba 1.52 pop @{$self->{open_elements}};
5699     }
5700 wakaba 1.43
5701 wakaba 1.116 !!!insert-element ('colgroup',, $token);
5702 wakaba 1.54 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
5703 wakaba 1.52 ## reprocess
5704 wakaba 1.125 !!!ack-later;
5705 wakaba 1.126 next B;
5706 wakaba 1.52 } elsif ({
5707     caption => 1,
5708     colgroup => 1,
5709     tbody => 1, tfoot => 1, thead => 1,
5710     }->{$token->{tag_name}}) {
5711     ## Clear back to table context
5712 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5713     & TABLE_SCOPING_EL)) {
5714 wakaba 1.79 !!!cp ('t220');
5715 wakaba 1.83 ## ISSUE: Can this state be reached?
5716 wakaba 1.52 pop @{$self->{open_elements}};
5717 wakaba 1.1 }
5718 wakaba 1.52
5719     push @$active_formatting_elements, ['#marker', '']
5720     if $token->{tag_name} eq 'caption';
5721    
5722 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5723 wakaba 1.52 $self->{insertion_mode} = {
5724 wakaba 1.54 caption => IN_CAPTION_IM,
5725     colgroup => IN_COLUMN_GROUP_IM,
5726     tbody => IN_TABLE_BODY_IM,
5727     tfoot => IN_TABLE_BODY_IM,
5728     thead => IN_TABLE_BODY_IM,
5729 wakaba 1.52 }->{$token->{tag_name}};
5730 wakaba 1.1 !!!next-token;
5731 wakaba 1.125 !!!nack ('t220.1');
5732 wakaba 1.126 next B;
5733 wakaba 1.52 } else {
5734     die "$0: in table: <>: $token->{tag_name}";
5735 wakaba 1.1 }
5736 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
5737 wakaba 1.122 !!!parse-error (type => 'not closed',
5738 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5739 wakaba 1.122 ->manakai_local_name,
5740     token => $token);
5741 wakaba 1.1
5742 wakaba 1.52 ## As if </table>
5743 wakaba 1.1 ## have a table element in table scope
5744     my $i;
5745 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5746     my $node = $self->{open_elements}->[$_];
5747 wakaba 1.123 if ($node->[1] & TABLE_EL) {
5748 wakaba 1.79 !!!cp ('t221');
5749 wakaba 1.1 $i = $_;
5750     last INSCOPE;
5751 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5752 wakaba 1.79 !!!cp ('t222');
5753 wakaba 1.1 last INSCOPE;
5754     }
5755     } # INSCOPE
5756     unless (defined $i) {
5757 wakaba 1.79 !!!cp ('t223');
5758 wakaba 1.83 ## TODO: The following is wrong, maybe.
5759 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => 'table',
5760     token => $token);
5761 wakaba 1.52 ## Ignore tokens </table><table>
5762 wakaba 1.125 !!!nack ('t223.1');
5763 wakaba 1.1 !!!next-token;
5764 wakaba 1.126 next B;
5765 wakaba 1.1 }
5766    
5767 wakaba 1.151 ## TODO: Followings are removed from the latest spec.
5768 wakaba 1.1 ## generate implied end tags
5769 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5770 wakaba 1.79 !!!cp ('t224');
5771 wakaba 1.86 pop @{$self->{open_elements}};
5772 wakaba 1.1 }
5773    
5774 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & TABLE_EL) {
5775 wakaba 1.79 !!!cp ('t225');
5776 wakaba 1.122 ## NOTE: |<table><tr><table>|
5777     !!!parse-error (type => 'not closed',
5778 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5779 wakaba 1.122 ->manakai_local_name,
5780     token => $token);
5781 wakaba 1.79 } else {
5782     !!!cp ('t226');
5783 wakaba 1.1 }
5784    
5785 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5786 wakaba 1.95 pop @{$open_tables};
5787 wakaba 1.1
5788 wakaba 1.52 $self->_reset_insertion_mode;
5789 wakaba 1.1
5790 wakaba 1.125 ## reprocess
5791     !!!ack-later;
5792 wakaba 1.126 next B;
5793 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
5794     if (not $open_tables->[-1]->[1]) { # tainted
5795     !!!cp ('t227.8');
5796     ## NOTE: This is a "as if in head" code clone.
5797     $parse_rcdata->(CDATA_CONTENT_MODEL);
5798 wakaba 1.126 next B;
5799 wakaba 1.100 } else {
5800     !!!cp ('t227.7');
5801     #
5802     }
5803     } elsif ($token->{tag_name} eq 'script') {
5804     if (not $open_tables->[-1]->[1]) { # tainted
5805     !!!cp ('t227.6');
5806     ## NOTE: This is a "as if in head" code clone.
5807     $script_start_tag->();
5808 wakaba 1.126 next B;
5809 wakaba 1.100 } else {
5810     !!!cp ('t227.5');
5811     #
5812     }
5813 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
5814     if (not $open_tables->[-1]->[1]) { # tainted
5815     if ($token->{attributes}->{type}) { ## TODO: case
5816     my $type = lc $token->{attributes}->{type}->{value};
5817     if ($type eq 'hidden') {
5818     !!!cp ('t227.3');
5819 wakaba 1.153 !!!parse-error (type => 'in table',
5820     text => $token->{tag_name}, token => $token);
5821 wakaba 1.98
5822 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5823 wakaba 1.98
5824     ## TODO: form element pointer
5825    
5826     pop @{$self->{open_elements}};
5827    
5828     !!!next-token;
5829 wakaba 1.125 !!!ack ('t227.2.1');
5830 wakaba 1.126 next B;
5831 wakaba 1.98 } else {
5832     !!!cp ('t227.2');
5833     #
5834     }
5835     } else {
5836     !!!cp ('t227.1');
5837     #
5838     }
5839     } else {
5840     !!!cp ('t227.4');
5841     #
5842     }
5843 wakaba 1.58 } else {
5844 wakaba 1.79 !!!cp ('t227');
5845 wakaba 1.58 #
5846     }
5847 wakaba 1.98
5848 wakaba 1.153 !!!parse-error (type => 'in table', text => $token->{tag_name},
5849     token => $token);
5850 wakaba 1.98
5851     $insert = $insert_to_foster;
5852     #
5853 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
5854 wakaba 1.52 if ($token->{tag_name} eq 'tr' and
5855 wakaba 1.54 $self->{insertion_mode} == IN_ROW_IM) {
5856 wakaba 1.52 ## have an element in table scope
5857     my $i;
5858     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5859     my $node = $self->{open_elements}->[$_];
5860 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5861 wakaba 1.79 !!!cp ('t228');
5862 wakaba 1.52 $i = $_;
5863     last INSCOPE;
5864 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5865 wakaba 1.79 !!!cp ('t229');
5866 wakaba 1.52 last INSCOPE;
5867     }
5868     } # INSCOPE
5869     unless (defined $i) {
5870 wakaba 1.79 !!!cp ('t230');
5871 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5872     text => $token->{tag_name}, token => $token);
5873 wakaba 1.52 ## Ignore the token
5874 wakaba 1.125 !!!nack ('t230.1');
5875 wakaba 1.42 !!!next-token;
5876 wakaba 1.126 next B;
5877 wakaba 1.79 } else {
5878     !!!cp ('t232');
5879 wakaba 1.42 }
5880    
5881 wakaba 1.52 ## Clear back to table row context
5882 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5883     & TABLE_ROW_SCOPING_EL)) {
5884 wakaba 1.79 !!!cp ('t231');
5885 wakaba 1.83 ## ISSUE: Can this state be reached?
5886 wakaba 1.52 pop @{$self->{open_elements}};
5887     }
5888 wakaba 1.42
5889 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5890 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5891 wakaba 1.52 !!!next-token;
5892 wakaba 1.125 !!!nack ('t231.1');
5893 wakaba 1.126 next B;
5894 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
5895 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5896 wakaba 1.52 ## As if </tr>
5897     ## have an element in table scope
5898     my $i;
5899     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5900     my $node = $self->{open_elements}->[$_];
5901 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5902 wakaba 1.79 !!!cp ('t233');
5903 wakaba 1.52 $i = $_;
5904     last INSCOPE;
5905 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5906 wakaba 1.79 !!!cp ('t234');
5907 wakaba 1.52 last INSCOPE;
5908 wakaba 1.42 }
5909 wakaba 1.52 } # INSCOPE
5910     unless (defined $i) {
5911 wakaba 1.79 !!!cp ('t235');
5912 wakaba 1.83 ## TODO: The following is wrong.
5913 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5914     text => $token->{type}, token => $token);
5915 wakaba 1.52 ## Ignore the token
5916 wakaba 1.125 !!!nack ('t236.1');
5917 wakaba 1.52 !!!next-token;
5918 wakaba 1.126 next B;
5919 wakaba 1.42 }
5920 wakaba 1.52
5921     ## Clear back to table row context
5922 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5923     & TABLE_ROW_SCOPING_EL)) {
5924 wakaba 1.79 !!!cp ('t236');
5925 wakaba 1.83 ## ISSUE: Can this state be reached?
5926 wakaba 1.46 pop @{$self->{open_elements}};
5927 wakaba 1.1 }
5928 wakaba 1.46
5929 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5930 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5931 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
5932 wakaba 1.1 }
5933    
5934 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5935 wakaba 1.52 ## have an element in table scope
5936     my $i;
5937     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5938     my $node = $self->{open_elements}->[$_];
5939 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
5940 wakaba 1.79 !!!cp ('t237');
5941 wakaba 1.52 $i = $_;
5942     last INSCOPE;
5943 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5944 wakaba 1.79 !!!cp ('t238');
5945 wakaba 1.52 last INSCOPE;
5946     }
5947     } # INSCOPE
5948     unless (defined $i) {
5949 wakaba 1.79 !!!cp ('t239');
5950 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5951     text => $token->{tag_name}, token => $token);
5952 wakaba 1.52 ## Ignore the token
5953 wakaba 1.125 !!!nack ('t239.1');
5954 wakaba 1.52 !!!next-token;
5955 wakaba 1.126 next B;
5956 wakaba 1.47 }
5957    
5958     ## Clear back to table body context
5959 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5960     & TABLE_ROWS_SCOPING_EL)) {
5961 wakaba 1.79 !!!cp ('t240');
5962 wakaba 1.47 pop @{$self->{open_elements}};
5963     }
5964    
5965 wakaba 1.52 ## As if <{current node}>
5966     ## have an element in table scope
5967     ## true by definition
5968    
5969     ## Clear back to table body context
5970     ## nop by definition
5971    
5972     pop @{$self->{open_elements}};
5973 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5974 wakaba 1.52 ## reprocess in the "in table" insertion mode...
5975     }
5976    
5977 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
5978     ## When you edit the code fragment below, please ensure that
5979     ## the code for <table> in the "in table" insertion mode
5980     ## is synced with it.
5981    
5982 wakaba 1.52 ## have a table element in table scope
5983     my $i;
5984     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5985     my $node = $self->{open_elements}->[$_];
5986 wakaba 1.123 if ($node->[1] & TABLE_EL) {
5987 wakaba 1.79 !!!cp ('t241');
5988 wakaba 1.52 $i = $_;
5989     last INSCOPE;
5990 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5991 wakaba 1.79 !!!cp ('t242');
5992 wakaba 1.52 last INSCOPE;
5993 wakaba 1.47 }
5994 wakaba 1.52 } # INSCOPE
5995     unless (defined $i) {
5996 wakaba 1.79 !!!cp ('t243');
5997 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5998     text => $token->{tag_name}, token => $token);
5999 wakaba 1.52 ## Ignore the token
6000 wakaba 1.125 !!!nack ('t243.1');
6001 wakaba 1.52 !!!next-token;
6002 wakaba 1.126 next B;
6003 wakaba 1.3 }
6004 wakaba 1.52
6005     splice @{$self->{open_elements}}, $i;
6006 wakaba 1.95 pop @{$open_tables};
6007 wakaba 1.1
6008 wakaba 1.52 $self->_reset_insertion_mode;
6009 wakaba 1.47
6010     !!!next-token;
6011 wakaba 1.126 next B;
6012 wakaba 1.47 } elsif ({
6013 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
6014 wakaba 1.52 }->{$token->{tag_name}} and
6015 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
6016 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
6017 wakaba 1.52 ## have an element in table scope
6018     my $i;
6019     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6020     my $node = $self->{open_elements}->[$_];
6021 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6022 wakaba 1.79 !!!cp ('t247');
6023 wakaba 1.52 $i = $_;
6024     last INSCOPE;
6025 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
6026 wakaba 1.79 !!!cp ('t248');
6027 wakaba 1.52 last INSCOPE;
6028     }
6029     } # INSCOPE
6030     unless (defined $i) {
6031 wakaba 1.79 !!!cp ('t249');
6032 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6033     text => $token->{tag_name}, token => $token);
6034 wakaba 1.52 ## Ignore the token
6035 wakaba 1.125 !!!nack ('t249.1');
6036 wakaba 1.52 !!!next-token;
6037 wakaba 1.126 next B;
6038 wakaba 1.52 }
6039    
6040 wakaba 1.48 ## As if </tr>
6041     ## have an element in table scope
6042     my $i;
6043     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6044     my $node = $self->{open_elements}->[$_];
6045 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
6046 wakaba 1.79 !!!cp ('t250');
6047 wakaba 1.48 $i = $_;
6048     last INSCOPE;
6049 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
6050 wakaba 1.79 !!!cp ('t251');
6051 wakaba 1.48 last INSCOPE;
6052     }
6053     } # INSCOPE
6054 wakaba 1.52 unless (defined $i) {
6055 wakaba 1.79 !!!cp ('t252');
6056 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6057     text => 'tr', token => $token);
6058 wakaba 1.52 ## Ignore the token
6059 wakaba 1.125 !!!nack ('t252.1');
6060 wakaba 1.52 !!!next-token;
6061 wakaba 1.126 next B;
6062 wakaba 1.52 }
6063 wakaba 1.48
6064     ## Clear back to table row context
6065 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
6066     & TABLE_ROW_SCOPING_EL)) {
6067 wakaba 1.79 !!!cp ('t253');
6068 wakaba 1.83 ## ISSUE: Can this case be reached?
6069 wakaba 1.48 pop @{$self->{open_elements}};
6070     }
6071    
6072     pop @{$self->{open_elements}}; # tr
6073 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
6074 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
6075     }
6076    
6077     ## have an element in table scope
6078     my $i;
6079     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6080     my $node = $self->{open_elements}->[$_];
6081 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6082 wakaba 1.79 !!!cp ('t254');
6083 wakaba 1.52 $i = $_;
6084     last INSCOPE;
6085 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
6086 wakaba 1.79 !!!cp ('t255');
6087 wakaba 1.52 last INSCOPE;
6088     }
6089     } # INSCOPE
6090     unless (defined $i) {
6091 wakaba 1.79 !!!cp ('t256');
6092 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6093     text => $token->{tag_name}, token => $token);
6094 wakaba 1.52 ## Ignore the token
6095 wakaba 1.125 !!!nack ('t256.1');
6096 wakaba 1.52 !!!next-token;
6097 wakaba 1.126 next B;
6098 wakaba 1.52 }
6099    
6100     ## Clear back to table body context
6101 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
6102     & TABLE_ROWS_SCOPING_EL)) {
6103 wakaba 1.79 !!!cp ('t257');
6104 wakaba 1.83 ## ISSUE: Can this case be reached?
6105 wakaba 1.52 pop @{$self->{open_elements}};
6106     }
6107    
6108     pop @{$self->{open_elements}};
6109 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
6110 wakaba 1.125 !!!nack ('t257.1');
6111 wakaba 1.52 !!!next-token;
6112 wakaba 1.126 next B;
6113 wakaba 1.52 } elsif ({
6114     body => 1, caption => 1, col => 1, colgroup => 1,
6115     html => 1, td => 1, th => 1,
6116 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
6117     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
6118 wakaba 1.52 }->{$token->{tag_name}}) {
6119 wakaba 1.125 !!!cp ('t258');
6120 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6121     text => $token->{tag_name}, token => $token);
6122 wakaba 1.125 ## Ignore the token
6123     !!!nack ('t258.1');
6124     !!!next-token;
6125 wakaba 1.126 next B;
6126 wakaba 1.58 } else {
6127 wakaba 1.79 !!!cp ('t259');
6128 wakaba 1.153 !!!parse-error (type => 'in table:/',
6129     text => $token->{tag_name}, token => $token);
6130 wakaba 1.52
6131 wakaba 1.58 $insert = $insert_to_foster;
6132     #
6133     }
6134 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6135 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
6136 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
6137 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
6138 wakaba 1.104 !!!cp ('t259.1');
6139 wakaba 1.105 #
6140 wakaba 1.104 } else {
6141     !!!cp ('t259.2');
6142 wakaba 1.105 #
6143 wakaba 1.104 }
6144    
6145     ## Stop parsing
6146     last B;
6147 wakaba 1.58 } else {
6148     die "$0: $token->{type}: Unknown token type";
6149     }
6150 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
6151 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6152 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6153     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6154     unless (length $token->{data}) {
6155 wakaba 1.79 !!!cp ('t260');
6156 wakaba 1.52 !!!next-token;
6157 wakaba 1.126 next B;
6158 wakaba 1.52 }
6159     }
6160    
6161 wakaba 1.79 !!!cp ('t261');
6162 wakaba 1.52 #
6163 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
6164 wakaba 1.52 if ($token->{tag_name} eq 'col') {
6165 wakaba 1.79 !!!cp ('t262');
6166 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6167 wakaba 1.52 pop @{$self->{open_elements}};
6168 wakaba 1.125 !!!ack ('t262.1');
6169 wakaba 1.52 !!!next-token;
6170 wakaba 1.126 next B;
6171 wakaba 1.52 } else {
6172 wakaba 1.79 !!!cp ('t263');
6173 wakaba 1.52 #
6174     }
6175 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6176 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
6177 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
6178 wakaba 1.79 !!!cp ('t264');
6179 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6180     text => 'colgroup', token => $token);
6181 wakaba 1.52 ## Ignore the token
6182     !!!next-token;
6183 wakaba 1.126 next B;
6184 wakaba 1.52 } else {
6185 wakaba 1.79 !!!cp ('t265');
6186 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
6187 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
6188 wakaba 1.52 !!!next-token;
6189 wakaba 1.126 next B;
6190 wakaba 1.52 }
6191     } elsif ($token->{tag_name} eq 'col') {
6192 wakaba 1.79 !!!cp ('t266');
6193 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6194     text => 'col', token => $token);
6195 wakaba 1.52 ## Ignore the token
6196     !!!next-token;
6197 wakaba 1.126 next B;
6198 wakaba 1.52 } else {
6199 wakaba 1.79 !!!cp ('t267');
6200 wakaba 1.52 #
6201     }
6202 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6203 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
6204 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
6205     !!!cp ('t270.2');
6206     ## Stop parsing.
6207     last B;
6208     } else {
6209     ## NOTE: As if </colgroup>.
6210     !!!cp ('t270.1');
6211     pop @{$self->{open_elements}}; # colgroup
6212     $self->{insertion_mode} = IN_TABLE_IM;
6213     ## Reprocess.
6214 wakaba 1.126 next B;
6215 wakaba 1.104 }
6216     } else {
6217     die "$0: $token->{type}: Unknown token type";
6218     }
6219 wakaba 1.52
6220     ## As if </colgroup>
6221 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
6222 wakaba 1.79 !!!cp ('t269');
6223 wakaba 1.104 ## TODO: Wrong error type?
6224 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6225     text => 'colgroup', token => $token);
6226 wakaba 1.52 ## Ignore the token
6227 wakaba 1.125 !!!nack ('t269.1');
6228 wakaba 1.52 !!!next-token;
6229 wakaba 1.126 next B;
6230 wakaba 1.52 } else {
6231 wakaba 1.79 !!!cp ('t270');
6232 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
6233 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
6234 wakaba 1.125 !!!ack-later;
6235 wakaba 1.52 ## reprocess
6236 wakaba 1.126 next B;
6237 wakaba 1.52 }
6238 wakaba 1.101 } elsif ($self->{insertion_mode} & SELECT_IMS) {
6239 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
6240 wakaba 1.79 !!!cp ('t271');
6241 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6242     !!!next-token;
6243 wakaba 1.126 next B;
6244 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
6245 wakaba 1.123 if ($token->{tag_name} eq 'option') {
6246     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
6247     !!!cp ('t272');
6248     ## As if </option>
6249     pop @{$self->{open_elements}};
6250     } else {
6251     !!!cp ('t273');
6252     }
6253 wakaba 1.52
6254 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6255 wakaba 1.125 !!!nack ('t273.1');
6256 wakaba 1.123 !!!next-token;
6257 wakaba 1.126 next B;
6258 wakaba 1.123 } elsif ($token->{tag_name} eq 'optgroup') {
6259     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
6260     !!!cp ('t274');
6261     ## As if </option>
6262     pop @{$self->{open_elements}};
6263     } else {
6264     !!!cp ('t275');
6265     }
6266 wakaba 1.52
6267 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
6268     !!!cp ('t276');
6269     ## As if </optgroup>
6270     pop @{$self->{open_elements}};
6271     } else {
6272     !!!cp ('t277');
6273     }
6274 wakaba 1.52
6275 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6276 wakaba 1.125 !!!nack ('t277.1');
6277 wakaba 1.123 !!!next-token;
6278 wakaba 1.126 next B;
6279 wakaba 1.146 } elsif ({
6280     select => 1, input => 1, textarea => 1,
6281     }->{$token->{tag_name}} or
6282 wakaba 1.101 ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
6283     {
6284     caption => 1, table => 1,
6285     tbody => 1, tfoot => 1, thead => 1,
6286     tr => 1, td => 1, th => 1,
6287     }->{$token->{tag_name}})) {
6288     ## TODO: The type below is not good - <select> is replaced by </select>
6289 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'select',
6290     token => $token);
6291 wakaba 1.101 ## NOTE: As if the token were </select> (<select> case) or
6292     ## as if there were </select> (otherwise).
6293 wakaba 1.123 ## have an element in table scope
6294     my $i;
6295     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6296     my $node = $self->{open_elements}->[$_];
6297     if ($node->[1] & SELECT_EL) {
6298     !!!cp ('t278');
6299     $i = $_;
6300     last INSCOPE;
6301     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6302     !!!cp ('t279');
6303     last INSCOPE;
6304     }
6305     } # INSCOPE
6306     unless (defined $i) {
6307     !!!cp ('t280');
6308 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6309     text => 'select', token => $token);
6310 wakaba 1.123 ## Ignore the token
6311 wakaba 1.125 !!!nack ('t280.1');
6312 wakaba 1.123 !!!next-token;
6313 wakaba 1.126 next B;
6314 wakaba 1.123 }
6315 wakaba 1.52
6316 wakaba 1.123 !!!cp ('t281');
6317     splice @{$self->{open_elements}}, $i;
6318 wakaba 1.52
6319 wakaba 1.123 $self->_reset_insertion_mode;
6320 wakaba 1.47
6321 wakaba 1.101 if ($token->{tag_name} eq 'select') {
6322 wakaba 1.125 !!!nack ('t281.2');
6323 wakaba 1.101 !!!next-token;
6324 wakaba 1.126 next B;
6325 wakaba 1.101 } else {
6326     !!!cp ('t281.1');
6327 wakaba 1.125 !!!ack-later;
6328 wakaba 1.101 ## Reprocess the token.
6329 wakaba 1.126 next B;
6330 wakaba 1.101 }
6331 wakaba 1.58 } else {
6332 wakaba 1.79 !!!cp ('t282');
6333 wakaba 1.153 !!!parse-error (type => 'in select',
6334     text => $token->{tag_name}, token => $token);
6335 wakaba 1.58 ## Ignore the token
6336 wakaba 1.125 !!!nack ('t282.1');
6337 wakaba 1.58 !!!next-token;
6338 wakaba 1.126 next B;
6339 wakaba 1.58 }
6340     } elsif ($token->{type} == END_TAG_TOKEN) {
6341 wakaba 1.123 if ($token->{tag_name} eq 'optgroup') {
6342     if ($self->{open_elements}->[-1]->[1] & OPTION_EL and
6343     $self->{open_elements}->[-2]->[1] & OPTGROUP_EL) {
6344     !!!cp ('t283');
6345     ## As if </option>
6346     splice @{$self->{open_elements}}, -2;
6347     } elsif ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
6348     !!!cp ('t284');
6349     pop @{$self->{open_elements}};
6350     } else {
6351     !!!cp ('t285');
6352 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6353     text => $token->{tag_name}, token => $token);
6354 wakaba 1.123 ## Ignore the token
6355     }
6356 wakaba 1.125 !!!nack ('t285.1');
6357 wakaba 1.123 !!!next-token;
6358 wakaba 1.126 next B;
6359 wakaba 1.123 } elsif ($token->{tag_name} eq 'option') {
6360     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
6361     !!!cp ('t286');
6362     pop @{$self->{open_elements}};
6363     } else {
6364     !!!cp ('t287');
6365 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6366     text => $token->{tag_name}, token => $token);
6367 wakaba 1.123 ## Ignore the token
6368     }
6369 wakaba 1.125 !!!nack ('t287.1');
6370 wakaba 1.123 !!!next-token;
6371 wakaba 1.126 next B;
6372 wakaba 1.123 } elsif ($token->{tag_name} eq 'select') {
6373     ## have an element in table scope
6374     my $i;
6375     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6376     my $node = $self->{open_elements}->[$_];
6377     if ($node->[1] & SELECT_EL) {
6378     !!!cp ('t288');
6379     $i = $_;
6380     last INSCOPE;
6381     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6382     !!!cp ('t289');
6383     last INSCOPE;
6384     }
6385     } # INSCOPE
6386     unless (defined $i) {
6387     !!!cp ('t290');
6388 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6389     text => $token->{tag_name}, token => $token);
6390 wakaba 1.123 ## Ignore the token
6391 wakaba 1.125 !!!nack ('t290.1');
6392 wakaba 1.123 !!!next-token;
6393 wakaba 1.126 next B;
6394 wakaba 1.123 }
6395 wakaba 1.52
6396 wakaba 1.123 !!!cp ('t291');
6397     splice @{$self->{open_elements}}, $i;
6398 wakaba 1.52
6399 wakaba 1.123 $self->_reset_insertion_mode;
6400 wakaba 1.52
6401 wakaba 1.125 !!!nack ('t291.1');
6402 wakaba 1.123 !!!next-token;
6403 wakaba 1.126 next B;
6404 wakaba 1.101 } elsif ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
6405     {
6406     caption => 1, table => 1, tbody => 1,
6407     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
6408     }->{$token->{tag_name}}) {
6409 wakaba 1.83 ## TODO: The following is wrong?
6410 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6411     text => $token->{tag_name}, token => $token);
6412 wakaba 1.52
6413 wakaba 1.123 ## have an element in table scope
6414     my $i;
6415     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6416     my $node = $self->{open_elements}->[$_];
6417     if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6418     !!!cp ('t292');
6419     $i = $_;
6420     last INSCOPE;
6421     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6422     !!!cp ('t293');
6423     last INSCOPE;
6424     }
6425     } # INSCOPE
6426     unless (defined $i) {
6427     !!!cp ('t294');
6428     ## Ignore the token
6429 wakaba 1.125 !!!nack ('t294.1');
6430 wakaba 1.123 !!!next-token;
6431 wakaba 1.126 next B;
6432 wakaba 1.123 }
6433 wakaba 1.52
6434 wakaba 1.123 ## As if </select>
6435     ## have an element in table scope
6436     undef $i;
6437     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6438     my $node = $self->{open_elements}->[$_];
6439     if ($node->[1] & SELECT_EL) {
6440     !!!cp ('t295');
6441     $i = $_;
6442     last INSCOPE;
6443     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6444 wakaba 1.83 ## ISSUE: Can this state be reached?
6445 wakaba 1.123 !!!cp ('t296');
6446     last INSCOPE;
6447     }
6448     } # INSCOPE
6449     unless (defined $i) {
6450     !!!cp ('t297');
6451 wakaba 1.83 ## TODO: The following error type is correct?
6452 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6453     text => 'select', token => $token);
6454 wakaba 1.123 ## Ignore the </select> token
6455 wakaba 1.125 !!!nack ('t297.1');
6456 wakaba 1.123 !!!next-token; ## TODO: ok?
6457 wakaba 1.126 next B;
6458 wakaba 1.123 }
6459 wakaba 1.52
6460 wakaba 1.123 !!!cp ('t298');
6461     splice @{$self->{open_elements}}, $i;
6462 wakaba 1.52
6463 wakaba 1.123 $self->_reset_insertion_mode;
6464 wakaba 1.52
6465 wakaba 1.125 !!!ack-later;
6466 wakaba 1.123 ## reprocess
6467 wakaba 1.126 next B;
6468 wakaba 1.58 } else {
6469 wakaba 1.79 !!!cp ('t299');
6470 wakaba 1.153 !!!parse-error (type => 'in select:/',
6471     text => $token->{tag_name}, token => $token);
6472 wakaba 1.52 ## Ignore the token
6473 wakaba 1.125 !!!nack ('t299.3');
6474 wakaba 1.52 !!!next-token;
6475 wakaba 1.126 next B;
6476 wakaba 1.58 }
6477 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6478 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
6479 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
6480     !!!cp ('t299.1');
6481 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
6482 wakaba 1.104 } else {
6483     !!!cp ('t299.2');
6484     }
6485    
6486     ## Stop parsing.
6487     last B;
6488 wakaba 1.58 } else {
6489     die "$0: $token->{type}: Unknown token type";
6490     }
6491 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
6492 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6493 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6494     my $data = $1;
6495     ## As if in body
6496     $reconstruct_active_formatting_elements->($insert_to_current);
6497    
6498     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6499    
6500     unless (length $token->{data}) {
6501 wakaba 1.79 !!!cp ('t300');
6502 wakaba 1.52 !!!next-token;
6503 wakaba 1.126 next B;
6504 wakaba 1.52 }
6505     }
6506    
6507 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6508 wakaba 1.79 !!!cp ('t301');
6509 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
6510 wakaba 1.52
6511 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
6512 wakaba 1.79 } else {
6513     !!!cp ('t302');
6514 wakaba 1.52 }
6515    
6516     ## "after body" insertion mode
6517 wakaba 1.153 !!!parse-error (type => 'after body:#text', token => $token);
6518 wakaba 1.52
6519 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
6520 wakaba 1.52 ## reprocess
6521 wakaba 1.126 next B;
6522 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
6523 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6524 wakaba 1.79 !!!cp ('t303');
6525 wakaba 1.153 !!!parse-error (type => 'after html',
6526     text => $token->{tag_name}, token => $token);
6527 wakaba 1.52
6528 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
6529 wakaba 1.79 } else {
6530     !!!cp ('t304');
6531 wakaba 1.52 }
6532    
6533     ## "after body" insertion mode
6534 wakaba 1.153 !!!parse-error (type => 'after body',
6535     text => $token->{tag_name}, token => $token);
6536 wakaba 1.52
6537 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
6538 wakaba 1.125 !!!ack-later;
6539 wakaba 1.52 ## reprocess
6540 wakaba 1.126 next B;
6541 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6542 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6543 wakaba 1.79 !!!cp ('t305');
6544 wakaba 1.153 !!!parse-error (type => 'after html:/',
6545     text => $token->{tag_name}, token => $token);
6546 wakaba 1.52
6547 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
6548 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
6549 wakaba 1.79 } else {
6550     !!!cp ('t306');
6551 wakaba 1.52 }
6552    
6553     ## "after body" insertion mode
6554     if ($token->{tag_name} eq 'html') {
6555     if (defined $self->{inner_html_node}) {
6556 wakaba 1.79 !!!cp ('t307');
6557 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6558     text => 'html', token => $token);
6559 wakaba 1.52 ## Ignore the token
6560     !!!next-token;
6561 wakaba 1.126 next B;
6562 wakaba 1.52 } else {
6563 wakaba 1.79 !!!cp ('t308');
6564 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
6565 wakaba 1.52 !!!next-token;
6566 wakaba 1.126 next B;
6567 wakaba 1.52 }
6568     } else {
6569 wakaba 1.79 !!!cp ('t309');
6570 wakaba 1.153 !!!parse-error (type => 'after body:/',
6571     text => $token->{tag_name}, token => $token);
6572 wakaba 1.52
6573 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
6574 wakaba 1.52 ## reprocess
6575 wakaba 1.126 next B;
6576 wakaba 1.52 }
6577 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6578     !!!cp ('t309.2');
6579     ## Stop parsing
6580     last B;
6581 wakaba 1.52 } else {
6582     die "$0: $token->{type}: Unknown token type";
6583     }
6584 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
6585 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6586 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6587     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6588    
6589     unless (length $token->{data}) {
6590 wakaba 1.79 !!!cp ('t310');
6591 wakaba 1.52 !!!next-token;
6592 wakaba 1.126 next B;
6593 wakaba 1.52 }
6594     }
6595    
6596     if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {
6597 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6598 wakaba 1.79 !!!cp ('t311');
6599 wakaba 1.153 !!!parse-error (type => 'in frameset:#text', token => $token);
6600 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
6601 wakaba 1.79 !!!cp ('t312');
6602 wakaba 1.153 !!!parse-error (type => 'after frameset:#text', token => $token);
6603 wakaba 1.158 } else { # "after after frameset"
6604 wakaba 1.79 !!!cp ('t313');
6605 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
6606 wakaba 1.52 }
6607    
6608     ## Ignore the token.
6609     if (length $token->{data}) {
6610 wakaba 1.79 !!!cp ('t314');
6611 wakaba 1.52 ## reprocess the rest of characters
6612     } else {
6613 wakaba 1.79 !!!cp ('t315');
6614 wakaba 1.52 !!!next-token;
6615     }
6616 wakaba 1.126 next B;
6617 wakaba 1.52 }
6618    
6619     die qq[$0: Character "$token->{data}"];
6620 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
6621 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
6622 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6623 wakaba 1.79 !!!cp ('t318');
6624 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6625 wakaba 1.125 !!!nack ('t318.1');
6626 wakaba 1.52 !!!next-token;
6627 wakaba 1.126 next B;
6628 wakaba 1.52 } elsif ($token->{tag_name} eq 'frame' and
6629 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6630 wakaba 1.79 !!!cp ('t319');
6631 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6632 wakaba 1.52 pop @{$self->{open_elements}};
6633 wakaba 1.125 !!!ack ('t319.1');
6634 wakaba 1.52 !!!next-token;
6635 wakaba 1.126 next B;
6636 wakaba 1.52 } elsif ($token->{tag_name} eq 'noframes') {
6637 wakaba 1.79 !!!cp ('t320');
6638 wakaba 1.148 ## NOTE: As if in head.
6639 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6640 wakaba 1.126 next B;
6641 wakaba 1.158
6642     ## NOTE: |<!DOCTYPE HTML><frameset></frameset></html><noframes></noframes>|
6643     ## has no parse error.
6644 wakaba 1.52 } else {
6645 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6646 wakaba 1.79 !!!cp ('t321');
6647 wakaba 1.153 !!!parse-error (type => 'in frameset',
6648     text => $token->{tag_name}, token => $token);
6649 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
6650 wakaba 1.79 !!!cp ('t322');
6651 wakaba 1.153 !!!parse-error (type => 'after frameset',
6652     text => $token->{tag_name}, token => $token);
6653 wakaba 1.158 } else { # "after after frameset"
6654     !!!cp ('t322.2');
6655     !!!parse-error (type => 'after after frameset',
6656     text => $token->{tag_name}, token => $token);
6657 wakaba 1.52 }
6658     ## Ignore the token
6659 wakaba 1.125 !!!nack ('t322.1');
6660 wakaba 1.52 !!!next-token;
6661 wakaba 1.126 next B;
6662 wakaba 1.52 }
6663 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6664 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
6665 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6666 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
6667 wakaba 1.52 @{$self->{open_elements}} == 1) {
6668 wakaba 1.79 !!!cp ('t325');
6669 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6670     text => $token->{tag_name}, token => $token);
6671 wakaba 1.52 ## Ignore the token
6672     !!!next-token;
6673     } else {
6674 wakaba 1.79 !!!cp ('t326');
6675 wakaba 1.52 pop @{$self->{open_elements}};
6676     !!!next-token;
6677     }
6678 wakaba 1.47
6679 wakaba 1.52 if (not defined $self->{inner_html_node} and
6680 wakaba 1.123 not ($self->{open_elements}->[-1]->[1] & FRAMESET_EL)) {
6681 wakaba 1.79 !!!cp ('t327');
6682 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
6683 wakaba 1.79 } else {
6684     !!!cp ('t328');
6685 wakaba 1.52 }
6686 wakaba 1.126 next B;
6687 wakaba 1.52 } elsif ($token->{tag_name} eq 'html' and
6688 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
6689 wakaba 1.79 !!!cp ('t329');
6690 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
6691 wakaba 1.52 !!!next-token;
6692 wakaba 1.126 next B;
6693 wakaba 1.52 } else {
6694 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6695 wakaba 1.79 !!!cp ('t330');
6696 wakaba 1.153 !!!parse-error (type => 'in frameset:/',
6697     text => $token->{tag_name}, token => $token);
6698 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
6699     !!!cp ('t330.1');
6700     !!!parse-error (type => 'after frameset:/',
6701     text => $token->{tag_name}, token => $token);
6702     } else { # "after after html"
6703 wakaba 1.79 !!!cp ('t331');
6704 wakaba 1.158 !!!parse-error (type => 'after after frameset:/',
6705 wakaba 1.153 text => $token->{tag_name}, token => $token);
6706 wakaba 1.52 }
6707     ## Ignore the token
6708     !!!next-token;
6709 wakaba 1.126 next B;
6710 wakaba 1.52 }
6711 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6712 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
6713 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
6714     !!!cp ('t331.1');
6715 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
6716 wakaba 1.104 } else {
6717     !!!cp ('t331.2');
6718     }
6719    
6720     ## Stop parsing
6721     last B;
6722 wakaba 1.52 } else {
6723     die "$0: $token->{type}: Unknown token type";
6724     }
6725 wakaba 1.47
6726 wakaba 1.52 ## ISSUE: An issue in spec here
6727     } else {
6728     die "$0: $self->{insertion_mode}: Unknown insertion mode";
6729     }
6730 wakaba 1.47
6731 wakaba 1.52 ## "in body" insertion mode
6732 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
6733 wakaba 1.52 if ($token->{tag_name} eq 'script') {
6734 wakaba 1.79 !!!cp ('t332');
6735 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6736 wakaba 1.100 $script_start_tag->();
6737 wakaba 1.126 next B;
6738 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
6739 wakaba 1.79 !!!cp ('t333');
6740 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6741 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6742 wakaba 1.126 next B;
6743 wakaba 1.52 } elsif ({
6744     base => 1, link => 1,
6745     }->{$token->{tag_name}}) {
6746 wakaba 1.79 !!!cp ('t334');
6747 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
6748 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6749 wakaba 1.52 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
6750 wakaba 1.125 !!!ack ('t334.1');
6751 wakaba 1.52 !!!next-token;
6752 wakaba 1.126 next B;
6753 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
6754     ## NOTE: This is an "as if in head" code clone, only "-t" differs
6755 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6756 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
6757 wakaba 1.46
6758 wakaba 1.52 unless ($self->{confident}) {
6759 wakaba 1.134 if ($token->{attributes}->{charset}) {
6760 wakaba 1.79 !!!cp ('t335');
6761 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
6762     ## in the {change_encoding} callback.
6763 wakaba 1.63 $self->{change_encoding}
6764 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value}, $token);
6765 wakaba 1.66
6766     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
6767     ->set_user_data (manakai_has_reference =>
6768     $token->{attributes}->{charset}
6769     ->{has_reference});
6770 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
6771     if ($token->{attributes}->{content}->{value}
6772 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
6773 wakaba 1.70 [\x09-\x0D\x20]*=
6774 wakaba 1.52 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
6775 wakaba 1.145 ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {
6776 wakaba 1.79 !!!cp ('t336');
6777 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
6778     ## in the {change_encoding} callback.
6779 wakaba 1.63 $self->{change_encoding}
6780 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
6781 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
6782     ->set_user_data (manakai_has_reference =>
6783     $token->{attributes}->{content}
6784     ->{has_reference});
6785 wakaba 1.63 }
6786 wakaba 1.52 }
6787 wakaba 1.66 } else {
6788     if ($token->{attributes}->{charset}) {
6789 wakaba 1.79 !!!cp ('t337');
6790 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
6791     ->set_user_data (manakai_has_reference =>
6792     $token->{attributes}->{charset}
6793     ->{has_reference});
6794     }
6795 wakaba 1.68 if ($token->{attributes}->{content}) {
6796 wakaba 1.79 !!!cp ('t338');
6797 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
6798     ->set_user_data (manakai_has_reference =>
6799     $token->{attributes}->{content}
6800     ->{has_reference});
6801     }
6802 wakaba 1.52 }
6803 wakaba 1.1
6804 wakaba 1.125 !!!ack ('t338.1');
6805 wakaba 1.52 !!!next-token;
6806 wakaba 1.126 next B;
6807 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
6808 wakaba 1.79 !!!cp ('t341');
6809 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6810 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
6811 wakaba 1.126 next B;
6812 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
6813 wakaba 1.153 !!!parse-error (type => 'in body', text => 'body', token => $token);
6814 wakaba 1.46
6815 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
6816 wakaba 1.123 not ($self->{open_elements}->[1]->[1] & BODY_EL)) {
6817 wakaba 1.79 !!!cp ('t342');
6818 wakaba 1.52 ## Ignore the token
6819     } else {
6820     my $body_el = $self->{open_elements}->[1]->[0];
6821     for my $attr_name (keys %{$token->{attributes}}) {
6822     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
6823 wakaba 1.79 !!!cp ('t343');
6824 wakaba 1.52 $body_el->set_attribute_ns
6825     (undef, [undef, $attr_name],
6826     $token->{attributes}->{$attr_name}->{value});
6827     }
6828     }
6829     }
6830 wakaba 1.125 !!!nack ('t343.1');
6831 wakaba 1.52 !!!next-token;
6832 wakaba 1.126 next B;
6833 wakaba 1.52 } elsif ({
6834     address => 1, blockquote => 1, center => 1, dir => 1,
6835 wakaba 1.85 div => 1, dl => 1, fieldset => 1,
6836     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6837 wakaba 1.97 menu => 1, ol => 1, p => 1, ul => 1,
6838     pre => 1, listing => 1,
6839 wakaba 1.109 form => 1,
6840     table => 1,
6841     hr => 1,
6842 wakaba 1.52 }->{$token->{tag_name}}) {
6843 wakaba 1.109 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
6844     !!!cp ('t350');
6845 wakaba 1.113 !!!parse-error (type => 'in form:form', token => $token);
6846 wakaba 1.109 ## Ignore the token
6847 wakaba 1.125 !!!nack ('t350.1');
6848 wakaba 1.109 !!!next-token;
6849 wakaba 1.126 next B;
6850 wakaba 1.109 }
6851    
6852 wakaba 1.52 ## has a p element in scope
6853     INSCOPE: for (reverse @{$self->{open_elements}}) {
6854 wakaba 1.123 if ($_->[1] & P_EL) {
6855 wakaba 1.79 !!!cp ('t344');
6856 wakaba 1.125 !!!back-token; # <form>
6857 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6858     line => $token->{line}, column => $token->{column}};
6859 wakaba 1.126 next B;
6860 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6861 wakaba 1.79 !!!cp ('t345');
6862 wakaba 1.52 last INSCOPE;
6863     }
6864     } # INSCOPE
6865    
6866 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6867 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
6868 wakaba 1.125 !!!nack ('t346.1');
6869 wakaba 1.52 !!!next-token;
6870 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6871 wakaba 1.52 $token->{data} =~ s/^\x0A//;
6872     unless (length $token->{data}) {
6873 wakaba 1.79 !!!cp ('t346');
6874 wakaba 1.1 !!!next-token;
6875 wakaba 1.79 } else {
6876     !!!cp ('t349');
6877 wakaba 1.52 }
6878 wakaba 1.79 } else {
6879     !!!cp ('t348');
6880 wakaba 1.52 }
6881 wakaba 1.109 } elsif ($token->{tag_name} eq 'form') {
6882     !!!cp ('t347.1');
6883     $self->{form_element} = $self->{open_elements}->[-1]->[0];
6884    
6885 wakaba 1.125 !!!nack ('t347.2');
6886 wakaba 1.109 !!!next-token;
6887     } elsif ($token->{tag_name} eq 'table') {
6888     !!!cp ('t382');
6889     push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
6890    
6891     $self->{insertion_mode} = IN_TABLE_IM;
6892    
6893 wakaba 1.125 !!!nack ('t382.1');
6894 wakaba 1.109 !!!next-token;
6895     } elsif ($token->{tag_name} eq 'hr') {
6896     !!!cp ('t386');
6897     pop @{$self->{open_elements}};
6898    
6899 wakaba 1.125 !!!nack ('t386.1');
6900 wakaba 1.109 !!!next-token;
6901 wakaba 1.52 } else {
6902 wakaba 1.125 !!!nack ('t347.1');
6903 wakaba 1.52 !!!next-token;
6904     }
6905 wakaba 1.126 next B;
6906 wakaba 1.109 } elsif ({li => 1, dt => 1, dd => 1}->{$token->{tag_name}}) {
6907 wakaba 1.52 ## has a p element in scope
6908     INSCOPE: for (reverse @{$self->{open_elements}}) {
6909 wakaba 1.123 if ($_->[1] & P_EL) {
6910 wakaba 1.79 !!!cp ('t353');
6911 wakaba 1.125 !!!back-token; # <x>
6912 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6913     line => $token->{line}, column => $token->{column}};
6914 wakaba 1.126 next B;
6915 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6916 wakaba 1.79 !!!cp ('t354');
6917 wakaba 1.52 last INSCOPE;
6918     }
6919     } # INSCOPE
6920    
6921     ## Step 1
6922     my $i = -1;
6923     my $node = $self->{open_elements}->[$i];
6924 wakaba 1.109 my $li_or_dtdd = {li => {li => 1},
6925     dt => {dt => 1, dd => 1},
6926     dd => {dt => 1, dd => 1}}->{$token->{tag_name}};
6927 wakaba 1.52 LI: {
6928     ## Step 2
6929 wakaba 1.123 if ($li_or_dtdd->{$node->[0]->manakai_local_name}) {
6930 wakaba 1.52 if ($i != -1) {
6931 wakaba 1.79 !!!cp ('t355');
6932 wakaba 1.122 !!!parse-error (type => 'not closed',
6933 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
6934 wakaba 1.122 ->manakai_local_name,
6935     token => $token);
6936 wakaba 1.79 } else {
6937     !!!cp ('t356');
6938 wakaba 1.52 }
6939     splice @{$self->{open_elements}}, $i;
6940     last LI;
6941 wakaba 1.79 } else {
6942     !!!cp ('t357');
6943 wakaba 1.52 }
6944    
6945     ## Step 3
6946 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
6947 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
6948 wakaba 1.123 ($node->[1] & SPECIAL_EL or
6949     $node->[1] & SCOPING_EL) and
6950     not ($node->[1] & ADDRESS_EL) and
6951     not ($node->[1] & DIV_EL)) {
6952 wakaba 1.79 !!!cp ('t358');
6953 wakaba 1.52 last LI;
6954     }
6955    
6956 wakaba 1.79 !!!cp ('t359');
6957 wakaba 1.52 ## Step 4
6958     $i--;
6959     $node = $self->{open_elements}->[$i];
6960     redo LI;
6961     } # LI
6962    
6963 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6964 wakaba 1.125 !!!nack ('t359.1');
6965 wakaba 1.52 !!!next-token;
6966 wakaba 1.126 next B;
6967 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
6968     ## has a p element in scope
6969     INSCOPE: for (reverse @{$self->{open_elements}}) {
6970 wakaba 1.123 if ($_->[1] & P_EL) {
6971 wakaba 1.79 !!!cp ('t367');
6972 wakaba 1.125 !!!back-token; # <plaintext>
6973 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6974     line => $token->{line}, column => $token->{column}};
6975 wakaba 1.126 next B;
6976 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6977 wakaba 1.79 !!!cp ('t368');
6978 wakaba 1.52 last INSCOPE;
6979 wakaba 1.46 }
6980 wakaba 1.52 } # INSCOPE
6981    
6982 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6983 wakaba 1.52
6984     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
6985    
6986 wakaba 1.125 !!!nack ('t368.1');
6987 wakaba 1.52 !!!next-token;
6988 wakaba 1.126 next B;
6989 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
6990     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
6991     my $node = $active_formatting_elements->[$i];
6992 wakaba 1.123 if ($node->[1] & A_EL) {
6993 wakaba 1.79 !!!cp ('t371');
6994 wakaba 1.113 !!!parse-error (type => 'in a:a', token => $token);
6995 wakaba 1.52
6996 wakaba 1.125 !!!back-token; # <a>
6997 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'a',
6998     line => $token->{line}, column => $token->{column}};
6999 wakaba 1.113 $formatting_end_tag->($token);
7000 wakaba 1.52
7001     AFE2: for (reverse 0..$#$active_formatting_elements) {
7002     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
7003 wakaba 1.79 !!!cp ('t372');
7004 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
7005     last AFE2;
7006 wakaba 1.1 }
7007 wakaba 1.52 } # AFE2
7008     OE: for (reverse 0..$#{$self->{open_elements}}) {
7009     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
7010 wakaba 1.79 !!!cp ('t373');
7011 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
7012     last OE;
7013 wakaba 1.1 }
7014 wakaba 1.52 } # OE
7015     last AFE;
7016     } elsif ($node->[0] eq '#marker') {
7017 wakaba 1.79 !!!cp ('t374');
7018 wakaba 1.52 last AFE;
7019     }
7020     } # AFE
7021    
7022     $reconstruct_active_formatting_elements->($insert_to_current);
7023 wakaba 1.1
7024 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
7025 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
7026 wakaba 1.1
7027 wakaba 1.125 !!!nack ('t374.1');
7028 wakaba 1.52 !!!next-token;
7029 wakaba 1.126 next B;
7030 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
7031     $reconstruct_active_formatting_elements->($insert_to_current);
7032 wakaba 1.1
7033 wakaba 1.52 ## has a |nobr| element in scope
7034     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7035     my $node = $self->{open_elements}->[$_];
7036 wakaba 1.123 if ($node->[1] & NOBR_EL) {
7037 wakaba 1.79 !!!cp ('t376');
7038 wakaba 1.113 !!!parse-error (type => 'in nobr:nobr', token => $token);
7039 wakaba 1.125 !!!back-token; # <nobr>
7040 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
7041     line => $token->{line}, column => $token->{column}};
7042 wakaba 1.126 next B;
7043 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7044 wakaba 1.79 !!!cp ('t377');
7045 wakaba 1.52 last INSCOPE;
7046     }
7047     } # INSCOPE
7048    
7049 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
7050 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
7051    
7052 wakaba 1.125 !!!nack ('t377.1');
7053 wakaba 1.52 !!!next-token;
7054 wakaba 1.126 next B;
7055 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
7056     ## has a button element in scope
7057     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7058     my $node = $self->{open_elements}->[$_];
7059 wakaba 1.123 if ($node->[1] & BUTTON_EL) {
7060 wakaba 1.79 !!!cp ('t378');
7061 wakaba 1.113 !!!parse-error (type => 'in button:button', token => $token);
7062 wakaba 1.125 !!!back-token; # <button>
7063 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'button',
7064     line => $token->{line}, column => $token->{column}};
7065 wakaba 1.126 next B;
7066 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7067 wakaba 1.79 !!!cp ('t379');
7068 wakaba 1.52 last INSCOPE;
7069     }
7070     } # INSCOPE
7071    
7072     $reconstruct_active_formatting_elements->($insert_to_current);
7073    
7074 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
7075 wakaba 1.85
7076     ## TODO: associate with $self->{form_element} if defined
7077    
7078 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
7079 wakaba 1.1
7080 wakaba 1.125 !!!nack ('t379.1');
7081 wakaba 1.52 !!!next-token;
7082 wakaba 1.126 next B;
7083 wakaba 1.103 } elsif ({
7084 wakaba 1.109 xmp => 1,
7085     iframe => 1,
7086     noembed => 1,
7087 wakaba 1.148 noframes => 1, ## NOTE: This is an "as if in head" code clone.
7088 wakaba 1.109 noscript => 0, ## TODO: 1 if scripting is enabled
7089 wakaba 1.103 }->{$token->{tag_name}}) {
7090 wakaba 1.109 if ($token->{tag_name} eq 'xmp') {
7091     !!!cp ('t381');
7092     $reconstruct_active_formatting_elements->($insert_to_current);
7093     } else {
7094     !!!cp ('t399');
7095     }
7096     ## NOTE: There is an "as if in body" code clone.
7097 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
7098 wakaba 1.126 next B;
7099 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
7100 wakaba 1.113 !!!parse-error (type => 'isindex', token => $token);
7101 wakaba 1.52
7102     if (defined $self->{form_element}) {
7103 wakaba 1.79 !!!cp ('t389');
7104 wakaba 1.52 ## Ignore the token
7105 wakaba 1.125 !!!nack ('t389'); ## NOTE: Not acknowledged.
7106 wakaba 1.52 !!!next-token;
7107 wakaba 1.126 next B;
7108 wakaba 1.52 } else {
7109 wakaba 1.147 !!!ack ('t391.1');
7110    
7111 wakaba 1.52 my $at = $token->{attributes};
7112     my $form_attrs;
7113     $form_attrs->{action} = $at->{action} if $at->{action};
7114     my $prompt_attr = $at->{prompt};
7115     $at->{name} = {name => 'name', value => 'isindex'};
7116     delete $at->{action};
7117     delete $at->{prompt};
7118     my @tokens = (
7119 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
7120 wakaba 1.114 attributes => $form_attrs,
7121     line => $token->{line}, column => $token->{column}},
7122     {type => START_TAG_TOKEN, tag_name => 'hr',
7123     line => $token->{line}, column => $token->{column}},
7124     {type => START_TAG_TOKEN, tag_name => 'p',
7125     line => $token->{line}, column => $token->{column}},
7126     {type => START_TAG_TOKEN, tag_name => 'label',
7127     line => $token->{line}, column => $token->{column}},
7128 wakaba 1.52 );
7129     if ($prompt_attr) {
7130 wakaba 1.79 !!!cp ('t390');
7131 wakaba 1.114 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
7132 wakaba 1.118 #line => $token->{line}, column => $token->{column},
7133     };
7134 wakaba 1.1 } else {
7135 wakaba 1.79 !!!cp ('t391');
7136 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
7137 wakaba 1.114 data => 'This is a searchable index. Insert your search keywords here: ',
7138 wakaba 1.118 #line => $token->{line}, column => $token->{column},
7139     }; # SHOULD
7140 wakaba 1.52 ## TODO: make this configurable
7141 wakaba 1.1 }
7142 wakaba 1.52 push @tokens,
7143 wakaba 1.114 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
7144     line => $token->{line}, column => $token->{column}},
7145 wakaba 1.55 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
7146 wakaba 1.114 {type => END_TAG_TOKEN, tag_name => 'label',
7147     line => $token->{line}, column => $token->{column}},
7148     {type => END_TAG_TOKEN, tag_name => 'p',
7149     line => $token->{line}, column => $token->{column}},
7150     {type => START_TAG_TOKEN, tag_name => 'hr',
7151     line => $token->{line}, column => $token->{column}},
7152     {type => END_TAG_TOKEN, tag_name => 'form',
7153     line => $token->{line}, column => $token->{column}};
7154 wakaba 1.52 !!!back-token (@tokens);
7155 wakaba 1.125 !!!next-token;
7156 wakaba 1.126 next B;
7157 wakaba 1.52 }
7158     } elsif ($token->{tag_name} eq 'textarea') {
7159     my $tag_name = $token->{tag_name};
7160     my $el;
7161 wakaba 1.126 !!!create-element ($el, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
7162 wakaba 1.52
7163     ## TODO: $self->{form_element} if defined
7164     $self->{content_model} = RCDATA_CONTENT_MODEL;
7165     delete $self->{escape}; # MUST
7166    
7167     $insert->($el);
7168    
7169     my $text = '';
7170 wakaba 1.125 !!!nack ('t392.1');
7171 wakaba 1.52 !!!next-token;
7172 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
7173 wakaba 1.52 $token->{data} =~ s/^\x0A//;
7174 wakaba 1.51 unless (length $token->{data}) {
7175 wakaba 1.79 !!!cp ('t392');
7176 wakaba 1.51 !!!next-token;
7177 wakaba 1.79 } else {
7178     !!!cp ('t393');
7179 wakaba 1.51 }
7180 wakaba 1.79 } else {
7181     !!!cp ('t394');
7182 wakaba 1.51 }
7183 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
7184 wakaba 1.79 !!!cp ('t395');
7185 wakaba 1.52 $text .= $token->{data};
7186     !!!next-token;
7187     }
7188     if (length $text) {
7189 wakaba 1.79 !!!cp ('t396');
7190 wakaba 1.52 $el->manakai_append_text ($text);
7191     }
7192    
7193     $self->{content_model} = PCDATA_CONTENT_MODEL;
7194 wakaba 1.51
7195 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
7196 wakaba 1.52 $token->{tag_name} eq $tag_name) {
7197 wakaba 1.79 !!!cp ('t397');
7198 wakaba 1.52 ## Ignore the token
7199     } else {
7200 wakaba 1.79 !!!cp ('t398');
7201 wakaba 1.153 !!!parse-error (type => 'in RCDATA:#eof', token => $token);
7202 wakaba 1.51 }
7203 wakaba 1.52 !!!next-token;
7204 wakaba 1.126 next B;
7205 wakaba 1.151 } elsif ($token->{tag_name} eq 'rt' or
7206     $token->{tag_name} eq 'rp') {
7207     ## has a |ruby| element in scope
7208     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7209     my $node = $self->{open_elements}->[$_];
7210     if ($node->[1] & RUBY_EL) {
7211     !!!cp ('t398.1');
7212     ## generate implied end tags
7213     while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7214     !!!cp ('t398.2');
7215     pop @{$self->{open_elements}};
7216     }
7217     unless ($self->{open_elements}->[-1]->[1] & RUBY_EL) {
7218     !!!cp ('t398.3');
7219     !!!parse-error (type => 'not closed',
7220 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
7221 wakaba 1.151 ->manakai_local_name,
7222     token => $token);
7223     pop @{$self->{open_elements}}
7224     while not $self->{open_elements}->[-1]->[1] & RUBY_EL;
7225     }
7226     last INSCOPE;
7227     } elsif ($node->[1] & SCOPING_EL) {
7228     !!!cp ('t398.4');
7229     last INSCOPE;
7230     }
7231     } # INSCOPE
7232    
7233     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
7234    
7235     !!!nack ('t398.5');
7236     !!!next-token;
7237     redo B;
7238 wakaba 1.126 } elsif ($token->{tag_name} eq 'math' or
7239     $token->{tag_name} eq 'svg') {
7240     $reconstruct_active_formatting_elements->($insert_to_current);
7241 wakaba 1.131
7242 wakaba 1.155 ## "Adjust MathML attributes" ('math' only) - done in insert-element-f
7243    
7244 wakaba 1.131 ## "adjust SVG attributes" ('svg' only) - done in insert-element-f
7245    
7246     ## "adjust foreign attributes" - done in insert-element-f
7247 wakaba 1.126
7248 wakaba 1.131 !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
7249 wakaba 1.126
7250     if ($self->{self_closing}) {
7251     pop @{$self->{open_elements}};
7252     !!!ack ('t398.1');
7253     } else {
7254     !!!cp ('t398.2');
7255     $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
7256     ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
7257     ## mode, "in body" (not "in foreign content") secondary insertion
7258     ## mode, maybe.
7259     }
7260    
7261     !!!next-token;
7262     next B;
7263 wakaba 1.52 } elsif ({
7264     caption => 1, col => 1, colgroup => 1, frame => 1,
7265     frameset => 1, head => 1, option => 1, optgroup => 1,
7266     tbody => 1, td => 1, tfoot => 1, th => 1,
7267     thead => 1, tr => 1,
7268     }->{$token->{tag_name}}) {
7269 wakaba 1.79 !!!cp ('t401');
7270 wakaba 1.153 !!!parse-error (type => 'in body',
7271     text => $token->{tag_name}, token => $token);
7272 wakaba 1.52 ## Ignore the token
7273 wakaba 1.125 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
7274 wakaba 1.52 !!!next-token;
7275 wakaba 1.126 next B;
7276 wakaba 1.52
7277     ## ISSUE: An issue on HTML5 new elements in the spec.
7278     } else {
7279 wakaba 1.110 if ($token->{tag_name} eq 'image') {
7280     !!!cp ('t384');
7281 wakaba 1.113 !!!parse-error (type => 'image', token => $token);
7282 wakaba 1.110 $token->{tag_name} = 'img';
7283     } else {
7284     !!!cp ('t385');
7285     }
7286    
7287     ## NOTE: There is an "as if <br>" code clone.
7288 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
7289    
7290 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
7291 wakaba 1.109
7292 wakaba 1.110 if ({
7293     applet => 1, marquee => 1, object => 1,
7294     }->{$token->{tag_name}}) {
7295     !!!cp ('t380');
7296     push @$active_formatting_elements, ['#marker', ''];
7297 wakaba 1.125 !!!nack ('t380.1');
7298 wakaba 1.110 } elsif ({
7299     b => 1, big => 1, em => 1, font => 1, i => 1,
7300     s => 1, small => 1, strile => 1,
7301     strong => 1, tt => 1, u => 1,
7302     }->{$token->{tag_name}}) {
7303     !!!cp ('t375');
7304     push @$active_formatting_elements, $self->{open_elements}->[-1];
7305 wakaba 1.125 !!!nack ('t375.1');
7306 wakaba 1.110 } elsif ($token->{tag_name} eq 'input') {
7307     !!!cp ('t388');
7308     ## TODO: associate with $self->{form_element} if defined
7309     pop @{$self->{open_elements}};
7310 wakaba 1.125 !!!ack ('t388.2');
7311 wakaba 1.110 } elsif ({
7312     area => 1, basefont => 1, bgsound => 1, br => 1,
7313     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
7314     #image => 1,
7315     }->{$token->{tag_name}}) {
7316     !!!cp ('t388.1');
7317     pop @{$self->{open_elements}};
7318 wakaba 1.125 !!!ack ('t388.3');
7319 wakaba 1.110 } elsif ($token->{tag_name} eq 'select') {
7320 wakaba 1.109 ## TODO: associate with $self->{form_element} if defined
7321    
7322     if ($self->{insertion_mode} & TABLE_IMS or
7323     $self->{insertion_mode} & BODY_TABLE_IMS or
7324     $self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
7325     !!!cp ('t400.1');
7326     $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
7327     } else {
7328     !!!cp ('t400.2');
7329     $self->{insertion_mode} = IN_SELECT_IM;
7330     }
7331 wakaba 1.125 !!!nack ('t400.3');
7332 wakaba 1.110 } else {
7333 wakaba 1.125 !!!nack ('t402');
7334 wakaba 1.109 }
7335 wakaba 1.51
7336 wakaba 1.52 !!!next-token;
7337 wakaba 1.126 next B;
7338 wakaba 1.52 }
7339 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
7340 wakaba 1.52 if ($token->{tag_name} eq 'body') {
7341 wakaba 1.107 ## has a |body| element in scope
7342     my $i;
7343 wakaba 1.111 INSCOPE: {
7344     for (reverse @{$self->{open_elements}}) {
7345 wakaba 1.123 if ($_->[1] & BODY_EL) {
7346 wakaba 1.111 !!!cp ('t405');
7347     $i = $_;
7348     last INSCOPE;
7349 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
7350 wakaba 1.111 !!!cp ('t405.1');
7351     last;
7352     }
7353 wakaba 1.52 }
7354 wakaba 1.111
7355     !!!parse-error (type => 'start tag not allowed',
7356 wakaba 1.153 text => $token->{tag_name}, token => $token);
7357 wakaba 1.107 ## NOTE: Ignore the token.
7358 wakaba 1.52 !!!next-token;
7359 wakaba 1.126 next B;
7360 wakaba 1.111 } # INSCOPE
7361 wakaba 1.107
7362     for (@{$self->{open_elements}}) {
7363 wakaba 1.123 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL) {
7364 wakaba 1.107 !!!cp ('t403');
7365 wakaba 1.122 !!!parse-error (type => 'not closed',
7366 wakaba 1.153 text => $_->[0]->manakai_local_name,
7367 wakaba 1.122 token => $token);
7368 wakaba 1.107 last;
7369     } else {
7370     !!!cp ('t404');
7371     }
7372     }
7373    
7374     $self->{insertion_mode} = AFTER_BODY_IM;
7375     !!!next-token;
7376 wakaba 1.126 next B;
7377 wakaba 1.52 } elsif ($token->{tag_name} eq 'html') {
7378 wakaba 1.122 ## TODO: Update this code. It seems that the code below is not
7379     ## up-to-date, though it has same effect as speced.
7380 wakaba 1.123 if (@{$self->{open_elements}} > 1 and
7381     $self->{open_elements}->[1]->[1] & BODY_EL) {
7382 wakaba 1.52 ## ISSUE: There is an issue in the spec.
7383 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & BODY_EL) {
7384 wakaba 1.79 !!!cp ('t406');
7385 wakaba 1.122 !!!parse-error (type => 'not closed',
7386 wakaba 1.153 text => $self->{open_elements}->[1]->[0]
7387 wakaba 1.122 ->manakai_local_name,
7388     token => $token);
7389 wakaba 1.79 } else {
7390     !!!cp ('t407');
7391 wakaba 1.1 }
7392 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
7393 wakaba 1.52 ## reprocess
7394 wakaba 1.126 next B;
7395 wakaba 1.51 } else {
7396 wakaba 1.79 !!!cp ('t408');
7397 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7398     text => $token->{tag_name}, token => $token);
7399 wakaba 1.52 ## Ignore the token
7400     !!!next-token;
7401 wakaba 1.126 next B;
7402 wakaba 1.51 }
7403 wakaba 1.52 } elsif ({
7404     address => 1, blockquote => 1, center => 1, dir => 1,
7405     div => 1, dl => 1, fieldset => 1, listing => 1,
7406     menu => 1, ol => 1, pre => 1, ul => 1,
7407     dd => 1, dt => 1, li => 1,
7408 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
7409 wakaba 1.52 }->{$token->{tag_name}}) {
7410     ## has an element in scope
7411     my $i;
7412     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7413     my $node = $self->{open_elements}->[$_];
7414 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
7415 wakaba 1.79 !!!cp ('t410');
7416 wakaba 1.52 $i = $_;
7417 wakaba 1.87 last INSCOPE;
7418 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7419 wakaba 1.79 !!!cp ('t411');
7420 wakaba 1.52 last INSCOPE;
7421 wakaba 1.51 }
7422 wakaba 1.52 } # INSCOPE
7423 wakaba 1.89
7424     unless (defined $i) { # has an element in scope
7425     !!!cp ('t413');
7426 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7427     text => $token->{tag_name}, token => $token);
7428 wakaba 1.157 ## NOTE: Ignore the token.
7429 wakaba 1.89 } else {
7430     ## Step 1. generate implied end tags
7431     while ({
7432 wakaba 1.151 ## END_TAG_OPTIONAL_EL
7433 wakaba 1.89 dd => ($token->{tag_name} ne 'dd'),
7434     dt => ($token->{tag_name} ne 'dt'),
7435     li => ($token->{tag_name} ne 'li'),
7436     p => 1,
7437 wakaba 1.151 rt => 1,
7438     rp => 1,
7439 wakaba 1.123 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
7440 wakaba 1.89 !!!cp ('t409');
7441     pop @{$self->{open_elements}};
7442     }
7443    
7444     ## Step 2.
7445 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7446     ne $token->{tag_name}) {
7447 wakaba 1.79 !!!cp ('t412');
7448 wakaba 1.122 !!!parse-error (type => 'not closed',
7449 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
7450 wakaba 1.122 ->manakai_local_name,
7451     token => $token);
7452 wakaba 1.51 } else {
7453 wakaba 1.89 !!!cp ('t414');
7454 wakaba 1.51 }
7455 wakaba 1.89
7456     ## Step 3.
7457 wakaba 1.52 splice @{$self->{open_elements}}, $i;
7458 wakaba 1.89
7459     ## Step 4.
7460     $clear_up_to_marker->()
7461     if {
7462 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
7463 wakaba 1.89 }->{$token->{tag_name}};
7464 wakaba 1.51 }
7465 wakaba 1.52 !!!next-token;
7466 wakaba 1.126 next B;
7467 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
7468 wakaba 1.92 undef $self->{form_element};
7469    
7470 wakaba 1.52 ## has an element in scope
7471 wakaba 1.92 my $i;
7472 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7473     my $node = $self->{open_elements}->[$_];
7474 wakaba 1.123 if ($node->[1] & FORM_EL) {
7475 wakaba 1.79 !!!cp ('t418');
7476 wakaba 1.92 $i = $_;
7477 wakaba 1.52 last INSCOPE;
7478 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7479 wakaba 1.79 !!!cp ('t419');
7480 wakaba 1.52 last INSCOPE;
7481     }
7482     } # INSCOPE
7483 wakaba 1.92
7484     unless (defined $i) { # has an element in scope
7485 wakaba 1.79 !!!cp ('t421');
7486 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7487     text => $token->{tag_name}, token => $token);
7488 wakaba 1.157 ## NOTE: Ignore the token.
7489 wakaba 1.92 } else {
7490     ## Step 1. generate implied end tags
7491 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7492 wakaba 1.92 !!!cp ('t417');
7493     pop @{$self->{open_elements}};
7494     }
7495    
7496     ## Step 2.
7497 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7498     ne $token->{tag_name}) {
7499 wakaba 1.92 !!!cp ('t417.1');
7500 wakaba 1.122 !!!parse-error (type => 'not closed',
7501 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
7502 wakaba 1.122 ->manakai_local_name,
7503     token => $token);
7504 wakaba 1.92 } else {
7505     !!!cp ('t420');
7506     }
7507    
7508     ## Step 3.
7509     splice @{$self->{open_elements}}, $i;
7510 wakaba 1.52 }
7511    
7512     !!!next-token;
7513 wakaba 1.126 next B;
7514 wakaba 1.52 } elsif ({
7515     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
7516     }->{$token->{tag_name}}) {
7517     ## has an element in scope
7518     my $i;
7519     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7520     my $node = $self->{open_elements}->[$_];
7521 wakaba 1.123 if ($node->[1] & HEADING_EL) {
7522 wakaba 1.79 !!!cp ('t423');
7523 wakaba 1.52 $i = $_;
7524     last INSCOPE;
7525 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7526 wakaba 1.79 !!!cp ('t424');
7527 wakaba 1.52 last INSCOPE;
7528 wakaba 1.51 }
7529 wakaba 1.52 } # INSCOPE
7530 wakaba 1.93
7531     unless (defined $i) { # has an element in scope
7532     !!!cp ('t425.1');
7533 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7534     text => $token->{tag_name}, token => $token);
7535 wakaba 1.157 ## NOTE: Ignore the token.
7536 wakaba 1.79 } else {
7537 wakaba 1.93 ## Step 1. generate implied end tags
7538 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7539 wakaba 1.93 !!!cp ('t422');
7540     pop @{$self->{open_elements}};
7541     }
7542    
7543     ## Step 2.
7544 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7545     ne $token->{tag_name}) {
7546 wakaba 1.93 !!!cp ('t425');
7547 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7548     text => $token->{tag_name}, token => $token);
7549 wakaba 1.93 } else {
7550     !!!cp ('t426');
7551     }
7552    
7553     ## Step 3.
7554     splice @{$self->{open_elements}}, $i;
7555 wakaba 1.36 }
7556 wakaba 1.52
7557     !!!next-token;
7558 wakaba 1.126 next B;
7559 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
7560     ## has an element in scope
7561     my $i;
7562     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7563     my $node = $self->{open_elements}->[$_];
7564 wakaba 1.123 if ($node->[1] & P_EL) {
7565 wakaba 1.87 !!!cp ('t410.1');
7566     $i = $_;
7567 wakaba 1.88 last INSCOPE;
7568 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7569 wakaba 1.87 !!!cp ('t411.1');
7570     last INSCOPE;
7571     }
7572     } # INSCOPE
7573 wakaba 1.91
7574     if (defined $i) {
7575 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7576     ne $token->{tag_name}) {
7577 wakaba 1.87 !!!cp ('t412.1');
7578 wakaba 1.122 !!!parse-error (type => 'not closed',
7579 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
7580 wakaba 1.122 ->manakai_local_name,
7581     token => $token);
7582 wakaba 1.87 } else {
7583 wakaba 1.91 !!!cp ('t414.1');
7584 wakaba 1.87 }
7585 wakaba 1.91
7586 wakaba 1.87 splice @{$self->{open_elements}}, $i;
7587     } else {
7588 wakaba 1.91 !!!cp ('t413.1');
7589 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7590     text => $token->{tag_name}, token => $token);
7591 wakaba 1.91
7592 wakaba 1.87 !!!cp ('t415.1');
7593     ## As if <p>, then reprocess the current token
7594     my $el;
7595 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'p',, $token);
7596 wakaba 1.87 $insert->($el);
7597 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
7598 wakaba 1.87 }
7599 wakaba 1.91
7600 wakaba 1.87 !!!next-token;
7601 wakaba 1.126 next B;
7602 wakaba 1.52 } elsif ({
7603     a => 1,
7604     b => 1, big => 1, em => 1, font => 1, i => 1,
7605     nobr => 1, s => 1, small => 1, strile => 1,
7606     strong => 1, tt => 1, u => 1,
7607     }->{$token->{tag_name}}) {
7608 wakaba 1.79 !!!cp ('t427');
7609 wakaba 1.113 $formatting_end_tag->($token);
7610 wakaba 1.126 next B;
7611 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
7612 wakaba 1.79 !!!cp ('t428');
7613 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7614     text => 'br', token => $token);
7615 wakaba 1.52
7616     ## As if <br>
7617     $reconstruct_active_formatting_elements->($insert_to_current);
7618    
7619     my $el;
7620 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'br',, $token);
7621 wakaba 1.52 $insert->($el);
7622    
7623     ## Ignore the token.
7624     !!!next-token;
7625 wakaba 1.126 next B;
7626 wakaba 1.52 } elsif ({
7627     caption => 1, col => 1, colgroup => 1, frame => 1,
7628     frameset => 1, head => 1, option => 1, optgroup => 1,
7629     tbody => 1, td => 1, tfoot => 1, th => 1,
7630     thead => 1, tr => 1,
7631     area => 1, basefont => 1, bgsound => 1,
7632     embed => 1, hr => 1, iframe => 1, image => 1,
7633     img => 1, input => 1, isindex => 1, noembed => 1,
7634     noframes => 1, param => 1, select => 1, spacer => 1,
7635     table => 1, textarea => 1, wbr => 1,
7636     noscript => 0, ## TODO: if scripting is enabled
7637     }->{$token->{tag_name}}) {
7638 wakaba 1.79 !!!cp ('t429');
7639 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7640     text => $token->{tag_name}, token => $token);
7641 wakaba 1.52 ## Ignore the token
7642     !!!next-token;
7643 wakaba 1.126 next B;
7644 wakaba 1.52
7645     ## ISSUE: Issue on HTML5 new elements in spec
7646    
7647     } else {
7648     ## Step 1
7649     my $node_i = -1;
7650     my $node = $self->{open_elements}->[$node_i];
7651 wakaba 1.51
7652 wakaba 1.52 ## Step 2
7653     S2: {
7654 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
7655 wakaba 1.52 ## Step 1
7656     ## generate implied end tags
7657 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7658 wakaba 1.79 !!!cp ('t430');
7659 wakaba 1.151 ## NOTE: |<ruby><rt></ruby>|.
7660     ## ISSUE: <ruby><rt></rt> will also take this code path,
7661     ## which seems wrong.
7662 wakaba 1.86 pop @{$self->{open_elements}};
7663 wakaba 1.151 $node_i++;
7664 wakaba 1.52 }
7665    
7666     ## Step 2
7667 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7668     ne $token->{tag_name}) {
7669 wakaba 1.79 !!!cp ('t431');
7670 wakaba 1.58 ## NOTE: <x><y></x>
7671 wakaba 1.122 !!!parse-error (type => 'not closed',
7672 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
7673 wakaba 1.122 ->manakai_local_name,
7674     token => $token);
7675 wakaba 1.79 } else {
7676     !!!cp ('t432');
7677 wakaba 1.52 }
7678    
7679     ## Step 3
7680 wakaba 1.151 splice @{$self->{open_elements}}, $node_i if $node_i < 0;
7681 wakaba 1.51
7682 wakaba 1.1 !!!next-token;
7683 wakaba 1.52 last S2;
7684 wakaba 1.1 } else {
7685 wakaba 1.52 ## Step 3
7686 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
7687 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
7688 wakaba 1.123 ($node->[1] & SPECIAL_EL or
7689     $node->[1] & SCOPING_EL)) {
7690 wakaba 1.79 !!!cp ('t433');
7691 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7692     text => $token->{tag_name}, token => $token);
7693 wakaba 1.52 ## Ignore the token
7694     !!!next-token;
7695     last S2;
7696     }
7697 wakaba 1.79
7698     !!!cp ('t434');
7699 wakaba 1.1 }
7700 wakaba 1.52
7701     ## Step 4
7702     $node_i--;
7703     $node = $self->{open_elements}->[$node_i];
7704    
7705     ## Step 5;
7706     redo S2;
7707     } # S2
7708 wakaba 1.126 next B;
7709 wakaba 1.1 }
7710     }
7711 wakaba 1.126 next B;
7712     } continue { # B
7713     if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
7714     ## NOTE: The code below is executed in cases where it does not have
7715     ## to be, but it it is harmless even in those cases.
7716     ## has an element in scope
7717     INSCOPE: {
7718     for (reverse 0..$#{$self->{open_elements}}) {
7719     my $node = $self->{open_elements}->[$_];
7720     if ($node->[1] & FOREIGN_EL) {
7721     last INSCOPE;
7722     } elsif ($node->[1] & SCOPING_EL) {
7723     last;
7724     }
7725     }
7726    
7727     ## NOTE: No foreign element in scope.
7728     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
7729     } # INSCOPE
7730     }
7731 wakaba 1.1 } # B
7732    
7733     ## Stop parsing # MUST
7734    
7735     ## TODO: script stuffs
7736 wakaba 1.3 } # _tree_construct_main
7737    
7738 wakaba 1.162 sub set_inner_html ($$$;$) {
7739 wakaba 1.3 my $class = shift;
7740     my $node = shift;
7741     my $s = \$_[0];
7742     my $onerror = $_[1];
7743 wakaba 1.162 my $get_wrapper = $_[2] || sub ($) { return $_[0] };
7744 wakaba 1.3
7745 wakaba 1.63 ## ISSUE: Should {confident} be true?
7746    
7747 wakaba 1.3 my $nt = $node->node_type;
7748     if ($nt == 9) {
7749     # MUST
7750    
7751     ## Step 1 # MUST
7752     ## TODO: If the document has an active parser, ...
7753     ## ISSUE: There is an issue in the spec.
7754    
7755     ## Step 2 # MUST
7756     my @cn = @{$node->child_nodes};
7757     for (@cn) {
7758     $node->remove_child ($_);
7759     }
7760    
7761     ## Step 3, 4, 5 # MUST
7762 wakaba 1.162 $class->parse_char_string ($$s => $node, $onerror, $get_wrapper);
7763 wakaba 1.3 } elsif ($nt == 1) {
7764     ## TODO: If non-html element
7765    
7766     ## NOTE: Most of this code is copied from |parse_string|
7767    
7768 wakaba 1.162 ## TODO: Support for $get_wrapper
7769    
7770 wakaba 1.3 ## Step 1 # MUST
7771 wakaba 1.14 my $this_doc = $node->owner_document;
7772     my $doc = $this_doc->implementation->create_document;
7773 wakaba 1.18 $doc->manakai_is_html (1);
7774 wakaba 1.3 my $p = $class->new;
7775     $p->{document} = $doc;
7776    
7777 wakaba 1.84 ## Step 8 # MUST
7778 wakaba 1.3 my $i = 0;
7779 wakaba 1.121 $p->{line_prev} = $p->{line} = 1;
7780     $p->{column_prev} = $p->{column} = 0;
7781 wakaba 1.76 $p->{set_next_char} = sub {
7782 wakaba 1.3 my $self = shift;
7783 wakaba 1.14
7784 wakaba 1.76 pop @{$self->{prev_char}};
7785     unshift @{$self->{prev_char}}, $self->{next_char};
7786 wakaba 1.14
7787 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
7788     $self->{next_char} = ord substr $$s, $i++, 1;
7789 wakaba 1.121
7790     ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
7791     $p->{column}++;
7792 wakaba 1.4
7793 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
7794 wakaba 1.121 $p->{line}++;
7795     $p->{column} = 0;
7796 wakaba 1.79 !!!cp ('i1');
7797 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
7798 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
7799 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
7800 wakaba 1.121 $p->{line}++;
7801     $p->{column} = 0;
7802 wakaba 1.79 !!!cp ('i2');
7803 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
7804     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
7805 wakaba 1.79 !!!cp ('i3');
7806 wakaba 1.76 } elsif ($self->{next_char} == 0x0000) { # NULL
7807 wakaba 1.79 !!!cp ('i4');
7808 wakaba 1.14 !!!parse-error (type => 'NULL');
7809 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
7810 wakaba 1.132 } elsif ($self->{next_char} <= 0x0008 or
7811     (0x000E <= $self->{next_char} and
7812     $self->{next_char} <= 0x001F) or
7813     (0x007F <= $self->{next_char} and
7814     $self->{next_char} <= 0x009F) or
7815     (0xD800 <= $self->{next_char} and
7816     $self->{next_char} <= 0xDFFF) or
7817     (0xFDD0 <= $self->{next_char} and
7818     $self->{next_char} <= 0xFDDF) or
7819     {
7820     0xFFFE => 1, 0xFFFF => 1, 0x1FFFE => 1, 0x1FFFF => 1,
7821     0x2FFFE => 1, 0x2FFFF => 1, 0x3FFFE => 1, 0x3FFFF => 1,
7822     0x4FFFE => 1, 0x4FFFF => 1, 0x5FFFE => 1, 0x5FFFF => 1,
7823     0x6FFFE => 1, 0x6FFFF => 1, 0x7FFFE => 1, 0x7FFFF => 1,
7824     0x8FFFE => 1, 0x8FFFF => 1, 0x9FFFE => 1, 0x9FFFF => 1,
7825     0xAFFFE => 1, 0xAFFFF => 1, 0xBFFFE => 1, 0xBFFFF => 1,
7826     0xCFFFE => 1, 0xCFFFF => 1, 0xDFFFE => 1, 0xDFFFF => 1,
7827     0xEFFFE => 1, 0xEFFFF => 1, 0xFFFFE => 1, 0xFFFFF => 1,
7828     0x10FFFE => 1, 0x10FFFF => 1,
7829     }->{$self->{next_char}}) {
7830     !!!cp ('i4.1');
7831 wakaba 1.153 if ($self->{next_char} < 0x10000) {
7832     !!!parse-error (type => 'control char',
7833     text => (sprintf 'U+%04X', $self->{next_char}));
7834     } else {
7835     !!!parse-error (type => 'control char',
7836     text => (sprintf 'U-%08X', $self->{next_char}));
7837     }
7838 wakaba 1.3 }
7839     };
7840 wakaba 1.76 $p->{prev_char} = [-1, -1, -1];
7841     $p->{next_char} = -1;
7842 wakaba 1.171
7843 wakaba 1.172 $p->{read_until} = sub {
7844 wakaba 1.171 ## TODO: ...
7845 wakaba 1.172 return 0;
7846     }; # $p->{read_until};
7847 wakaba 1.171
7848 wakaba 1.3 my $ponerror = $onerror || sub {
7849     my (%opt) = @_;
7850 wakaba 1.121 my $line = $opt{line};
7851     my $column = $opt{column};
7852     if (defined $opt{token} and defined $opt{token}->{line}) {
7853     $line = $opt{token}->{line};
7854     $column = $opt{token}->{column};
7855     }
7856     warn "Parse error ($opt{type}) at line $line column $column\n";
7857 wakaba 1.3 };
7858     $p->{parse_error} = sub {
7859 wakaba 1.121 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
7860 wakaba 1.3 };
7861    
7862     $p->_initialize_tokenizer;
7863     $p->_initialize_tree_constructor;
7864    
7865     ## Step 2
7866 wakaba 1.71 my $node_ln = $node->manakai_local_name;
7867 wakaba 1.40 $p->{content_model} = {
7868     title => RCDATA_CONTENT_MODEL,
7869     textarea => RCDATA_CONTENT_MODEL,
7870     style => CDATA_CONTENT_MODEL,
7871     script => CDATA_CONTENT_MODEL,
7872     xmp => CDATA_CONTENT_MODEL,
7873     iframe => CDATA_CONTENT_MODEL,
7874     noembed => CDATA_CONTENT_MODEL,
7875     noframes => CDATA_CONTENT_MODEL,
7876     noscript => CDATA_CONTENT_MODEL,
7877     plaintext => PLAINTEXT_CONTENT_MODEL,
7878     }->{$node_ln};
7879     $p->{content_model} = PCDATA_CONTENT_MODEL
7880     unless defined $p->{content_model};
7881     ## ISSUE: What is "the name of the element"? local name?
7882 wakaba 1.3
7883 wakaba 1.123 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
7884     ## TODO: Foreign element OK?
7885 wakaba 1.3
7886 wakaba 1.84 ## Step 3
7887 wakaba 1.3 my $root = $doc->create_element_ns
7888     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
7889    
7890 wakaba 1.84 ## Step 4 # MUST
7891 wakaba 1.3 $doc->append_child ($root);
7892    
7893 wakaba 1.84 ## Step 5 # MUST
7894 wakaba 1.123 push @{$p->{open_elements}}, [$root, $el_category->{html}];
7895 wakaba 1.3
7896     undef $p->{head_element};
7897    
7898 wakaba 1.84 ## Step 6 # MUST
7899 wakaba 1.3 $p->_reset_insertion_mode;
7900    
7901 wakaba 1.84 ## Step 7 # MUST
7902 wakaba 1.3 my $anode = $node;
7903     AN: while (defined $anode) {
7904     if ($anode->node_type == 1) {
7905     my $nsuri = $anode->namespace_uri;
7906     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
7907 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
7908 wakaba 1.79 !!!cp ('i5');
7909 wakaba 1.3 $p->{form_element} = $anode;
7910     last AN;
7911     }
7912     }
7913     }
7914     $anode = $anode->parent_node;
7915     } # AN
7916    
7917 wakaba 1.84 ## Step 9 # MUST
7918 wakaba 1.3 {
7919     my $self = $p;
7920     !!!next-token;
7921     }
7922     $p->_tree_construction_main;
7923    
7924 wakaba 1.84 ## Step 10 # MUST
7925 wakaba 1.3 my @cn = @{$node->child_nodes};
7926     for (@cn) {
7927     $node->remove_child ($_);
7928     }
7929     ## ISSUE: mutation events? read-only?
7930    
7931 wakaba 1.84 ## Step 11 # MUST
7932 wakaba 1.3 @cn = @{$root->child_nodes};
7933     for (@cn) {
7934 wakaba 1.14 $this_doc->adopt_node ($_);
7935 wakaba 1.3 $node->append_child ($_);
7936     }
7937 wakaba 1.14 ## ISSUE: mutation events?
7938 wakaba 1.3
7939     $p->_terminate_tree_constructor;
7940 wakaba 1.121
7941     delete $p->{parse_error}; # delete loop
7942 wakaba 1.3 } else {
7943     die "$0: |set_inner_html| is not defined for node of type $nt";
7944     }
7945     } # set_inner_html
7946    
7947     } # tree construction stage
7948 wakaba 1.1
7949 wakaba 1.63 package Whatpm::HTML::RestartParser;
7950     push our @ISA, 'Error';
7951    
7952 wakaba 1.1 1;
7953 wakaba 1.174 # $Date: 2008/09/14 03:59:08 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24