/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.164 - (hide annotations) (download) (as text)
Sat Sep 13 06:33:39 2008 UTC (16 years, 1 month ago) by wakaba
Branch: MAIN
Changes since 1.163: +78 -57 lines
File MIME type: application/x-wais-source
++ whatpm/Whatpm/ChangeLog	13 Sep 2008 06:33:32 -0000
	* HTML.pm.src: |CLOSE_TAG_OPEN_STATE| is broken into
	itself and new |CDATA_PCDATA_CLOSE_TAG_STATE| so that
	no longer does the tokenizer have to push back next input
	characters in those states.

2008-09-13  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.164 our $VERSION=do{my @r=(q$Revision: 1.163 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.18 ## ISSUE:
7     ## var doc = implementation.createDocument (null, null, null);
8     ## doc.write ('');
9     ## alert (doc.compatMode);
10 wakaba 1.1
11 wakaba 1.139 require IO::Handle;
12    
13 wakaba 1.126 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
14     my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
15     my $SVG_NS = q<http://www.w3.org/2000/svg>;
16     my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
17     my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
18     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
19    
20 wakaba 1.123 sub A_EL () { 0b1 }
21     sub ADDRESS_EL () { 0b10 }
22     sub BODY_EL () { 0b100 }
23     sub BUTTON_EL () { 0b1000 }
24     sub CAPTION_EL () { 0b10000 }
25     sub DD_EL () { 0b100000 }
26     sub DIV_EL () { 0b1000000 }
27     sub DT_EL () { 0b10000000 }
28     sub FORM_EL () { 0b100000000 }
29     sub FORMATTING_EL () { 0b1000000000 }
30     sub FRAMESET_EL () { 0b10000000000 }
31     sub HEADING_EL () { 0b100000000000 }
32     sub HTML_EL () { 0b1000000000000 }
33     sub LI_EL () { 0b10000000000000 }
34     sub NOBR_EL () { 0b100000000000000 }
35     sub OPTION_EL () { 0b1000000000000000 }
36     sub OPTGROUP_EL () { 0b10000000000000000 }
37     sub P_EL () { 0b100000000000000000 }
38     sub SELECT_EL () { 0b1000000000000000000 }
39     sub TABLE_EL () { 0b10000000000000000000 }
40     sub TABLE_CELL_EL () { 0b100000000000000000000 }
41     sub TABLE_ROW_EL () { 0b1000000000000000000000 }
42     sub TABLE_ROW_GROUP_EL () { 0b10000000000000000000000 }
43     sub MISC_SCOPING_EL () { 0b100000000000000000000000 }
44     sub MISC_SPECIAL_EL () { 0b1000000000000000000000000 }
45 wakaba 1.126 sub FOREIGN_EL () { 0b10000000000000000000000000 }
46     sub FOREIGN_FLOW_CONTENT_EL () { 0b100000000000000000000000000 }
47     sub MML_AXML_EL () { 0b1000000000000000000000000000 }
48 wakaba 1.151 sub RUBY_EL () { 0b10000000000000000000000000000 }
49     sub RUBY_COMPONENT_EL () { 0b100000000000000000000000000000 }
50 wakaba 1.123
51     sub TABLE_ROWS_EL () {
52     TABLE_EL |
53     TABLE_ROW_EL |
54     TABLE_ROW_GROUP_EL
55     }
56    
57 wakaba 1.151 ## NOTE: Used in "generate implied end tags" algorithm.
58     ## NOTE: There is a code where a modified version of END_TAG_OPTIONAL_EL
59     ## is used in "generate implied end tags" implementation (search for the
60     ## function mae).
61 wakaba 1.123 sub END_TAG_OPTIONAL_EL () {
62     DD_EL |
63     DT_EL |
64     LI_EL |
65 wakaba 1.151 P_EL |
66     RUBY_COMPONENT_EL
67 wakaba 1.123 }
68    
69 wakaba 1.151 ## NOTE: Used in </body> and EOF algorithms.
70 wakaba 1.123 sub ALL_END_TAG_OPTIONAL_EL () {
71 wakaba 1.151 DD_EL |
72     DT_EL |
73     LI_EL |
74     P_EL |
75    
76 wakaba 1.123 BODY_EL |
77     HTML_EL |
78     TABLE_CELL_EL |
79     TABLE_ROW_EL |
80     TABLE_ROW_GROUP_EL
81     }
82    
83     sub SCOPING_EL () {
84     BUTTON_EL |
85     CAPTION_EL |
86     HTML_EL |
87     TABLE_EL |
88     TABLE_CELL_EL |
89     MISC_SCOPING_EL
90     }
91    
92     sub TABLE_SCOPING_EL () {
93     HTML_EL |
94     TABLE_EL
95     }
96    
97     sub TABLE_ROWS_SCOPING_EL () {
98     HTML_EL |
99     TABLE_ROW_GROUP_EL
100     }
101    
102     sub TABLE_ROW_SCOPING_EL () {
103     HTML_EL |
104     TABLE_ROW_EL
105     }
106    
107     sub SPECIAL_EL () {
108     ADDRESS_EL |
109     BODY_EL |
110     DIV_EL |
111 wakaba 1.151
112     DD_EL |
113     DT_EL |
114     LI_EL |
115     P_EL |
116    
117 wakaba 1.123 FORM_EL |
118     FRAMESET_EL |
119     HEADING_EL |
120     OPTION_EL |
121     OPTGROUP_EL |
122     SELECT_EL |
123     TABLE_ROW_EL |
124     TABLE_ROW_GROUP_EL |
125     MISC_SPECIAL_EL
126     }
127    
128     my $el_category = {
129     a => A_EL | FORMATTING_EL,
130     address => ADDRESS_EL,
131     applet => MISC_SCOPING_EL,
132     area => MISC_SPECIAL_EL,
133     b => FORMATTING_EL,
134     base => MISC_SPECIAL_EL,
135     basefont => MISC_SPECIAL_EL,
136     bgsound => MISC_SPECIAL_EL,
137     big => FORMATTING_EL,
138     blockquote => MISC_SPECIAL_EL,
139     body => BODY_EL,
140     br => MISC_SPECIAL_EL,
141     button => BUTTON_EL,
142     caption => CAPTION_EL,
143     center => MISC_SPECIAL_EL,
144     col => MISC_SPECIAL_EL,
145     colgroup => MISC_SPECIAL_EL,
146     dd => DD_EL,
147     dir => MISC_SPECIAL_EL,
148     div => DIV_EL,
149     dl => MISC_SPECIAL_EL,
150     dt => DT_EL,
151     em => FORMATTING_EL,
152     embed => MISC_SPECIAL_EL,
153     fieldset => MISC_SPECIAL_EL,
154     font => FORMATTING_EL,
155     form => FORM_EL,
156     frame => MISC_SPECIAL_EL,
157     frameset => FRAMESET_EL,
158     h1 => HEADING_EL,
159     h2 => HEADING_EL,
160     h3 => HEADING_EL,
161     h4 => HEADING_EL,
162     h5 => HEADING_EL,
163     h6 => HEADING_EL,
164     head => MISC_SPECIAL_EL,
165     hr => MISC_SPECIAL_EL,
166     html => HTML_EL,
167     i => FORMATTING_EL,
168     iframe => MISC_SPECIAL_EL,
169     img => MISC_SPECIAL_EL,
170     input => MISC_SPECIAL_EL,
171     isindex => MISC_SPECIAL_EL,
172     li => LI_EL,
173     link => MISC_SPECIAL_EL,
174     listing => MISC_SPECIAL_EL,
175     marquee => MISC_SCOPING_EL,
176     menu => MISC_SPECIAL_EL,
177     meta => MISC_SPECIAL_EL,
178     nobr => NOBR_EL | FORMATTING_EL,
179     noembed => MISC_SPECIAL_EL,
180     noframes => MISC_SPECIAL_EL,
181     noscript => MISC_SPECIAL_EL,
182     object => MISC_SCOPING_EL,
183     ol => MISC_SPECIAL_EL,
184     optgroup => OPTGROUP_EL,
185     option => OPTION_EL,
186     p => P_EL,
187     param => MISC_SPECIAL_EL,
188     plaintext => MISC_SPECIAL_EL,
189     pre => MISC_SPECIAL_EL,
190 wakaba 1.151 rp => RUBY_COMPONENT_EL,
191     rt => RUBY_COMPONENT_EL,
192     ruby => RUBY_EL,
193 wakaba 1.123 s => FORMATTING_EL,
194     script => MISC_SPECIAL_EL,
195     select => SELECT_EL,
196     small => FORMATTING_EL,
197     spacer => MISC_SPECIAL_EL,
198     strike => FORMATTING_EL,
199     strong => FORMATTING_EL,
200     style => MISC_SPECIAL_EL,
201     table => TABLE_EL,
202     tbody => TABLE_ROW_GROUP_EL,
203     td => TABLE_CELL_EL,
204     textarea => MISC_SPECIAL_EL,
205     tfoot => TABLE_ROW_GROUP_EL,
206     th => TABLE_CELL_EL,
207     thead => TABLE_ROW_GROUP_EL,
208     title => MISC_SPECIAL_EL,
209     tr => TABLE_ROW_EL,
210     tt => FORMATTING_EL,
211     u => FORMATTING_EL,
212     ul => MISC_SPECIAL_EL,
213     wbr => MISC_SPECIAL_EL,
214     };
215    
216 wakaba 1.126 my $el_category_f = {
217     $MML_NS => {
218     'annotation-xml' => MML_AXML_EL,
219     mi => FOREIGN_FLOW_CONTENT_EL,
220     mo => FOREIGN_FLOW_CONTENT_EL,
221     mn => FOREIGN_FLOW_CONTENT_EL,
222     ms => FOREIGN_FLOW_CONTENT_EL,
223     mtext => FOREIGN_FLOW_CONTENT_EL,
224     },
225     $SVG_NS => {
226 wakaba 1.131 foreignObject => FOREIGN_FLOW_CONTENT_EL,
227 wakaba 1.126 desc => FOREIGN_FLOW_CONTENT_EL,
228     title => FOREIGN_FLOW_CONTENT_EL,
229     },
230     ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
231     };
232    
233 wakaba 1.131 my $svg_attr_name = {
234 wakaba 1.146 attributename => 'attributeName',
235 wakaba 1.131 attributetype => 'attributeType',
236     basefrequency => 'baseFrequency',
237     baseprofile => 'baseProfile',
238     calcmode => 'calcMode',
239     clippathunits => 'clipPathUnits',
240     contentscripttype => 'contentScriptType',
241     contentstyletype => 'contentStyleType',
242     diffuseconstant => 'diffuseConstant',
243     edgemode => 'edgeMode',
244     externalresourcesrequired => 'externalResourcesRequired',
245     filterres => 'filterRes',
246     filterunits => 'filterUnits',
247     glyphref => 'glyphRef',
248     gradienttransform => 'gradientTransform',
249     gradientunits => 'gradientUnits',
250     kernelmatrix => 'kernelMatrix',
251     kernelunitlength => 'kernelUnitLength',
252     keypoints => 'keyPoints',
253     keysplines => 'keySplines',
254     keytimes => 'keyTimes',
255     lengthadjust => 'lengthAdjust',
256     limitingconeangle => 'limitingConeAngle',
257     markerheight => 'markerHeight',
258     markerunits => 'markerUnits',
259     markerwidth => 'markerWidth',
260     maskcontentunits => 'maskContentUnits',
261     maskunits => 'maskUnits',
262     numoctaves => 'numOctaves',
263     pathlength => 'pathLength',
264     patterncontentunits => 'patternContentUnits',
265     patterntransform => 'patternTransform',
266     patternunits => 'patternUnits',
267     pointsatx => 'pointsAtX',
268     pointsaty => 'pointsAtY',
269     pointsatz => 'pointsAtZ',
270     preservealpha => 'preserveAlpha',
271     preserveaspectratio => 'preserveAspectRatio',
272     primitiveunits => 'primitiveUnits',
273     refx => 'refX',
274     refy => 'refY',
275     repeatcount => 'repeatCount',
276     repeatdur => 'repeatDur',
277     requiredextensions => 'requiredExtensions',
278 wakaba 1.146 requiredfeatures => 'requiredFeatures',
279 wakaba 1.131 specularconstant => 'specularConstant',
280     specularexponent => 'specularExponent',
281     spreadmethod => 'spreadMethod',
282     startoffset => 'startOffset',
283     stddeviation => 'stdDeviation',
284     stitchtiles => 'stitchTiles',
285     surfacescale => 'surfaceScale',
286     systemlanguage => 'systemLanguage',
287     tablevalues => 'tableValues',
288     targetx => 'targetX',
289     targety => 'targetY',
290     textlength => 'textLength',
291     viewbox => 'viewBox',
292     viewtarget => 'viewTarget',
293     xchannelselector => 'xChannelSelector',
294     ychannelselector => 'yChannelSelector',
295     zoomandpan => 'zoomAndPan',
296     };
297    
298     my $foreign_attr_xname = {
299     'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
300     'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
301     'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
302     'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
303     'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
304     'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
305     'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
306     'xml:base' => [$XML_NS, ['xml', 'base']],
307     'xml:lang' => [$XML_NS, ['xml', 'lang']],
308     'xml:space' => [$XML_NS, ['xml', 'space']],
309     'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
310     'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
311     };
312    
313     ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
314    
315 wakaba 1.4 my $c1_entity_char = {
316 wakaba 1.10 0x80 => 0x20AC,
317     0x81 => 0xFFFD,
318     0x82 => 0x201A,
319     0x83 => 0x0192,
320     0x84 => 0x201E,
321     0x85 => 0x2026,
322     0x86 => 0x2020,
323     0x87 => 0x2021,
324     0x88 => 0x02C6,
325     0x89 => 0x2030,
326     0x8A => 0x0160,
327     0x8B => 0x2039,
328     0x8C => 0x0152,
329     0x8D => 0xFFFD,
330     0x8E => 0x017D,
331     0x8F => 0xFFFD,
332     0x90 => 0xFFFD,
333     0x91 => 0x2018,
334     0x92 => 0x2019,
335     0x93 => 0x201C,
336     0x94 => 0x201D,
337     0x95 => 0x2022,
338     0x96 => 0x2013,
339     0x97 => 0x2014,
340     0x98 => 0x02DC,
341     0x99 => 0x2122,
342     0x9A => 0x0161,
343     0x9B => 0x203A,
344     0x9C => 0x0153,
345     0x9D => 0xFFFD,
346     0x9E => 0x017E,
347     0x9F => 0x0178,
348 wakaba 1.4 }; # $c1_entity_char
349 wakaba 1.1
350 wakaba 1.63 sub parse_byte_string ($$$$;$) {
351 wakaba 1.138 my $self = shift;
352     my $charset_name = shift;
353     open my $input, '<', ref $_[0] ? $_[0] : \($_[0]);
354     return $self->parse_byte_stream ($charset_name, $input, @_[1..$#_]);
355     } # parse_byte_string
356    
357 wakaba 1.162 sub parse_byte_stream ($$$$;$$) {
358     # my ($self, $charset_name, $byte_stream, $doc, $onerror, $get_wrapper) = @_;
359 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
360 wakaba 1.133 my $charset_name = shift;
361 wakaba 1.138 my $byte_stream = $_[0];
362 wakaba 1.133
363 wakaba 1.134 my $onerror = $_[2] || sub {
364     my (%opt) = @_;
365     warn "Parse error ($opt{type})\n";
366     };
367     $self->{parse_error} = $onerror; # updated later by parse_char_string
368    
369 wakaba 1.162 my $get_wrapper = $_[3] || sub ($) {
370     return $_[0]; # $_[0] = byte stream handle, returned = arg to char handle
371     };
372    
373 wakaba 1.133 ## HTML5 encoding sniffing algorithm
374     require Message::Charset::Info;
375     my $charset;
376 wakaba 1.136 my $buffer;
377     my ($char_stream, $e_status);
378 wakaba 1.133
379     SNIFFING: {
380 wakaba 1.160 ## NOTE: By setting |allow_fallback| option true when the
381     ## |get_decode_handle| method is invoked, we ignore what the HTML5
382     ## spec requires, i.e. unsupported encoding should be ignored.
383     ## TODO: We should not do this unless the parser is invoked
384     ## in the conformance checking mode, in which this behavior
385     ## would be useful.
386 wakaba 1.133
387     ## Step 1
388     if (defined $charset_name) {
389 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
390     ## TODO: Is this ok? Transfer protocol's parameter should be
391     ## interpreted in its semantics?
392 wakaba 1.133
393     ## ISSUE: Unsupported encoding is not ignored according to the spec.
394 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
395     ($byte_stream, allow_error_reporting => 1,
396 wakaba 1.133 allow_fallback => 1);
397 wakaba 1.136 if ($char_stream) {
398 wakaba 1.133 $self->{confident} = 1;
399     last SNIFFING;
400 wakaba 1.136 } else {
401     ## TODO: unsupported error
402 wakaba 1.133 }
403     }
404    
405     ## Step 2
406 wakaba 1.136 my $byte_buffer = '';
407     for (1..1024) {
408     my $char = $byte_stream->getc;
409     last unless defined $char;
410     $byte_buffer .= $char;
411     } ## TODO: timeout
412 wakaba 1.133
413     ## Step 3
414 wakaba 1.136 if ($byte_buffer =~ /^\xFE\xFF/) {
415 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-16be');
416 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
417     ($byte_stream, allow_error_reporting => 1,
418     allow_fallback => 1, byte_buffer => \$byte_buffer);
419 wakaba 1.133 $self->{confident} = 1;
420     last SNIFFING;
421 wakaba 1.136 } elsif ($byte_buffer =~ /^\xFF\xFE/) {
422 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-16le');
423 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
424     ($byte_stream, allow_error_reporting => 1,
425     allow_fallback => 1, byte_buffer => \$byte_buffer);
426 wakaba 1.133 $self->{confident} = 1;
427     last SNIFFING;
428 wakaba 1.136 } elsif ($byte_buffer =~ /^\xEF\xBB\xBF/) {
429 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
430 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
431     ($byte_stream, allow_error_reporting => 1,
432     allow_fallback => 1, byte_buffer => \$byte_buffer);
433 wakaba 1.133 $self->{confident} = 1;
434     last SNIFFING;
435     }
436    
437     ## Step 4
438     ## TODO: <meta charset>
439    
440     ## Step 5
441     ## TODO: from history
442    
443     ## Step 6
444 wakaba 1.65 require Whatpm::Charset::UniversalCharDet;
445 wakaba 1.133 $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
446 wakaba 1.136 ($byte_buffer);
447 wakaba 1.133 if (defined $charset_name) {
448 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
449 wakaba 1.133
450     ## ISSUE: Unsupported encoding is not ignored according to the spec.
451 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
452     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
453     ($byte_stream);
454     ($char_stream, $e_status) = $charset->get_decode_handle
455     ($buffer, allow_error_reporting => 1,
456     allow_fallback => 1, byte_buffer => \$byte_buffer);
457     if ($char_stream) {
458     $buffer->{buffer} = $byte_buffer;
459 wakaba 1.153 !!!parse-error (type => 'sniffing:chardet',
460     text => $charset_name,
461     level => $self->{level}->{info},
462     layer => 'encode',
463 wakaba 1.134 line => 1, column => 1);
464 wakaba 1.133 $self->{confident} = 0;
465     last SNIFFING;
466     }
467     }
468    
469     ## Step 7: default
470     ## TODO: Make this configurable.
471 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('windows-1252');
472 wakaba 1.133 ## NOTE: We choose |windows-1252| here, since |utf-8| should be
473     ## detectable in the step 6.
474 wakaba 1.136 require Whatpm::Charset::DecodeHandle;
475     $buffer = Whatpm::Charset::DecodeHandle::ByteBuffer->new
476     ($byte_stream);
477     ($char_stream, $e_status)
478     = $charset->get_decode_handle ($buffer,
479     allow_error_reporting => 1,
480     allow_fallback => 1,
481     byte_buffer => \$byte_buffer);
482     $buffer->{buffer} = $byte_buffer;
483 wakaba 1.153 !!!parse-error (type => 'sniffing:default',
484     text => 'windows-1252',
485     level => $self->{level}->{info},
486     line => 1, column => 1,
487     layer => 'encode');
488 wakaba 1.63 $self->{confident} = 0;
489 wakaba 1.133 } # SNIFFING
490    
491     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
492 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
493 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
494 wakaba 1.160 #text => $self->{input_encoding},
495 wakaba 1.153 level => $self->{level}->{uncertain},
496     line => 1, column => 1,
497     layer => 'encode');
498 wakaba 1.133 } elsif (not ($e_status &
499     Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) {
500 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name;
501 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
502     text => $self->{input_encoding},
503     level => $self->{level}->{uncertain},
504     line => 1, column => 1,
505     layer => 'encode');
506 wakaba 1.160 } else {
507     $self->{input_encoding} = $charset->get_iana_name;
508 wakaba 1.63 }
509    
510     $self->{change_encoding} = sub {
511     my $self = shift;
512 wakaba 1.134 $charset_name = shift;
513 wakaba 1.114 my $token = shift;
514 wakaba 1.63
515 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ($charset_name);
516 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
517     ($byte_stream, allow_error_reporting => 1, allow_fallback => 1,
518     byte_buffer => \ $buffer->{buffer});
519 wakaba 1.134
520 wakaba 1.136 if ($char_stream) { # if supported
521 wakaba 1.134 ## "Change the encoding" algorithm:
522 wakaba 1.63
523 wakaba 1.134 ## Step 1
524 wakaba 1.149 if ($charset->{category} &
525     Message::Charset::Info::CHARSET_CATEGORY_UTF16 ()) {
526 wakaba 1.161 $charset = Message::Charset::Info->get_by_html_name ('utf-8');
527 wakaba 1.136 ($char_stream, $e_status) = $charset->get_decode_handle
528     ($byte_stream,
529     byte_buffer => \ $buffer->{buffer});
530 wakaba 1.134 }
531     $charset_name = $charset->get_iana_name;
532    
533     ## Step 2
534     if (defined $self->{input_encoding} and
535     $self->{input_encoding} eq $charset_name) {
536 wakaba 1.153 !!!parse-error (type => 'charset label:matching',
537     text => $charset_name,
538     level => $self->{level}->{info});
539 wakaba 1.134 $self->{confident} = 1;
540     return;
541     }
542 wakaba 1.63
543 wakaba 1.153 !!!parse-error (type => 'charset label detected',
544     text => $self->{input_encoding},
545     value => $charset_name,
546     level => $self->{level}->{warn},
547     token => $token);
548 wakaba 1.134
549     ## Step 3
550     # if (can) {
551     ## change the encoding on the fly.
552     #$self->{confident} = 1;
553     #return;
554     # }
555    
556     ## Step 4
557     throw Whatpm::HTML::RestartParser ();
558 wakaba 1.63 }
559     }; # $self->{change_encoding}
560    
561 wakaba 1.136 my $char_onerror = sub {
562     my (undef, $type, %opt) = @_;
563 wakaba 1.153 !!!parse-error (layer => 'encode',
564     %opt, type => $type,
565 wakaba 1.137 line => $self->{line}, column => $self->{column} + 1);
566 wakaba 1.136 if ($opt{octets}) {
567     ${$opt{octets}} = "\x{FFFD}"; # relacement character
568     }
569     };
570 wakaba 1.162
571     my $wrapped_char_stream = $get_wrapper->($char_stream);
572     $wrapped_char_stream->onerror ($char_onerror);
573 wakaba 1.136
574 wakaba 1.63 my @args = @_; shift @args; # $s
575     my $return;
576     try {
577 wakaba 1.162 $return = $self->parse_char_stream ($wrapped_char_stream, @args);
578 wakaba 1.63 } catch Whatpm::HTML::RestartParser with {
579 wakaba 1.134 ## NOTE: Invoked after {change_encoding}.
580    
581     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
582 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name; ## TODO: Should we set actual charset decoder's encoding name?
583 wakaba 1.153 !!!parse-error (type => 'chardecode:fallback',
584     level => $self->{level}->{uncertain},
585 wakaba 1.160 #text => $self->{input_encoding},
586 wakaba 1.153 line => 1, column => 1,
587     layer => 'encode');
588 wakaba 1.134 } elsif (not ($e_status &
589     Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) {
590 wakaba 1.160 $self->{input_encoding} = $charset->get_iana_name;
591 wakaba 1.153 !!!parse-error (type => 'chardecode:no error',
592     text => $self->{input_encoding},
593     level => $self->{level}->{uncertain},
594     line => 1, column => 1,
595     layer => 'encode');
596 wakaba 1.160 } else {
597     $self->{input_encoding} = $charset->get_iana_name;
598 wakaba 1.134 }
599 wakaba 1.63 $self->{confident} = 1;
600 wakaba 1.162
601     $wrapped_char_stream = $get_wrapper->($char_stream);
602     $wrapped_char_stream->onerror ($char_onerror);
603    
604     $return = $self->parse_char_stream ($wrapped_char_stream, @args);
605 wakaba 1.63 };
606     return $return;
607 wakaba 1.138 } # parse_byte_stream
608 wakaba 1.63
609 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
610     ## and the HTML layer MUST ignore it. However, we does strip BOM in
611     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
612     ## because the core part of our HTML parser expects a string of character,
613     ## not a string of bytes or code units or anything which might contain a BOM.
614     ## Therefore, any parser interface that accepts a string of bytes,
615     ## such as |parse_byte_string| in this module, must ensure that it does
616     ## strip the BOM and never strip any ZWNBSP.
617    
618 wakaba 1.162 sub parse_char_string ($$$;$$) {
619     #my ($self, $s, $doc, $onerror, $get_wrapper) = @_;
620 wakaba 1.135 my $self = shift;
621 wakaba 1.139 require utf8;
622     my $s = ref $_[0] ? $_[0] : \($_[0]);
623     open my $input, '<' . (utf8::is_utf8 ($$s) ? ':utf8' : ''), $s;
624 wakaba 1.162 if ($_[3]) {
625     $input = $_[3]->($input);
626     }
627 wakaba 1.135 return $self->parse_char_stream ($input, @_[1..$#_]);
628     } # parse_char_string
629 wakaba 1.162 *parse_string = \&parse_char_string; ## NOTE: Alias for backward compatibility.
630 wakaba 1.63
631 wakaba 1.135 sub parse_char_stream ($$$;$) {
632 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
633 wakaba 1.135 my $input = $_[0];
634 wakaba 1.1 $self->{document} = $_[1];
635 wakaba 1.63 @{$self->{document}->child_nodes} = ();
636 wakaba 1.1
637 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
638    
639 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
640 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
641     if defined $self->{input_encoding};
642 wakaba 1.63
643 wakaba 1.1 my $i = 0;
644 wakaba 1.112 $self->{line_prev} = $self->{line} = 1;
645     $self->{column_prev} = $self->{column} = 0;
646 wakaba 1.76 $self->{set_next_char} = sub {
647 wakaba 1.1 my $self = shift;
648 wakaba 1.13
649 wakaba 1.76 pop @{$self->{prev_char}};
650     unshift @{$self->{prev_char}}, $self->{next_char};
651 wakaba 1.13
652 wakaba 1.139 my $char;
653     if (defined $self->{next_next_char}) {
654     $char = $self->{next_next_char};
655     delete $self->{next_next_char};
656     } else {
657     $char = $input->getc;
658     }
659 wakaba 1.135 $self->{next_char} = -1 and return unless defined $char;
660     $self->{next_char} = ord $char;
661 wakaba 1.112
662     ($self->{line_prev}, $self->{column_prev})
663     = ($self->{line}, $self->{column});
664     $self->{column}++;
665 wakaba 1.1
666 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
667 wakaba 1.132 !!!cp ('j1');
668 wakaba 1.112 $self->{line}++;
669     $self->{column} = 0;
670 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
671 wakaba 1.132 !!!cp ('j2');
672 wakaba 1.135 my $next = $input->getc;
673 wakaba 1.139 if (defined $next and $next ne "\x0A") {
674     $self->{next_next_char} = $next;
675 wakaba 1.135 }
676 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
677 wakaba 1.112 $self->{line}++;
678     $self->{column} = 0;
679 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
680 wakaba 1.132 !!!cp ('j3');
681 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
682     } elsif ($self->{next_char} == 0x0000) { # NULL
683 wakaba 1.132 !!!cp ('j4');
684 wakaba 1.8 !!!parse-error (type => 'NULL');
685 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
686 wakaba 1.132 } elsif ($self->{next_char} <= 0x0008 or
687     (0x000E <= $self->{next_char} and $self->{next_char} <= 0x001F) or
688     (0x007F <= $self->{next_char} and $self->{next_char} <= 0x009F) or
689     (0xD800 <= $self->{next_char} and $self->{next_char} <= 0xDFFF) or
690     (0xFDD0 <= $self->{next_char} and $self->{next_char} <= 0xFDDF) or
691     {
692     0xFFFE => 1, 0xFFFF => 1, 0x1FFFE => 1, 0x1FFFF => 1,
693     0x2FFFE => 1, 0x2FFFF => 1, 0x3FFFE => 1, 0x3FFFF => 1,
694     0x4FFFE => 1, 0x4FFFF => 1, 0x5FFFE => 1, 0x5FFFF => 1,
695     0x6FFFE => 1, 0x6FFFF => 1, 0x7FFFE => 1, 0x7FFFF => 1,
696     0x8FFFE => 1, 0x8FFFF => 1, 0x9FFFE => 1, 0x9FFFF => 1,
697     0xAFFFE => 1, 0xAFFFF => 1, 0xBFFFE => 1, 0xBFFFF => 1,
698     0xCFFFE => 1, 0xCFFFF => 1, 0xDFFFE => 1, 0xDFFFF => 1,
699     0xEFFFE => 1, 0xEFFFF => 1, 0xFFFFE => 1, 0xFFFFF => 1,
700     0x10FFFE => 1, 0x10FFFF => 1,
701     }->{$self->{next_char}}) {
702     !!!cp ('j5');
703 wakaba 1.153 if ($self->{next_char} < 0x10000) {
704     !!!parse-error (type => 'control char',
705     text => (sprintf 'U+%04X', $self->{next_char}));
706     } else {
707     !!!parse-error (type => 'control char',
708     text => (sprintf 'U-%08X', $self->{next_char}));
709     }
710 wakaba 1.1 }
711     };
712 wakaba 1.76 $self->{prev_char} = [-1, -1, -1];
713     $self->{next_char} = -1;
714 wakaba 1.1
715 wakaba 1.3 my $onerror = $_[2] || sub {
716     my (%opt) = @_;
717 wakaba 1.112 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
718     my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
719     warn "Parse error ($opt{type}) at line $line column $column\n";
720 wakaba 1.3 };
721     $self->{parse_error} = sub {
722 wakaba 1.112 $onerror->(line => $self->{line}, column => $self->{column}, @_);
723 wakaba 1.1 };
724    
725     $self->_initialize_tokenizer;
726     $self->_initialize_tree_constructor;
727     $self->_construct_tree;
728     $self->_terminate_tree_constructor;
729    
730 wakaba 1.112 delete $self->{parse_error}; # remove loop
731    
732 wakaba 1.1 return $self->{document};
733 wakaba 1.135 } # parse_char_stream
734 wakaba 1.1
735     sub new ($) {
736     my $class = shift;
737 wakaba 1.134 my $self = bless {
738 wakaba 1.153 level => {must => 'm',
739 wakaba 1.159 should => 's',
740 wakaba 1.153 warn => 'w',
741     info => 'i',
742     uncertain => 'u'},
743 wakaba 1.134 }, $class;
744 wakaba 1.76 $self->{set_next_char} = sub {
745     $self->{next_char} = -1;
746 wakaba 1.1 };
747     $self->{parse_error} = sub {
748     #
749     };
750 wakaba 1.63 $self->{change_encoding} = sub {
751     # if ($_[0] is a supported encoding) {
752     # run "change the encoding" algorithm;
753     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
754     # }
755     };
756 wakaba 1.61 $self->{application_cache_selection} = sub {
757     #
758     };
759 wakaba 1.1 return $self;
760     } # new
761    
762 wakaba 1.40 sub CM_ENTITY () { 0b001 } # & markup in data
763     sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
764     sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
765    
766     sub PLAINTEXT_CONTENT_MODEL () { 0 }
767     sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
768     sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
769     sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
770    
771 wakaba 1.57 sub DATA_STATE () { 0 }
772     sub ENTITY_DATA_STATE () { 1 }
773     sub TAG_OPEN_STATE () { 2 }
774     sub CLOSE_TAG_OPEN_STATE () { 3 }
775     sub TAG_NAME_STATE () { 4 }
776     sub BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
777     sub ATTRIBUTE_NAME_STATE () { 6 }
778     sub AFTER_ATTRIBUTE_NAME_STATE () { 7 }
779     sub BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
780     sub ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
781     sub ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
782     sub ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
783     sub ENTITY_IN_ATTRIBUTE_VALUE_STATE () { 12 }
784     sub MARKUP_DECLARATION_OPEN_STATE () { 13 }
785     sub COMMENT_START_STATE () { 14 }
786     sub COMMENT_START_DASH_STATE () { 15 }
787     sub COMMENT_STATE () { 16 }
788     sub COMMENT_END_STATE () { 17 }
789     sub COMMENT_END_DASH_STATE () { 18 }
790     sub BOGUS_COMMENT_STATE () { 19 }
791     sub DOCTYPE_STATE () { 20 }
792     sub BEFORE_DOCTYPE_NAME_STATE () { 21 }
793     sub DOCTYPE_NAME_STATE () { 22 }
794     sub AFTER_DOCTYPE_NAME_STATE () { 23 }
795     sub BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
796     sub DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
797     sub DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
798     sub AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
799     sub BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
800     sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
801     sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
802     sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
803     sub BOGUS_DOCTYPE_STATE () { 32 }
804 wakaba 1.72 sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
805 wakaba 1.125 sub SELF_CLOSING_START_TAG_STATE () { 34 }
806 wakaba 1.127 sub CDATA_BLOCK_STATE () { 35 }
807 wakaba 1.164 sub MD_HYPHEN_STATE () { 36 } # "markup declaration open state" in the spec
808     sub MD_DOCTYPE_STATE () { 37 } # "markup declaration open state" in the spec
809     sub MD_CDATA_STATE () { 38 } # "markup declaration open state" in the spec
810     sub CDATA_PCDATA_CLOSE_TAG_STATE () { 39 } # "close tag open state" in the spec
811 wakaba 1.57
812 wakaba 1.55 sub DOCTYPE_TOKEN () { 1 }
813     sub COMMENT_TOKEN () { 2 }
814     sub START_TAG_TOKEN () { 3 }
815     sub END_TAG_TOKEN () { 4 }
816     sub END_OF_FILE_TOKEN () { 5 }
817     sub CHARACTER_TOKEN () { 6 }
818    
819 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
820     sub HEAD_IMS () { 0b1000 }
821     sub BODY_IMS () { 0b10000 }
822 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
823 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
824 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
825 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
826     sub FRAME_IMS () { 0b1000000000 }
827 wakaba 1.101 sub SELECT_IMS () { 0b10000000000 }
828 wakaba 1.126 sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 }
829     ## NOTE: "in foreign content" insertion mode is special; it is combined
830     ## with the secondary insertion mode. In this parser, they are stored
831     ## together in the bit-or'ed form.
832 wakaba 1.54
833 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
834    
835     ## NOTE: "after after body" insertion mode.
836 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
837 wakaba 1.84
838     ## NOTE: "after after frameset" insertion mode.
839 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
840 wakaba 1.84
841 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
842     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
843     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
844     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
845     sub IN_BODY_IM () { BODY_IMS }
846 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
847     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
848     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
849     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
850 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
851     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
852     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
853     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
854 wakaba 1.101 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
855     sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
856 wakaba 1.54 sub IN_COLUMN_GROUP_IM () { 0b10 }
857    
858 wakaba 1.1 ## Implementations MUST act as if state machine in the spec
859    
860     sub _initialize_tokenizer ($) {
861     my $self = shift;
862 wakaba 1.57 $self->{state} = DATA_STATE; # MUST
863 wakaba 1.163 #$self->{state_keyword}; # initialized when used
864 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # be
865 wakaba 1.1 undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
866     undef $self->{current_attribute};
867     undef $self->{last_emitted_start_tag_name};
868     undef $self->{last_attribute_value_state};
869 wakaba 1.125 delete $self->{self_closing};
870 wakaba 1.1 $self->{char} = [];
871 wakaba 1.76 # $self->{next_char}
872 wakaba 1.1 !!!next-input-character;
873     $self->{token} = [];
874 wakaba 1.18 # $self->{escape}
875 wakaba 1.1 } # _initialize_tokenizer
876    
877     ## A token has:
878 wakaba 1.55 ## ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
879     ## CHARACTER_TOKEN, or END_OF_FILE_TOKEN
880     ## ->{name} (DOCTYPE_TOKEN)
881     ## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
882     ## ->{public_identifier} (DOCTYPE_TOKEN)
883     ## ->{system_identifier} (DOCTYPE_TOKEN)
884 wakaba 1.75 ## ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
885 wakaba 1.55 ## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
886 wakaba 1.66 ## ->{name}
887     ## ->{value}
888     ## ->{has_reference} == 1 or 0
889 wakaba 1.55 ## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)
890 wakaba 1.125 ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
891     ## |->{self_closing}| is used to save the value of |$self->{self_closing}|
892     ## while the token is pushed back to the stack.
893    
894 wakaba 1.1 ## Emitted token MUST immediately be handled by the tree construction state.
895    
896     ## Before each step, UA MAY check to see if either one of the scripts in
897     ## "list of scripts that will execute as soon as possible" or the first
898     ## script in the "list of scripts that will execute asynchronously",
899     ## has completed loading. If one has, then it MUST be executed
900     ## and removed from the list.
901    
902 wakaba 1.59 ## NOTE: HTML5 "Writing HTML documents" section, applied to
903     ## documents and not to user agents and conformance checkers,
904     ## contains some requirements that are not detected by the
905     ## parsing algorithm:
906     ## - Some requirements on character encoding declarations. ## TODO
907     ## - "Elements MUST NOT contain content that their content model disallows."
908     ## ... Some are parse error, some are not (will be reported by c.c.).
909     ## - Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) ## TODO
910     ## - Text (in elements, attributes, and comments) SHOULD NOT contain
911     ## control characters other than space characters. ## TODO: (what is control character? C0, C1 and DEL? Unicode control character?)
912    
913     ## TODO: HTML5 poses authors two SHOULD-level requirements that cannot
914     ## be detected by the HTML5 parsing algorithm:
915     ## - Text,
916    
917 wakaba 1.1 sub _get_next_token ($) {
918     my $self = shift;
919 wakaba 1.125
920     if ($self->{self_closing}) {
921     !!!parse-error (type => 'nestc', token => $self->{current_token});
922     ## NOTE: The |self_closing| flag is only set by start tag token.
923     ## In addition, when a start tag token is emitted, it is always set to
924     ## |current_token|.
925     delete $self->{self_closing};
926     }
927    
928 wakaba 1.1 if (@{$self->{token}}) {
929 wakaba 1.125 $self->{self_closing} = $self->{token}->[0]->{self_closing};
930 wakaba 1.1 return shift @{$self->{token}};
931     }
932    
933     A: {
934 wakaba 1.57 if ($self->{state} == DATA_STATE) {
935 wakaba 1.76 if ($self->{next_char} == 0x0026) { # &
936 wakaba 1.72 if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
937     not $self->{escape}) {
938 wakaba 1.77 !!!cp (1);
939 wakaba 1.57 $self->{state} = ENTITY_DATA_STATE;
940 wakaba 1.1 !!!next-input-character;
941     redo A;
942     } else {
943 wakaba 1.77 !!!cp (2);
944 wakaba 1.1 #
945     }
946 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
947 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
948 wakaba 1.13 unless ($self->{escape}) {
949 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
950     $self->{prev_char}->[1] == 0x0021 and # !
951     $self->{prev_char}->[2] == 0x003C) { # <
952 wakaba 1.77 !!!cp (3);
953 wakaba 1.13 $self->{escape} = 1;
954 wakaba 1.77 } else {
955     !!!cp (4);
956 wakaba 1.13 }
957 wakaba 1.77 } else {
958     !!!cp (5);
959 wakaba 1.13 }
960     }
961    
962     #
963 wakaba 1.76 } elsif ($self->{next_char} == 0x003C) { # <
964 wakaba 1.40 if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
965     (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
966 wakaba 1.13 not $self->{escape})) {
967 wakaba 1.77 !!!cp (6);
968 wakaba 1.57 $self->{state} = TAG_OPEN_STATE;
969 wakaba 1.1 !!!next-input-character;
970     redo A;
971     } else {
972 wakaba 1.77 !!!cp (7);
973 wakaba 1.1 #
974     }
975 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
976 wakaba 1.13 if ($self->{escape} and
977 wakaba 1.40 ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
978 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
979     $self->{prev_char}->[1] == 0x002D) { # -
980 wakaba 1.77 !!!cp (8);
981 wakaba 1.13 delete $self->{escape};
982 wakaba 1.77 } else {
983     !!!cp (9);
984 wakaba 1.13 }
985 wakaba 1.77 } else {
986     !!!cp (10);
987 wakaba 1.13 }
988    
989     #
990 wakaba 1.76 } elsif ($self->{next_char} == -1) {
991 wakaba 1.77 !!!cp (11);
992 wakaba 1.112 !!!emit ({type => END_OF_FILE_TOKEN,
993     line => $self->{line}, column => $self->{column}});
994 wakaba 1.1 last A; ## TODO: ok?
995 wakaba 1.77 } else {
996     !!!cp (12);
997 wakaba 1.1 }
998     # Anything else
999 wakaba 1.55 my $token = {type => CHARACTER_TOKEN,
1000 wakaba 1.112 data => chr $self->{next_char},
1001 wakaba 1.120 line => $self->{line}, column => $self->{column},
1002 wakaba 1.118 };
1003 wakaba 1.1 ## Stay in the data state
1004     !!!next-input-character;
1005    
1006     !!!emit ($token);
1007    
1008     redo A;
1009 wakaba 1.57 } elsif ($self->{state} == ENTITY_DATA_STATE) {
1010 wakaba 1.1 ## (cannot happen in CDATA state)
1011 wakaba 1.112
1012 wakaba 1.120 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
1013 wakaba 1.1
1014 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity (0, -1);
1015 wakaba 1.1
1016 wakaba 1.57 $self->{state} = DATA_STATE;
1017 wakaba 1.1 # next-input-character is already done
1018    
1019     unless (defined $token) {
1020 wakaba 1.77 !!!cp (13);
1021 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '&',
1022 wakaba 1.120 line => $l, column => $c,
1023 wakaba 1.118 });
1024 wakaba 1.1 } else {
1025 wakaba 1.77 !!!cp (14);
1026 wakaba 1.1 !!!emit ($token);
1027     }
1028    
1029     redo A;
1030 wakaba 1.57 } elsif ($self->{state} == TAG_OPEN_STATE) {
1031 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
1032 wakaba 1.76 if ($self->{next_char} == 0x002F) { # /
1033 wakaba 1.77 !!!cp (15);
1034 wakaba 1.1 !!!next-input-character;
1035 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
1036 wakaba 1.1 redo A;
1037     } else {
1038 wakaba 1.77 !!!cp (16);
1039 wakaba 1.1 ## reconsume
1040 wakaba 1.57 $self->{state} = DATA_STATE;
1041 wakaba 1.1
1042 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
1043 wakaba 1.120 line => $self->{line_prev},
1044     column => $self->{column_prev},
1045 wakaba 1.118 });
1046 wakaba 1.1
1047     redo A;
1048     }
1049 wakaba 1.40 } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
1050 wakaba 1.76 if ($self->{next_char} == 0x0021) { # !
1051 wakaba 1.77 !!!cp (17);
1052 wakaba 1.57 $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
1053 wakaba 1.1 !!!next-input-character;
1054     redo A;
1055 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1056 wakaba 1.77 !!!cp (18);
1057 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
1058 wakaba 1.1 !!!next-input-character;
1059     redo A;
1060 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1061     $self->{next_char} <= 0x005A) { # A..Z
1062 wakaba 1.77 !!!cp (19);
1063 wakaba 1.1 $self->{current_token}
1064 wakaba 1.55 = {type => START_TAG_TOKEN,
1065 wakaba 1.112 tag_name => chr ($self->{next_char} + 0x0020),
1066     line => $self->{line_prev},
1067     column => $self->{column_prev}};
1068 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1069 wakaba 1.1 !!!next-input-character;
1070     redo A;
1071 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
1072     $self->{next_char} <= 0x007A) { # a..z
1073 wakaba 1.77 !!!cp (20);
1074 wakaba 1.55 $self->{current_token} = {type => START_TAG_TOKEN,
1075 wakaba 1.112 tag_name => chr ($self->{next_char}),
1076     line => $self->{line_prev},
1077     column => $self->{column_prev}};
1078 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1079 wakaba 1.1 !!!next-input-character;
1080     redo A;
1081 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1082 wakaba 1.77 !!!cp (21);
1083 wakaba 1.115 !!!parse-error (type => 'empty start tag',
1084     line => $self->{line_prev},
1085     column => $self->{column_prev});
1086 wakaba 1.57 $self->{state} = DATA_STATE;
1087 wakaba 1.1 !!!next-input-character;
1088    
1089 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<>',
1090 wakaba 1.120 line => $self->{line_prev},
1091     column => $self->{column_prev},
1092 wakaba 1.118 });
1093 wakaba 1.1
1094     redo A;
1095 wakaba 1.76 } elsif ($self->{next_char} == 0x003F) { # ?
1096 wakaba 1.77 !!!cp (22);
1097 wakaba 1.115 !!!parse-error (type => 'pio',
1098     line => $self->{line_prev},
1099     column => $self->{column_prev});
1100 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1101 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1102 wakaba 1.120 line => $self->{line_prev},
1103     column => $self->{column_prev},
1104 wakaba 1.118 };
1105 wakaba 1.76 ## $self->{next_char} is intentionally left as is
1106 wakaba 1.1 redo A;
1107     } else {
1108 wakaba 1.77 !!!cp (23);
1109 wakaba 1.136 !!!parse-error (type => 'bare stago',
1110     line => $self->{line_prev},
1111     column => $self->{column_prev});
1112 wakaba 1.57 $self->{state} = DATA_STATE;
1113 wakaba 1.1 ## reconsume
1114    
1115 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
1116 wakaba 1.120 line => $self->{line_prev},
1117     column => $self->{column_prev},
1118 wakaba 1.118 });
1119 wakaba 1.1
1120     redo A;
1121     }
1122     } else {
1123 wakaba 1.40 die "$0: $self->{content_model} in tag open";
1124 wakaba 1.1 }
1125 wakaba 1.57 } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
1126 wakaba 1.164 ## NOTE: The "close tag open state" in the spec is implemented as
1127     ## |CLOSE_TAG_OPEN_STATE| and |CDATA_PCDATA_CLOSE_TAG_STATE|.
1128    
1129 wakaba 1.113 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
1130 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
1131 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
1132 wakaba 1.164 $self->{state} = CDATA_PCDATA_CLOSE_TAG_STATE;
1133     $self->{state_keyword} = '';
1134     ## Reconsume.
1135     redo A;
1136 wakaba 1.23 } else {
1137     ## No start tag token has ever been emitted
1138 wakaba 1.164 ## NOTE: See <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>.
1139 wakaba 1.77 !!!cp (28);
1140 wakaba 1.57 $self->{state} = DATA_STATE;
1141 wakaba 1.164 ## Reconsume.
1142 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1143 wakaba 1.120 line => $l, column => $c,
1144 wakaba 1.118 });
1145 wakaba 1.1 redo A;
1146     }
1147     }
1148 wakaba 1.164
1149 wakaba 1.76 if (0x0041 <= $self->{next_char} and
1150     $self->{next_char} <= 0x005A) { # A..Z
1151 wakaba 1.77 !!!cp (29);
1152 wakaba 1.112 $self->{current_token}
1153     = {type => END_TAG_TOKEN,
1154     tag_name => chr ($self->{next_char} + 0x0020),
1155     line => $l, column => $c};
1156 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1157 wakaba 1.1 !!!next-input-character;
1158     redo A;
1159 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
1160     $self->{next_char} <= 0x007A) { # a..z
1161 wakaba 1.77 !!!cp (30);
1162 wakaba 1.55 $self->{current_token} = {type => END_TAG_TOKEN,
1163 wakaba 1.112 tag_name => chr ($self->{next_char}),
1164     line => $l, column => $c};
1165 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1166 wakaba 1.1 !!!next-input-character;
1167     redo A;
1168 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1169 wakaba 1.77 !!!cp (31);
1170 wakaba 1.115 !!!parse-error (type => 'empty end tag',
1171     line => $self->{line_prev}, ## "<" in "</>"
1172     column => $self->{column_prev} - 1);
1173 wakaba 1.57 $self->{state} = DATA_STATE;
1174 wakaba 1.1 !!!next-input-character;
1175     redo A;
1176 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1177 wakaba 1.77 !!!cp (32);
1178 wakaba 1.3 !!!parse-error (type => 'bare etago');
1179 wakaba 1.57 $self->{state} = DATA_STATE;
1180 wakaba 1.1 # reconsume
1181    
1182 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1183 wakaba 1.120 line => $l, column => $c,
1184 wakaba 1.118 });
1185 wakaba 1.1
1186     redo A;
1187     } else {
1188 wakaba 1.77 !!!cp (33);
1189 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
1190 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1191 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1192 wakaba 1.120 line => $self->{line_prev}, # "<" of "</"
1193     column => $self->{column_prev} - 1,
1194 wakaba 1.118 };
1195 wakaba 1.164 ## NOTE: $self->{next_char} is intentionally left as is.
1196     ## Although the "anything else" case of the spec not explicitly
1197     ## states that the next input character is to be reconsumed,
1198     ## it will be included to the |data| of the comment token
1199     ## generated from the bogus end tag, as defined in the
1200     ## "bogus comment state" entry.
1201     redo A;
1202     }
1203     } elsif ($self->{state} == CDATA_PCDATA_CLOSE_TAG_STATE) {
1204     my $ch = substr $self->{last_emitted_start_tag_name}, length $self->{state_keyword}, 1;
1205     if (length $ch) {
1206     my $CH = $ch;
1207     $ch =~ tr/a-z/A-Z/;
1208     my $nch = chr $self->{next_char};
1209     if ($nch eq $ch or $nch eq $CH) {
1210     !!!cp (24);
1211     ## Stay in the state.
1212     $self->{state_keyword} .= $nch;
1213     !!!next-input-character;
1214     redo A;
1215     } else {
1216     !!!cp (25);
1217     $self->{state} = DATA_STATE;
1218     ## Reconsume.
1219     !!!emit ({type => CHARACTER_TOKEN,
1220     data => '</' . $self->{state_keyword},
1221     line => $self->{line_prev},
1222     column => $self->{column_prev} - 1 - length $self->{state_keyword},
1223     });
1224     redo A;
1225     }
1226     } else { # after "<{tag-name}"
1227     unless ({
1228     0x0009 => 1, # HT
1229     0x000A => 1, # LF
1230     0x000B => 1, # VT
1231     0x000C => 1, # FF
1232     0x0020 => 1, # SP
1233     0x003E => 1, # >
1234     0x002F => 1, # /
1235     -1 => 1, # EOF
1236     }->{$self->{next_char}}) {
1237     !!!cp (26);
1238     ## Reconsume.
1239     $self->{state} = DATA_STATE;
1240     !!!emit ({type => CHARACTER_TOKEN,
1241     data => '</' . $self->{state_keyword},
1242     line => $self->{line_prev},
1243     column => $self->{column_prev} - 1 - length $self->{state_keyword},
1244     });
1245     redo A;
1246     } else {
1247     !!!cp (27);
1248     $self->{current_token}
1249     = {type => END_TAG_TOKEN,
1250     tag_name => $self->{last_emitted_start_tag_name},
1251     line => $self->{line_prev},
1252     column => $self->{column_prev} - 1 - length $self->{state_keyword}};
1253     $self->{state} = TAG_NAME_STATE;
1254     ## Reconsume.
1255     redo A;
1256     }
1257 wakaba 1.1 }
1258 wakaba 1.57 } elsif ($self->{state} == TAG_NAME_STATE) {
1259 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1260     $self->{next_char} == 0x000A or # LF
1261     $self->{next_char} == 0x000B or # VT
1262     $self->{next_char} == 0x000C or # FF
1263     $self->{next_char} == 0x0020) { # SP
1264 wakaba 1.77 !!!cp (34);
1265 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1266 wakaba 1.1 !!!next-input-character;
1267     redo A;
1268 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1269 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1270 wakaba 1.77 !!!cp (35);
1271 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1272 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1273 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1274 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
1275     # ## NOTE: This should never be reached.
1276     # !!! cp (36);
1277     # !!! parse-error (type => 'end tag attribute');
1278     #} else {
1279 wakaba 1.77 !!!cp (37);
1280 wakaba 1.78 #}
1281 wakaba 1.1 } else {
1282     die "$0: $self->{current_token}->{type}: Unknown token type";
1283     }
1284 wakaba 1.57 $self->{state} = DATA_STATE;
1285 wakaba 1.1 !!!next-input-character;
1286    
1287     !!!emit ($self->{current_token}); # start tag or end tag
1288    
1289     redo A;
1290 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1291     $self->{next_char} <= 0x005A) { # A..Z
1292 wakaba 1.77 !!!cp (38);
1293 wakaba 1.76 $self->{current_token}->{tag_name} .= chr ($self->{next_char} + 0x0020);
1294 wakaba 1.1 # start tag or end tag
1295     ## Stay in this state
1296     !!!next-input-character;
1297     redo A;
1298 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1299 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1300 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1301 wakaba 1.77 !!!cp (39);
1302 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1303 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1304 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1305 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
1306     # ## NOTE: This state should never be reached.
1307     # !!! cp (40);
1308     # !!! parse-error (type => 'end tag attribute');
1309     #} else {
1310 wakaba 1.77 !!!cp (41);
1311 wakaba 1.78 #}
1312 wakaba 1.1 } else {
1313     die "$0: $self->{current_token}->{type}: Unknown token type";
1314     }
1315 wakaba 1.57 $self->{state} = DATA_STATE;
1316 wakaba 1.1 # reconsume
1317    
1318     !!!emit ($self->{current_token}); # start tag or end tag
1319    
1320     redo A;
1321 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1322 wakaba 1.125 !!!cp (42);
1323     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1324 wakaba 1.1 !!!next-input-character;
1325     redo A;
1326     } else {
1327 wakaba 1.77 !!!cp (44);
1328 wakaba 1.76 $self->{current_token}->{tag_name} .= chr $self->{next_char};
1329 wakaba 1.1 # start tag or end tag
1330     ## Stay in the state
1331     !!!next-input-character;
1332     redo A;
1333     }
1334 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1335 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1336     $self->{next_char} == 0x000A or # LF
1337     $self->{next_char} == 0x000B or # VT
1338     $self->{next_char} == 0x000C or # FF
1339     $self->{next_char} == 0x0020) { # SP
1340 wakaba 1.77 !!!cp (45);
1341 wakaba 1.1 ## Stay in the state
1342     !!!next-input-character;
1343     redo A;
1344 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1345 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1346 wakaba 1.77 !!!cp (46);
1347 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1348 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1349 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1350 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1351 wakaba 1.77 !!!cp (47);
1352 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1353 wakaba 1.77 } else {
1354     !!!cp (48);
1355 wakaba 1.1 }
1356     } else {
1357     die "$0: $self->{current_token}->{type}: Unknown token type";
1358     }
1359 wakaba 1.57 $self->{state} = DATA_STATE;
1360 wakaba 1.1 !!!next-input-character;
1361    
1362     !!!emit ($self->{current_token}); # start tag or end tag
1363    
1364     redo A;
1365 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1366     $self->{next_char} <= 0x005A) { # A..Z
1367 wakaba 1.77 !!!cp (49);
1368 wakaba 1.119 $self->{current_attribute}
1369     = {name => chr ($self->{next_char} + 0x0020),
1370     value => '',
1371     line => $self->{line}, column => $self->{column}};
1372 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1373 wakaba 1.1 !!!next-input-character;
1374     redo A;
1375 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1376 wakaba 1.125 !!!cp (50);
1377     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1378 wakaba 1.1 !!!next-input-character;
1379     redo A;
1380 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1381 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1382 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1383 wakaba 1.77 !!!cp (52);
1384 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1385 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1386 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1387 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1388 wakaba 1.77 !!!cp (53);
1389 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1390 wakaba 1.77 } else {
1391     !!!cp (54);
1392 wakaba 1.1 }
1393     } else {
1394     die "$0: $self->{current_token}->{type}: Unknown token type";
1395     }
1396 wakaba 1.57 $self->{state} = DATA_STATE;
1397 wakaba 1.1 # reconsume
1398    
1399     !!!emit ($self->{current_token}); # start tag or end tag
1400    
1401     redo A;
1402     } else {
1403 wakaba 1.72 if ({
1404     0x0022 => 1, # "
1405     0x0027 => 1, # '
1406     0x003D => 1, # =
1407 wakaba 1.76 }->{$self->{next_char}}) {
1408 wakaba 1.77 !!!cp (55);
1409 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1410 wakaba 1.77 } else {
1411     !!!cp (56);
1412 wakaba 1.72 }
1413 wakaba 1.119 $self->{current_attribute}
1414     = {name => chr ($self->{next_char}),
1415     value => '',
1416     line => $self->{line}, column => $self->{column}};
1417 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1418 wakaba 1.1 !!!next-input-character;
1419     redo A;
1420     }
1421 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1422 wakaba 1.1 my $before_leave = sub {
1423     if (exists $self->{current_token}->{attributes} # start tag or end tag
1424     ->{$self->{current_attribute}->{name}}) { # MUST
1425 wakaba 1.77 !!!cp (57);
1426 wakaba 1.153 !!!parse-error (type => 'duplicate attribute', text => $self->{current_attribute}->{name}, line => $self->{current_attribute}->{line}, column => $self->{current_attribute}->{column});
1427 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
1428     } else {
1429 wakaba 1.77 !!!cp (58);
1430 wakaba 1.1 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
1431     = $self->{current_attribute};
1432     }
1433     }; # $before_leave
1434    
1435 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1436     $self->{next_char} == 0x000A or # LF
1437     $self->{next_char} == 0x000B or # VT
1438     $self->{next_char} == 0x000C or # FF
1439     $self->{next_char} == 0x0020) { # SP
1440 wakaba 1.77 !!!cp (59);
1441 wakaba 1.1 $before_leave->();
1442 wakaba 1.57 $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
1443 wakaba 1.1 !!!next-input-character;
1444     redo A;
1445 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1446 wakaba 1.77 !!!cp (60);
1447 wakaba 1.1 $before_leave->();
1448 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1449 wakaba 1.1 !!!next-input-character;
1450     redo A;
1451 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1452 wakaba 1.1 $before_leave->();
1453 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1454 wakaba 1.77 !!!cp (61);
1455 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1456 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1457 wakaba 1.77 !!!cp (62);
1458 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1459 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1460 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1461 wakaba 1.1 }
1462     } else {
1463     die "$0: $self->{current_token}->{type}: Unknown token type";
1464     }
1465 wakaba 1.57 $self->{state} = DATA_STATE;
1466 wakaba 1.1 !!!next-input-character;
1467    
1468     !!!emit ($self->{current_token}); # start tag or end tag
1469    
1470     redo A;
1471 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1472     $self->{next_char} <= 0x005A) { # A..Z
1473 wakaba 1.77 !!!cp (63);
1474 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char} + 0x0020);
1475 wakaba 1.1 ## Stay in the state
1476     !!!next-input-character;
1477     redo A;
1478 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1479 wakaba 1.125 !!!cp (64);
1480 wakaba 1.1 $before_leave->();
1481 wakaba 1.125 $self->{state} = SELF_CLOSING_START_TAG_STATE;
1482 wakaba 1.1 !!!next-input-character;
1483     redo A;
1484 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1485 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1486 wakaba 1.1 $before_leave->();
1487 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1488 wakaba 1.77 !!!cp (66);
1489 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1490 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1491 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1492 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1493 wakaba 1.77 !!!cp (67);
1494 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1495 wakaba 1.77 } else {
1496 wakaba 1.78 ## NOTE: This state should never be reached.
1497 wakaba 1.77 !!!cp (68);
1498 wakaba 1.1 }
1499     } else {
1500     die "$0: $self->{current_token}->{type}: Unknown token type";
1501     }
1502 wakaba 1.57 $self->{state} = DATA_STATE;
1503 wakaba 1.1 # reconsume
1504    
1505     !!!emit ($self->{current_token}); # start tag or end tag
1506    
1507     redo A;
1508     } else {
1509 wakaba 1.76 if ($self->{next_char} == 0x0022 or # "
1510     $self->{next_char} == 0x0027) { # '
1511 wakaba 1.77 !!!cp (69);
1512 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1513 wakaba 1.77 } else {
1514     !!!cp (70);
1515 wakaba 1.72 }
1516 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char});
1517 wakaba 1.1 ## Stay in the state
1518     !!!next-input-character;
1519     redo A;
1520     }
1521 wakaba 1.57 } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1522 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1523     $self->{next_char} == 0x000A or # LF
1524     $self->{next_char} == 0x000B or # VT
1525     $self->{next_char} == 0x000C or # FF
1526     $self->{next_char} == 0x0020) { # SP
1527 wakaba 1.77 !!!cp (71);
1528 wakaba 1.1 ## Stay in the state
1529     !!!next-input-character;
1530     redo A;
1531 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1532 wakaba 1.77 !!!cp (72);
1533 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1534 wakaba 1.1 !!!next-input-character;
1535     redo A;
1536 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1537 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1538 wakaba 1.77 !!!cp (73);
1539 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1540 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1541 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1542 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1543 wakaba 1.77 !!!cp (74);
1544 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1545 wakaba 1.77 } else {
1546 wakaba 1.78 ## NOTE: This state should never be reached.
1547 wakaba 1.77 !!!cp (75);
1548 wakaba 1.1 }
1549     } else {
1550     die "$0: $self->{current_token}->{type}: Unknown token type";
1551     }
1552 wakaba 1.57 $self->{state} = DATA_STATE;
1553 wakaba 1.1 !!!next-input-character;
1554    
1555     !!!emit ($self->{current_token}); # start tag or end tag
1556    
1557     redo A;
1558 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1559     $self->{next_char} <= 0x005A) { # A..Z
1560 wakaba 1.77 !!!cp (76);
1561 wakaba 1.119 $self->{current_attribute}
1562     = {name => chr ($self->{next_char} + 0x0020),
1563     value => '',
1564     line => $self->{line}, column => $self->{column}};
1565 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1566 wakaba 1.1 !!!next-input-character;
1567     redo A;
1568 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1569 wakaba 1.125 !!!cp (77);
1570     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1571 wakaba 1.1 !!!next-input-character;
1572     redo A;
1573 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1574 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1575 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1576 wakaba 1.77 !!!cp (79);
1577 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1578 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1579 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1580 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1581 wakaba 1.77 !!!cp (80);
1582 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1583 wakaba 1.77 } else {
1584 wakaba 1.78 ## NOTE: This state should never be reached.
1585 wakaba 1.77 !!!cp (81);
1586 wakaba 1.1 }
1587     } else {
1588     die "$0: $self->{current_token}->{type}: Unknown token type";
1589     }
1590 wakaba 1.57 $self->{state} = DATA_STATE;
1591 wakaba 1.1 # reconsume
1592    
1593     !!!emit ($self->{current_token}); # start tag or end tag
1594    
1595     redo A;
1596     } else {
1597 wakaba 1.156 if ($self->{next_char} == 0x0022 or # "
1598     $self->{next_char} == 0x0027) { # '
1599     !!!cp (78);
1600     !!!parse-error (type => 'bad attribute name');
1601     } else {
1602     !!!cp (82);
1603     }
1604 wakaba 1.119 $self->{current_attribute}
1605     = {name => chr ($self->{next_char}),
1606     value => '',
1607     line => $self->{line}, column => $self->{column}};
1608 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1609 wakaba 1.1 !!!next-input-character;
1610     redo A;
1611     }
1612 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1613 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1614     $self->{next_char} == 0x000A or # LF
1615     $self->{next_char} == 0x000B or # VT
1616     $self->{next_char} == 0x000C or # FF
1617     $self->{next_char} == 0x0020) { # SP
1618 wakaba 1.77 !!!cp (83);
1619 wakaba 1.1 ## Stay in the state
1620     !!!next-input-character;
1621     redo A;
1622 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
1623 wakaba 1.77 !!!cp (84);
1624 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
1625 wakaba 1.1 !!!next-input-character;
1626     redo A;
1627 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1628 wakaba 1.77 !!!cp (85);
1629 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1630 wakaba 1.1 ## reconsume
1631     redo A;
1632 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
1633 wakaba 1.77 !!!cp (86);
1634 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
1635 wakaba 1.1 !!!next-input-character;
1636     redo A;
1637 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1638 wakaba 1.156 !!!parse-error (type => 'empty unquoted attribute value');
1639 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1640 wakaba 1.77 !!!cp (87);
1641 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1642 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1643 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1644 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1645 wakaba 1.77 !!!cp (88);
1646 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1647 wakaba 1.77 } else {
1648 wakaba 1.78 ## NOTE: This state should never be reached.
1649 wakaba 1.77 !!!cp (89);
1650 wakaba 1.1 }
1651     } else {
1652     die "$0: $self->{current_token}->{type}: Unknown token type";
1653     }
1654 wakaba 1.57 $self->{state} = DATA_STATE;
1655 wakaba 1.1 !!!next-input-character;
1656    
1657     !!!emit ($self->{current_token}); # start tag or end tag
1658    
1659     redo A;
1660 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1661 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1662 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1663 wakaba 1.77 !!!cp (90);
1664 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1665 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1666 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1667 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1668 wakaba 1.77 !!!cp (91);
1669 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1670 wakaba 1.77 } else {
1671 wakaba 1.78 ## NOTE: This state should never be reached.
1672 wakaba 1.77 !!!cp (92);
1673 wakaba 1.1 }
1674     } else {
1675     die "$0: $self->{current_token}->{type}: Unknown token type";
1676     }
1677 wakaba 1.57 $self->{state} = DATA_STATE;
1678 wakaba 1.1 ## reconsume
1679    
1680     !!!emit ($self->{current_token}); # start tag or end tag
1681    
1682     redo A;
1683     } else {
1684 wakaba 1.76 if ($self->{next_char} == 0x003D) { # =
1685 wakaba 1.77 !!!cp (93);
1686 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1687 wakaba 1.77 } else {
1688     !!!cp (94);
1689 wakaba 1.72 }
1690 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1691 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1692 wakaba 1.1 !!!next-input-character;
1693     redo A;
1694     }
1695 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1696 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
1697 wakaba 1.77 !!!cp (95);
1698 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1699 wakaba 1.1 !!!next-input-character;
1700     redo A;
1701 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1702 wakaba 1.77 !!!cp (96);
1703 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1704     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1705 wakaba 1.1 !!!next-input-character;
1706     redo A;
1707 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1708 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1709 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1710 wakaba 1.77 !!!cp (97);
1711 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1712 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1713 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1714 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1715 wakaba 1.77 !!!cp (98);
1716 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1717 wakaba 1.77 } else {
1718 wakaba 1.78 ## NOTE: This state should never be reached.
1719 wakaba 1.77 !!!cp (99);
1720 wakaba 1.1 }
1721     } else {
1722     die "$0: $self->{current_token}->{type}: Unknown token type";
1723     }
1724 wakaba 1.57 $self->{state} = DATA_STATE;
1725 wakaba 1.1 ## reconsume
1726    
1727     !!!emit ($self->{current_token}); # start tag or end tag
1728    
1729     redo A;
1730     } else {
1731 wakaba 1.77 !!!cp (100);
1732 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1733 wakaba 1.1 ## Stay in the state
1734     !!!next-input-character;
1735     redo A;
1736     }
1737 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1738 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
1739 wakaba 1.77 !!!cp (101);
1740 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1741 wakaba 1.1 !!!next-input-character;
1742     redo A;
1743 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1744 wakaba 1.77 !!!cp (102);
1745 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1746     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1747 wakaba 1.1 !!!next-input-character;
1748     redo A;
1749 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1750 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1751 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1752 wakaba 1.77 !!!cp (103);
1753 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1754 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1755 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1756 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1757 wakaba 1.77 !!!cp (104);
1758 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1759 wakaba 1.77 } else {
1760 wakaba 1.78 ## NOTE: This state should never be reached.
1761 wakaba 1.77 !!!cp (105);
1762 wakaba 1.1 }
1763     } else {
1764     die "$0: $self->{current_token}->{type}: Unknown token type";
1765     }
1766 wakaba 1.57 $self->{state} = DATA_STATE;
1767 wakaba 1.1 ## reconsume
1768    
1769     !!!emit ($self->{current_token}); # start tag or end tag
1770    
1771     redo A;
1772     } else {
1773 wakaba 1.77 !!!cp (106);
1774 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1775 wakaba 1.1 ## Stay in the state
1776     !!!next-input-character;
1777     redo A;
1778     }
1779 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1780 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1781     $self->{next_char} == 0x000A or # LF
1782     $self->{next_char} == 0x000B or # HT
1783     $self->{next_char} == 0x000C or # FF
1784     $self->{next_char} == 0x0020) { # SP
1785 wakaba 1.77 !!!cp (107);
1786 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1787 wakaba 1.1 !!!next-input-character;
1788     redo A;
1789 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1790 wakaba 1.77 !!!cp (108);
1791 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1792     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1793 wakaba 1.1 !!!next-input-character;
1794     redo A;
1795 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1796 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1797 wakaba 1.77 !!!cp (109);
1798 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1799 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1800 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1801 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1802 wakaba 1.77 !!!cp (110);
1803 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1804 wakaba 1.77 } else {
1805 wakaba 1.78 ## NOTE: This state should never be reached.
1806 wakaba 1.77 !!!cp (111);
1807 wakaba 1.1 }
1808     } else {
1809     die "$0: $self->{current_token}->{type}: Unknown token type";
1810     }
1811 wakaba 1.57 $self->{state} = DATA_STATE;
1812 wakaba 1.1 !!!next-input-character;
1813    
1814     !!!emit ($self->{current_token}); # start tag or end tag
1815    
1816     redo A;
1817 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1818 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1819 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1820 wakaba 1.77 !!!cp (112);
1821 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1822 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1823 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1824 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1825 wakaba 1.77 !!!cp (113);
1826 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1827 wakaba 1.77 } else {
1828 wakaba 1.78 ## NOTE: This state should never be reached.
1829 wakaba 1.77 !!!cp (114);
1830 wakaba 1.1 }
1831     } else {
1832     die "$0: $self->{current_token}->{type}: Unknown token type";
1833     }
1834 wakaba 1.57 $self->{state} = DATA_STATE;
1835 wakaba 1.1 ## reconsume
1836    
1837     !!!emit ($self->{current_token}); # start tag or end tag
1838    
1839     redo A;
1840     } else {
1841 wakaba 1.72 if ({
1842     0x0022 => 1, # "
1843     0x0027 => 1, # '
1844     0x003D => 1, # =
1845 wakaba 1.76 }->{$self->{next_char}}) {
1846 wakaba 1.77 !!!cp (115);
1847 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1848 wakaba 1.77 } else {
1849     !!!cp (116);
1850 wakaba 1.72 }
1851 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1852 wakaba 1.1 ## Stay in the state
1853     !!!next-input-character;
1854     redo A;
1855     }
1856 wakaba 1.57 } elsif ($self->{state} == ENTITY_IN_ATTRIBUTE_VALUE_STATE) {
1857 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity
1858     (1,
1859     $self->{last_attribute_value_state}
1860     == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE ? 0x0022 : # "
1861     $self->{last_attribute_value_state}
1862     == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE ? 0x0027 : # '
1863     -1);
1864 wakaba 1.1
1865     unless (defined $token) {
1866 wakaba 1.77 !!!cp (117);
1867 wakaba 1.1 $self->{current_attribute}->{value} .= '&';
1868     } else {
1869 wakaba 1.77 !!!cp (118);
1870 wakaba 1.1 $self->{current_attribute}->{value} .= $token->{data};
1871 wakaba 1.66 $self->{current_attribute}->{has_reference} = $token->{has_reference};
1872 wakaba 1.1 ## ISSUE: spec says "append the returned character token to the current attribute's value"
1873     }
1874    
1875     $self->{state} = $self->{last_attribute_value_state};
1876     # next-input-character is already done
1877     redo A;
1878 wakaba 1.72 } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1879 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1880     $self->{next_char} == 0x000A or # LF
1881     $self->{next_char} == 0x000B or # VT
1882     $self->{next_char} == 0x000C or # FF
1883     $self->{next_char} == 0x0020) { # SP
1884 wakaba 1.77 !!!cp (118);
1885 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1886     !!!next-input-character;
1887     redo A;
1888 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1889 wakaba 1.72 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1890 wakaba 1.77 !!!cp (119);
1891 wakaba 1.72 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1892     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1893     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1894     if ($self->{current_token}->{attributes}) {
1895 wakaba 1.77 !!!cp (120);
1896 wakaba 1.72 !!!parse-error (type => 'end tag attribute');
1897 wakaba 1.77 } else {
1898 wakaba 1.78 ## NOTE: This state should never be reached.
1899 wakaba 1.77 !!!cp (121);
1900 wakaba 1.72 }
1901     } else {
1902     die "$0: $self->{current_token}->{type}: Unknown token type";
1903     }
1904     $self->{state} = DATA_STATE;
1905     !!!next-input-character;
1906    
1907     !!!emit ($self->{current_token}); # start tag or end tag
1908    
1909     redo A;
1910 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1911 wakaba 1.125 !!!cp (122);
1912     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1913 wakaba 1.72 !!!next-input-character;
1914 wakaba 1.125 redo A;
1915 wakaba 1.141 } elsif ($self->{next_char} == -1) {
1916     !!!parse-error (type => 'unclosed tag');
1917     if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1918     !!!cp (122.3);
1919     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1920     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1921     if ($self->{current_token}->{attributes}) {
1922     !!!cp (122.1);
1923     !!!parse-error (type => 'end tag attribute');
1924     } else {
1925     ## NOTE: This state should never be reached.
1926     !!!cp (122.2);
1927     }
1928     } else {
1929     die "$0: $self->{current_token}->{type}: Unknown token type";
1930     }
1931     $self->{state} = DATA_STATE;
1932     ## Reconsume.
1933     !!!emit ($self->{current_token}); # start tag or end tag
1934     redo A;
1935 wakaba 1.125 } else {
1936     !!!cp ('124.1');
1937     !!!parse-error (type => 'no space between attributes');
1938     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1939     ## reconsume
1940     redo A;
1941     }
1942     } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
1943     if ($self->{next_char} == 0x003E) { # >
1944     if ($self->{current_token}->{type} == END_TAG_TOKEN) {
1945     !!!cp ('124.2');
1946     !!!parse-error (type => 'nestc', token => $self->{current_token});
1947     ## TODO: Different type than slash in start tag
1948     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1949     if ($self->{current_token}->{attributes}) {
1950     !!!cp ('124.4');
1951     !!!parse-error (type => 'end tag attribute');
1952     } else {
1953     !!!cp ('124.5');
1954     }
1955     ## TODO: Test |<title></title/>|
1956 wakaba 1.72 } else {
1957 wakaba 1.125 !!!cp ('124.3');
1958     $self->{self_closing} = 1;
1959 wakaba 1.72 }
1960 wakaba 1.125
1961     $self->{state} = DATA_STATE;
1962     !!!next-input-character;
1963    
1964     !!!emit ($self->{current_token}); # start tag or end tag
1965    
1966 wakaba 1.72 redo A;
1967 wakaba 1.141 } elsif ($self->{next_char} == -1) {
1968     !!!parse-error (type => 'unclosed tag');
1969     if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1970     !!!cp (124.7);
1971     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1972     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1973     if ($self->{current_token}->{attributes}) {
1974     !!!cp (124.5);
1975     !!!parse-error (type => 'end tag attribute');
1976     } else {
1977     ## NOTE: This state should never be reached.
1978     !!!cp (124.6);
1979     }
1980     } else {
1981     die "$0: $self->{current_token}->{type}: Unknown token type";
1982     }
1983     $self->{state} = DATA_STATE;
1984     ## Reconsume.
1985     !!!emit ($self->{current_token}); # start tag or end tag
1986     redo A;
1987 wakaba 1.72 } else {
1988 wakaba 1.125 !!!cp ('124.4');
1989     !!!parse-error (type => 'nestc');
1990     ## TODO: This error type is wrong.
1991 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1992 wakaba 1.125 ## Reconsume.
1993 wakaba 1.72 redo A;
1994     }
1995 wakaba 1.57 } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
1996 wakaba 1.1 ## (only happen if PCDATA state)
1997    
1998 wakaba 1.112 ## NOTE: Set by the previous state
1999     #my $token = {type => COMMENT_TOKEN, data => ''};
2000 wakaba 1.1
2001     BC: {
2002 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2003 wakaba 1.77 !!!cp (124);
2004 wakaba 1.57 $self->{state} = DATA_STATE;
2005 wakaba 1.1 !!!next-input-character;
2006    
2007 wakaba 1.112 !!!emit ($self->{current_token}); # comment
2008 wakaba 1.1
2009     redo A;
2010 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2011 wakaba 1.77 !!!cp (125);
2012 wakaba 1.57 $self->{state} = DATA_STATE;
2013 wakaba 1.1 ## reconsume
2014    
2015 wakaba 1.112 !!!emit ($self->{current_token}); # comment
2016 wakaba 1.1
2017     redo A;
2018     } else {
2019 wakaba 1.77 !!!cp (126);
2020 wakaba 1.112 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
2021 wakaba 1.1 !!!next-input-character;
2022     redo BC;
2023     }
2024     } # BC
2025 wakaba 1.77
2026     die "$0: _get_next_token: unexpected case [BC]";
2027 wakaba 1.57 } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
2028 wakaba 1.1 ## (only happen if PCDATA state)
2029    
2030 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2031 wakaba 1.163 !!!cp (133);
2032     $self->{state} = MD_HYPHEN_STATE;
2033 wakaba 1.1 !!!next-input-character;
2034 wakaba 1.163 redo A;
2035 wakaba 1.76 } elsif ($self->{next_char} == 0x0044 or # D
2036     $self->{next_char} == 0x0064) { # d
2037 wakaba 1.163 ## ASCII case-insensitive.
2038     !!!cp (130);
2039     $self->{state} = MD_DOCTYPE_STATE;
2040     $self->{state_keyword} = chr $self->{next_char};
2041 wakaba 1.1 !!!next-input-character;
2042 wakaba 1.163 redo A;
2043 wakaba 1.127 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
2044     $self->{open_elements}->[-1]->[1] & FOREIGN_EL and
2045     $self->{next_char} == 0x005B) { # [
2046 wakaba 1.163 !!!cp (135.4);
2047     $self->{state} = MD_CDATA_STATE;
2048     $self->{state_keyword} = '[';
2049 wakaba 1.127 !!!next-input-character;
2050 wakaba 1.163 redo A;
2051 wakaba 1.77 } else {
2052     !!!cp (136);
2053 wakaba 1.1 }
2054    
2055 wakaba 1.163 !!!parse-error (type => 'bogus comment',
2056     line => $self->{line_prev},
2057     column => $self->{column_prev} - 1);
2058     ## Reconsume.
2059 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
2060 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
2061 wakaba 1.163 line => $self->{line_prev},
2062     column => $self->{column_prev} - 1,
2063 wakaba 1.118 };
2064 wakaba 1.1 redo A;
2065 wakaba 1.163 } elsif ($self->{state} == MD_HYPHEN_STATE) {
2066     if ($self->{next_char} == 0x002D) { # -
2067     !!!cp (127);
2068     $self->{current_token} = {type => COMMENT_TOKEN, data => '',
2069     line => $self->{line_prev},
2070     column => $self->{column_prev} - 2,
2071     };
2072     $self->{state} = COMMENT_START_STATE;
2073     !!!next-input-character;
2074     redo A;
2075     } else {
2076     !!!cp (128);
2077     !!!parse-error (type => 'bogus comment',
2078     line => $self->{line_prev},
2079     column => $self->{column_prev} - 2);
2080     $self->{state} = BOGUS_COMMENT_STATE;
2081     ## Reconsume.
2082     $self->{current_token} = {type => COMMENT_TOKEN,
2083     data => '-',
2084     line => $self->{line_prev},
2085     column => $self->{column_prev} - 2,
2086     };
2087     redo A;
2088     }
2089     } elsif ($self->{state} == MD_DOCTYPE_STATE) {
2090     ## ASCII case-insensitive.
2091     if ($self->{next_char} == [
2092     undef,
2093     0x004F, # O
2094     0x0043, # C
2095     0x0054, # T
2096     0x0059, # Y
2097     0x0050, # P
2098     ]->[length $self->{state_keyword}] or
2099     $self->{next_char} == [
2100     undef,
2101     0x006F, # o
2102     0x0063, # c
2103     0x0074, # t
2104     0x0079, # y
2105     0x0070, # p
2106     ]->[length $self->{state_keyword}]) {
2107     !!!cp (131);
2108     ## Stay in the state.
2109     $self->{state_keyword} .= chr $self->{next_char};
2110     !!!next-input-character;
2111     redo A;
2112     } elsif ((length $self->{state_keyword}) == 6 and
2113     ($self->{next_char} == 0x0045 or # E
2114     $self->{next_char} == 0x0065)) { # e
2115     !!!cp (129);
2116     $self->{state} = DOCTYPE_STATE;
2117     $self->{current_token} = {type => DOCTYPE_TOKEN,
2118     quirks => 1,
2119     line => $self->{line_prev},
2120     column => $self->{column_prev} - 7,
2121     };
2122     !!!next-input-character;
2123     redo A;
2124     } else {
2125     !!!cp (132);
2126     !!!parse-error (type => 'bogus comment',
2127     line => $self->{line_prev},
2128     column => $self->{column_prev} - 1 - length $self->{state_keyword});
2129     $self->{state} = BOGUS_COMMENT_STATE;
2130     ## Reconsume.
2131     $self->{current_token} = {type => COMMENT_TOKEN,
2132     data => $self->{state_keyword},
2133     line => $self->{line_prev},
2134     column => $self->{column_prev} - 1 - length $self->{state_keyword},
2135     };
2136     redo A;
2137     }
2138     } elsif ($self->{state} == MD_CDATA_STATE) {
2139     if ($self->{next_char} == {
2140     '[' => 0x0043, # C
2141     '[C' => 0x0044, # D
2142     '[CD' => 0x0041, # A
2143     '[CDA' => 0x0054, # T
2144     '[CDAT' => 0x0041, # A
2145     }->{$self->{state_keyword}}) {
2146     !!!cp (135.1);
2147     ## Stay in the state.
2148     $self->{state_keyword} .= chr $self->{next_char};
2149     !!!next-input-character;
2150     redo A;
2151     } elsif ($self->{state_keyword} eq '[CDATA' and
2152     $self->{next_char} == 0x005B) { # [
2153     !!!cp (135.2);
2154     $self->{state} = CDATA_BLOCK_STATE;
2155     !!!next-input-character;
2156     redo A;
2157     } else {
2158     !!!cp (135.3);
2159     !!!parse-error (type => 'bogus comment',
2160     line => $self->{line_prev},
2161     column => $self->{column_prev} - 1 - length $self->{state_keyword});
2162     $self->{state} = BOGUS_COMMENT_STATE;
2163     ## Reconsume.
2164     $self->{current_token} = {type => COMMENT_TOKEN,
2165     data => $self->{state_keyword},
2166     line => $self->{line_prev},
2167     column => $self->{column_prev} - 1 - length $self->{state_keyword},
2168     };
2169     redo A;
2170     }
2171 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_STATE) {
2172 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2173 wakaba 1.77 !!!cp (137);
2174 wakaba 1.57 $self->{state} = COMMENT_START_DASH_STATE;
2175 wakaba 1.23 !!!next-input-character;
2176     redo A;
2177 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2178 wakaba 1.77 !!!cp (138);
2179 wakaba 1.23 !!!parse-error (type => 'bogus comment');
2180 wakaba 1.57 $self->{state} = DATA_STATE;
2181 wakaba 1.23 !!!next-input-character;
2182    
2183     !!!emit ($self->{current_token}); # comment
2184    
2185     redo A;
2186 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2187 wakaba 1.77 !!!cp (139);
2188 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
2189 wakaba 1.57 $self->{state} = DATA_STATE;
2190 wakaba 1.23 ## reconsume
2191    
2192     !!!emit ($self->{current_token}); # comment
2193    
2194     redo A;
2195     } else {
2196 wakaba 1.77 !!!cp (140);
2197 wakaba 1.23 $self->{current_token}->{data} # comment
2198 wakaba 1.76 .= chr ($self->{next_char});
2199 wakaba 1.57 $self->{state} = COMMENT_STATE;
2200 wakaba 1.23 !!!next-input-character;
2201     redo A;
2202     }
2203 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
2204 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2205 wakaba 1.77 !!!cp (141);
2206 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
2207 wakaba 1.23 !!!next-input-character;
2208     redo A;
2209 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2210 wakaba 1.77 !!!cp (142);
2211 wakaba 1.23 !!!parse-error (type => 'bogus comment');
2212 wakaba 1.57 $self->{state} = DATA_STATE;
2213 wakaba 1.23 !!!next-input-character;
2214    
2215     !!!emit ($self->{current_token}); # comment
2216    
2217     redo A;
2218 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2219 wakaba 1.77 !!!cp (143);
2220 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
2221 wakaba 1.57 $self->{state} = DATA_STATE;
2222 wakaba 1.23 ## reconsume
2223    
2224     !!!emit ($self->{current_token}); # comment
2225    
2226     redo A;
2227     } else {
2228 wakaba 1.77 !!!cp (144);
2229 wakaba 1.23 $self->{current_token}->{data} # comment
2230 wakaba 1.76 .= '-' . chr ($self->{next_char});
2231 wakaba 1.57 $self->{state} = COMMENT_STATE;
2232 wakaba 1.23 !!!next-input-character;
2233     redo A;
2234     }
2235 wakaba 1.57 } elsif ($self->{state} == COMMENT_STATE) {
2236 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2237 wakaba 1.77 !!!cp (145);
2238 wakaba 1.57 $self->{state} = COMMENT_END_DASH_STATE;
2239 wakaba 1.1 !!!next-input-character;
2240     redo A;
2241 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2242 wakaba 1.77 !!!cp (146);
2243 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2244 wakaba 1.57 $self->{state} = DATA_STATE;
2245 wakaba 1.1 ## reconsume
2246    
2247     !!!emit ($self->{current_token}); # comment
2248    
2249     redo A;
2250     } else {
2251 wakaba 1.77 !!!cp (147);
2252 wakaba 1.76 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
2253 wakaba 1.1 ## Stay in the state
2254     !!!next-input-character;
2255     redo A;
2256     }
2257 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2258 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2259 wakaba 1.77 !!!cp (148);
2260 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
2261 wakaba 1.1 !!!next-input-character;
2262     redo A;
2263 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2264 wakaba 1.77 !!!cp (149);
2265 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2266 wakaba 1.57 $self->{state} = DATA_STATE;
2267 wakaba 1.1 ## reconsume
2268    
2269     !!!emit ($self->{current_token}); # comment
2270    
2271     redo A;
2272     } else {
2273 wakaba 1.77 !!!cp (150);
2274 wakaba 1.76 $self->{current_token}->{data} .= '-' . chr ($self->{next_char}); # comment
2275 wakaba 1.57 $self->{state} = COMMENT_STATE;
2276 wakaba 1.1 !!!next-input-character;
2277     redo A;
2278     }
2279 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_STATE) {
2280 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2281 wakaba 1.77 !!!cp (151);
2282 wakaba 1.57 $self->{state} = DATA_STATE;
2283 wakaba 1.1 !!!next-input-character;
2284    
2285     !!!emit ($self->{current_token}); # comment
2286    
2287     redo A;
2288 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
2289 wakaba 1.77 !!!cp (152);
2290 wakaba 1.114 !!!parse-error (type => 'dash in comment',
2291     line => $self->{line_prev},
2292     column => $self->{column_prev});
2293 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
2294     ## Stay in the state
2295     !!!next-input-character;
2296     redo A;
2297 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2298 wakaba 1.77 !!!cp (153);
2299 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2300 wakaba 1.57 $self->{state} = DATA_STATE;
2301 wakaba 1.1 ## reconsume
2302    
2303     !!!emit ($self->{current_token}); # comment
2304    
2305     redo A;
2306     } else {
2307 wakaba 1.77 !!!cp (154);
2308 wakaba 1.114 !!!parse-error (type => 'dash in comment',
2309     line => $self->{line_prev},
2310     column => $self->{column_prev});
2311 wakaba 1.76 $self->{current_token}->{data} .= '--' . chr ($self->{next_char}); # comment
2312 wakaba 1.57 $self->{state} = COMMENT_STATE;
2313 wakaba 1.1 !!!next-input-character;
2314     redo A;
2315     }
2316 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_STATE) {
2317 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2318     $self->{next_char} == 0x000A or # LF
2319     $self->{next_char} == 0x000B or # VT
2320     $self->{next_char} == 0x000C or # FF
2321     $self->{next_char} == 0x0020) { # SP
2322 wakaba 1.77 !!!cp (155);
2323 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2324 wakaba 1.1 !!!next-input-character;
2325     redo A;
2326     } else {
2327 wakaba 1.77 !!!cp (156);
2328 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
2329 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2330 wakaba 1.1 ## reconsume
2331     redo A;
2332     }
2333 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
2334 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2335     $self->{next_char} == 0x000A or # LF
2336     $self->{next_char} == 0x000B or # VT
2337     $self->{next_char} == 0x000C or # FF
2338     $self->{next_char} == 0x0020) { # SP
2339 wakaba 1.77 !!!cp (157);
2340 wakaba 1.1 ## Stay in the state
2341     !!!next-input-character;
2342     redo A;
2343 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2344 wakaba 1.77 !!!cp (158);
2345 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
2346 wakaba 1.57 $self->{state} = DATA_STATE;
2347 wakaba 1.1 !!!next-input-character;
2348    
2349 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
2350 wakaba 1.1
2351     redo A;
2352 wakaba 1.77 } elsif ($self->{next_char} == -1) {
2353     !!!cp (159);
2354 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
2355 wakaba 1.57 $self->{state} = DATA_STATE;
2356 wakaba 1.1 ## reconsume
2357    
2358 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
2359 wakaba 1.1
2360     redo A;
2361     } else {
2362 wakaba 1.77 !!!cp (160);
2363 wakaba 1.112 $self->{current_token}->{name} = chr $self->{next_char};
2364     delete $self->{current_token}->{quirks};
2365 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
2366 wakaba 1.57 $self->{state} = DOCTYPE_NAME_STATE;
2367 wakaba 1.1 !!!next-input-character;
2368     redo A;
2369     }
2370 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
2371 wakaba 1.18 ## ISSUE: Redundant "First," in the spec.
2372 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2373     $self->{next_char} == 0x000A or # LF
2374     $self->{next_char} == 0x000B or # VT
2375     $self->{next_char} == 0x000C or # FF
2376     $self->{next_char} == 0x0020) { # SP
2377 wakaba 1.77 !!!cp (161);
2378 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_NAME_STATE;
2379 wakaba 1.1 !!!next-input-character;
2380     redo A;
2381 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2382 wakaba 1.77 !!!cp (162);
2383 wakaba 1.57 $self->{state} = DATA_STATE;
2384 wakaba 1.1 !!!next-input-character;
2385    
2386     !!!emit ($self->{current_token}); # DOCTYPE
2387    
2388     redo A;
2389 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2390 wakaba 1.77 !!!cp (163);
2391 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2392 wakaba 1.57 $self->{state} = DATA_STATE;
2393 wakaba 1.1 ## reconsume
2394    
2395 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2396 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2397 wakaba 1.1
2398     redo A;
2399     } else {
2400 wakaba 1.77 !!!cp (164);
2401 wakaba 1.1 $self->{current_token}->{name}
2402 wakaba 1.76 .= chr ($self->{next_char}); # DOCTYPE
2403 wakaba 1.1 ## Stay in the state
2404     !!!next-input-character;
2405     redo A;
2406     }
2407 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
2408 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2409     $self->{next_char} == 0x000A or # LF
2410     $self->{next_char} == 0x000B or # VT
2411     $self->{next_char} == 0x000C or # FF
2412     $self->{next_char} == 0x0020) { # SP
2413 wakaba 1.77 !!!cp (165);
2414 wakaba 1.1 ## Stay in the state
2415     !!!next-input-character;
2416     redo A;
2417 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2418 wakaba 1.77 !!!cp (166);
2419 wakaba 1.57 $self->{state} = DATA_STATE;
2420 wakaba 1.1 !!!next-input-character;
2421    
2422     !!!emit ($self->{current_token}); # DOCTYPE
2423    
2424     redo A;
2425 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2426 wakaba 1.77 !!!cp (167);
2427 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2428 wakaba 1.57 $self->{state} = DATA_STATE;
2429 wakaba 1.1 ## reconsume
2430    
2431 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2432 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2433    
2434     redo A;
2435 wakaba 1.76 } elsif ($self->{next_char} == 0x0050 or # P
2436     $self->{next_char} == 0x0070) { # p
2437 wakaba 1.18 !!!next-input-character;
2438 wakaba 1.76 if ($self->{next_char} == 0x0055 or # U
2439     $self->{next_char} == 0x0075) { # u
2440 wakaba 1.18 !!!next-input-character;
2441 wakaba 1.76 if ($self->{next_char} == 0x0042 or # B
2442     $self->{next_char} == 0x0062) { # b
2443 wakaba 1.18 !!!next-input-character;
2444 wakaba 1.76 if ($self->{next_char} == 0x004C or # L
2445     $self->{next_char} == 0x006C) { # l
2446 wakaba 1.18 !!!next-input-character;
2447 wakaba 1.76 if ($self->{next_char} == 0x0049 or # I
2448     $self->{next_char} == 0x0069) { # i
2449 wakaba 1.18 !!!next-input-character;
2450 wakaba 1.76 if ($self->{next_char} == 0x0043 or # C
2451     $self->{next_char} == 0x0063) { # c
2452 wakaba 1.77 !!!cp (168);
2453 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2454 wakaba 1.18 !!!next-input-character;
2455     redo A;
2456 wakaba 1.77 } else {
2457     !!!cp (169);
2458 wakaba 1.18 }
2459 wakaba 1.77 } else {
2460     !!!cp (170);
2461 wakaba 1.18 }
2462 wakaba 1.77 } else {
2463     !!!cp (171);
2464 wakaba 1.18 }
2465 wakaba 1.77 } else {
2466     !!!cp (172);
2467 wakaba 1.18 }
2468 wakaba 1.77 } else {
2469     !!!cp (173);
2470 wakaba 1.18 }
2471    
2472     #
2473 wakaba 1.76 } elsif ($self->{next_char} == 0x0053 or # S
2474     $self->{next_char} == 0x0073) { # s
2475 wakaba 1.18 !!!next-input-character;
2476 wakaba 1.76 if ($self->{next_char} == 0x0059 or # Y
2477     $self->{next_char} == 0x0079) { # y
2478 wakaba 1.18 !!!next-input-character;
2479 wakaba 1.76 if ($self->{next_char} == 0x0053 or # S
2480     $self->{next_char} == 0x0073) { # s
2481 wakaba 1.18 !!!next-input-character;
2482 wakaba 1.76 if ($self->{next_char} == 0x0054 or # T
2483     $self->{next_char} == 0x0074) { # t
2484 wakaba 1.18 !!!next-input-character;
2485 wakaba 1.76 if ($self->{next_char} == 0x0045 or # E
2486     $self->{next_char} == 0x0065) { # e
2487 wakaba 1.18 !!!next-input-character;
2488 wakaba 1.76 if ($self->{next_char} == 0x004D or # M
2489     $self->{next_char} == 0x006D) { # m
2490 wakaba 1.77 !!!cp (174);
2491 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2492 wakaba 1.18 !!!next-input-character;
2493     redo A;
2494 wakaba 1.77 } else {
2495     !!!cp (175);
2496 wakaba 1.18 }
2497 wakaba 1.77 } else {
2498     !!!cp (176);
2499 wakaba 1.18 }
2500 wakaba 1.77 } else {
2501     !!!cp (177);
2502 wakaba 1.18 }
2503 wakaba 1.77 } else {
2504     !!!cp (178);
2505 wakaba 1.18 }
2506 wakaba 1.77 } else {
2507     !!!cp (179);
2508 wakaba 1.18 }
2509    
2510     #
2511     } else {
2512 wakaba 1.77 !!!cp (180);
2513 wakaba 1.18 !!!next-input-character;
2514     #
2515     }
2516    
2517     !!!parse-error (type => 'string after DOCTYPE name');
2518 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2519 wakaba 1.73
2520 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2521 wakaba 1.18 # next-input-character is already done
2522     redo A;
2523 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2524 wakaba 1.18 if ({
2525     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2526     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2527 wakaba 1.76 }->{$self->{next_char}}) {
2528 wakaba 1.77 !!!cp (181);
2529 wakaba 1.18 ## Stay in the state
2530     !!!next-input-character;
2531     redo A;
2532 wakaba 1.76 } elsif ($self->{next_char} eq 0x0022) { # "
2533 wakaba 1.77 !!!cp (182);
2534 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2535 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
2536 wakaba 1.18 !!!next-input-character;
2537     redo A;
2538 wakaba 1.76 } elsif ($self->{next_char} eq 0x0027) { # '
2539 wakaba 1.77 !!!cp (183);
2540 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2541 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
2542 wakaba 1.18 !!!next-input-character;
2543     redo A;
2544 wakaba 1.76 } elsif ($self->{next_char} eq 0x003E) { # >
2545 wakaba 1.77 !!!cp (184);
2546 wakaba 1.18 !!!parse-error (type => 'no PUBLIC literal');
2547    
2548 wakaba 1.57 $self->{state} = DATA_STATE;
2549 wakaba 1.18 !!!next-input-character;
2550    
2551 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2552 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2553    
2554     redo A;
2555 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2556 wakaba 1.77 !!!cp (185);
2557 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2558    
2559 wakaba 1.57 $self->{state} = DATA_STATE;
2560 wakaba 1.18 ## reconsume
2561    
2562 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2563 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2564    
2565     redo A;
2566     } else {
2567 wakaba 1.77 !!!cp (186);
2568 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC');
2569 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2570 wakaba 1.73
2571 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2572 wakaba 1.18 !!!next-input-character;
2573     redo A;
2574     }
2575 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2576 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2577 wakaba 1.77 !!!cp (187);
2578 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2579 wakaba 1.18 !!!next-input-character;
2580     redo A;
2581 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2582 wakaba 1.77 !!!cp (188);
2583 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2584    
2585     $self->{state} = DATA_STATE;
2586     !!!next-input-character;
2587    
2588 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2589 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2590    
2591     redo A;
2592 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2593 wakaba 1.77 !!!cp (189);
2594 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2595    
2596 wakaba 1.57 $self->{state} = DATA_STATE;
2597 wakaba 1.18 ## reconsume
2598    
2599 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2600 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2601    
2602     redo A;
2603     } else {
2604 wakaba 1.77 !!!cp (190);
2605 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2606 wakaba 1.76 .= chr $self->{next_char};
2607 wakaba 1.18 ## Stay in the state
2608     !!!next-input-character;
2609     redo A;
2610     }
2611 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
2612 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2613 wakaba 1.77 !!!cp (191);
2614 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2615 wakaba 1.18 !!!next-input-character;
2616     redo A;
2617 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2618 wakaba 1.77 !!!cp (192);
2619 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2620    
2621     $self->{state} = DATA_STATE;
2622     !!!next-input-character;
2623    
2624 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2625 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2626    
2627     redo A;
2628 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2629 wakaba 1.77 !!!cp (193);
2630 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2631    
2632 wakaba 1.57 $self->{state} = DATA_STATE;
2633 wakaba 1.18 ## reconsume
2634    
2635 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2636 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2637    
2638     redo A;
2639     } else {
2640 wakaba 1.77 !!!cp (194);
2641 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2642 wakaba 1.76 .= chr $self->{next_char};
2643 wakaba 1.18 ## Stay in the state
2644     !!!next-input-character;
2645     redo A;
2646     }
2647 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2648 wakaba 1.18 if ({
2649     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2650     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2651 wakaba 1.76 }->{$self->{next_char}}) {
2652 wakaba 1.77 !!!cp (195);
2653 wakaba 1.18 ## Stay in the state
2654     !!!next-input-character;
2655     redo A;
2656 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2657 wakaba 1.77 !!!cp (196);
2658 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2659 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2660 wakaba 1.18 !!!next-input-character;
2661     redo A;
2662 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2663 wakaba 1.77 !!!cp (197);
2664 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2665 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2666 wakaba 1.18 !!!next-input-character;
2667     redo A;
2668 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2669 wakaba 1.77 !!!cp (198);
2670 wakaba 1.57 $self->{state} = DATA_STATE;
2671 wakaba 1.18 !!!next-input-character;
2672    
2673     !!!emit ($self->{current_token}); # DOCTYPE
2674    
2675     redo A;
2676 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2677 wakaba 1.77 !!!cp (199);
2678 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2679    
2680 wakaba 1.57 $self->{state} = DATA_STATE;
2681 wakaba 1.26 ## reconsume
2682 wakaba 1.18
2683 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2684 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2685    
2686     redo A;
2687     } else {
2688 wakaba 1.77 !!!cp (200);
2689 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC literal');
2690 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2691 wakaba 1.73
2692 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2693 wakaba 1.18 !!!next-input-character;
2694     redo A;
2695     }
2696 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2697 wakaba 1.18 if ({
2698     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2699     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2700 wakaba 1.76 }->{$self->{next_char}}) {
2701 wakaba 1.77 !!!cp (201);
2702 wakaba 1.18 ## Stay in the state
2703     !!!next-input-character;
2704     redo A;
2705 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2706 wakaba 1.77 !!!cp (202);
2707 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2708 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2709 wakaba 1.18 !!!next-input-character;
2710     redo A;
2711 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2712 wakaba 1.77 !!!cp (203);
2713 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2714 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2715 wakaba 1.18 !!!next-input-character;
2716     redo A;
2717 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2718 wakaba 1.77 !!!cp (204);
2719 wakaba 1.18 !!!parse-error (type => 'no SYSTEM literal');
2720 wakaba 1.57 $self->{state} = DATA_STATE;
2721 wakaba 1.18 !!!next-input-character;
2722    
2723 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2724 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2725    
2726     redo A;
2727 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2728 wakaba 1.77 !!!cp (205);
2729 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2730    
2731 wakaba 1.57 $self->{state} = DATA_STATE;
2732 wakaba 1.26 ## reconsume
2733 wakaba 1.18
2734 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2735 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2736    
2737     redo A;
2738     } else {
2739 wakaba 1.77 !!!cp (206);
2740 wakaba 1.30 !!!parse-error (type => 'string after SYSTEM');
2741 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2742 wakaba 1.73
2743 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2744 wakaba 1.18 !!!next-input-character;
2745     redo A;
2746     }
2747 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2748 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2749 wakaba 1.77 !!!cp (207);
2750 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2751 wakaba 1.18 !!!next-input-character;
2752     redo A;
2753 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2754 wakaba 1.77 !!!cp (208);
2755 wakaba 1.153 !!!parse-error (type => 'unclosed SYSTEM literal');
2756 wakaba 1.69
2757     $self->{state} = DATA_STATE;
2758     !!!next-input-character;
2759    
2760 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2761 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2762    
2763     redo A;
2764 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2765 wakaba 1.77 !!!cp (209);
2766 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2767    
2768 wakaba 1.57 $self->{state} = DATA_STATE;
2769 wakaba 1.18 ## reconsume
2770    
2771 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2772 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2773    
2774     redo A;
2775     } else {
2776 wakaba 1.77 !!!cp (210);
2777 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2778 wakaba 1.76 .= chr $self->{next_char};
2779 wakaba 1.18 ## Stay in the state
2780     !!!next-input-character;
2781     redo A;
2782     }
2783 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
2784 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2785 wakaba 1.77 !!!cp (211);
2786 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2787 wakaba 1.18 !!!next-input-character;
2788     redo A;
2789 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2790 wakaba 1.77 !!!cp (212);
2791 wakaba 1.153 !!!parse-error (type => 'unclosed SYSTEM literal');
2792 wakaba 1.69
2793     $self->{state} = DATA_STATE;
2794     !!!next-input-character;
2795    
2796 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2797 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2798    
2799     redo A;
2800 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2801 wakaba 1.77 !!!cp (213);
2802 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2803    
2804 wakaba 1.57 $self->{state} = DATA_STATE;
2805 wakaba 1.18 ## reconsume
2806    
2807 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2808 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
2809    
2810     redo A;
2811     } else {
2812 wakaba 1.77 !!!cp (214);
2813 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2814 wakaba 1.76 .= chr $self->{next_char};
2815 wakaba 1.18 ## Stay in the state
2816     !!!next-input-character;
2817     redo A;
2818     }
2819 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2820 wakaba 1.18 if ({
2821     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2822     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2823 wakaba 1.76 }->{$self->{next_char}}) {
2824 wakaba 1.77 !!!cp (215);
2825 wakaba 1.18 ## Stay in the state
2826     !!!next-input-character;
2827     redo A;
2828 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2829 wakaba 1.77 !!!cp (216);
2830 wakaba 1.57 $self->{state} = DATA_STATE;
2831 wakaba 1.18 !!!next-input-character;
2832    
2833     !!!emit ($self->{current_token}); # DOCTYPE
2834    
2835     redo A;
2836 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2837 wakaba 1.77 !!!cp (217);
2838 wakaba 1.150 !!!parse-error (type => 'unclosed DOCTYPE');
2839 wakaba 1.57 $self->{state} = DATA_STATE;
2840 wakaba 1.26 ## reconsume
2841 wakaba 1.18
2842 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2843 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2844    
2845     redo A;
2846     } else {
2847 wakaba 1.77 !!!cp (218);
2848 wakaba 1.18 !!!parse-error (type => 'string after SYSTEM literal');
2849 wakaba 1.75 #$self->{current_token}->{quirks} = 1;
2850 wakaba 1.73
2851 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2852 wakaba 1.1 !!!next-input-character;
2853     redo A;
2854     }
2855 wakaba 1.57 } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
2856 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2857 wakaba 1.77 !!!cp (219);
2858 wakaba 1.57 $self->{state} = DATA_STATE;
2859 wakaba 1.1 !!!next-input-character;
2860    
2861     !!!emit ($self->{current_token}); # DOCTYPE
2862    
2863     redo A;
2864 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2865 wakaba 1.77 !!!cp (220);
2866 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2867 wakaba 1.57 $self->{state} = DATA_STATE;
2868 wakaba 1.1 ## reconsume
2869    
2870     !!!emit ($self->{current_token}); # DOCTYPE
2871    
2872     redo A;
2873     } else {
2874 wakaba 1.77 !!!cp (221);
2875 wakaba 1.1 ## Stay in the state
2876     !!!next-input-character;
2877     redo A;
2878     }
2879 wakaba 1.127 } elsif ($self->{state} == CDATA_BLOCK_STATE) {
2880     my $s = '';
2881    
2882     my ($l, $c) = ($self->{line}, $self->{column});
2883    
2884     CS: while ($self->{next_char} != -1) {
2885     if ($self->{next_char} == 0x005D) { # ]
2886     !!!next-input-character;
2887     if ($self->{next_char} == 0x005D) { # ]
2888     !!!next-input-character;
2889     MDC: {
2890     if ($self->{next_char} == 0x003E) { # >
2891     !!!cp (221.1);
2892     !!!next-input-character;
2893     last CS;
2894     } elsif ($self->{next_char} == 0x005D) { # ]
2895     !!!cp (221.2);
2896     $s .= ']';
2897     !!!next-input-character;
2898     redo MDC;
2899     } else {
2900     !!!cp (221.3);
2901     $s .= ']]';
2902     #
2903     }
2904     } # MDC
2905     } else {
2906     !!!cp (221.4);
2907     $s .= ']';
2908     #
2909     }
2910     } else {
2911     !!!cp (221.5);
2912     #
2913     }
2914     $s .= chr $self->{next_char};
2915     !!!next-input-character;
2916     } # CS
2917    
2918     $self->{state} = DATA_STATE;
2919     ## next-input-character done or EOF, which is reconsumed.
2920    
2921     if (length $s) {
2922     !!!cp (221.6);
2923     !!!emit ({type => CHARACTER_TOKEN, data => $s,
2924     line => $l, column => $c});
2925     } else {
2926     !!!cp (221.7);
2927     }
2928    
2929     redo A;
2930    
2931     ## ISSUE: "text tokens" in spec.
2932     ## TODO: Streaming support
2933 wakaba 1.1 } else {
2934     die "$0: $self->{state}: Unknown state";
2935     }
2936     } # A
2937    
2938     die "$0: _get_next_token: unexpected case";
2939     } # _get_next_token
2940    
2941 wakaba 1.72 sub _tokenize_attempt_to_consume_an_entity ($$$) {
2942     my ($self, $in_attr, $additional) = @_;
2943 wakaba 1.20
2944 wakaba 1.112 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
2945    
2946 wakaba 1.20 if ({
2947     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2948     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
2949 wakaba 1.72 $additional => 1,
2950 wakaba 1.76 }->{$self->{next_char}}) {
2951 wakaba 1.78 !!!cp (1001);
2952 wakaba 1.20 ## Don't consume
2953     ## No error
2954     return undef;
2955 wakaba 1.76 } elsif ($self->{next_char} == 0x0023) { # #
2956 wakaba 1.1 !!!next-input-character;
2957 wakaba 1.76 if ($self->{next_char} == 0x0078 or # x
2958     $self->{next_char} == 0x0058) { # X
2959 wakaba 1.26 my $code;
2960 wakaba 1.1 X: {
2961 wakaba 1.76 my $x_char = $self->{next_char};
2962 wakaba 1.1 !!!next-input-character;
2963 wakaba 1.76 if (0x0030 <= $self->{next_char} and
2964     $self->{next_char} <= 0x0039) { # 0..9
2965 wakaba 1.78 !!!cp (1002);
2966 wakaba 1.26 $code ||= 0;
2967     $code *= 0x10;
2968 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2969 wakaba 1.1 redo X;
2970 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
2971     $self->{next_char} <= 0x0066) { # a..f
2972 wakaba 1.78 !!!cp (1003);
2973 wakaba 1.26 $code ||= 0;
2974     $code *= 0x10;
2975 wakaba 1.76 $code += $self->{next_char} - 0x0060 + 9;
2976 wakaba 1.1 redo X;
2977 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
2978     $self->{next_char} <= 0x0046) { # A..F
2979 wakaba 1.78 !!!cp (1004);
2980 wakaba 1.26 $code ||= 0;
2981     $code *= 0x10;
2982 wakaba 1.76 $code += $self->{next_char} - 0x0040 + 9;
2983 wakaba 1.1 redo X;
2984 wakaba 1.26 } elsif (not defined $code) { # no hexadecimal digit
2985 wakaba 1.78 !!!cp (1005);
2986 wakaba 1.112 !!!parse-error (type => 'bare hcro', line => $l, column => $c);
2987 wakaba 1.76 !!!back-next-input-character ($x_char, $self->{next_char});
2988     $self->{next_char} = 0x0023; # #
2989 wakaba 1.1 return undef;
2990 wakaba 1.76 } elsif ($self->{next_char} == 0x003B) { # ;
2991 wakaba 1.78 !!!cp (1006);
2992 wakaba 1.1 !!!next-input-character;
2993     } else {
2994 wakaba 1.78 !!!cp (1007);
2995 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2996 wakaba 1.1 }
2997    
2998 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2999 wakaba 1.78 !!!cp (1008);
3000 wakaba 1.153 !!!parse-error (type => 'invalid character reference',
3001     text => (sprintf 'U+%04X', $code),
3002     line => $l, column => $c);
3003 wakaba 1.26 $code = 0xFFFD;
3004     } elsif ($code > 0x10FFFF) {
3005 wakaba 1.78 !!!cp (1009);
3006 wakaba 1.153 !!!parse-error (type => 'invalid character reference',
3007     text => (sprintf 'U-%08X', $code),
3008     line => $l, column => $c);
3009 wakaba 1.26 $code = 0xFFFD;
3010     } elsif ($code == 0x000D) {
3011 wakaba 1.78 !!!cp (1010);
3012 wakaba 1.112 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
3013 wakaba 1.26 $code = 0x000A;
3014     } elsif (0x80 <= $code and $code <= 0x9F) {
3015 wakaba 1.78 !!!cp (1011);
3016 wakaba 1.153 !!!parse-error (type => 'C1 character reference', text => (sprintf 'U+%04X', $code), line => $l, column => $c);
3017 wakaba 1.26 $code = $c1_entity_char->{$code};
3018 wakaba 1.1 }
3019    
3020 wakaba 1.66 return {type => CHARACTER_TOKEN, data => chr $code,
3021 wakaba 1.118 has_reference => 1,
3022 wakaba 1.120 line => $l, column => $c,
3023 wakaba 1.118 };
3024 wakaba 1.1 } # X
3025 wakaba 1.76 } elsif (0x0030 <= $self->{next_char} and
3026     $self->{next_char} <= 0x0039) { # 0..9
3027     my $code = $self->{next_char} - 0x0030;
3028 wakaba 1.1 !!!next-input-character;
3029    
3030 wakaba 1.76 while (0x0030 <= $self->{next_char} and
3031     $self->{next_char} <= 0x0039) { # 0..9
3032 wakaba 1.78 !!!cp (1012);
3033 wakaba 1.1 $code *= 10;
3034 wakaba 1.76 $code += $self->{next_char} - 0x0030;
3035 wakaba 1.1
3036     !!!next-input-character;
3037     }
3038    
3039 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
3040 wakaba 1.78 !!!cp (1013);
3041 wakaba 1.1 !!!next-input-character;
3042     } else {
3043 wakaba 1.78 !!!cp (1014);
3044 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
3045 wakaba 1.1 }
3046    
3047 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
3048 wakaba 1.78 !!!cp (1015);
3049 wakaba 1.153 !!!parse-error (type => 'invalid character reference',
3050     text => (sprintf 'U+%04X', $code),
3051     line => $l, column => $c);
3052 wakaba 1.26 $code = 0xFFFD;
3053     } elsif ($code > 0x10FFFF) {
3054 wakaba 1.78 !!!cp (1016);
3055 wakaba 1.153 !!!parse-error (type => 'invalid character reference',
3056     text => (sprintf 'U-%08X', $code),
3057     line => $l, column => $c);
3058 wakaba 1.26 $code = 0xFFFD;
3059     } elsif ($code == 0x000D) {
3060 wakaba 1.78 !!!cp (1017);
3061 wakaba 1.153 !!!parse-error (type => 'CR character reference',
3062     line => $l, column => $c);
3063 wakaba 1.26 $code = 0x000A;
3064 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
3065 wakaba 1.78 !!!cp (1018);
3066 wakaba 1.153 !!!parse-error (type => 'C1 character reference',
3067     text => (sprintf 'U+%04X', $code),
3068     line => $l, column => $c);
3069 wakaba 1.4 $code = $c1_entity_char->{$code};
3070 wakaba 1.1 }
3071    
3072 wakaba 1.112 return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1,
3073 wakaba 1.120 line => $l, column => $c,
3074 wakaba 1.118 };
3075 wakaba 1.1 } else {
3076 wakaba 1.78 !!!cp (1019);
3077 wakaba 1.112 !!!parse-error (type => 'bare nero', line => $l, column => $c);
3078 wakaba 1.76 !!!back-next-input-character ($self->{next_char});
3079     $self->{next_char} = 0x0023; # #
3080 wakaba 1.1 return undef;
3081     }
3082 wakaba 1.76 } elsif ((0x0041 <= $self->{next_char} and
3083     $self->{next_char} <= 0x005A) or
3084     (0x0061 <= $self->{next_char} and
3085     $self->{next_char} <= 0x007A)) {
3086     my $entity_name = chr $self->{next_char};
3087 wakaba 1.1 !!!next-input-character;
3088    
3089     my $value = $entity_name;
3090 wakaba 1.37 my $match = 0;
3091 wakaba 1.16 require Whatpm::_NamedEntityList;
3092     our $EntityChar;
3093 wakaba 1.1
3094 wakaba 1.128 while (length $entity_name < 30 and
3095 wakaba 1.1 ## NOTE: Some number greater than the maximum length of entity name
3096 wakaba 1.76 ((0x0041 <= $self->{next_char} and # a
3097     $self->{next_char} <= 0x005A) or # x
3098     (0x0061 <= $self->{next_char} and # a
3099     $self->{next_char} <= 0x007A) or # z
3100     (0x0030 <= $self->{next_char} and # 0
3101     $self->{next_char} <= 0x0039) or # 9
3102     $self->{next_char} == 0x003B)) { # ;
3103     $entity_name .= chr $self->{next_char};
3104 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
3105 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
3106 wakaba 1.78 !!!cp (1020);
3107 wakaba 1.26 $value = $EntityChar->{$entity_name};
3108 wakaba 1.16 $match = 1;
3109     !!!next-input-character;
3110     last;
3111 wakaba 1.37 } else {
3112 wakaba 1.78 !!!cp (1021);
3113 wakaba 1.26 $value = $EntityChar->{$entity_name};
3114     $match = -1;
3115 wakaba 1.37 !!!next-input-character;
3116 wakaba 1.16 }
3117 wakaba 1.1 } else {
3118 wakaba 1.78 !!!cp (1022);
3119 wakaba 1.76 $value .= chr $self->{next_char};
3120 wakaba 1.37 $match *= 2;
3121     !!!next-input-character;
3122 wakaba 1.1 }
3123     }
3124    
3125 wakaba 1.16 if ($match > 0) {
3126 wakaba 1.78 !!!cp (1023);
3127 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
3128 wakaba 1.120 line => $l, column => $c,
3129 wakaba 1.118 };
3130 wakaba 1.16 } elsif ($match < 0) {
3131 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
3132 wakaba 1.37 if ($in_attr and $match < -1) {
3133 wakaba 1.78 !!!cp (1024);
3134 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$entity_name,
3135 wakaba 1.120 line => $l, column => $c,
3136 wakaba 1.118 };
3137 wakaba 1.37 } else {
3138 wakaba 1.78 !!!cp (1025);
3139 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
3140 wakaba 1.120 line => $l, column => $c,
3141 wakaba 1.118 };
3142 wakaba 1.37 }
3143 wakaba 1.1 } else {
3144 wakaba 1.78 !!!cp (1026);
3145 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
3146 wakaba 1.66 ## NOTE: "No characters are consumed" in the spec.
3147 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$value,
3148 wakaba 1.120 line => $l, column => $c,
3149 wakaba 1.118 };
3150 wakaba 1.1 }
3151     } else {
3152 wakaba 1.78 !!!cp (1027);
3153 wakaba 1.1 ## no characters are consumed
3154 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
3155 wakaba 1.1 return undef;
3156     }
3157     } # _tokenize_attempt_to_consume_an_entity
3158    
3159     sub _initialize_tree_constructor ($) {
3160     my $self = shift;
3161     ## NOTE: $self->{document} MUST be specified before this method is called
3162     $self->{document}->strict_error_checking (0);
3163     ## TODO: Turn mutation events off # MUST
3164     ## TODO: Turn loose Document option (manakai extension) on
3165 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
3166 wakaba 1.154 $self->{document}->set_user_data (manakai_source_line => 1);
3167     $self->{document}->set_user_data (manakai_source_column => 1);
3168 wakaba 1.1 } # _initialize_tree_constructor
3169    
3170     sub _terminate_tree_constructor ($) {
3171     my $self = shift;
3172     $self->{document}->strict_error_checking (1);
3173     ## TODO: Turn mutation events on
3174     } # _terminate_tree_constructor
3175    
3176     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
3177    
3178 wakaba 1.3 { # tree construction stage
3179     my $token;
3180    
3181 wakaba 1.1 sub _construct_tree ($) {
3182     my ($self) = @_;
3183    
3184     ## When an interactive UA render the $self->{document} available
3185     ## to the user, or when it begin accepting user input, are
3186     ## not defined.
3187    
3188     ## Append a character: collect it and all subsequent consecutive
3189     ## characters and insert one Text node whose data is concatenation
3190     ## of all those characters. # MUST
3191    
3192     !!!next-token;
3193    
3194 wakaba 1.3 undef $self->{form_element};
3195     undef $self->{head_element};
3196     $self->{open_elements} = [];
3197     undef $self->{inner_html_node};
3198    
3199 wakaba 1.84 ## NOTE: The "initial" insertion mode.
3200 wakaba 1.3 $self->_tree_construction_initial; # MUST
3201 wakaba 1.84
3202     ## NOTE: The "before html" insertion mode.
3203 wakaba 1.3 $self->_tree_construction_root_element;
3204 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
3205    
3206     ## NOTE: The "before head" insertion mode and so on.
3207 wakaba 1.3 $self->_tree_construction_main;
3208     } # _construct_tree
3209    
3210     sub _tree_construction_initial ($) {
3211     my $self = shift;
3212 wakaba 1.84
3213     ## NOTE: "initial" insertion mode
3214    
3215 wakaba 1.18 INITIAL: {
3216 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3217 wakaba 1.18 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
3218     ## error, switch to a conformance checking mode for another
3219     ## language.
3220     my $doctype_name = $token->{name};
3221     $doctype_name = '' unless defined $doctype_name;
3222 wakaba 1.159 $doctype_name =~ tr/a-z/A-Z/; # ASCII case-insensitive
3223 wakaba 1.18 if (not defined $token->{name} or # <!DOCTYPE>
3224     defined $token->{system_identifier}) {
3225 wakaba 1.79 !!!cp ('t1');
3226 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
3227 wakaba 1.18 } elsif ($doctype_name ne 'HTML') {
3228 wakaba 1.79 !!!cp ('t2');
3229 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
3230 wakaba 1.159 } elsif (defined $token->{public_identifier}) {
3231     if ($token->{public_identifier} eq 'XSLT-compat') {
3232     !!!cp ('t1.2');
3233     !!!parse-error (type => 'XSLT-compat', token => $token,
3234     level => $self->{level}->{should});
3235     } else {
3236     !!!parse-error (type => 'not HTML5', token => $token);
3237     }
3238 wakaba 1.79 } else {
3239     !!!cp ('t3');
3240 wakaba 1.159 #
3241 wakaba 1.18 }
3242    
3243     my $doctype = $self->{document}->create_document_type_definition
3244     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
3245 wakaba 1.122 ## NOTE: Default value for both |public_id| and |system_id| attributes
3246     ## are empty strings, so that we don't set any value in missing cases.
3247 wakaba 1.18 $doctype->public_id ($token->{public_identifier})
3248     if defined $token->{public_identifier};
3249     $doctype->system_id ($token->{system_identifier})
3250     if defined $token->{system_identifier};
3251     ## NOTE: Other DocumentType attributes are null or empty lists.
3252     ## ISSUE: internalSubset = null??
3253     $self->{document}->append_child ($doctype);
3254    
3255 wakaba 1.75 if ($token->{quirks} or $doctype_name ne 'HTML') {
3256 wakaba 1.79 !!!cp ('t4');
3257 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3258     } elsif (defined $token->{public_identifier}) {
3259     my $pubid = $token->{public_identifier};
3260     $pubid =~ tr/a-z/A-z/;
3261 wakaba 1.143 my $prefix = [
3262     "+//SILMARIL//DTD HTML PRO V0R11 19970101//",
3263     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
3264     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//",
3265     "-//IETF//DTD HTML 2.0 LEVEL 1//",
3266     "-//IETF//DTD HTML 2.0 LEVEL 2//",
3267     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//",
3268     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//",
3269     "-//IETF//DTD HTML 2.0 STRICT//",
3270     "-//IETF//DTD HTML 2.0//",
3271     "-//IETF//DTD HTML 2.1E//",
3272     "-//IETF//DTD HTML 3.0//",
3273     "-//IETF//DTD HTML 3.2 FINAL//",
3274     "-//IETF//DTD HTML 3.2//",
3275     "-//IETF//DTD HTML 3//",
3276     "-//IETF//DTD HTML LEVEL 0//",
3277     "-//IETF//DTD HTML LEVEL 1//",
3278     "-//IETF//DTD HTML LEVEL 2//",
3279     "-//IETF//DTD HTML LEVEL 3//",
3280     "-//IETF//DTD HTML STRICT LEVEL 0//",
3281     "-//IETF//DTD HTML STRICT LEVEL 1//",
3282     "-//IETF//DTD HTML STRICT LEVEL 2//",
3283     "-//IETF//DTD HTML STRICT LEVEL 3//",
3284     "-//IETF//DTD HTML STRICT//",
3285     "-//IETF//DTD HTML//",
3286     "-//METRIUS//DTD METRIUS PRESENTATIONAL//",
3287     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//",
3288     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//",
3289     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//",
3290     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//",
3291     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//",
3292     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//",
3293     "-//NETSCAPE COMM. CORP.//DTD HTML//",
3294     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//",
3295     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//",
3296     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//",
3297     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//",
3298     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//",
3299     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//",
3300     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//",
3301     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//",
3302     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//",
3303     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//",
3304     "-//W3C//DTD HTML 3 1995-03-24//",
3305     "-//W3C//DTD HTML 3.2 DRAFT//",
3306     "-//W3C//DTD HTML 3.2 FINAL//",
3307     "-//W3C//DTD HTML 3.2//",
3308     "-//W3C//DTD HTML 3.2S DRAFT//",
3309     "-//W3C//DTD HTML 4.0 FRAMESET//",
3310     "-//W3C//DTD HTML 4.0 TRANSITIONAL//",
3311     "-//W3C//DTD HTML EXPERIMETNAL 19960712//",
3312     "-//W3C//DTD HTML EXPERIMENTAL 970421//",
3313     "-//W3C//DTD W3 HTML//",
3314     "-//W3O//DTD W3 HTML 3.0//",
3315     "-//WEBTECHS//DTD MOZILLA HTML 2.0//",
3316     "-//WEBTECHS//DTD MOZILLA HTML//",
3317     ]; # $prefix
3318     my $match;
3319     for (@$prefix) {
3320     if (substr ($prefix, 0, length $_) eq $_) {
3321     $match = 1;
3322     last;
3323     }
3324     }
3325     if ($match or
3326     $pubid eq "-//W3O//DTD W3 HTML STRICT 3.0//EN//" or
3327     $pubid eq "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" or
3328     $pubid eq "HTML") {
3329 wakaba 1.79 !!!cp ('t5');
3330 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3331 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD HTML 4.01 FRAMESET//] or
3332     $pubid =~ m[^-//W3C//DTD HTML 4.01 TRANSITIONAL//]) {
3333 wakaba 1.18 if (defined $token->{system_identifier}) {
3334 wakaba 1.79 !!!cp ('t6');
3335 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3336     } else {
3337 wakaba 1.79 !!!cp ('t7');
3338 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
3339 wakaba 1.3 }
3340 wakaba 1.143 } elsif ($pubid =~ m[^-//W3C//DTD XHTML 1.0 FRAMESET//] or
3341     $pubid =~ m[^-//W3C//DTD XHTML 1.0 TRANSITIONAL//]) {
3342 wakaba 1.79 !!!cp ('t8');
3343 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
3344 wakaba 1.79 } else {
3345     !!!cp ('t9');
3346 wakaba 1.18 }
3347 wakaba 1.79 } else {
3348     !!!cp ('t10');
3349 wakaba 1.18 }
3350     if (defined $token->{system_identifier}) {
3351     my $sysid = $token->{system_identifier};
3352     $sysid =~ tr/A-Z/a-z/;
3353     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
3354 wakaba 1.143 ## NOTE: Ensure that |PUBLIC "(limited quirks)" "(quirks)"| is
3355     ## marked as quirks.
3356 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3357 wakaba 1.79 !!!cp ('t11');
3358     } else {
3359     !!!cp ('t12');
3360 wakaba 1.18 }
3361 wakaba 1.79 } else {
3362     !!!cp ('t13');
3363 wakaba 1.18 }
3364    
3365 wakaba 1.84 ## Go to the "before html" insertion mode.
3366 wakaba 1.18 !!!next-token;
3367     return;
3368     } elsif ({
3369 wakaba 1.55 START_TAG_TOKEN, 1,
3370     END_TAG_TOKEN, 1,
3371     END_OF_FILE_TOKEN, 1,
3372 wakaba 1.18 }->{$token->{type}}) {
3373 wakaba 1.79 !!!cp ('t14');
3374 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
3375 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3376 wakaba 1.84 ## Go to the "before html" insertion mode.
3377 wakaba 1.18 ## reprocess
3378 wakaba 1.125 !!!ack-later;
3379 wakaba 1.18 return;
3380 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
3381 wakaba 1.18 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
3382     ## Ignore the token
3383 wakaba 1.26
3384 wakaba 1.18 unless (length $token->{data}) {
3385 wakaba 1.79 !!!cp ('t15');
3386 wakaba 1.84 ## Stay in the insertion mode.
3387 wakaba 1.18 !!!next-token;
3388     redo INITIAL;
3389 wakaba 1.79 } else {
3390     !!!cp ('t16');
3391 wakaba 1.3 }
3392 wakaba 1.79 } else {
3393     !!!cp ('t17');
3394 wakaba 1.3 }
3395 wakaba 1.18
3396 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
3397 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3398 wakaba 1.84 ## Go to the "before html" insertion mode.
3399 wakaba 1.18 ## reprocess
3400     return;
3401 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3402 wakaba 1.79 !!!cp ('t18');
3403 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
3404     $self->{document}->append_child ($comment);
3405    
3406 wakaba 1.84 ## Stay in the insertion mode.
3407 wakaba 1.18 !!!next-token;
3408     redo INITIAL;
3409     } else {
3410 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
3411 wakaba 1.18 }
3412     } # INITIAL
3413 wakaba 1.79
3414     die "$0: _tree_construction_initial: This should be never reached";
3415 wakaba 1.3 } # _tree_construction_initial
3416    
3417     sub _tree_construction_root_element ($) {
3418     my $self = shift;
3419 wakaba 1.84
3420     ## NOTE: "before html" insertion mode.
3421 wakaba 1.3
3422     B: {
3423 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3424 wakaba 1.79 !!!cp ('t19');
3425 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
3426 wakaba 1.3 ## Ignore the token
3427 wakaba 1.84 ## Stay in the insertion mode.
3428 wakaba 1.3 !!!next-token;
3429     redo B;
3430 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3431 wakaba 1.79 !!!cp ('t20');
3432 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
3433     $self->{document}->append_child ($comment);
3434 wakaba 1.84 ## Stay in the insertion mode.
3435 wakaba 1.3 !!!next-token;
3436     redo B;
3437 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
3438 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
3439     ## Ignore the token.
3440    
3441 wakaba 1.3 unless (length $token->{data}) {
3442 wakaba 1.79 !!!cp ('t21');
3443 wakaba 1.84 ## Stay in the insertion mode.
3444 wakaba 1.3 !!!next-token;
3445     redo B;
3446 wakaba 1.79 } else {
3447     !!!cp ('t22');
3448 wakaba 1.3 }
3449 wakaba 1.79 } else {
3450     !!!cp ('t23');
3451 wakaba 1.3 }
3452 wakaba 1.61
3453     $self->{application_cache_selection}->(undef);
3454    
3455     #
3456     } elsif ($token->{type} == START_TAG_TOKEN) {
3457 wakaba 1.84 if ($token->{tag_name} eq 'html') {
3458     my $root_element;
3459 wakaba 1.126 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
3460 wakaba 1.84 $self->{document}->append_child ($root_element);
3461 wakaba 1.123 push @{$self->{open_elements}},
3462     [$root_element, $el_category->{html}];
3463 wakaba 1.84
3464     if ($token->{attributes}->{manifest}) {
3465     !!!cp ('t24');
3466     $self->{application_cache_selection}
3467     ->($token->{attributes}->{manifest}->{value});
3468 wakaba 1.118 ## ISSUE: Spec is unclear on relative references.
3469     ## According to Hixie (#whatwg 2008-03-19), it should be
3470     ## resolved against the base URI of the document in HTML
3471     ## or xml:base of the element in XHTML.
3472 wakaba 1.84 } else {
3473     !!!cp ('t25');
3474     $self->{application_cache_selection}->(undef);
3475     }
3476    
3477 wakaba 1.125 !!!nack ('t25c');
3478    
3479 wakaba 1.84 !!!next-token;
3480     return; ## Go to the "before head" insertion mode.
3481 wakaba 1.61 } else {
3482 wakaba 1.84 !!!cp ('t25.1');
3483     #
3484 wakaba 1.61 }
3485 wakaba 1.3 } elsif ({
3486 wakaba 1.55 END_TAG_TOKEN, 1,
3487     END_OF_FILE_TOKEN, 1,
3488 wakaba 1.3 }->{$token->{type}}) {
3489 wakaba 1.79 !!!cp ('t26');
3490 wakaba 1.3 #
3491     } else {
3492 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
3493 wakaba 1.3 }
3494 wakaba 1.61
3495 wakaba 1.126 my $root_element;
3496     !!!create-element ($root_element, $HTML_NS, 'html',, $token);
3497 wakaba 1.84 $self->{document}->append_child ($root_element);
3498 wakaba 1.123 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
3499 wakaba 1.84
3500     $self->{application_cache_selection}->(undef);
3501    
3502     ## NOTE: Reprocess the token.
3503 wakaba 1.125 !!!ack-later;
3504 wakaba 1.84 return; ## Go to the "before head" insertion mode.
3505    
3506     ## ISSUE: There is an issue in the spec
3507 wakaba 1.3 } # B
3508 wakaba 1.79
3509     die "$0: _tree_construction_root_element: This should never be reached";
3510 wakaba 1.3 } # _tree_construction_root_element
3511    
3512     sub _reset_insertion_mode ($) {
3513     my $self = shift;
3514    
3515     ## Step 1
3516     my $last;
3517    
3518     ## Step 2
3519     my $i = -1;
3520     my $node = $self->{open_elements}->[$i];
3521    
3522     ## Step 3
3523     S3: {
3524 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
3525     $last = 1;
3526     if (defined $self->{inner_html_node}) {
3527 wakaba 1.140 !!!cp ('t28');
3528     $node = $self->{inner_html_node};
3529     } else {
3530     die "_reset_insertion_mode: t27";
3531 wakaba 1.3 }
3532     }
3533 wakaba 1.140
3534     ## Step 4..14
3535     my $new_mode;
3536     if ($node->[1] & FOREIGN_EL) {
3537     !!!cp ('t28.1');
3538     ## NOTE: Strictly spaking, the line below only applies to MathML and
3539     ## SVG elements. Currently the HTML syntax supports only MathML and
3540     ## SVG elements as foreigners.
3541 wakaba 1.148 $new_mode = IN_BODY_IM | IN_FOREIGN_CONTENT_IM;
3542 wakaba 1.140 } elsif ($node->[1] & TABLE_CELL_EL) {
3543     if ($last) {
3544     !!!cp ('t28.2');
3545     #
3546     } else {
3547     !!!cp ('t28.3');
3548     $new_mode = IN_CELL_IM;
3549     }
3550     } else {
3551     !!!cp ('t28.4');
3552     $new_mode = {
3553 wakaba 1.54 select => IN_SELECT_IM,
3554 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
3555     ## insertion mode to "in select" by themselves.
3556 wakaba 1.54 tr => IN_ROW_IM,
3557     tbody => IN_TABLE_BODY_IM,
3558     thead => IN_TABLE_BODY_IM,
3559     tfoot => IN_TABLE_BODY_IM,
3560     caption => IN_CAPTION_IM,
3561     colgroup => IN_COLUMN_GROUP_IM,
3562     table => IN_TABLE_IM,
3563     head => IN_BODY_IM, # not in head!
3564     body => IN_BODY_IM,
3565     frameset => IN_FRAMESET_IM,
3566 wakaba 1.123 }->{$node->[0]->manakai_local_name};
3567 wakaba 1.140 }
3568     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
3569 wakaba 1.3
3570 wakaba 1.126 ## Step 15
3571 wakaba 1.123 if ($node->[1] & HTML_EL) {
3572 wakaba 1.3 unless (defined $self->{head_element}) {
3573 wakaba 1.79 !!!cp ('t29');
3574 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
3575 wakaba 1.3 } else {
3576 wakaba 1.81 ## ISSUE: Can this state be reached?
3577 wakaba 1.79 !!!cp ('t30');
3578 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3579 wakaba 1.3 }
3580     return;
3581 wakaba 1.79 } else {
3582     !!!cp ('t31');
3583 wakaba 1.3 }
3584    
3585 wakaba 1.126 ## Step 16
3586 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
3587 wakaba 1.3
3588 wakaba 1.126 ## Step 17
3589 wakaba 1.3 $i--;
3590     $node = $self->{open_elements}->[$i];
3591    
3592 wakaba 1.126 ## Step 18
3593 wakaba 1.3 redo S3;
3594     } # S3
3595 wakaba 1.79
3596     die "$0: _reset_insertion_mode: This line should never be reached";
3597 wakaba 1.3 } # _reset_insertion_mode
3598    
3599     sub _tree_construction_main ($) {
3600     my $self = shift;
3601    
3602 wakaba 1.1 my $active_formatting_elements = [];
3603    
3604     my $reconstruct_active_formatting_elements = sub { # MUST
3605     my $insert = shift;
3606    
3607     ## Step 1
3608     return unless @$active_formatting_elements;
3609    
3610     ## Step 3
3611     my $i = -1;
3612     my $entry = $active_formatting_elements->[$i];
3613    
3614     ## Step 2
3615     return if $entry->[0] eq '#marker';
3616 wakaba 1.3 for (@{$self->{open_elements}}) {
3617 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3618 wakaba 1.79 !!!cp ('t32');
3619 wakaba 1.1 return;
3620     }
3621     }
3622    
3623     S4: {
3624     ## Step 4
3625     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
3626    
3627     ## Step 5
3628     $i--;
3629     $entry = $active_formatting_elements->[$i];
3630    
3631     ## Step 6
3632     if ($entry->[0] eq '#marker') {
3633 wakaba 1.81 !!!cp ('t33_1');
3634 wakaba 1.1 #
3635     } else {
3636     my $in_open_elements;
3637 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
3638 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3639 wakaba 1.79 !!!cp ('t33');
3640 wakaba 1.1 $in_open_elements = 1;
3641     last OE;
3642     }
3643     }
3644     if ($in_open_elements) {
3645 wakaba 1.79 !!!cp ('t34');
3646 wakaba 1.1 #
3647     } else {
3648 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
3649 wakaba 1.79 !!!cp ('t35');
3650 wakaba 1.1 redo S4;
3651     }
3652     }
3653    
3654     ## Step 7
3655     $i++;
3656     $entry = $active_formatting_elements->[$i];
3657     } # S4
3658    
3659     S7: {
3660     ## Step 8
3661     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
3662    
3663     ## Step 9
3664     $insert->($clone->[0]);
3665 wakaba 1.3 push @{$self->{open_elements}}, $clone;
3666 wakaba 1.1
3667     ## Step 10
3668 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
3669 wakaba 1.1
3670     ## Step 11
3671     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
3672 wakaba 1.79 !!!cp ('t36');
3673 wakaba 1.1 ## Step 7'
3674     $i++;
3675     $entry = $active_formatting_elements->[$i];
3676    
3677     redo S7;
3678     }
3679 wakaba 1.79
3680     !!!cp ('t37');
3681 wakaba 1.1 } # S7
3682     }; # $reconstruct_active_formatting_elements
3683    
3684     my $clear_up_to_marker = sub {
3685     for (reverse 0..$#$active_formatting_elements) {
3686     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3687 wakaba 1.79 !!!cp ('t38');
3688 wakaba 1.1 splice @$active_formatting_elements, $_;
3689     return;
3690     }
3691     }
3692 wakaba 1.79
3693     !!!cp ('t39');
3694 wakaba 1.1 }; # $clear_up_to_marker
3695    
3696 wakaba 1.96 my $insert;
3697    
3698     my $parse_rcdata = sub ($) {
3699     my ($content_model_flag) = @_;
3700 wakaba 1.25
3701     ## Step 1
3702     my $start_tag_name = $token->{tag_name};
3703     my $el;
3704 wakaba 1.126 !!!create-element ($el, $HTML_NS, $start_tag_name, $token->{attributes}, $token);
3705 wakaba 1.25
3706     ## Step 2
3707 wakaba 1.96 $insert->($el);
3708 wakaba 1.25
3709     ## Step 3
3710 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
3711 wakaba 1.13 delete $self->{escape}; # MUST
3712 wakaba 1.25
3713     ## Step 4
3714 wakaba 1.1 my $text = '';
3715 wakaba 1.125 !!!nack ('t40.1');
3716 wakaba 1.1 !!!next-token;
3717 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing
3718 wakaba 1.79 !!!cp ('t40');
3719 wakaba 1.1 $text .= $token->{data};
3720     !!!next-token;
3721 wakaba 1.25 }
3722    
3723     ## Step 5
3724 wakaba 1.1 if (length $text) {
3725 wakaba 1.79 !!!cp ('t41');
3726 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
3727     $el->append_child ($text);
3728 wakaba 1.1 }
3729 wakaba 1.25
3730     ## Step 6
3731 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3732 wakaba 1.25
3733     ## Step 7
3734 wakaba 1.79 if ($token->{type} == END_TAG_TOKEN and
3735     $token->{tag_name} eq $start_tag_name) {
3736     !!!cp ('t42');
3737 wakaba 1.1 ## Ignore the token
3738     } else {
3739 wakaba 1.96 ## NOTE: An end-of-file token.
3740     if ($content_model_flag == CDATA_CONTENT_MODEL) {
3741     !!!cp ('t43');
3742 wakaba 1.153 !!!parse-error (type => 'in CDATA:#eof', token => $token);
3743 wakaba 1.96 } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
3744     !!!cp ('t44');
3745 wakaba 1.153 !!!parse-error (type => 'in RCDATA:#eof', token => $token);
3746 wakaba 1.96 } else {
3747     die "$0: $content_model_flag in parse_rcdata";
3748     }
3749 wakaba 1.1 }
3750     !!!next-token;
3751 wakaba 1.25 }; # $parse_rcdata
3752 wakaba 1.1
3753 wakaba 1.96 my $script_start_tag = sub () {
3754 wakaba 1.1 my $script_el;
3755 wakaba 1.126 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
3756 wakaba 1.1 ## TODO: mark as "parser-inserted"
3757    
3758 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
3759 wakaba 1.13 delete $self->{escape}; # MUST
3760 wakaba 1.1
3761     my $text = '';
3762 wakaba 1.125 !!!nack ('t45.1');
3763 wakaba 1.1 !!!next-token;
3764 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
3765 wakaba 1.79 !!!cp ('t45');
3766 wakaba 1.1 $text .= $token->{data};
3767     !!!next-token;
3768     } # stop if non-character token or tokenizer stops tokenising
3769     if (length $text) {
3770 wakaba 1.79 !!!cp ('t46');
3771 wakaba 1.1 $script_el->manakai_append_text ($text);
3772     }
3773    
3774 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3775 wakaba 1.1
3776 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
3777 wakaba 1.1 $token->{tag_name} eq 'script') {
3778 wakaba 1.79 !!!cp ('t47');
3779 wakaba 1.1 ## Ignore the token
3780     } else {
3781 wakaba 1.79 !!!cp ('t48');
3782 wakaba 1.153 !!!parse-error (type => 'in CDATA:#eof', token => $token);
3783 wakaba 1.1 ## ISSUE: And ignore?
3784     ## TODO: mark as "already executed"
3785     }
3786    
3787 wakaba 1.3 if (defined $self->{inner_html_node}) {
3788 wakaba 1.79 !!!cp ('t49');
3789 wakaba 1.3 ## TODO: mark as "already executed"
3790     } else {
3791 wakaba 1.79 !!!cp ('t50');
3792 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
3793     ## TODO: insertion point = just before the next input character
3794 wakaba 1.25
3795     $insert->($script_el);
3796 wakaba 1.1
3797     ## TODO: insertion point = $old_insertion_point (might be "undefined")
3798    
3799     ## TODO: if there is a script that will execute as soon as the parser resume, then...
3800     }
3801    
3802     !!!next-token;
3803     }; # $script_start_tag
3804    
3805 wakaba 1.102 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
3806     ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
3807     my $open_tables = [[$self->{open_elements}->[0]->[0]]];
3808    
3809 wakaba 1.1 my $formatting_end_tag = sub {
3810 wakaba 1.113 my $end_tag_token = shift;
3811     my $tag_name = $end_tag_token->{tag_name};
3812 wakaba 1.1
3813 wakaba 1.103 ## NOTE: The adoption agency algorithm (AAA).
3814 wakaba 1.102
3815 wakaba 1.1 FET: {
3816     ## Step 1
3817     my $formatting_element;
3818     my $formatting_element_i_in_active;
3819     AFE: for (reverse 0..$#$active_formatting_elements) {
3820 wakaba 1.123 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3821     !!!cp ('t52');
3822     last AFE;
3823     } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
3824     eq $tag_name) {
3825 wakaba 1.79 !!!cp ('t51');
3826 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
3827     $formatting_element_i_in_active = $_;
3828     last AFE;
3829     }
3830     } # AFE
3831     unless (defined $formatting_element) {
3832 wakaba 1.79 !!!cp ('t53');
3833 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => $tag_name, token => $end_tag_token);
3834 wakaba 1.1 ## Ignore the token
3835     !!!next-token;
3836     return;
3837     }
3838     ## has an element in scope
3839     my $in_scope = 1;
3840     my $formatting_element_i_in_open;
3841 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3842     my $node = $self->{open_elements}->[$_];
3843 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
3844     if ($in_scope) {
3845 wakaba 1.79 !!!cp ('t54');
3846 wakaba 1.1 $formatting_element_i_in_open = $_;
3847     last INSCOPE;
3848     } else { # in open elements but not in scope
3849 wakaba 1.79 !!!cp ('t55');
3850 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3851     text => $token->{tag_name},
3852 wakaba 1.113 token => $end_tag_token);
3853 wakaba 1.1 ## Ignore the token
3854     !!!next-token;
3855     return;
3856     }
3857 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
3858 wakaba 1.79 !!!cp ('t56');
3859 wakaba 1.1 $in_scope = 0;
3860     }
3861     } # INSCOPE
3862     unless (defined $formatting_element_i_in_open) {
3863 wakaba 1.79 !!!cp ('t57');
3864 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
3865     text => $token->{tag_name},
3866 wakaba 1.113 token => $end_tag_token);
3867 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
3868     !!!next-token; ## TODO: ok?
3869     return;
3870     }
3871 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
3872 wakaba 1.79 !!!cp ('t58');
3873 wakaba 1.122 !!!parse-error (type => 'not closed',
3874 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
3875 wakaba 1.122 ->manakai_local_name,
3876 wakaba 1.113 token => $end_tag_token);
3877 wakaba 1.1 }
3878    
3879     ## Step 2
3880     my $furthest_block;
3881     my $furthest_block_i_in_open;
3882 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3883     my $node = $self->{open_elements}->[$_];
3884 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
3885 wakaba 1.1 #not $phrasing_category->{$node->[1]} and
3886 wakaba 1.123 ($node->[1] & SPECIAL_EL or
3887     $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
3888 wakaba 1.79 !!!cp ('t59');
3889 wakaba 1.1 $furthest_block = $node;
3890     $furthest_block_i_in_open = $_;
3891     } elsif ($node->[0] eq $formatting_element->[0]) {
3892 wakaba 1.79 !!!cp ('t60');
3893 wakaba 1.1 last OE;
3894     }
3895     } # OE
3896    
3897     ## Step 3
3898     unless (defined $furthest_block) { # MUST
3899 wakaba 1.79 !!!cp ('t61');
3900 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
3901 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
3902     !!!next-token;
3903     return;
3904     }
3905    
3906     ## Step 4
3907 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
3908 wakaba 1.1
3909     ## Step 5
3910     my $furthest_block_parent = $furthest_block->[0]->parent_node;
3911     if (defined $furthest_block_parent) {
3912 wakaba 1.79 !!!cp ('t62');
3913 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
3914     }
3915    
3916     ## Step 6
3917     my $bookmark_prev_el
3918     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
3919     ->[0];
3920    
3921     ## Step 7
3922     my $node = $furthest_block;
3923     my $node_i_in_open = $furthest_block_i_in_open;
3924     my $last_node = $furthest_block;
3925     S7: {
3926     ## Step 1
3927     $node_i_in_open--;
3928 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
3929 wakaba 1.1
3930     ## Step 2
3931     my $node_i_in_active;
3932     S7S2: {
3933     for (reverse 0..$#$active_formatting_elements) {
3934     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3935 wakaba 1.79 !!!cp ('t63');
3936 wakaba 1.1 $node_i_in_active = $_;
3937     last S7S2;
3938     }
3939     }
3940 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
3941 wakaba 1.1 redo S7;
3942     } # S7S2
3943    
3944     ## Step 3
3945     last S7 if $node->[0] eq $formatting_element->[0];
3946    
3947     ## Step 4
3948     if ($last_node->[0] eq $furthest_block->[0]) {
3949 wakaba 1.79 !!!cp ('t64');
3950 wakaba 1.1 $bookmark_prev_el = $node->[0];
3951     }
3952    
3953     ## Step 5
3954     if ($node->[0]->has_child_nodes ()) {
3955 wakaba 1.79 !!!cp ('t65');
3956 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
3957     $active_formatting_elements->[$node_i_in_active] = $clone;
3958 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
3959 wakaba 1.1 $node = $clone;
3960     }
3961    
3962     ## Step 6
3963     $node->[0]->append_child ($last_node->[0]);
3964    
3965     ## Step 7
3966     $last_node = $node;
3967    
3968     ## Step 8
3969     redo S7;
3970     } # S7
3971    
3972     ## Step 8
3973 wakaba 1.123 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
3974 wakaba 1.102 my $foster_parent_element;
3975     my $next_sibling;
3976 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
3977     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
3978 wakaba 1.102 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3979     if (defined $parent and $parent->node_type == 1) {
3980     !!!cp ('t65.1');
3981     $foster_parent_element = $parent;
3982     $next_sibling = $self->{open_elements}->[$_]->[0];
3983     } else {
3984     !!!cp ('t65.2');
3985     $foster_parent_element
3986     = $self->{open_elements}->[$_ - 1]->[0];
3987     }
3988     last OE;
3989     }
3990     } # OE
3991     $foster_parent_element = $self->{open_elements}->[0]->[0]
3992     unless defined $foster_parent_element;
3993     $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
3994     $open_tables->[-1]->[1] = 1; # tainted
3995     } else {
3996     !!!cp ('t65.3');
3997     $common_ancestor_node->[0]->append_child ($last_node->[0]);
3998     }
3999 wakaba 1.1
4000     ## Step 9
4001     my $clone = [$formatting_element->[0]->clone_node (0),
4002     $formatting_element->[1]];
4003    
4004     ## Step 10
4005     my @cn = @{$furthest_block->[0]->child_nodes};
4006     $clone->[0]->append_child ($_) for @cn;
4007    
4008     ## Step 11
4009     $furthest_block->[0]->append_child ($clone->[0]);
4010    
4011     ## Step 12
4012     my $i;
4013     AFE: for (reverse 0..$#$active_formatting_elements) {
4014     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
4015 wakaba 1.79 !!!cp ('t66');
4016 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
4017     $i-- and last AFE if defined $i;
4018     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
4019 wakaba 1.79 !!!cp ('t67');
4020 wakaba 1.1 $i = $_;
4021     }
4022     } # AFE
4023     splice @$active_formatting_elements, $i + 1, 0, $clone;
4024    
4025     ## Step 13
4026     undef $i;
4027 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
4028     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
4029 wakaba 1.79 !!!cp ('t68');
4030 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
4031 wakaba 1.1 $i-- and last OE if defined $i;
4032 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
4033 wakaba 1.79 !!!cp ('t69');
4034 wakaba 1.1 $i = $_;
4035     }
4036     } # OE
4037 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
4038 wakaba 1.1
4039     ## Step 14
4040     redo FET;
4041     } # FET
4042     }; # $formatting_end_tag
4043    
4044 wakaba 1.96 $insert = my $insert_to_current = sub {
4045 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
4046 wakaba 1.1 }; # $insert_to_current
4047    
4048     my $insert_to_foster = sub {
4049 wakaba 1.95 my $child = shift;
4050 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
4051 wakaba 1.95 # MUST
4052     my $foster_parent_element;
4053     my $next_sibling;
4054 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
4055     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
4056 wakaba 1.3 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4057 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
4058 wakaba 1.79 !!!cp ('t70');
4059 wakaba 1.1 $foster_parent_element = $parent;
4060 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
4061 wakaba 1.1 } else {
4062 wakaba 1.79 !!!cp ('t71');
4063 wakaba 1.1 $foster_parent_element
4064 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
4065 wakaba 1.1 }
4066     last OE;
4067     }
4068     } # OE
4069 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
4070 wakaba 1.1 unless defined $foster_parent_element;
4071     $foster_parent_element->insert_before
4072     ($child, $next_sibling);
4073 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
4074     } else {
4075     !!!cp ('t72');
4076     $self->{open_elements}->[-1]->[0]->append_child ($child);
4077     }
4078 wakaba 1.1 }; # $insert_to_foster
4079    
4080 wakaba 1.126 B: while (1) {
4081 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
4082 wakaba 1.79 !!!cp ('t73');
4083 wakaba 1.153 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
4084 wakaba 1.52 ## Ignore the token
4085     ## Stay in the phase
4086     !!!next-token;
4087 wakaba 1.126 next B;
4088 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
4089 wakaba 1.52 $token->{tag_name} eq 'html') {
4090 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
4091 wakaba 1.79 !!!cp ('t79');
4092 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
4093 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
4094     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
4095 wakaba 1.79 !!!cp ('t80');
4096 wakaba 1.153 !!!parse-error (type => 'after html', text => 'html', token => $token);
4097 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
4098 wakaba 1.79 } else {
4099     !!!cp ('t81');
4100 wakaba 1.52 }
4101    
4102 wakaba 1.84 !!!cp ('t82');
4103 wakaba 1.113 !!!parse-error (type => 'not first start tag', token => $token);
4104 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
4105     for my $attr_name (keys %{$token->{attributes}}) {
4106     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
4107 wakaba 1.79 !!!cp ('t84');
4108 wakaba 1.52 $top_el->set_attribute_ns
4109     (undef, [undef, $attr_name],
4110     $token->{attributes}->{$attr_name}->{value});
4111     }
4112     }
4113 wakaba 1.125 !!!nack ('t84.1');
4114 wakaba 1.52 !!!next-token;
4115 wakaba 1.126 next B;
4116 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
4117 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
4118 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
4119 wakaba 1.79 !!!cp ('t85');
4120 wakaba 1.52 $self->{document}->append_child ($comment);
4121 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
4122 wakaba 1.79 !!!cp ('t86');
4123 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
4124     } else {
4125 wakaba 1.79 !!!cp ('t87');
4126 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4127     }
4128     !!!next-token;
4129 wakaba 1.126 next B;
4130     } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
4131     if ($token->{type} == CHARACTER_TOKEN) {
4132     !!!cp ('t87.1');
4133     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4134     !!!next-token;
4135     next B;
4136     } elsif ($token->{type} == START_TAG_TOKEN) {
4137 wakaba 1.129 if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
4138     $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
4139 wakaba 1.126 not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
4140     ($token->{tag_name} eq 'svg' and
4141     $self->{open_elements}->[-1]->[1] & MML_AXML_EL)) {
4142     ## NOTE: "using the rules for secondary insertion mode"then"continue"
4143     !!!cp ('t87.2');
4144     #
4145     } elsif ({
4146 wakaba 1.130 b => 1, big => 1, blockquote => 1, body => 1, br => 1,
4147 wakaba 1.146 center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1,
4148     em => 1, embed => 1, font => 1, h1 => 1, h2 => 1, h3 => 1,
4149     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1,
4150     img => 1, li => 1, listing => 1, menu => 1, meta => 1,
4151     nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1,
4152     small => 1, span => 1, strong => 1, strike => 1, sub => 1,
4153     sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1,
4154 wakaba 1.126 }->{$token->{tag_name}}) {
4155     !!!cp ('t87.2');
4156     !!!parse-error (type => 'not closed',
4157 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4158 wakaba 1.126 ->manakai_local_name,
4159     token => $token);
4160    
4161     pop @{$self->{open_elements}}
4162     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
4163    
4164 wakaba 1.130 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
4165 wakaba 1.126 ## Reprocess.
4166     next B;
4167     } else {
4168 wakaba 1.131 my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
4169     my $tag_name = $token->{tag_name};
4170     if ($nsuri eq $SVG_NS) {
4171     $tag_name = {
4172     altglyph => 'altGlyph',
4173     altglyphdef => 'altGlyphDef',
4174     altglyphitem => 'altGlyphItem',
4175     animatecolor => 'animateColor',
4176     animatemotion => 'animateMotion',
4177     animatetransform => 'animateTransform',
4178     clippath => 'clipPath',
4179     feblend => 'feBlend',
4180     fecolormatrix => 'feColorMatrix',
4181     fecomponenttransfer => 'feComponentTransfer',
4182     fecomposite => 'feComposite',
4183     feconvolvematrix => 'feConvolveMatrix',
4184     fediffuselighting => 'feDiffuseLighting',
4185     fedisplacementmap => 'feDisplacementMap',
4186     fedistantlight => 'feDistantLight',
4187     feflood => 'feFlood',
4188     fefunca => 'feFuncA',
4189     fefuncb => 'feFuncB',
4190     fefuncg => 'feFuncG',
4191     fefuncr => 'feFuncR',
4192     fegaussianblur => 'feGaussianBlur',
4193     feimage => 'feImage',
4194     femerge => 'feMerge',
4195     femergenode => 'feMergeNode',
4196     femorphology => 'feMorphology',
4197     feoffset => 'feOffset',
4198     fepointlight => 'fePointLight',
4199     fespecularlighting => 'feSpecularLighting',
4200     fespotlight => 'feSpotLight',
4201     fetile => 'feTile',
4202     feturbulence => 'feTurbulence',
4203     foreignobject => 'foreignObject',
4204     glyphref => 'glyphRef',
4205     lineargradient => 'linearGradient',
4206     radialgradient => 'radialGradient',
4207     #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
4208     textpath => 'textPath',
4209     }->{$tag_name} || $tag_name;
4210     }
4211    
4212     ## "adjust SVG attributes" (SVG only) - done in insert-element-f
4213    
4214     ## "adjust foreign attributes" - done in insert-element-f
4215 wakaba 1.126
4216 wakaba 1.131 !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);
4217 wakaba 1.126
4218     if ($self->{self_closing}) {
4219     pop @{$self->{open_elements}};
4220     !!!ack ('t87.3');
4221     } else {
4222     !!!cp ('t87.4');
4223     }
4224    
4225     !!!next-token;
4226     next B;
4227     }
4228     } elsif ($token->{type} == END_TAG_TOKEN) {
4229     ## NOTE: "using the rules for secondary insertion mode" then "continue"
4230     !!!cp ('t87.5');
4231     #
4232     } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4233     !!!cp ('t87.6');
4234 wakaba 1.146 !!!parse-error (type => 'not closed',
4235 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4236 wakaba 1.146 ->manakai_local_name,
4237     token => $token);
4238    
4239     pop @{$self->{open_elements}}
4240     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
4241    
4242     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
4243     ## Reprocess.
4244     next B;
4245 wakaba 1.126 } else {
4246     die "$0: $token->{type}: Unknown token type";
4247     }
4248     }
4249    
4250     if ($self->{insertion_mode} & HEAD_IMS) {
4251 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4252 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4253 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4254     !!!cp ('t88.2');
4255     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4256     } else {
4257     !!!cp ('t88.1');
4258     ## Ignore the token.
4259     !!!next-token;
4260 wakaba 1.126 next B;
4261 wakaba 1.99 }
4262 wakaba 1.52 unless (length $token->{data}) {
4263 wakaba 1.79 !!!cp ('t88');
4264 wakaba 1.52 !!!next-token;
4265 wakaba 1.126 next B;
4266 wakaba 1.1 }
4267     }
4268 wakaba 1.52
4269 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4270 wakaba 1.79 !!!cp ('t89');
4271 wakaba 1.52 ## As if <head>
4272 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4273 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4274 wakaba 1.123 push @{$self->{open_elements}},
4275     [$self->{head_element}, $el_category->{head}];
4276 wakaba 1.52
4277     ## Reprocess in the "in head" insertion mode...
4278     pop @{$self->{open_elements}};
4279    
4280     ## Reprocess in the "after head" insertion mode...
4281 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4282 wakaba 1.79 !!!cp ('t90');
4283 wakaba 1.52 ## As if </noscript>
4284     pop @{$self->{open_elements}};
4285 wakaba 1.153 !!!parse-error (type => 'in noscript:#text', token => $token);
4286 wakaba 1.1
4287 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
4288     ## As if </head>
4289     pop @{$self->{open_elements}};
4290    
4291     ## Reprocess in the "after head" insertion mode...
4292 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4293 wakaba 1.79 !!!cp ('t91');
4294 wakaba 1.52 pop @{$self->{open_elements}};
4295    
4296     ## Reprocess in the "after head" insertion mode...
4297 wakaba 1.79 } else {
4298     !!!cp ('t92');
4299 wakaba 1.1 }
4300 wakaba 1.52
4301 wakaba 1.123 ## "after head" insertion mode
4302     ## As if <body>
4303     !!!insert-element ('body',, $token);
4304     $self->{insertion_mode} = IN_BODY_IM;
4305     ## reprocess
4306 wakaba 1.126 next B;
4307 wakaba 1.123 } elsif ($token->{type} == START_TAG_TOKEN) {
4308     if ($token->{tag_name} eq 'head') {
4309     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4310     !!!cp ('t93');
4311 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
4312 wakaba 1.123 $self->{open_elements}->[-1]->[0]->append_child
4313     ($self->{head_element});
4314     push @{$self->{open_elements}},
4315     [$self->{head_element}, $el_category->{head}];
4316     $self->{insertion_mode} = IN_HEAD_IM;
4317 wakaba 1.125 !!!nack ('t93.1');
4318 wakaba 1.123 !!!next-token;
4319 wakaba 1.126 next B;
4320 wakaba 1.125 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4321 wakaba 1.139 !!!cp ('t93.2');
4322 wakaba 1.153 !!!parse-error (type => 'after head', text => 'head',
4323     token => $token);
4324 wakaba 1.139 ## Ignore the token
4325     !!!nack ('t93.3');
4326     !!!next-token;
4327     next B;
4328 wakaba 1.125 } else {
4329     !!!cp ('t95');
4330 wakaba 1.153 !!!parse-error (type => 'in head:head',
4331     token => $token); # or in head noscript
4332 wakaba 1.125 ## Ignore the token
4333     !!!nack ('t95.1');
4334     !!!next-token;
4335 wakaba 1.126 next B;
4336 wakaba 1.125 }
4337     } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4338 wakaba 1.126 !!!cp ('t96');
4339     ## As if <head>
4340     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4341     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4342     push @{$self->{open_elements}},
4343     [$self->{head_element}, $el_category->{head}];
4344 wakaba 1.52
4345 wakaba 1.126 $self->{insertion_mode} = IN_HEAD_IM;
4346     ## Reprocess in the "in head" insertion mode...
4347     } else {
4348     !!!cp ('t97');
4349     }
4350 wakaba 1.52
4351 wakaba 1.49 if ($token->{tag_name} eq 'base') {
4352 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4353 wakaba 1.79 !!!cp ('t98');
4354 wakaba 1.49 ## As if </noscript>
4355     pop @{$self->{open_elements}};
4356 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'base',
4357     token => $token);
4358 wakaba 1.49
4359 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4360 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4361 wakaba 1.79 } else {
4362     !!!cp ('t99');
4363 wakaba 1.49 }
4364    
4365     ## NOTE: There is a "as if in head" code clone.
4366 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4367 wakaba 1.79 !!!cp ('t100');
4368 wakaba 1.153 !!!parse-error (type => 'after head',
4369     text => $token->{tag_name}, token => $token);
4370 wakaba 1.123 push @{$self->{open_elements}},
4371     [$self->{head_element}, $el_category->{head}];
4372 wakaba 1.79 } else {
4373     !!!cp ('t101');
4374 wakaba 1.49 }
4375 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4376 wakaba 1.49 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4377 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4378 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4379 wakaba 1.125 !!!nack ('t101.1');
4380 wakaba 1.49 !!!next-token;
4381 wakaba 1.126 next B;
4382 wakaba 1.49 } elsif ($token->{tag_name} eq 'link') {
4383 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4384 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4385 wakaba 1.79 !!!cp ('t102');
4386 wakaba 1.153 !!!parse-error (type => 'after head',
4387     text => $token->{tag_name}, token => $token);
4388 wakaba 1.123 push @{$self->{open_elements}},
4389     [$self->{head_element}, $el_category->{head}];
4390 wakaba 1.79 } else {
4391     !!!cp ('t103');
4392 wakaba 1.25 }
4393 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4394 wakaba 1.25 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4395 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4396 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4397 wakaba 1.125 !!!ack ('t103.1');
4398 wakaba 1.1 !!!next-token;
4399 wakaba 1.126 next B;
4400 wakaba 1.34 } elsif ($token->{tag_name} eq 'meta') {
4401     ## NOTE: There is a "as if in head" code clone.
4402 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4403 wakaba 1.79 !!!cp ('t104');
4404 wakaba 1.153 !!!parse-error (type => 'after head',
4405     text => $token->{tag_name}, token => $token);
4406 wakaba 1.123 push @{$self->{open_elements}},
4407     [$self->{head_element}, $el_category->{head}];
4408 wakaba 1.79 } else {
4409     !!!cp ('t105');
4410 wakaba 1.34 }
4411 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4412 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4413 wakaba 1.34
4414     unless ($self->{confident}) {
4415 wakaba 1.134 if ($token->{attributes}->{charset}) {
4416 wakaba 1.79 !!!cp ('t106');
4417 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4418     ## in the {change_encoding} callback.
4419 wakaba 1.63 $self->{change_encoding}
4420 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value},
4421     $token);
4422 wakaba 1.66
4423     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4424     ->set_user_data (manakai_has_reference =>
4425     $token->{attributes}->{charset}
4426     ->{has_reference});
4427 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
4428     if ($token->{attributes}->{content}->{value}
4429 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4430 wakaba 1.70 [\x09-\x0D\x20]*=
4431 wakaba 1.34 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4432 wakaba 1.145 ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {
4433 wakaba 1.79 !!!cp ('t107');
4434 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
4435     ## in the {change_encoding} callback.
4436 wakaba 1.63 $self->{change_encoding}
4437 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
4438     $token);
4439 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4440     ->set_user_data (manakai_has_reference =>
4441     $token->{attributes}->{content}
4442     ->{has_reference});
4443 wakaba 1.79 } else {
4444     !!!cp ('t108');
4445 wakaba 1.63 }
4446 wakaba 1.34 }
4447 wakaba 1.66 } else {
4448     if ($token->{attributes}->{charset}) {
4449 wakaba 1.79 !!!cp ('t109');
4450 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4451     ->set_user_data (manakai_has_reference =>
4452     $token->{attributes}->{charset}
4453     ->{has_reference});
4454     }
4455 wakaba 1.68 if ($token->{attributes}->{content}) {
4456 wakaba 1.79 !!!cp ('t110');
4457 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4458     ->set_user_data (manakai_has_reference =>
4459     $token->{attributes}->{content}
4460     ->{has_reference});
4461     }
4462 wakaba 1.34 }
4463    
4464 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4465 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4466 wakaba 1.125 !!!ack ('t110.1');
4467 wakaba 1.34 !!!next-token;
4468 wakaba 1.126 next B;
4469 wakaba 1.49 } elsif ($token->{tag_name} eq 'title') {
4470 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4471 wakaba 1.79 !!!cp ('t111');
4472 wakaba 1.49 ## As if </noscript>
4473     pop @{$self->{open_elements}};
4474 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'title',
4475     token => $token);
4476 wakaba 1.49
4477 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4478 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4479 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4480 wakaba 1.79 !!!cp ('t112');
4481 wakaba 1.153 !!!parse-error (type => 'after head',
4482     text => $token->{tag_name}, token => $token);
4483 wakaba 1.123 push @{$self->{open_elements}},
4484     [$self->{head_element}, $el_category->{head}];
4485 wakaba 1.79 } else {
4486     !!!cp ('t113');
4487 wakaba 1.25 }
4488 wakaba 1.49
4489     ## NOTE: There is a "as if in head" code clone.
4490 wakaba 1.31 my $parent = defined $self->{head_element} ? $self->{head_element}
4491     : $self->{open_elements}->[-1]->[0];
4492 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
4493 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4494 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4495 wakaba 1.126 next B;
4496 wakaba 1.148 } elsif ($token->{tag_name} eq 'style' or
4497     $token->{tag_name} eq 'noframes') {
4498 wakaba 1.25 ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
4499 wakaba 1.54 ## insertion mode IN_HEAD_IM)
4500 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4501 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4502 wakaba 1.79 !!!cp ('t114');
4503 wakaba 1.153 !!!parse-error (type => 'after head',
4504     text => $token->{tag_name}, token => $token);
4505 wakaba 1.123 push @{$self->{open_elements}},
4506     [$self->{head_element}, $el_category->{head}];
4507 wakaba 1.79 } else {
4508     !!!cp ('t115');
4509 wakaba 1.25 }
4510 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4511 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4512 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4513 wakaba 1.126 next B;
4514 wakaba 1.25 } elsif ($token->{tag_name} eq 'noscript') {
4515 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
4516 wakaba 1.79 !!!cp ('t116');
4517 wakaba 1.25 ## NOTE: and scripting is disalbed
4518 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4519 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
4520 wakaba 1.125 !!!nack ('t116.1');
4521 wakaba 1.1 !!!next-token;
4522 wakaba 1.126 next B;
4523 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4524 wakaba 1.79 !!!cp ('t117');
4525 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'noscript',
4526     token => $token);
4527 wakaba 1.1 ## Ignore the token
4528 wakaba 1.125 !!!nack ('t117.1');
4529 wakaba 1.41 !!!next-token;
4530 wakaba 1.126 next B;
4531 wakaba 1.1 } else {
4532 wakaba 1.79 !!!cp ('t118');
4533 wakaba 1.25 #
4534 wakaba 1.1 }
4535 wakaba 1.49 } elsif ($token->{tag_name} eq 'script') {
4536 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4537 wakaba 1.79 !!!cp ('t119');
4538 wakaba 1.49 ## As if </noscript>
4539     pop @{$self->{open_elements}};
4540 wakaba 1.153 !!!parse-error (type => 'in noscript', text => 'script',
4541     token => $token);
4542 wakaba 1.49
4543 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4544 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4545 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4546 wakaba 1.79 !!!cp ('t120');
4547 wakaba 1.153 !!!parse-error (type => 'after head',
4548     text => $token->{tag_name}, token => $token);
4549 wakaba 1.123 push @{$self->{open_elements}},
4550     [$self->{head_element}, $el_category->{head}];
4551 wakaba 1.79 } else {
4552     !!!cp ('t121');
4553 wakaba 1.25 }
4554 wakaba 1.49
4555 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4556 wakaba 1.100 $script_start_tag->();
4557     pop @{$self->{open_elements}} # <head>
4558 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4559 wakaba 1.126 next B;
4560 wakaba 1.49 } elsif ($token->{tag_name} eq 'body' or
4561 wakaba 1.25 $token->{tag_name} eq 'frameset') {
4562 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4563 wakaba 1.79 !!!cp ('t122');
4564 wakaba 1.49 ## As if </noscript>
4565     pop @{$self->{open_elements}};
4566 wakaba 1.153 !!!parse-error (type => 'in noscript',
4567     text => $token->{tag_name}, token => $token);
4568 wakaba 1.49
4569     ## Reprocess in the "in head" insertion mode...
4570     ## As if </head>
4571     pop @{$self->{open_elements}};
4572    
4573     ## Reprocess in the "after head" insertion mode...
4574 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4575 wakaba 1.79 !!!cp ('t124');
4576 wakaba 1.49 pop @{$self->{open_elements}};
4577    
4578     ## Reprocess in the "after head" insertion mode...
4579 wakaba 1.79 } else {
4580     !!!cp ('t125');
4581 wakaba 1.49 }
4582    
4583     ## "after head" insertion mode
4584 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4585 wakaba 1.54 if ($token->{tag_name} eq 'body') {
4586 wakaba 1.79 !!!cp ('t126');
4587 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4588     } elsif ($token->{tag_name} eq 'frameset') {
4589 wakaba 1.79 !!!cp ('t127');
4590 wakaba 1.54 $self->{insertion_mode} = IN_FRAMESET_IM;
4591     } else {
4592     die "$0: tag name: $self->{tag_name}";
4593     }
4594 wakaba 1.125 !!!nack ('t127.1');
4595 wakaba 1.1 !!!next-token;
4596 wakaba 1.126 next B;
4597 wakaba 1.1 } else {
4598 wakaba 1.79 !!!cp ('t128');
4599 wakaba 1.1 #
4600     }
4601 wakaba 1.49
4602 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4603 wakaba 1.79 !!!cp ('t129');
4604 wakaba 1.49 ## As if </noscript>
4605     pop @{$self->{open_elements}};
4606 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
4607     text => $token->{tag_name}, token => $token);
4608 wakaba 1.49
4609     ## Reprocess in the "in head" insertion mode...
4610     ## As if </head>
4611 wakaba 1.25 pop @{$self->{open_elements}};
4612 wakaba 1.49
4613     ## Reprocess in the "after head" insertion mode...
4614 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4615 wakaba 1.79 !!!cp ('t130');
4616 wakaba 1.49 ## As if </head>
4617 wakaba 1.25 pop @{$self->{open_elements}};
4618 wakaba 1.49
4619     ## Reprocess in the "after head" insertion mode...
4620 wakaba 1.79 } else {
4621     !!!cp ('t131');
4622 wakaba 1.49 }
4623    
4624     ## "after head" insertion mode
4625     ## As if <body>
4626 wakaba 1.116 !!!insert-element ('body',, $token);
4627 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4628 wakaba 1.49 ## reprocess
4629 wakaba 1.125 !!!ack-later;
4630 wakaba 1.126 next B;
4631 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4632 wakaba 1.49 if ($token->{tag_name} eq 'head') {
4633 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4634 wakaba 1.79 !!!cp ('t132');
4635 wakaba 1.50 ## As if <head>
4636 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4637 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4638 wakaba 1.123 push @{$self->{open_elements}},
4639     [$self->{head_element}, $el_category->{head}];
4640 wakaba 1.50
4641     ## Reprocess in the "in head" insertion mode...
4642     pop @{$self->{open_elements}};
4643 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4644 wakaba 1.50 !!!next-token;
4645 wakaba 1.126 next B;
4646 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4647 wakaba 1.79 !!!cp ('t133');
4648 wakaba 1.49 ## As if </noscript>
4649     pop @{$self->{open_elements}};
4650 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
4651     text => 'head', token => $token);
4652 wakaba 1.49
4653     ## Reprocess in the "in head" insertion mode...
4654 wakaba 1.50 pop @{$self->{open_elements}};
4655 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4656 wakaba 1.50 !!!next-token;
4657 wakaba 1.126 next B;
4658 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4659 wakaba 1.79 !!!cp ('t134');
4660 wakaba 1.49 pop @{$self->{open_elements}};
4661 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4662 wakaba 1.49 !!!next-token;
4663 wakaba 1.126 next B;
4664 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4665     !!!cp ('t134.1');
4666 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => 'head',
4667     token => $token);
4668 wakaba 1.139 ## Ignore the token
4669     !!!next-token;
4670     next B;
4671 wakaba 1.49 } else {
4672 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4673 wakaba 1.49 }
4674     } elsif ($token->{tag_name} eq 'noscript') {
4675 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4676 wakaba 1.79 !!!cp ('t136');
4677 wakaba 1.49 pop @{$self->{open_elements}};
4678 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4679 wakaba 1.49 !!!next-token;
4680 wakaba 1.126 next B;
4681 wakaba 1.139 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM or
4682     $self->{insertion_mode} == AFTER_HEAD_IM) {
4683 wakaba 1.79 !!!cp ('t137');
4684 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4685     text => 'noscript', token => $token);
4686 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
4687     !!!next-token;
4688 wakaba 1.126 next B;
4689 wakaba 1.49 } else {
4690 wakaba 1.79 !!!cp ('t138');
4691 wakaba 1.49 #
4692     }
4693     } elsif ({
4694 wakaba 1.31 body => 1, html => 1,
4695     }->{$token->{tag_name}}) {
4696 wakaba 1.139 if ($self->{insertion_mode} == BEFORE_HEAD_IM or
4697     $self->{insertion_mode} == IN_HEAD_IM or
4698     $self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4699 wakaba 1.79 !!!cp ('t140');
4700 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4701     text => $token->{tag_name}, token => $token);
4702 wakaba 1.49 ## Ignore the token
4703     !!!next-token;
4704 wakaba 1.126 next B;
4705 wakaba 1.139 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4706     !!!cp ('t140.1');
4707 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4708     text => $token->{tag_name}, token => $token);
4709 wakaba 1.139 ## Ignore the token
4710     !!!next-token;
4711     next B;
4712 wakaba 1.79 } else {
4713 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4714 wakaba 1.49 }
4715 wakaba 1.139 } elsif ($token->{tag_name} eq 'p') {
4716     !!!cp ('t142');
4717 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4718     text => $token->{tag_name}, token => $token);
4719 wakaba 1.139 ## Ignore the token
4720     !!!next-token;
4721     next B;
4722     } elsif ($token->{tag_name} eq 'br') {
4723 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4724 wakaba 1.139 !!!cp ('t142.2');
4725     ## (before head) as if <head>, (in head) as if </head>
4726 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4727 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4728 wakaba 1.139 $self->{insertion_mode} = AFTER_HEAD_IM;
4729    
4730     ## Reprocess in the "after head" insertion mode...
4731     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4732     !!!cp ('t143.2');
4733     ## As if </head>
4734     pop @{$self->{open_elements}};
4735     $self->{insertion_mode} = AFTER_HEAD_IM;
4736    
4737     ## Reprocess in the "after head" insertion mode...
4738     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4739     !!!cp ('t143.3');
4740     ## ISSUE: Two parse errors for <head><noscript></br>
4741 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4742     text => 'br', token => $token);
4743 wakaba 1.139 ## As if </noscript>
4744     pop @{$self->{open_elements}};
4745     $self->{insertion_mode} = IN_HEAD_IM;
4746 wakaba 1.50
4747     ## Reprocess in the "in head" insertion mode...
4748 wakaba 1.139 ## As if </head>
4749     pop @{$self->{open_elements}};
4750     $self->{insertion_mode} = AFTER_HEAD_IM;
4751    
4752     ## Reprocess in the "after head" insertion mode...
4753     } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4754     !!!cp ('t143.4');
4755     #
4756 wakaba 1.79 } else {
4757 wakaba 1.139 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4758 wakaba 1.50 }
4759    
4760 wakaba 1.139 ## ISSUE: does not agree with IE7 - it doesn't ignore </br>.
4761 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4762     text => 'br', token => $token);
4763 wakaba 1.139 ## Ignore the token
4764     !!!next-token;
4765     next B;
4766 wakaba 1.25 } else {
4767 wakaba 1.139 !!!cp ('t145');
4768 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4769     text => $token->{tag_name}, token => $token);
4770 wakaba 1.139 ## Ignore the token
4771     !!!next-token;
4772     next B;
4773 wakaba 1.49 }
4774    
4775 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4776 wakaba 1.79 !!!cp ('t146');
4777 wakaba 1.49 ## As if </noscript>
4778     pop @{$self->{open_elements}};
4779 wakaba 1.153 !!!parse-error (type => 'in noscript:/',
4780     text => $token->{tag_name}, token => $token);
4781 wakaba 1.49
4782     ## Reprocess in the "in head" insertion mode...
4783     ## As if </head>
4784     pop @{$self->{open_elements}};
4785    
4786     ## Reprocess in the "after head" insertion mode...
4787 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4788 wakaba 1.79 !!!cp ('t147');
4789 wakaba 1.49 ## As if </head>
4790     pop @{$self->{open_elements}};
4791    
4792     ## Reprocess in the "after head" insertion mode...
4793 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4794 wakaba 1.82 ## ISSUE: This case cannot be reached?
4795 wakaba 1.79 !!!cp ('t148');
4796 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4797     text => $token->{tag_name}, token => $token);
4798 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
4799     !!!next-token;
4800 wakaba 1.126 next B;
4801 wakaba 1.79 } else {
4802     !!!cp ('t149');
4803 wakaba 1.1 }
4804    
4805 wakaba 1.49 ## "after head" insertion mode
4806     ## As if <body>
4807 wakaba 1.116 !!!insert-element ('body',, $token);
4808 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4809 wakaba 1.52 ## reprocess
4810 wakaba 1.126 next B;
4811 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4812     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4813     !!!cp ('t149.1');
4814    
4815     ## NOTE: As if <head>
4816 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4817 wakaba 1.104 $self->{open_elements}->[-1]->[0]->append_child
4818     ($self->{head_element});
4819 wakaba 1.123 #push @{$self->{open_elements}},
4820     # [$self->{head_element}, $el_category->{head}];
4821 wakaba 1.104 #$self->{insertion_mode} = IN_HEAD_IM;
4822     ## NOTE: Reprocess.
4823    
4824     ## NOTE: As if </head>
4825     #pop @{$self->{open_elements}};
4826     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4827     ## NOTE: Reprocess.
4828    
4829     #
4830     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4831     !!!cp ('t149.2');
4832    
4833     ## NOTE: As if </head>
4834     pop @{$self->{open_elements}};
4835     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4836     ## NOTE: Reprocess.
4837    
4838     #
4839     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4840     !!!cp ('t149.3');
4841    
4842 wakaba 1.113 !!!parse-error (type => 'in noscript:#eof', token => $token);
4843 wakaba 1.104
4844     ## As if </noscript>
4845     pop @{$self->{open_elements}};
4846     #$self->{insertion_mode} = IN_HEAD_IM;
4847     ## NOTE: Reprocess.
4848    
4849     ## NOTE: As if </head>
4850     pop @{$self->{open_elements}};
4851     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4852     ## NOTE: Reprocess.
4853    
4854     #
4855     } else {
4856     !!!cp ('t149.4');
4857     #
4858     }
4859    
4860     ## NOTE: As if <body>
4861 wakaba 1.116 !!!insert-element ('body',, $token);
4862 wakaba 1.104 $self->{insertion_mode} = IN_BODY_IM;
4863     ## NOTE: Reprocess.
4864 wakaba 1.126 next B;
4865 wakaba 1.104 } else {
4866     die "$0: $token->{type}: Unknown token type";
4867     }
4868 wakaba 1.52
4869     ## ISSUE: An issue in the spec.
4870 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
4871 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4872 wakaba 1.79 !!!cp ('t150');
4873 wakaba 1.52 ## NOTE: There is a code clone of "character in body".
4874     $reconstruct_active_formatting_elements->($insert_to_current);
4875    
4876     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4877    
4878     !!!next-token;
4879 wakaba 1.126 next B;
4880 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4881 wakaba 1.52 if ({
4882     caption => 1, col => 1, colgroup => 1, tbody => 1,
4883     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
4884     }->{$token->{tag_name}}) {
4885 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
4886 wakaba 1.52 ## have an element in table scope
4887 wakaba 1.108 for (reverse 0..$#{$self->{open_elements}}) {
4888 wakaba 1.52 my $node = $self->{open_elements}->[$_];
4889 wakaba 1.123 if ($node->[1] & TABLE_CELL_EL) {
4890 wakaba 1.79 !!!cp ('t151');
4891 wakaba 1.108
4892     ## Close the cell
4893 wakaba 1.125 !!!back-token; # <x>
4894 wakaba 1.122 $token = {type => END_TAG_TOKEN,
4895     tag_name => $node->[0]->manakai_local_name,
4896 wakaba 1.114 line => $token->{line},
4897     column => $token->{column}};
4898 wakaba 1.126 next B;
4899 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4900 wakaba 1.79 !!!cp ('t152');
4901 wakaba 1.108 ## ISSUE: This case can never be reached, maybe.
4902     last;
4903 wakaba 1.52 }
4904 wakaba 1.108 }
4905    
4906     !!!cp ('t153');
4907     !!!parse-error (type => 'start tag not allowed',
4908 wakaba 1.153 text => $token->{tag_name}, token => $token);
4909 wakaba 1.108 ## Ignore the token
4910 wakaba 1.125 !!!nack ('t153.1');
4911 wakaba 1.108 !!!next-token;
4912 wakaba 1.126 next B;
4913 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4914 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
4915     token => $token);
4916 wakaba 1.52
4917 wakaba 1.108 ## NOTE: As if </caption>.
4918 wakaba 1.52 ## have a table element in table scope
4919     my $i;
4920 wakaba 1.108 INSCOPE: {
4921     for (reverse 0..$#{$self->{open_elements}}) {
4922     my $node = $self->{open_elements}->[$_];
4923 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4924 wakaba 1.108 !!!cp ('t155');
4925     $i = $_;
4926     last INSCOPE;
4927 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4928 wakaba 1.108 !!!cp ('t156');
4929     last;
4930     }
4931 wakaba 1.52 }
4932 wakaba 1.108
4933     !!!cp ('t157');
4934     !!!parse-error (type => 'start tag not allowed',
4935 wakaba 1.153 text => $token->{tag_name}, token => $token);
4936 wakaba 1.108 ## Ignore the token
4937 wakaba 1.125 !!!nack ('t157.1');
4938 wakaba 1.108 !!!next-token;
4939 wakaba 1.126 next B;
4940 wakaba 1.52 } # INSCOPE
4941    
4942     ## generate implied end tags
4943 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4944     & END_TAG_OPTIONAL_EL) {
4945 wakaba 1.79 !!!cp ('t158');
4946 wakaba 1.86 pop @{$self->{open_elements}};
4947 wakaba 1.52 }
4948    
4949 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
4950 wakaba 1.79 !!!cp ('t159');
4951 wakaba 1.122 !!!parse-error (type => 'not closed',
4952 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
4953 wakaba 1.122 ->manakai_local_name,
4954     token => $token);
4955 wakaba 1.79 } else {
4956     !!!cp ('t160');
4957 wakaba 1.52 }
4958    
4959     splice @{$self->{open_elements}}, $i;
4960    
4961     $clear_up_to_marker->();
4962    
4963 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4964 wakaba 1.52
4965     ## reprocess
4966 wakaba 1.125 !!!ack-later;
4967 wakaba 1.126 next B;
4968 wakaba 1.52 } else {
4969 wakaba 1.79 !!!cp ('t161');
4970 wakaba 1.52 #
4971     }
4972     } else {
4973 wakaba 1.79 !!!cp ('t162');
4974 wakaba 1.52 #
4975     }
4976 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4977 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4978 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
4979 wakaba 1.43 ## have an element in table scope
4980 wakaba 1.52 my $i;
4981 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4982     my $node = $self->{open_elements}->[$_];
4983 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
4984 wakaba 1.79 !!!cp ('t163');
4985 wakaba 1.52 $i = $_;
4986 wakaba 1.43 last INSCOPE;
4987 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4988 wakaba 1.79 !!!cp ('t164');
4989 wakaba 1.43 last INSCOPE;
4990     }
4991     } # INSCOPE
4992 wakaba 1.52 unless (defined $i) {
4993 wakaba 1.79 !!!cp ('t165');
4994 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
4995     text => $token->{tag_name},
4996     token => $token);
4997 wakaba 1.43 ## Ignore the token
4998     !!!next-token;
4999 wakaba 1.126 next B;
5000 wakaba 1.43 }
5001    
5002 wakaba 1.52 ## generate implied end tags
5003 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
5004     & END_TAG_OPTIONAL_EL) {
5005 wakaba 1.79 !!!cp ('t166');
5006 wakaba 1.86 pop @{$self->{open_elements}};
5007 wakaba 1.52 }
5008 wakaba 1.86
5009 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
5010     ne $token->{tag_name}) {
5011 wakaba 1.79 !!!cp ('t167');
5012 wakaba 1.122 !!!parse-error (type => 'not closed',
5013 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5014 wakaba 1.122 ->manakai_local_name,
5015     token => $token);
5016 wakaba 1.79 } else {
5017     !!!cp ('t168');
5018 wakaba 1.52 }
5019    
5020     splice @{$self->{open_elements}}, $i;
5021    
5022     $clear_up_to_marker->();
5023    
5024 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
5025 wakaba 1.52
5026     !!!next-token;
5027 wakaba 1.126 next B;
5028 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
5029 wakaba 1.79 !!!cp ('t169');
5030 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5031     text => $token->{tag_name}, token => $token);
5032 wakaba 1.52 ## Ignore the token
5033     !!!next-token;
5034 wakaba 1.126 next B;
5035 wakaba 1.52 } else {
5036 wakaba 1.79 !!!cp ('t170');
5037 wakaba 1.52 #
5038     }
5039     } elsif ($token->{tag_name} eq 'caption') {
5040 wakaba 1.54 if ($self->{insertion_mode} == IN_CAPTION_IM) {
5041 wakaba 1.43 ## have a table element in table scope
5042     my $i;
5043 wakaba 1.108 INSCOPE: {
5044     for (reverse 0..$#{$self->{open_elements}}) {
5045     my $node = $self->{open_elements}->[$_];
5046 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
5047 wakaba 1.108 !!!cp ('t171');
5048     $i = $_;
5049     last INSCOPE;
5050 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5051 wakaba 1.108 !!!cp ('t172');
5052     last;
5053     }
5054 wakaba 1.43 }
5055 wakaba 1.108
5056     !!!cp ('t173');
5057     !!!parse-error (type => 'unmatched end tag',
5058 wakaba 1.153 text => $token->{tag_name}, token => $token);
5059 wakaba 1.108 ## Ignore the token
5060     !!!next-token;
5061 wakaba 1.126 next B;
5062 wakaba 1.43 } # INSCOPE
5063    
5064     ## generate implied end tags
5065 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
5066     & END_TAG_OPTIONAL_EL) {
5067 wakaba 1.79 !!!cp ('t174');
5068 wakaba 1.86 pop @{$self->{open_elements}};
5069 wakaba 1.43 }
5070 wakaba 1.52
5071 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
5072 wakaba 1.79 !!!cp ('t175');
5073 wakaba 1.122 !!!parse-error (type => 'not closed',
5074 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5075 wakaba 1.122 ->manakai_local_name,
5076     token => $token);
5077 wakaba 1.79 } else {
5078     !!!cp ('t176');
5079 wakaba 1.52 }
5080    
5081     splice @{$self->{open_elements}}, $i;
5082    
5083     $clear_up_to_marker->();
5084    
5085 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5086 wakaba 1.52
5087     !!!next-token;
5088 wakaba 1.126 next B;
5089 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CELL_IM) {
5090 wakaba 1.79 !!!cp ('t177');
5091 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5092     text => $token->{tag_name}, token => $token);
5093 wakaba 1.52 ## Ignore the token
5094     !!!next-token;
5095 wakaba 1.126 next B;
5096 wakaba 1.52 } else {
5097 wakaba 1.79 !!!cp ('t178');
5098 wakaba 1.52 #
5099     }
5100     } elsif ({
5101     table => 1, tbody => 1, tfoot => 1,
5102     thead => 1, tr => 1,
5103     }->{$token->{tag_name}} and
5104 wakaba 1.54 $self->{insertion_mode} == IN_CELL_IM) {
5105 wakaba 1.52 ## have an element in table scope
5106     my $i;
5107     my $tn;
5108 wakaba 1.108 INSCOPE: {
5109     for (reverse 0..$#{$self->{open_elements}}) {
5110     my $node = $self->{open_elements}->[$_];
5111 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5112 wakaba 1.108 !!!cp ('t179');
5113     $i = $_;
5114    
5115     ## Close the cell
5116 wakaba 1.125 !!!back-token; # </x>
5117 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $tn,
5118     line => $token->{line},
5119     column => $token->{column}};
5120 wakaba 1.126 next B;
5121 wakaba 1.123 } elsif ($node->[1] & TABLE_CELL_EL) {
5122 wakaba 1.108 !!!cp ('t180');
5123 wakaba 1.123 $tn = $node->[0]->manakai_local_name;
5124 wakaba 1.108 ## NOTE: There is exactly one |td| or |th| element
5125     ## in scope in the stack of open elements by definition.
5126 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5127 wakaba 1.108 ## ISSUE: Can this be reached?
5128     !!!cp ('t181');
5129     last;
5130     }
5131 wakaba 1.52 }
5132 wakaba 1.108
5133 wakaba 1.79 !!!cp ('t182');
5134 wakaba 1.108 !!!parse-error (type => 'unmatched end tag',
5135 wakaba 1.153 text => $token->{tag_name}, token => $token);
5136 wakaba 1.52 ## Ignore the token
5137     !!!next-token;
5138 wakaba 1.126 next B;
5139 wakaba 1.108 } # INSCOPE
5140 wakaba 1.52 } elsif ($token->{tag_name} eq 'table' and
5141 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
5142 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'caption',
5143     token => $token);
5144 wakaba 1.52
5145     ## As if </caption>
5146     ## have a table element in table scope
5147     my $i;
5148     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5149     my $node = $self->{open_elements}->[$_];
5150 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
5151 wakaba 1.79 !!!cp ('t184');
5152 wakaba 1.52 $i = $_;
5153     last INSCOPE;
5154 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5155 wakaba 1.79 !!!cp ('t185');
5156 wakaba 1.52 last INSCOPE;
5157     }
5158     } # INSCOPE
5159     unless (defined $i) {
5160 wakaba 1.79 !!!cp ('t186');
5161 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5162     text => 'caption', token => $token);
5163 wakaba 1.52 ## Ignore the token
5164     !!!next-token;
5165 wakaba 1.126 next B;
5166 wakaba 1.52 }
5167    
5168     ## generate implied end tags
5169 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5170 wakaba 1.79 !!!cp ('t187');
5171 wakaba 1.86 pop @{$self->{open_elements}};
5172 wakaba 1.52 }
5173    
5174 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
5175 wakaba 1.79 !!!cp ('t188');
5176 wakaba 1.122 !!!parse-error (type => 'not closed',
5177 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5178 wakaba 1.122 ->manakai_local_name,
5179     token => $token);
5180 wakaba 1.79 } else {
5181     !!!cp ('t189');
5182 wakaba 1.52 }
5183    
5184     splice @{$self->{open_elements}}, $i;
5185    
5186     $clear_up_to_marker->();
5187    
5188 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5189 wakaba 1.52
5190     ## reprocess
5191 wakaba 1.126 next B;
5192 wakaba 1.52 } elsif ({
5193     body => 1, col => 1, colgroup => 1, html => 1,
5194     }->{$token->{tag_name}}) {
5195 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
5196 wakaba 1.79 !!!cp ('t190');
5197 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5198     text => $token->{tag_name}, token => $token);
5199 wakaba 1.52 ## Ignore the token
5200     !!!next-token;
5201 wakaba 1.126 next B;
5202 wakaba 1.52 } else {
5203 wakaba 1.79 !!!cp ('t191');
5204 wakaba 1.52 #
5205     }
5206     } elsif ({
5207     tbody => 1, tfoot => 1,
5208     thead => 1, tr => 1,
5209     }->{$token->{tag_name}} and
5210 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
5211 wakaba 1.79 !!!cp ('t192');
5212 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5213     text => $token->{tag_name}, token => $token);
5214 wakaba 1.52 ## Ignore the token
5215     !!!next-token;
5216 wakaba 1.126 next B;
5217 wakaba 1.52 } else {
5218 wakaba 1.79 !!!cp ('t193');
5219 wakaba 1.52 #
5220     }
5221 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5222     for my $entry (@{$self->{open_elements}}) {
5223 wakaba 1.123 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
5224 wakaba 1.104 !!!cp ('t75');
5225 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5226 wakaba 1.104 last;
5227     }
5228     }
5229    
5230     ## Stop parsing.
5231     last B;
5232 wakaba 1.52 } else {
5233     die "$0: $token->{type}: Unknown token type";
5234     }
5235    
5236     $insert = $insert_to_current;
5237     #
5238 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
5239 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
5240 wakaba 1.95 if (not $open_tables->[-1]->[1] and # tainted
5241     $token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5242     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5243 wakaba 1.52
5244 wakaba 1.95 unless (length $token->{data}) {
5245     !!!cp ('t194');
5246     !!!next-token;
5247 wakaba 1.126 next B;
5248 wakaba 1.95 } else {
5249     !!!cp ('t195');
5250     }
5251     }
5252 wakaba 1.52
5253 wakaba 1.153 !!!parse-error (type => 'in table:#text', token => $token);
5254 wakaba 1.52
5255     ## As if in body, but insert into foster parent element
5256     ## ISSUE: Spec says that "whenever a node would be inserted
5257     ## into the current node" while characters might not be
5258     ## result in a new Text node.
5259     $reconstruct_active_formatting_elements->($insert_to_foster);
5260    
5261 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
5262 wakaba 1.52 # MUST
5263     my $foster_parent_element;
5264     my $next_sibling;
5265     my $prev_sibling;
5266     OE: for (reverse 0..$#{$self->{open_elements}}) {
5267 wakaba 1.123 if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
5268 wakaba 1.52 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
5269     if (defined $parent and $parent->node_type == 1) {
5270 wakaba 1.79 !!!cp ('t196');
5271 wakaba 1.52 $foster_parent_element = $parent;
5272     $next_sibling = $self->{open_elements}->[$_]->[0];
5273     $prev_sibling = $next_sibling->previous_sibling;
5274     } else {
5275 wakaba 1.79 !!!cp ('t197');
5276 wakaba 1.52 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
5277     $prev_sibling = $foster_parent_element->last_child;
5278     }
5279     last OE;
5280     }
5281     } # OE
5282     $foster_parent_element = $self->{open_elements}->[0]->[0] and
5283     $prev_sibling = $foster_parent_element->last_child
5284     unless defined $foster_parent_element;
5285     if (defined $prev_sibling and
5286     $prev_sibling->node_type == 3) {
5287 wakaba 1.79 !!!cp ('t198');
5288 wakaba 1.52 $prev_sibling->manakai_append_text ($token->{data});
5289     } else {
5290 wakaba 1.79 !!!cp ('t199');
5291 wakaba 1.52 $foster_parent_element->insert_before
5292     ($self->{document}->create_text_node ($token->{data}),
5293     $next_sibling);
5294     }
5295 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
5296     } else {
5297     !!!cp ('t200');
5298     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5299     }
5300 wakaba 1.52
5301 wakaba 1.95 !!!next-token;
5302 wakaba 1.126 next B;
5303 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
5304 wakaba 1.153 if ({
5305     tr => ($self->{insertion_mode} != IN_ROW_IM),
5306     th => 1, td => 1,
5307     }->{$token->{tag_name}}) {
5308     if ($self->{insertion_mode} == IN_TABLE_IM) {
5309     ## Clear back to table context
5310     while (not ($self->{open_elements}->[-1]->[1]
5311     & TABLE_SCOPING_EL)) {
5312     !!!cp ('t201');
5313     pop @{$self->{open_elements}};
5314     }
5315    
5316     !!!insert-element ('tbody',, $token);
5317     $self->{insertion_mode} = IN_TABLE_BODY_IM;
5318     ## reprocess in the "in table body" insertion mode...
5319     }
5320    
5321     if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5322     unless ($token->{tag_name} eq 'tr') {
5323     !!!cp ('t202');
5324     !!!parse-error (type => 'missing start tag:tr', token => $token);
5325     }
5326 wakaba 1.43
5327 wakaba 1.153 ## Clear back to table body context
5328     while (not ($self->{open_elements}->[-1]->[1]
5329     & TABLE_ROWS_SCOPING_EL)) {
5330     !!!cp ('t203');
5331     ## ISSUE: Can this case be reached?
5332     pop @{$self->{open_elements}};
5333     }
5334 wakaba 1.43
5335 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
5336 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
5337 wakaba 1.79 !!!cp ('t204');
5338 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5339 wakaba 1.125 !!!nack ('t204');
5340 wakaba 1.52 !!!next-token;
5341 wakaba 1.126 next B;
5342 wakaba 1.52 } else {
5343 wakaba 1.79 !!!cp ('t205');
5344 wakaba 1.116 !!!insert-element ('tr',, $token);
5345 wakaba 1.52 ## reprocess in the "in row" insertion mode
5346     }
5347 wakaba 1.79 } else {
5348     !!!cp ('t206');
5349 wakaba 1.52 }
5350    
5351     ## Clear back to table row context
5352 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5353     & TABLE_ROW_SCOPING_EL)) {
5354 wakaba 1.79 !!!cp ('t207');
5355 wakaba 1.52 pop @{$self->{open_elements}};
5356 wakaba 1.43 }
5357 wakaba 1.52
5358 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5359 wakaba 1.54 $self->{insertion_mode} = IN_CELL_IM;
5360 wakaba 1.52
5361     push @$active_formatting_elements, ['#marker', ''];
5362    
5363 wakaba 1.125 !!!nack ('t207.1');
5364 wakaba 1.52 !!!next-token;
5365 wakaba 1.126 next B;
5366 wakaba 1.52 } elsif ({
5367     caption => 1, col => 1, colgroup => 1,
5368     tbody => 1, tfoot => 1, thead => 1,
5369 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
5370 wakaba 1.52 }->{$token->{tag_name}}) {
5371 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5372 wakaba 1.52 ## As if </tr>
5373 wakaba 1.43 ## have an element in table scope
5374     my $i;
5375     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5376     my $node = $self->{open_elements}->[$_];
5377 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5378 wakaba 1.79 !!!cp ('t208');
5379 wakaba 1.43 $i = $_;
5380     last INSCOPE;
5381 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5382 wakaba 1.79 !!!cp ('t209');
5383 wakaba 1.43 last INSCOPE;
5384     }
5385     } # INSCOPE
5386 wakaba 1.79 unless (defined $i) {
5387 wakaba 1.125 !!!cp ('t210');
5388 wakaba 1.83 ## TODO: This type is wrong.
5389 wakaba 1.153 !!!parse-error (type => 'unmacthed end tag',
5390     text => $token->{tag_name}, token => $token);
5391 wakaba 1.52 ## Ignore the token
5392 wakaba 1.125 !!!nack ('t210.1');
5393 wakaba 1.52 !!!next-token;
5394 wakaba 1.126 next B;
5395 wakaba 1.43 }
5396    
5397 wakaba 1.52 ## Clear back to table row context
5398 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5399     & TABLE_ROW_SCOPING_EL)) {
5400 wakaba 1.79 !!!cp ('t211');
5401 wakaba 1.83 ## ISSUE: Can this case be reached?
5402 wakaba 1.52 pop @{$self->{open_elements}};
5403 wakaba 1.1 }
5404 wakaba 1.43
5405 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5406 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5407 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
5408 wakaba 1.79 !!!cp ('t212');
5409 wakaba 1.52 ## reprocess
5410 wakaba 1.125 !!!ack-later;
5411 wakaba 1.126 next B;
5412 wakaba 1.52 } else {
5413 wakaba 1.79 !!!cp ('t213');
5414 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
5415     }
5416 wakaba 1.1 }
5417 wakaba 1.52
5418 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5419 wakaba 1.52 ## have an element in table scope
5420 wakaba 1.43 my $i;
5421     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5422     my $node = $self->{open_elements}->[$_];
5423 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
5424 wakaba 1.79 !!!cp ('t214');
5425 wakaba 1.43 $i = $_;
5426     last INSCOPE;
5427 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5428 wakaba 1.79 !!!cp ('t215');
5429 wakaba 1.43 last INSCOPE;
5430     }
5431     } # INSCOPE
5432 wakaba 1.52 unless (defined $i) {
5433 wakaba 1.79 !!!cp ('t216');
5434 wakaba 1.153 ## TODO: This erorr type is wrong.
5435     !!!parse-error (type => 'unmatched end tag',
5436     text => $token->{tag_name}, token => $token);
5437 wakaba 1.52 ## Ignore the token
5438 wakaba 1.125 !!!nack ('t216.1');
5439 wakaba 1.52 !!!next-token;
5440 wakaba 1.126 next B;
5441 wakaba 1.43 }
5442 wakaba 1.52
5443     ## Clear back to table body context
5444 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5445     & TABLE_ROWS_SCOPING_EL)) {
5446 wakaba 1.79 !!!cp ('t217');
5447 wakaba 1.83 ## ISSUE: Can this state be reached?
5448 wakaba 1.52 pop @{$self->{open_elements}};
5449 wakaba 1.43 }
5450    
5451 wakaba 1.52 ## As if <{current node}>
5452     ## have an element in table scope
5453     ## true by definition
5454 wakaba 1.43
5455 wakaba 1.52 ## Clear back to table body context
5456     ## nop by definition
5457 wakaba 1.43
5458 wakaba 1.52 pop @{$self->{open_elements}};
5459 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5460 wakaba 1.52 ## reprocess in "in table" insertion mode...
5461 wakaba 1.79 } else {
5462     !!!cp ('t218');
5463 wakaba 1.52 }
5464    
5465     if ($token->{tag_name} eq 'col') {
5466     ## Clear back to table context
5467 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5468     & TABLE_SCOPING_EL)) {
5469 wakaba 1.79 !!!cp ('t219');
5470 wakaba 1.83 ## ISSUE: Can this state be reached?
5471 wakaba 1.52 pop @{$self->{open_elements}};
5472     }
5473 wakaba 1.43
5474 wakaba 1.116 !!!insert-element ('colgroup',, $token);
5475 wakaba 1.54 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
5476 wakaba 1.52 ## reprocess
5477 wakaba 1.125 !!!ack-later;
5478 wakaba 1.126 next B;
5479 wakaba 1.52 } elsif ({
5480     caption => 1,
5481     colgroup => 1,
5482     tbody => 1, tfoot => 1, thead => 1,
5483     }->{$token->{tag_name}}) {
5484     ## Clear back to table context
5485 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5486     & TABLE_SCOPING_EL)) {
5487 wakaba 1.79 !!!cp ('t220');
5488 wakaba 1.83 ## ISSUE: Can this state be reached?
5489 wakaba 1.52 pop @{$self->{open_elements}};
5490 wakaba 1.1 }
5491 wakaba 1.52
5492     push @$active_formatting_elements, ['#marker', '']
5493     if $token->{tag_name} eq 'caption';
5494    
5495 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5496 wakaba 1.52 $self->{insertion_mode} = {
5497 wakaba 1.54 caption => IN_CAPTION_IM,
5498     colgroup => IN_COLUMN_GROUP_IM,
5499     tbody => IN_TABLE_BODY_IM,
5500     tfoot => IN_TABLE_BODY_IM,
5501     thead => IN_TABLE_BODY_IM,
5502 wakaba 1.52 }->{$token->{tag_name}};
5503 wakaba 1.1 !!!next-token;
5504 wakaba 1.125 !!!nack ('t220.1');
5505 wakaba 1.126 next B;
5506 wakaba 1.52 } else {
5507     die "$0: in table: <>: $token->{tag_name}";
5508 wakaba 1.1 }
5509 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
5510 wakaba 1.122 !!!parse-error (type => 'not closed',
5511 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5512 wakaba 1.122 ->manakai_local_name,
5513     token => $token);
5514 wakaba 1.1
5515 wakaba 1.52 ## As if </table>
5516 wakaba 1.1 ## have a table element in table scope
5517     my $i;
5518 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5519     my $node = $self->{open_elements}->[$_];
5520 wakaba 1.123 if ($node->[1] & TABLE_EL) {
5521 wakaba 1.79 !!!cp ('t221');
5522 wakaba 1.1 $i = $_;
5523     last INSCOPE;
5524 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5525 wakaba 1.79 !!!cp ('t222');
5526 wakaba 1.1 last INSCOPE;
5527     }
5528     } # INSCOPE
5529     unless (defined $i) {
5530 wakaba 1.79 !!!cp ('t223');
5531 wakaba 1.83 ## TODO: The following is wrong, maybe.
5532 wakaba 1.153 !!!parse-error (type => 'unmatched end tag', text => 'table',
5533     token => $token);
5534 wakaba 1.52 ## Ignore tokens </table><table>
5535 wakaba 1.125 !!!nack ('t223.1');
5536 wakaba 1.1 !!!next-token;
5537 wakaba 1.126 next B;
5538 wakaba 1.1 }
5539    
5540 wakaba 1.151 ## TODO: Followings are removed from the latest spec.
5541 wakaba 1.1 ## generate implied end tags
5542 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5543 wakaba 1.79 !!!cp ('t224');
5544 wakaba 1.86 pop @{$self->{open_elements}};
5545 wakaba 1.1 }
5546    
5547 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & TABLE_EL) {
5548 wakaba 1.79 !!!cp ('t225');
5549 wakaba 1.122 ## NOTE: |<table><tr><table>|
5550     !!!parse-error (type => 'not closed',
5551 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
5552 wakaba 1.122 ->manakai_local_name,
5553     token => $token);
5554 wakaba 1.79 } else {
5555     !!!cp ('t226');
5556 wakaba 1.1 }
5557    
5558 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5559 wakaba 1.95 pop @{$open_tables};
5560 wakaba 1.1
5561 wakaba 1.52 $self->_reset_insertion_mode;
5562 wakaba 1.1
5563 wakaba 1.125 ## reprocess
5564     !!!ack-later;
5565 wakaba 1.126 next B;
5566 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
5567     if (not $open_tables->[-1]->[1]) { # tainted
5568     !!!cp ('t227.8');
5569     ## NOTE: This is a "as if in head" code clone.
5570     $parse_rcdata->(CDATA_CONTENT_MODEL);
5571 wakaba 1.126 next B;
5572 wakaba 1.100 } else {
5573     !!!cp ('t227.7');
5574     #
5575     }
5576     } elsif ($token->{tag_name} eq 'script') {
5577     if (not $open_tables->[-1]->[1]) { # tainted
5578     !!!cp ('t227.6');
5579     ## NOTE: This is a "as if in head" code clone.
5580     $script_start_tag->();
5581 wakaba 1.126 next B;
5582 wakaba 1.100 } else {
5583     !!!cp ('t227.5');
5584     #
5585     }
5586 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
5587     if (not $open_tables->[-1]->[1]) { # tainted
5588     if ($token->{attributes}->{type}) { ## TODO: case
5589     my $type = lc $token->{attributes}->{type}->{value};
5590     if ($type eq 'hidden') {
5591     !!!cp ('t227.3');
5592 wakaba 1.153 !!!parse-error (type => 'in table',
5593     text => $token->{tag_name}, token => $token);
5594 wakaba 1.98
5595 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5596 wakaba 1.98
5597     ## TODO: form element pointer
5598    
5599     pop @{$self->{open_elements}};
5600    
5601     !!!next-token;
5602 wakaba 1.125 !!!ack ('t227.2.1');
5603 wakaba 1.126 next B;
5604 wakaba 1.98 } else {
5605     !!!cp ('t227.2');
5606     #
5607     }
5608     } else {
5609     !!!cp ('t227.1');
5610     #
5611     }
5612     } else {
5613     !!!cp ('t227.4');
5614     #
5615     }
5616 wakaba 1.58 } else {
5617 wakaba 1.79 !!!cp ('t227');
5618 wakaba 1.58 #
5619     }
5620 wakaba 1.98
5621 wakaba 1.153 !!!parse-error (type => 'in table', text => $token->{tag_name},
5622     token => $token);
5623 wakaba 1.98
5624     $insert = $insert_to_foster;
5625     #
5626 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
5627 wakaba 1.52 if ($token->{tag_name} eq 'tr' and
5628 wakaba 1.54 $self->{insertion_mode} == IN_ROW_IM) {
5629 wakaba 1.52 ## have an element in table scope
5630     my $i;
5631     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5632     my $node = $self->{open_elements}->[$_];
5633 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5634 wakaba 1.79 !!!cp ('t228');
5635 wakaba 1.52 $i = $_;
5636     last INSCOPE;
5637 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5638 wakaba 1.79 !!!cp ('t229');
5639 wakaba 1.52 last INSCOPE;
5640     }
5641     } # INSCOPE
5642     unless (defined $i) {
5643 wakaba 1.79 !!!cp ('t230');
5644 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5645     text => $token->{tag_name}, token => $token);
5646 wakaba 1.52 ## Ignore the token
5647 wakaba 1.125 !!!nack ('t230.1');
5648 wakaba 1.42 !!!next-token;
5649 wakaba 1.126 next B;
5650 wakaba 1.79 } else {
5651     !!!cp ('t232');
5652 wakaba 1.42 }
5653    
5654 wakaba 1.52 ## Clear back to table row context
5655 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5656     & TABLE_ROW_SCOPING_EL)) {
5657 wakaba 1.79 !!!cp ('t231');
5658 wakaba 1.83 ## ISSUE: Can this state be reached?
5659 wakaba 1.52 pop @{$self->{open_elements}};
5660     }
5661 wakaba 1.42
5662 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5663 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5664 wakaba 1.52 !!!next-token;
5665 wakaba 1.125 !!!nack ('t231.1');
5666 wakaba 1.126 next B;
5667 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
5668 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5669 wakaba 1.52 ## As if </tr>
5670     ## have an element in table scope
5671     my $i;
5672     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5673     my $node = $self->{open_elements}->[$_];
5674 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5675 wakaba 1.79 !!!cp ('t233');
5676 wakaba 1.52 $i = $_;
5677     last INSCOPE;
5678 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5679 wakaba 1.79 !!!cp ('t234');
5680 wakaba 1.52 last INSCOPE;
5681 wakaba 1.42 }
5682 wakaba 1.52 } # INSCOPE
5683     unless (defined $i) {
5684 wakaba 1.79 !!!cp ('t235');
5685 wakaba 1.83 ## TODO: The following is wrong.
5686 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5687     text => $token->{type}, token => $token);
5688 wakaba 1.52 ## Ignore the token
5689 wakaba 1.125 !!!nack ('t236.1');
5690 wakaba 1.52 !!!next-token;
5691 wakaba 1.126 next B;
5692 wakaba 1.42 }
5693 wakaba 1.52
5694     ## Clear back to table row context
5695 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5696     & TABLE_ROW_SCOPING_EL)) {
5697 wakaba 1.79 !!!cp ('t236');
5698 wakaba 1.83 ## ISSUE: Can this state be reached?
5699 wakaba 1.46 pop @{$self->{open_elements}};
5700 wakaba 1.1 }
5701 wakaba 1.46
5702 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5703 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5704 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
5705 wakaba 1.1 }
5706    
5707 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5708 wakaba 1.52 ## have an element in table scope
5709     my $i;
5710     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5711     my $node = $self->{open_elements}->[$_];
5712 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
5713 wakaba 1.79 !!!cp ('t237');
5714 wakaba 1.52 $i = $_;
5715     last INSCOPE;
5716 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5717 wakaba 1.79 !!!cp ('t238');
5718 wakaba 1.52 last INSCOPE;
5719     }
5720     } # INSCOPE
5721     unless (defined $i) {
5722 wakaba 1.79 !!!cp ('t239');
5723 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5724     text => $token->{tag_name}, token => $token);
5725 wakaba 1.52 ## Ignore the token
5726 wakaba 1.125 !!!nack ('t239.1');
5727 wakaba 1.52 !!!next-token;
5728 wakaba 1.126 next B;
5729 wakaba 1.47 }
5730    
5731     ## Clear back to table body context
5732 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5733     & TABLE_ROWS_SCOPING_EL)) {
5734 wakaba 1.79 !!!cp ('t240');
5735 wakaba 1.47 pop @{$self->{open_elements}};
5736     }
5737    
5738 wakaba 1.52 ## As if <{current node}>
5739     ## have an element in table scope
5740     ## true by definition
5741    
5742     ## Clear back to table body context
5743     ## nop by definition
5744    
5745     pop @{$self->{open_elements}};
5746 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5747 wakaba 1.52 ## reprocess in the "in table" insertion mode...
5748     }
5749    
5750 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
5751     ## When you edit the code fragment below, please ensure that
5752     ## the code for <table> in the "in table" insertion mode
5753     ## is synced with it.
5754    
5755 wakaba 1.52 ## have a table element in table scope
5756     my $i;
5757     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5758     my $node = $self->{open_elements}->[$_];
5759 wakaba 1.123 if ($node->[1] & TABLE_EL) {
5760 wakaba 1.79 !!!cp ('t241');
5761 wakaba 1.52 $i = $_;
5762     last INSCOPE;
5763 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5764 wakaba 1.79 !!!cp ('t242');
5765 wakaba 1.52 last INSCOPE;
5766 wakaba 1.47 }
5767 wakaba 1.52 } # INSCOPE
5768     unless (defined $i) {
5769 wakaba 1.79 !!!cp ('t243');
5770 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5771     text => $token->{tag_name}, token => $token);
5772 wakaba 1.52 ## Ignore the token
5773 wakaba 1.125 !!!nack ('t243.1');
5774 wakaba 1.52 !!!next-token;
5775 wakaba 1.126 next B;
5776 wakaba 1.3 }
5777 wakaba 1.52
5778     splice @{$self->{open_elements}}, $i;
5779 wakaba 1.95 pop @{$open_tables};
5780 wakaba 1.1
5781 wakaba 1.52 $self->_reset_insertion_mode;
5782 wakaba 1.47
5783     !!!next-token;
5784 wakaba 1.126 next B;
5785 wakaba 1.47 } elsif ({
5786 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
5787 wakaba 1.52 }->{$token->{tag_name}} and
5788 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
5789 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5790 wakaba 1.52 ## have an element in table scope
5791     my $i;
5792     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5793     my $node = $self->{open_elements}->[$_];
5794 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5795 wakaba 1.79 !!!cp ('t247');
5796 wakaba 1.52 $i = $_;
5797     last INSCOPE;
5798 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5799 wakaba 1.79 !!!cp ('t248');
5800 wakaba 1.52 last INSCOPE;
5801     }
5802     } # INSCOPE
5803     unless (defined $i) {
5804 wakaba 1.79 !!!cp ('t249');
5805 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5806     text => $token->{tag_name}, token => $token);
5807 wakaba 1.52 ## Ignore the token
5808 wakaba 1.125 !!!nack ('t249.1');
5809 wakaba 1.52 !!!next-token;
5810 wakaba 1.126 next B;
5811 wakaba 1.52 }
5812    
5813 wakaba 1.48 ## As if </tr>
5814     ## have an element in table scope
5815     my $i;
5816     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5817     my $node = $self->{open_elements}->[$_];
5818 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5819 wakaba 1.79 !!!cp ('t250');
5820 wakaba 1.48 $i = $_;
5821     last INSCOPE;
5822 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5823 wakaba 1.79 !!!cp ('t251');
5824 wakaba 1.48 last INSCOPE;
5825     }
5826     } # INSCOPE
5827 wakaba 1.52 unless (defined $i) {
5828 wakaba 1.79 !!!cp ('t252');
5829 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5830     text => 'tr', token => $token);
5831 wakaba 1.52 ## Ignore the token
5832 wakaba 1.125 !!!nack ('t252.1');
5833 wakaba 1.52 !!!next-token;
5834 wakaba 1.126 next B;
5835 wakaba 1.52 }
5836 wakaba 1.48
5837     ## Clear back to table row context
5838 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5839     & TABLE_ROW_SCOPING_EL)) {
5840 wakaba 1.79 !!!cp ('t253');
5841 wakaba 1.83 ## ISSUE: Can this case be reached?
5842 wakaba 1.48 pop @{$self->{open_elements}};
5843     }
5844    
5845     pop @{$self->{open_elements}}; # tr
5846 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5847 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
5848     }
5849    
5850     ## have an element in table scope
5851     my $i;
5852     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5853     my $node = $self->{open_elements}->[$_];
5854 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5855 wakaba 1.79 !!!cp ('t254');
5856 wakaba 1.52 $i = $_;
5857     last INSCOPE;
5858 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5859 wakaba 1.79 !!!cp ('t255');
5860 wakaba 1.52 last INSCOPE;
5861     }
5862     } # INSCOPE
5863     unless (defined $i) {
5864 wakaba 1.79 !!!cp ('t256');
5865 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5866     text => $token->{tag_name}, token => $token);
5867 wakaba 1.52 ## Ignore the token
5868 wakaba 1.125 !!!nack ('t256.1');
5869 wakaba 1.52 !!!next-token;
5870 wakaba 1.126 next B;
5871 wakaba 1.52 }
5872    
5873     ## Clear back to table body context
5874 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5875     & TABLE_ROWS_SCOPING_EL)) {
5876 wakaba 1.79 !!!cp ('t257');
5877 wakaba 1.83 ## ISSUE: Can this case be reached?
5878 wakaba 1.52 pop @{$self->{open_elements}};
5879     }
5880    
5881     pop @{$self->{open_elements}};
5882 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5883 wakaba 1.125 !!!nack ('t257.1');
5884 wakaba 1.52 !!!next-token;
5885 wakaba 1.126 next B;
5886 wakaba 1.52 } elsif ({
5887     body => 1, caption => 1, col => 1, colgroup => 1,
5888     html => 1, td => 1, th => 1,
5889 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
5890     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
5891 wakaba 1.52 }->{$token->{tag_name}}) {
5892 wakaba 1.125 !!!cp ('t258');
5893 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5894     text => $token->{tag_name}, token => $token);
5895 wakaba 1.125 ## Ignore the token
5896     !!!nack ('t258.1');
5897     !!!next-token;
5898 wakaba 1.126 next B;
5899 wakaba 1.58 } else {
5900 wakaba 1.79 !!!cp ('t259');
5901 wakaba 1.153 !!!parse-error (type => 'in table:/',
5902     text => $token->{tag_name}, token => $token);
5903 wakaba 1.52
5904 wakaba 1.58 $insert = $insert_to_foster;
5905     #
5906     }
5907 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5908 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
5909 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5910 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5911 wakaba 1.104 !!!cp ('t259.1');
5912 wakaba 1.105 #
5913 wakaba 1.104 } else {
5914     !!!cp ('t259.2');
5915 wakaba 1.105 #
5916 wakaba 1.104 }
5917    
5918     ## Stop parsing
5919     last B;
5920 wakaba 1.58 } else {
5921     die "$0: $token->{type}: Unknown token type";
5922     }
5923 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
5924 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5925 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5926     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5927     unless (length $token->{data}) {
5928 wakaba 1.79 !!!cp ('t260');
5929 wakaba 1.52 !!!next-token;
5930 wakaba 1.126 next B;
5931 wakaba 1.52 }
5932     }
5933    
5934 wakaba 1.79 !!!cp ('t261');
5935 wakaba 1.52 #
5936 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5937 wakaba 1.52 if ($token->{tag_name} eq 'col') {
5938 wakaba 1.79 !!!cp ('t262');
5939 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5940 wakaba 1.52 pop @{$self->{open_elements}};
5941 wakaba 1.125 !!!ack ('t262.1');
5942 wakaba 1.52 !!!next-token;
5943 wakaba 1.126 next B;
5944 wakaba 1.52 } else {
5945 wakaba 1.79 !!!cp ('t263');
5946 wakaba 1.52 #
5947     }
5948 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5949 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
5950 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
5951 wakaba 1.79 !!!cp ('t264');
5952 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5953     text => 'colgroup', token => $token);
5954 wakaba 1.52 ## Ignore the token
5955     !!!next-token;
5956 wakaba 1.126 next B;
5957 wakaba 1.52 } else {
5958 wakaba 1.79 !!!cp ('t265');
5959 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
5960 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5961 wakaba 1.52 !!!next-token;
5962 wakaba 1.126 next B;
5963 wakaba 1.52 }
5964     } elsif ($token->{tag_name} eq 'col') {
5965 wakaba 1.79 !!!cp ('t266');
5966 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5967     text => 'col', token => $token);
5968 wakaba 1.52 ## Ignore the token
5969     !!!next-token;
5970 wakaba 1.126 next B;
5971 wakaba 1.52 } else {
5972 wakaba 1.79 !!!cp ('t267');
5973 wakaba 1.52 #
5974     }
5975 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5976 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
5977 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5978     !!!cp ('t270.2');
5979     ## Stop parsing.
5980     last B;
5981     } else {
5982     ## NOTE: As if </colgroup>.
5983     !!!cp ('t270.1');
5984     pop @{$self->{open_elements}}; # colgroup
5985     $self->{insertion_mode} = IN_TABLE_IM;
5986     ## Reprocess.
5987 wakaba 1.126 next B;
5988 wakaba 1.104 }
5989     } else {
5990     die "$0: $token->{type}: Unknown token type";
5991     }
5992 wakaba 1.52
5993     ## As if </colgroup>
5994 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
5995 wakaba 1.79 !!!cp ('t269');
5996 wakaba 1.104 ## TODO: Wrong error type?
5997 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
5998     text => 'colgroup', token => $token);
5999 wakaba 1.52 ## Ignore the token
6000 wakaba 1.125 !!!nack ('t269.1');
6001 wakaba 1.52 !!!next-token;
6002 wakaba 1.126 next B;
6003 wakaba 1.52 } else {
6004 wakaba 1.79 !!!cp ('t270');
6005 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
6006 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
6007 wakaba 1.125 !!!ack-later;
6008 wakaba 1.52 ## reprocess
6009 wakaba 1.126 next B;
6010 wakaba 1.52 }
6011 wakaba 1.101 } elsif ($self->{insertion_mode} & SELECT_IMS) {
6012 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
6013 wakaba 1.79 !!!cp ('t271');
6014 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
6015     !!!next-token;
6016 wakaba 1.126 next B;
6017 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
6018 wakaba 1.123 if ($token->{tag_name} eq 'option') {
6019     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
6020     !!!cp ('t272');
6021     ## As if </option>
6022     pop @{$self->{open_elements}};
6023     } else {
6024     !!!cp ('t273');
6025     }
6026 wakaba 1.52
6027 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6028 wakaba 1.125 !!!nack ('t273.1');
6029 wakaba 1.123 !!!next-token;
6030 wakaba 1.126 next B;
6031 wakaba 1.123 } elsif ($token->{tag_name} eq 'optgroup') {
6032     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
6033     !!!cp ('t274');
6034     ## As if </option>
6035     pop @{$self->{open_elements}};
6036     } else {
6037     !!!cp ('t275');
6038     }
6039 wakaba 1.52
6040 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
6041     !!!cp ('t276');
6042     ## As if </optgroup>
6043     pop @{$self->{open_elements}};
6044     } else {
6045     !!!cp ('t277');
6046     }
6047 wakaba 1.52
6048 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6049 wakaba 1.125 !!!nack ('t277.1');
6050 wakaba 1.123 !!!next-token;
6051 wakaba 1.126 next B;
6052 wakaba 1.146 } elsif ({
6053     select => 1, input => 1, textarea => 1,
6054     }->{$token->{tag_name}} or
6055 wakaba 1.101 ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
6056     {
6057     caption => 1, table => 1,
6058     tbody => 1, tfoot => 1, thead => 1,
6059     tr => 1, td => 1, th => 1,
6060     }->{$token->{tag_name}})) {
6061     ## TODO: The type below is not good - <select> is replaced by </select>
6062 wakaba 1.153 !!!parse-error (type => 'not closed', text => 'select',
6063     token => $token);
6064 wakaba 1.101 ## NOTE: As if the token were </select> (<select> case) or
6065     ## as if there were </select> (otherwise).
6066 wakaba 1.123 ## have an element in table scope
6067     my $i;
6068     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6069     my $node = $self->{open_elements}->[$_];
6070     if ($node->[1] & SELECT_EL) {
6071     !!!cp ('t278');
6072     $i = $_;
6073     last INSCOPE;
6074     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6075     !!!cp ('t279');
6076     last INSCOPE;
6077     }
6078     } # INSCOPE
6079     unless (defined $i) {
6080     !!!cp ('t280');
6081 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6082     text => 'select', token => $token);
6083 wakaba 1.123 ## Ignore the token
6084 wakaba 1.125 !!!nack ('t280.1');
6085 wakaba 1.123 !!!next-token;
6086 wakaba 1.126 next B;
6087 wakaba 1.123 }
6088 wakaba 1.52
6089 wakaba 1.123 !!!cp ('t281');
6090     splice @{$self->{open_elements}}, $i;
6091 wakaba 1.52
6092 wakaba 1.123 $self->_reset_insertion_mode;
6093 wakaba 1.47
6094 wakaba 1.101 if ($token->{tag_name} eq 'select') {
6095 wakaba 1.125 !!!nack ('t281.2');
6096 wakaba 1.101 !!!next-token;
6097 wakaba 1.126 next B;
6098 wakaba 1.101 } else {
6099     !!!cp ('t281.1');
6100 wakaba 1.125 !!!ack-later;
6101 wakaba 1.101 ## Reprocess the token.
6102 wakaba 1.126 next B;
6103 wakaba 1.101 }
6104 wakaba 1.58 } else {
6105 wakaba 1.79 !!!cp ('t282');
6106 wakaba 1.153 !!!parse-error (type => 'in select',
6107     text => $token->{tag_name}, token => $token);
6108 wakaba 1.58 ## Ignore the token
6109 wakaba 1.125 !!!nack ('t282.1');
6110 wakaba 1.58 !!!next-token;
6111 wakaba 1.126 next B;
6112 wakaba 1.58 }
6113     } elsif ($token->{type} == END_TAG_TOKEN) {
6114 wakaba 1.123 if ($token->{tag_name} eq 'optgroup') {
6115     if ($self->{open_elements}->[-1]->[1] & OPTION_EL and
6116     $self->{open_elements}->[-2]->[1] & OPTGROUP_EL) {
6117     !!!cp ('t283');
6118     ## As if </option>
6119     splice @{$self->{open_elements}}, -2;
6120     } elsif ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
6121     !!!cp ('t284');
6122     pop @{$self->{open_elements}};
6123     } else {
6124     !!!cp ('t285');
6125 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6126     text => $token->{tag_name}, token => $token);
6127 wakaba 1.123 ## Ignore the token
6128     }
6129 wakaba 1.125 !!!nack ('t285.1');
6130 wakaba 1.123 !!!next-token;
6131 wakaba 1.126 next B;
6132 wakaba 1.123 } elsif ($token->{tag_name} eq 'option') {
6133     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
6134     !!!cp ('t286');
6135     pop @{$self->{open_elements}};
6136     } else {
6137     !!!cp ('t287');
6138 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6139     text => $token->{tag_name}, token => $token);
6140 wakaba 1.123 ## Ignore the token
6141     }
6142 wakaba 1.125 !!!nack ('t287.1');
6143 wakaba 1.123 !!!next-token;
6144 wakaba 1.126 next B;
6145 wakaba 1.123 } elsif ($token->{tag_name} eq 'select') {
6146     ## have an element in table scope
6147     my $i;
6148     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6149     my $node = $self->{open_elements}->[$_];
6150     if ($node->[1] & SELECT_EL) {
6151     !!!cp ('t288');
6152     $i = $_;
6153     last INSCOPE;
6154     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6155     !!!cp ('t289');
6156     last INSCOPE;
6157     }
6158     } # INSCOPE
6159     unless (defined $i) {
6160     !!!cp ('t290');
6161 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6162     text => $token->{tag_name}, token => $token);
6163 wakaba 1.123 ## Ignore the token
6164 wakaba 1.125 !!!nack ('t290.1');
6165 wakaba 1.123 !!!next-token;
6166 wakaba 1.126 next B;
6167 wakaba 1.123 }
6168 wakaba 1.52
6169 wakaba 1.123 !!!cp ('t291');
6170     splice @{$self->{open_elements}}, $i;
6171 wakaba 1.52
6172 wakaba 1.123 $self->_reset_insertion_mode;
6173 wakaba 1.52
6174 wakaba 1.125 !!!nack ('t291.1');
6175 wakaba 1.123 !!!next-token;
6176 wakaba 1.126 next B;
6177 wakaba 1.101 } elsif ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
6178     {
6179     caption => 1, table => 1, tbody => 1,
6180     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
6181     }->{$token->{tag_name}}) {
6182 wakaba 1.83 ## TODO: The following is wrong?
6183 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6184     text => $token->{tag_name}, token => $token);
6185 wakaba 1.52
6186 wakaba 1.123 ## have an element in table scope
6187     my $i;
6188     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6189     my $node = $self->{open_elements}->[$_];
6190     if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6191     !!!cp ('t292');
6192     $i = $_;
6193     last INSCOPE;
6194     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6195     !!!cp ('t293');
6196     last INSCOPE;
6197     }
6198     } # INSCOPE
6199     unless (defined $i) {
6200     !!!cp ('t294');
6201     ## Ignore the token
6202 wakaba 1.125 !!!nack ('t294.1');
6203 wakaba 1.123 !!!next-token;
6204 wakaba 1.126 next B;
6205 wakaba 1.123 }
6206 wakaba 1.52
6207 wakaba 1.123 ## As if </select>
6208     ## have an element in table scope
6209     undef $i;
6210     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6211     my $node = $self->{open_elements}->[$_];
6212     if ($node->[1] & SELECT_EL) {
6213     !!!cp ('t295');
6214     $i = $_;
6215     last INSCOPE;
6216     } elsif ($node->[1] & TABLE_SCOPING_EL) {
6217 wakaba 1.83 ## ISSUE: Can this state be reached?
6218 wakaba 1.123 !!!cp ('t296');
6219     last INSCOPE;
6220     }
6221     } # INSCOPE
6222     unless (defined $i) {
6223     !!!cp ('t297');
6224 wakaba 1.83 ## TODO: The following error type is correct?
6225 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6226     text => 'select', token => $token);
6227 wakaba 1.123 ## Ignore the </select> token
6228 wakaba 1.125 !!!nack ('t297.1');
6229 wakaba 1.123 !!!next-token; ## TODO: ok?
6230 wakaba 1.126 next B;
6231 wakaba 1.123 }
6232 wakaba 1.52
6233 wakaba 1.123 !!!cp ('t298');
6234     splice @{$self->{open_elements}}, $i;
6235 wakaba 1.52
6236 wakaba 1.123 $self->_reset_insertion_mode;
6237 wakaba 1.52
6238 wakaba 1.125 !!!ack-later;
6239 wakaba 1.123 ## reprocess
6240 wakaba 1.126 next B;
6241 wakaba 1.58 } else {
6242 wakaba 1.79 !!!cp ('t299');
6243 wakaba 1.153 !!!parse-error (type => 'in select:/',
6244     text => $token->{tag_name}, token => $token);
6245 wakaba 1.52 ## Ignore the token
6246 wakaba 1.125 !!!nack ('t299.3');
6247 wakaba 1.52 !!!next-token;
6248 wakaba 1.126 next B;
6249 wakaba 1.58 }
6250 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6251 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
6252 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
6253     !!!cp ('t299.1');
6254 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
6255 wakaba 1.104 } else {
6256     !!!cp ('t299.2');
6257     }
6258    
6259     ## Stop parsing.
6260     last B;
6261 wakaba 1.58 } else {
6262     die "$0: $token->{type}: Unknown token type";
6263     }
6264 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
6265 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6266 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6267     my $data = $1;
6268     ## As if in body
6269     $reconstruct_active_formatting_elements->($insert_to_current);
6270    
6271     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6272    
6273     unless (length $token->{data}) {
6274 wakaba 1.79 !!!cp ('t300');
6275 wakaba 1.52 !!!next-token;
6276 wakaba 1.126 next B;
6277 wakaba 1.52 }
6278     }
6279    
6280 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6281 wakaba 1.79 !!!cp ('t301');
6282 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
6283 wakaba 1.52
6284 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
6285 wakaba 1.79 } else {
6286     !!!cp ('t302');
6287 wakaba 1.52 }
6288    
6289     ## "after body" insertion mode
6290 wakaba 1.153 !!!parse-error (type => 'after body:#text', token => $token);
6291 wakaba 1.52
6292 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
6293 wakaba 1.52 ## reprocess
6294 wakaba 1.126 next B;
6295 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
6296 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6297 wakaba 1.79 !!!cp ('t303');
6298 wakaba 1.153 !!!parse-error (type => 'after html',
6299     text => $token->{tag_name}, token => $token);
6300 wakaba 1.52
6301 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
6302 wakaba 1.79 } else {
6303     !!!cp ('t304');
6304 wakaba 1.52 }
6305    
6306     ## "after body" insertion mode
6307 wakaba 1.153 !!!parse-error (type => 'after body',
6308     text => $token->{tag_name}, token => $token);
6309 wakaba 1.52
6310 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
6311 wakaba 1.125 !!!ack-later;
6312 wakaba 1.52 ## reprocess
6313 wakaba 1.126 next B;
6314 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6315 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6316 wakaba 1.79 !!!cp ('t305');
6317 wakaba 1.153 !!!parse-error (type => 'after html:/',
6318     text => $token->{tag_name}, token => $token);
6319 wakaba 1.52
6320 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
6321 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
6322 wakaba 1.79 } else {
6323     !!!cp ('t306');
6324 wakaba 1.52 }
6325    
6326     ## "after body" insertion mode
6327     if ($token->{tag_name} eq 'html') {
6328     if (defined $self->{inner_html_node}) {
6329 wakaba 1.79 !!!cp ('t307');
6330 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6331     text => 'html', token => $token);
6332 wakaba 1.52 ## Ignore the token
6333     !!!next-token;
6334 wakaba 1.126 next B;
6335 wakaba 1.52 } else {
6336 wakaba 1.79 !!!cp ('t308');
6337 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
6338 wakaba 1.52 !!!next-token;
6339 wakaba 1.126 next B;
6340 wakaba 1.52 }
6341     } else {
6342 wakaba 1.79 !!!cp ('t309');
6343 wakaba 1.153 !!!parse-error (type => 'after body:/',
6344     text => $token->{tag_name}, token => $token);
6345 wakaba 1.52
6346 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
6347 wakaba 1.52 ## reprocess
6348 wakaba 1.126 next B;
6349 wakaba 1.52 }
6350 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6351     !!!cp ('t309.2');
6352     ## Stop parsing
6353     last B;
6354 wakaba 1.52 } else {
6355     die "$0: $token->{type}: Unknown token type";
6356     }
6357 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
6358 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6359 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
6360     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6361    
6362     unless (length $token->{data}) {
6363 wakaba 1.79 !!!cp ('t310');
6364 wakaba 1.52 !!!next-token;
6365 wakaba 1.126 next B;
6366 wakaba 1.52 }
6367     }
6368    
6369     if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {
6370 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6371 wakaba 1.79 !!!cp ('t311');
6372 wakaba 1.153 !!!parse-error (type => 'in frameset:#text', token => $token);
6373 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
6374 wakaba 1.79 !!!cp ('t312');
6375 wakaba 1.153 !!!parse-error (type => 'after frameset:#text', token => $token);
6376 wakaba 1.158 } else { # "after after frameset"
6377 wakaba 1.79 !!!cp ('t313');
6378 wakaba 1.153 !!!parse-error (type => 'after html:#text', token => $token);
6379 wakaba 1.52 }
6380    
6381     ## Ignore the token.
6382     if (length $token->{data}) {
6383 wakaba 1.79 !!!cp ('t314');
6384 wakaba 1.52 ## reprocess the rest of characters
6385     } else {
6386 wakaba 1.79 !!!cp ('t315');
6387 wakaba 1.52 !!!next-token;
6388     }
6389 wakaba 1.126 next B;
6390 wakaba 1.52 }
6391    
6392     die qq[$0: Character "$token->{data}"];
6393 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
6394 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
6395 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6396 wakaba 1.79 !!!cp ('t318');
6397 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6398 wakaba 1.125 !!!nack ('t318.1');
6399 wakaba 1.52 !!!next-token;
6400 wakaba 1.126 next B;
6401 wakaba 1.52 } elsif ($token->{tag_name} eq 'frame' and
6402 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6403 wakaba 1.79 !!!cp ('t319');
6404 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6405 wakaba 1.52 pop @{$self->{open_elements}};
6406 wakaba 1.125 !!!ack ('t319.1');
6407 wakaba 1.52 !!!next-token;
6408 wakaba 1.126 next B;
6409 wakaba 1.52 } elsif ($token->{tag_name} eq 'noframes') {
6410 wakaba 1.79 !!!cp ('t320');
6411 wakaba 1.148 ## NOTE: As if in head.
6412 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6413 wakaba 1.126 next B;
6414 wakaba 1.158
6415     ## NOTE: |<!DOCTYPE HTML><frameset></frameset></html><noframes></noframes>|
6416     ## has no parse error.
6417 wakaba 1.52 } else {
6418 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6419 wakaba 1.79 !!!cp ('t321');
6420 wakaba 1.153 !!!parse-error (type => 'in frameset',
6421     text => $token->{tag_name}, token => $token);
6422 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
6423 wakaba 1.79 !!!cp ('t322');
6424 wakaba 1.153 !!!parse-error (type => 'after frameset',
6425     text => $token->{tag_name}, token => $token);
6426 wakaba 1.158 } else { # "after after frameset"
6427     !!!cp ('t322.2');
6428     !!!parse-error (type => 'after after frameset',
6429     text => $token->{tag_name}, token => $token);
6430 wakaba 1.52 }
6431     ## Ignore the token
6432 wakaba 1.125 !!!nack ('t322.1');
6433 wakaba 1.52 !!!next-token;
6434 wakaba 1.126 next B;
6435 wakaba 1.52 }
6436 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6437 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
6438 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6439 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
6440 wakaba 1.52 @{$self->{open_elements}} == 1) {
6441 wakaba 1.79 !!!cp ('t325');
6442 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
6443     text => $token->{tag_name}, token => $token);
6444 wakaba 1.52 ## Ignore the token
6445     !!!next-token;
6446     } else {
6447 wakaba 1.79 !!!cp ('t326');
6448 wakaba 1.52 pop @{$self->{open_elements}};
6449     !!!next-token;
6450     }
6451 wakaba 1.47
6452 wakaba 1.52 if (not defined $self->{inner_html_node} and
6453 wakaba 1.123 not ($self->{open_elements}->[-1]->[1] & FRAMESET_EL)) {
6454 wakaba 1.79 !!!cp ('t327');
6455 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
6456 wakaba 1.79 } else {
6457     !!!cp ('t328');
6458 wakaba 1.52 }
6459 wakaba 1.126 next B;
6460 wakaba 1.52 } elsif ($token->{tag_name} eq 'html' and
6461 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
6462 wakaba 1.79 !!!cp ('t329');
6463 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
6464 wakaba 1.52 !!!next-token;
6465 wakaba 1.126 next B;
6466 wakaba 1.52 } else {
6467 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6468 wakaba 1.79 !!!cp ('t330');
6469 wakaba 1.153 !!!parse-error (type => 'in frameset:/',
6470     text => $token->{tag_name}, token => $token);
6471 wakaba 1.158 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
6472     !!!cp ('t330.1');
6473     !!!parse-error (type => 'after frameset:/',
6474     text => $token->{tag_name}, token => $token);
6475     } else { # "after after html"
6476 wakaba 1.79 !!!cp ('t331');
6477 wakaba 1.158 !!!parse-error (type => 'after after frameset:/',
6478 wakaba 1.153 text => $token->{tag_name}, token => $token);
6479 wakaba 1.52 }
6480     ## Ignore the token
6481     !!!next-token;
6482 wakaba 1.126 next B;
6483 wakaba 1.52 }
6484 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6485 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
6486 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
6487     !!!cp ('t331.1');
6488 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
6489 wakaba 1.104 } else {
6490     !!!cp ('t331.2');
6491     }
6492    
6493     ## Stop parsing
6494     last B;
6495 wakaba 1.52 } else {
6496     die "$0: $token->{type}: Unknown token type";
6497     }
6498 wakaba 1.47
6499 wakaba 1.52 ## ISSUE: An issue in spec here
6500     } else {
6501     die "$0: $self->{insertion_mode}: Unknown insertion mode";
6502     }
6503 wakaba 1.47
6504 wakaba 1.52 ## "in body" insertion mode
6505 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
6506 wakaba 1.52 if ($token->{tag_name} eq 'script') {
6507 wakaba 1.79 !!!cp ('t332');
6508 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6509 wakaba 1.100 $script_start_tag->();
6510 wakaba 1.126 next B;
6511 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
6512 wakaba 1.79 !!!cp ('t333');
6513 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6514 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6515 wakaba 1.126 next B;
6516 wakaba 1.52 } elsif ({
6517     base => 1, link => 1,
6518     }->{$token->{tag_name}}) {
6519 wakaba 1.79 !!!cp ('t334');
6520 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
6521 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6522 wakaba 1.52 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
6523 wakaba 1.125 !!!ack ('t334.1');
6524 wakaba 1.52 !!!next-token;
6525 wakaba 1.126 next B;
6526 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
6527     ## NOTE: This is an "as if in head" code clone, only "-t" differs
6528 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6529 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
6530 wakaba 1.46
6531 wakaba 1.52 unless ($self->{confident}) {
6532 wakaba 1.134 if ($token->{attributes}->{charset}) {
6533 wakaba 1.79 !!!cp ('t335');
6534 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
6535     ## in the {change_encoding} callback.
6536 wakaba 1.63 $self->{change_encoding}
6537 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value}, $token);
6538 wakaba 1.66
6539     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
6540     ->set_user_data (manakai_has_reference =>
6541     $token->{attributes}->{charset}
6542     ->{has_reference});
6543 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
6544     if ($token->{attributes}->{content}->{value}
6545 wakaba 1.144 =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
6546 wakaba 1.70 [\x09-\x0D\x20]*=
6547 wakaba 1.52 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
6548 wakaba 1.145 ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {
6549 wakaba 1.79 !!!cp ('t336');
6550 wakaba 1.134 ## NOTE: Whether the encoding is supported or not is handled
6551     ## in the {change_encoding} callback.
6552 wakaba 1.63 $self->{change_encoding}
6553 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
6554 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
6555     ->set_user_data (manakai_has_reference =>
6556     $token->{attributes}->{content}
6557     ->{has_reference});
6558 wakaba 1.63 }
6559 wakaba 1.52 }
6560 wakaba 1.66 } else {
6561     if ($token->{attributes}->{charset}) {
6562 wakaba 1.79 !!!cp ('t337');
6563 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
6564     ->set_user_data (manakai_has_reference =>
6565     $token->{attributes}->{charset}
6566     ->{has_reference});
6567     }
6568 wakaba 1.68 if ($token->{attributes}->{content}) {
6569 wakaba 1.79 !!!cp ('t338');
6570 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
6571     ->set_user_data (manakai_has_reference =>
6572     $token->{attributes}->{content}
6573     ->{has_reference});
6574     }
6575 wakaba 1.52 }
6576 wakaba 1.1
6577 wakaba 1.125 !!!ack ('t338.1');
6578 wakaba 1.52 !!!next-token;
6579 wakaba 1.126 next B;
6580 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
6581 wakaba 1.79 !!!cp ('t341');
6582 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6583 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
6584 wakaba 1.126 next B;
6585 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
6586 wakaba 1.153 !!!parse-error (type => 'in body', text => 'body', token => $token);
6587 wakaba 1.46
6588 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
6589 wakaba 1.123 not ($self->{open_elements}->[1]->[1] & BODY_EL)) {
6590 wakaba 1.79 !!!cp ('t342');
6591 wakaba 1.52 ## Ignore the token
6592     } else {
6593     my $body_el = $self->{open_elements}->[1]->[0];
6594     for my $attr_name (keys %{$token->{attributes}}) {
6595     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
6596 wakaba 1.79 !!!cp ('t343');
6597 wakaba 1.52 $body_el->set_attribute_ns
6598     (undef, [undef, $attr_name],
6599     $token->{attributes}->{$attr_name}->{value});
6600     }
6601     }
6602     }
6603 wakaba 1.125 !!!nack ('t343.1');
6604 wakaba 1.52 !!!next-token;
6605 wakaba 1.126 next B;
6606 wakaba 1.52 } elsif ({
6607     address => 1, blockquote => 1, center => 1, dir => 1,
6608 wakaba 1.85 div => 1, dl => 1, fieldset => 1,
6609     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6610 wakaba 1.97 menu => 1, ol => 1, p => 1, ul => 1,
6611     pre => 1, listing => 1,
6612 wakaba 1.109 form => 1,
6613     table => 1,
6614     hr => 1,
6615 wakaba 1.52 }->{$token->{tag_name}}) {
6616 wakaba 1.109 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
6617     !!!cp ('t350');
6618 wakaba 1.113 !!!parse-error (type => 'in form:form', token => $token);
6619 wakaba 1.109 ## Ignore the token
6620 wakaba 1.125 !!!nack ('t350.1');
6621 wakaba 1.109 !!!next-token;
6622 wakaba 1.126 next B;
6623 wakaba 1.109 }
6624    
6625 wakaba 1.52 ## has a p element in scope
6626     INSCOPE: for (reverse @{$self->{open_elements}}) {
6627 wakaba 1.123 if ($_->[1] & P_EL) {
6628 wakaba 1.79 !!!cp ('t344');
6629 wakaba 1.125 !!!back-token; # <form>
6630 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6631     line => $token->{line}, column => $token->{column}};
6632 wakaba 1.126 next B;
6633 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6634 wakaba 1.79 !!!cp ('t345');
6635 wakaba 1.52 last INSCOPE;
6636     }
6637     } # INSCOPE
6638    
6639 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6640 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
6641 wakaba 1.125 !!!nack ('t346.1');
6642 wakaba 1.52 !!!next-token;
6643 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6644 wakaba 1.52 $token->{data} =~ s/^\x0A//;
6645     unless (length $token->{data}) {
6646 wakaba 1.79 !!!cp ('t346');
6647 wakaba 1.1 !!!next-token;
6648 wakaba 1.79 } else {
6649     !!!cp ('t349');
6650 wakaba 1.52 }
6651 wakaba 1.79 } else {
6652     !!!cp ('t348');
6653 wakaba 1.52 }
6654 wakaba 1.109 } elsif ($token->{tag_name} eq 'form') {
6655     !!!cp ('t347.1');
6656     $self->{form_element} = $self->{open_elements}->[-1]->[0];
6657    
6658 wakaba 1.125 !!!nack ('t347.2');
6659 wakaba 1.109 !!!next-token;
6660     } elsif ($token->{tag_name} eq 'table') {
6661     !!!cp ('t382');
6662     push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
6663    
6664     $self->{insertion_mode} = IN_TABLE_IM;
6665    
6666 wakaba 1.125 !!!nack ('t382.1');
6667 wakaba 1.109 !!!next-token;
6668     } elsif ($token->{tag_name} eq 'hr') {
6669     !!!cp ('t386');
6670     pop @{$self->{open_elements}};
6671    
6672 wakaba 1.125 !!!nack ('t386.1');
6673 wakaba 1.109 !!!next-token;
6674 wakaba 1.52 } else {
6675 wakaba 1.125 !!!nack ('t347.1');
6676 wakaba 1.52 !!!next-token;
6677     }
6678 wakaba 1.126 next B;
6679 wakaba 1.109 } elsif ({li => 1, dt => 1, dd => 1}->{$token->{tag_name}}) {
6680 wakaba 1.52 ## has a p element in scope
6681     INSCOPE: for (reverse @{$self->{open_elements}}) {
6682 wakaba 1.123 if ($_->[1] & P_EL) {
6683 wakaba 1.79 !!!cp ('t353');
6684 wakaba 1.125 !!!back-token; # <x>
6685 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6686     line => $token->{line}, column => $token->{column}};
6687 wakaba 1.126 next B;
6688 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6689 wakaba 1.79 !!!cp ('t354');
6690 wakaba 1.52 last INSCOPE;
6691     }
6692     } # INSCOPE
6693    
6694     ## Step 1
6695     my $i = -1;
6696     my $node = $self->{open_elements}->[$i];
6697 wakaba 1.109 my $li_or_dtdd = {li => {li => 1},
6698     dt => {dt => 1, dd => 1},
6699     dd => {dt => 1, dd => 1}}->{$token->{tag_name}};
6700 wakaba 1.52 LI: {
6701     ## Step 2
6702 wakaba 1.123 if ($li_or_dtdd->{$node->[0]->manakai_local_name}) {
6703 wakaba 1.52 if ($i != -1) {
6704 wakaba 1.79 !!!cp ('t355');
6705 wakaba 1.122 !!!parse-error (type => 'not closed',
6706 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
6707 wakaba 1.122 ->manakai_local_name,
6708     token => $token);
6709 wakaba 1.79 } else {
6710     !!!cp ('t356');
6711 wakaba 1.52 }
6712     splice @{$self->{open_elements}}, $i;
6713     last LI;
6714 wakaba 1.79 } else {
6715     !!!cp ('t357');
6716 wakaba 1.52 }
6717    
6718     ## Step 3
6719 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
6720 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
6721 wakaba 1.123 ($node->[1] & SPECIAL_EL or
6722     $node->[1] & SCOPING_EL) and
6723     not ($node->[1] & ADDRESS_EL) and
6724     not ($node->[1] & DIV_EL)) {
6725 wakaba 1.79 !!!cp ('t358');
6726 wakaba 1.52 last LI;
6727     }
6728    
6729 wakaba 1.79 !!!cp ('t359');
6730 wakaba 1.52 ## Step 4
6731     $i--;
6732     $node = $self->{open_elements}->[$i];
6733     redo LI;
6734     } # LI
6735    
6736 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6737 wakaba 1.125 !!!nack ('t359.1');
6738 wakaba 1.52 !!!next-token;
6739 wakaba 1.126 next B;
6740 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
6741     ## has a p element in scope
6742     INSCOPE: for (reverse @{$self->{open_elements}}) {
6743 wakaba 1.123 if ($_->[1] & P_EL) {
6744 wakaba 1.79 !!!cp ('t367');
6745 wakaba 1.125 !!!back-token; # <plaintext>
6746 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6747     line => $token->{line}, column => $token->{column}};
6748 wakaba 1.126 next B;
6749 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6750 wakaba 1.79 !!!cp ('t368');
6751 wakaba 1.52 last INSCOPE;
6752 wakaba 1.46 }
6753 wakaba 1.52 } # INSCOPE
6754    
6755 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6756 wakaba 1.52
6757     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
6758    
6759 wakaba 1.125 !!!nack ('t368.1');
6760 wakaba 1.52 !!!next-token;
6761 wakaba 1.126 next B;
6762 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
6763     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
6764     my $node = $active_formatting_elements->[$i];
6765 wakaba 1.123 if ($node->[1] & A_EL) {
6766 wakaba 1.79 !!!cp ('t371');
6767 wakaba 1.113 !!!parse-error (type => 'in a:a', token => $token);
6768 wakaba 1.52
6769 wakaba 1.125 !!!back-token; # <a>
6770 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'a',
6771     line => $token->{line}, column => $token->{column}};
6772 wakaba 1.113 $formatting_end_tag->($token);
6773 wakaba 1.52
6774     AFE2: for (reverse 0..$#$active_formatting_elements) {
6775     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
6776 wakaba 1.79 !!!cp ('t372');
6777 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
6778     last AFE2;
6779 wakaba 1.1 }
6780 wakaba 1.52 } # AFE2
6781     OE: for (reverse 0..$#{$self->{open_elements}}) {
6782     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
6783 wakaba 1.79 !!!cp ('t373');
6784 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
6785     last OE;
6786 wakaba 1.1 }
6787 wakaba 1.52 } # OE
6788     last AFE;
6789     } elsif ($node->[0] eq '#marker') {
6790 wakaba 1.79 !!!cp ('t374');
6791 wakaba 1.52 last AFE;
6792     }
6793     } # AFE
6794    
6795     $reconstruct_active_formatting_elements->($insert_to_current);
6796 wakaba 1.1
6797 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6798 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
6799 wakaba 1.1
6800 wakaba 1.125 !!!nack ('t374.1');
6801 wakaba 1.52 !!!next-token;
6802 wakaba 1.126 next B;
6803 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
6804     $reconstruct_active_formatting_elements->($insert_to_current);
6805 wakaba 1.1
6806 wakaba 1.52 ## has a |nobr| element in scope
6807     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6808     my $node = $self->{open_elements}->[$_];
6809 wakaba 1.123 if ($node->[1] & NOBR_EL) {
6810 wakaba 1.79 !!!cp ('t376');
6811 wakaba 1.113 !!!parse-error (type => 'in nobr:nobr', token => $token);
6812 wakaba 1.125 !!!back-token; # <nobr>
6813 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
6814     line => $token->{line}, column => $token->{column}};
6815 wakaba 1.126 next B;
6816 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6817 wakaba 1.79 !!!cp ('t377');
6818 wakaba 1.52 last INSCOPE;
6819     }
6820     } # INSCOPE
6821    
6822 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6823 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
6824    
6825 wakaba 1.125 !!!nack ('t377.1');
6826 wakaba 1.52 !!!next-token;
6827 wakaba 1.126 next B;
6828 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
6829     ## has a button element in scope
6830     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6831     my $node = $self->{open_elements}->[$_];
6832 wakaba 1.123 if ($node->[1] & BUTTON_EL) {
6833 wakaba 1.79 !!!cp ('t378');
6834 wakaba 1.113 !!!parse-error (type => 'in button:button', token => $token);
6835 wakaba 1.125 !!!back-token; # <button>
6836 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'button',
6837     line => $token->{line}, column => $token->{column}};
6838 wakaba 1.126 next B;
6839 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6840 wakaba 1.79 !!!cp ('t379');
6841 wakaba 1.52 last INSCOPE;
6842     }
6843     } # INSCOPE
6844    
6845     $reconstruct_active_formatting_elements->($insert_to_current);
6846    
6847 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6848 wakaba 1.85
6849     ## TODO: associate with $self->{form_element} if defined
6850    
6851 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
6852 wakaba 1.1
6853 wakaba 1.125 !!!nack ('t379.1');
6854 wakaba 1.52 !!!next-token;
6855 wakaba 1.126 next B;
6856 wakaba 1.103 } elsif ({
6857 wakaba 1.109 xmp => 1,
6858     iframe => 1,
6859     noembed => 1,
6860 wakaba 1.148 noframes => 1, ## NOTE: This is an "as if in head" code clone.
6861 wakaba 1.109 noscript => 0, ## TODO: 1 if scripting is enabled
6862 wakaba 1.103 }->{$token->{tag_name}}) {
6863 wakaba 1.109 if ($token->{tag_name} eq 'xmp') {
6864     !!!cp ('t381');
6865     $reconstruct_active_formatting_elements->($insert_to_current);
6866     } else {
6867     !!!cp ('t399');
6868     }
6869     ## NOTE: There is an "as if in body" code clone.
6870 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6871 wakaba 1.126 next B;
6872 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
6873 wakaba 1.113 !!!parse-error (type => 'isindex', token => $token);
6874 wakaba 1.52
6875     if (defined $self->{form_element}) {
6876 wakaba 1.79 !!!cp ('t389');
6877 wakaba 1.52 ## Ignore the token
6878 wakaba 1.125 !!!nack ('t389'); ## NOTE: Not acknowledged.
6879 wakaba 1.52 !!!next-token;
6880 wakaba 1.126 next B;
6881 wakaba 1.52 } else {
6882 wakaba 1.147 !!!ack ('t391.1');
6883    
6884 wakaba 1.52 my $at = $token->{attributes};
6885     my $form_attrs;
6886     $form_attrs->{action} = $at->{action} if $at->{action};
6887     my $prompt_attr = $at->{prompt};
6888     $at->{name} = {name => 'name', value => 'isindex'};
6889     delete $at->{action};
6890     delete $at->{prompt};
6891     my @tokens = (
6892 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
6893 wakaba 1.114 attributes => $form_attrs,
6894     line => $token->{line}, column => $token->{column}},
6895     {type => START_TAG_TOKEN, tag_name => 'hr',
6896     line => $token->{line}, column => $token->{column}},
6897     {type => START_TAG_TOKEN, tag_name => 'p',
6898     line => $token->{line}, column => $token->{column}},
6899     {type => START_TAG_TOKEN, tag_name => 'label',
6900     line => $token->{line}, column => $token->{column}},
6901 wakaba 1.52 );
6902     if ($prompt_attr) {
6903 wakaba 1.79 !!!cp ('t390');
6904 wakaba 1.114 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
6905 wakaba 1.118 #line => $token->{line}, column => $token->{column},
6906     };
6907 wakaba 1.1 } else {
6908 wakaba 1.79 !!!cp ('t391');
6909 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
6910 wakaba 1.114 data => 'This is a searchable index. Insert your search keywords here: ',
6911 wakaba 1.118 #line => $token->{line}, column => $token->{column},
6912     }; # SHOULD
6913 wakaba 1.52 ## TODO: make this configurable
6914 wakaba 1.1 }
6915 wakaba 1.52 push @tokens,
6916 wakaba 1.114 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
6917     line => $token->{line}, column => $token->{column}},
6918 wakaba 1.55 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
6919 wakaba 1.114 {type => END_TAG_TOKEN, tag_name => 'label',
6920     line => $token->{line}, column => $token->{column}},
6921     {type => END_TAG_TOKEN, tag_name => 'p',
6922     line => $token->{line}, column => $token->{column}},
6923     {type => START_TAG_TOKEN, tag_name => 'hr',
6924     line => $token->{line}, column => $token->{column}},
6925     {type => END_TAG_TOKEN, tag_name => 'form',
6926     line => $token->{line}, column => $token->{column}};
6927 wakaba 1.52 !!!back-token (@tokens);
6928 wakaba 1.125 !!!next-token;
6929 wakaba 1.126 next B;
6930 wakaba 1.52 }
6931     } elsif ($token->{tag_name} eq 'textarea') {
6932     my $tag_name = $token->{tag_name};
6933     my $el;
6934 wakaba 1.126 !!!create-element ($el, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
6935 wakaba 1.52
6936     ## TODO: $self->{form_element} if defined
6937     $self->{content_model} = RCDATA_CONTENT_MODEL;
6938     delete $self->{escape}; # MUST
6939    
6940     $insert->($el);
6941    
6942     my $text = '';
6943 wakaba 1.125 !!!nack ('t392.1');
6944 wakaba 1.52 !!!next-token;
6945 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6946 wakaba 1.52 $token->{data} =~ s/^\x0A//;
6947 wakaba 1.51 unless (length $token->{data}) {
6948 wakaba 1.79 !!!cp ('t392');
6949 wakaba 1.51 !!!next-token;
6950 wakaba 1.79 } else {
6951     !!!cp ('t393');
6952 wakaba 1.51 }
6953 wakaba 1.79 } else {
6954     !!!cp ('t394');
6955 wakaba 1.51 }
6956 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
6957 wakaba 1.79 !!!cp ('t395');
6958 wakaba 1.52 $text .= $token->{data};
6959     !!!next-token;
6960     }
6961     if (length $text) {
6962 wakaba 1.79 !!!cp ('t396');
6963 wakaba 1.52 $el->manakai_append_text ($text);
6964     }
6965    
6966     $self->{content_model} = PCDATA_CONTENT_MODEL;
6967 wakaba 1.51
6968 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
6969 wakaba 1.52 $token->{tag_name} eq $tag_name) {
6970 wakaba 1.79 !!!cp ('t397');
6971 wakaba 1.52 ## Ignore the token
6972     } else {
6973 wakaba 1.79 !!!cp ('t398');
6974 wakaba 1.153 !!!parse-error (type => 'in RCDATA:#eof', token => $token);
6975 wakaba 1.51 }
6976 wakaba 1.52 !!!next-token;
6977 wakaba 1.126 next B;
6978 wakaba 1.151 } elsif ($token->{tag_name} eq 'rt' or
6979     $token->{tag_name} eq 'rp') {
6980     ## has a |ruby| element in scope
6981     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6982     my $node = $self->{open_elements}->[$_];
6983     if ($node->[1] & RUBY_EL) {
6984     !!!cp ('t398.1');
6985     ## generate implied end tags
6986     while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
6987     !!!cp ('t398.2');
6988     pop @{$self->{open_elements}};
6989     }
6990     unless ($self->{open_elements}->[-1]->[1] & RUBY_EL) {
6991     !!!cp ('t398.3');
6992     !!!parse-error (type => 'not closed',
6993 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
6994 wakaba 1.151 ->manakai_local_name,
6995     token => $token);
6996     pop @{$self->{open_elements}}
6997     while not $self->{open_elements}->[-1]->[1] & RUBY_EL;
6998     }
6999     last INSCOPE;
7000     } elsif ($node->[1] & SCOPING_EL) {
7001     !!!cp ('t398.4');
7002     last INSCOPE;
7003     }
7004     } # INSCOPE
7005    
7006     !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
7007    
7008     !!!nack ('t398.5');
7009     !!!next-token;
7010     redo B;
7011 wakaba 1.126 } elsif ($token->{tag_name} eq 'math' or
7012     $token->{tag_name} eq 'svg') {
7013     $reconstruct_active_formatting_elements->($insert_to_current);
7014 wakaba 1.131
7015 wakaba 1.155 ## "Adjust MathML attributes" ('math' only) - done in insert-element-f
7016    
7017 wakaba 1.131 ## "adjust SVG attributes" ('svg' only) - done in insert-element-f
7018    
7019     ## "adjust foreign attributes" - done in insert-element-f
7020 wakaba 1.126
7021 wakaba 1.131 !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
7022 wakaba 1.126
7023     if ($self->{self_closing}) {
7024     pop @{$self->{open_elements}};
7025     !!!ack ('t398.1');
7026     } else {
7027     !!!cp ('t398.2');
7028     $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
7029     ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
7030     ## mode, "in body" (not "in foreign content") secondary insertion
7031     ## mode, maybe.
7032     }
7033    
7034     !!!next-token;
7035     next B;
7036 wakaba 1.52 } elsif ({
7037     caption => 1, col => 1, colgroup => 1, frame => 1,
7038     frameset => 1, head => 1, option => 1, optgroup => 1,
7039     tbody => 1, td => 1, tfoot => 1, th => 1,
7040     thead => 1, tr => 1,
7041     }->{$token->{tag_name}}) {
7042 wakaba 1.79 !!!cp ('t401');
7043 wakaba 1.153 !!!parse-error (type => 'in body',
7044     text => $token->{tag_name}, token => $token);
7045 wakaba 1.52 ## Ignore the token
7046 wakaba 1.125 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
7047 wakaba 1.52 !!!next-token;
7048 wakaba 1.126 next B;
7049 wakaba 1.52
7050     ## ISSUE: An issue on HTML5 new elements in the spec.
7051     } else {
7052 wakaba 1.110 if ($token->{tag_name} eq 'image') {
7053     !!!cp ('t384');
7054 wakaba 1.113 !!!parse-error (type => 'image', token => $token);
7055 wakaba 1.110 $token->{tag_name} = 'img';
7056     } else {
7057     !!!cp ('t385');
7058     }
7059    
7060     ## NOTE: There is an "as if <br>" code clone.
7061 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
7062    
7063 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
7064 wakaba 1.109
7065 wakaba 1.110 if ({
7066     applet => 1, marquee => 1, object => 1,
7067     }->{$token->{tag_name}}) {
7068     !!!cp ('t380');
7069     push @$active_formatting_elements, ['#marker', ''];
7070 wakaba 1.125 !!!nack ('t380.1');
7071 wakaba 1.110 } elsif ({
7072     b => 1, big => 1, em => 1, font => 1, i => 1,
7073     s => 1, small => 1, strile => 1,
7074     strong => 1, tt => 1, u => 1,
7075     }->{$token->{tag_name}}) {
7076     !!!cp ('t375');
7077     push @$active_formatting_elements, $self->{open_elements}->[-1];
7078 wakaba 1.125 !!!nack ('t375.1');
7079 wakaba 1.110 } elsif ($token->{tag_name} eq 'input') {
7080     !!!cp ('t388');
7081     ## TODO: associate with $self->{form_element} if defined
7082     pop @{$self->{open_elements}};
7083 wakaba 1.125 !!!ack ('t388.2');
7084 wakaba 1.110 } elsif ({
7085     area => 1, basefont => 1, bgsound => 1, br => 1,
7086     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
7087     #image => 1,
7088     }->{$token->{tag_name}}) {
7089     !!!cp ('t388.1');
7090     pop @{$self->{open_elements}};
7091 wakaba 1.125 !!!ack ('t388.3');
7092 wakaba 1.110 } elsif ($token->{tag_name} eq 'select') {
7093 wakaba 1.109 ## TODO: associate with $self->{form_element} if defined
7094    
7095     if ($self->{insertion_mode} & TABLE_IMS or
7096     $self->{insertion_mode} & BODY_TABLE_IMS or
7097     $self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
7098     !!!cp ('t400.1');
7099     $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
7100     } else {
7101     !!!cp ('t400.2');
7102     $self->{insertion_mode} = IN_SELECT_IM;
7103     }
7104 wakaba 1.125 !!!nack ('t400.3');
7105 wakaba 1.110 } else {
7106 wakaba 1.125 !!!nack ('t402');
7107 wakaba 1.109 }
7108 wakaba 1.51
7109 wakaba 1.52 !!!next-token;
7110 wakaba 1.126 next B;
7111 wakaba 1.52 }
7112 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
7113 wakaba 1.52 if ($token->{tag_name} eq 'body') {
7114 wakaba 1.107 ## has a |body| element in scope
7115     my $i;
7116 wakaba 1.111 INSCOPE: {
7117     for (reverse @{$self->{open_elements}}) {
7118 wakaba 1.123 if ($_->[1] & BODY_EL) {
7119 wakaba 1.111 !!!cp ('t405');
7120     $i = $_;
7121     last INSCOPE;
7122 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
7123 wakaba 1.111 !!!cp ('t405.1');
7124     last;
7125     }
7126 wakaba 1.52 }
7127 wakaba 1.111
7128     !!!parse-error (type => 'start tag not allowed',
7129 wakaba 1.153 text => $token->{tag_name}, token => $token);
7130 wakaba 1.107 ## NOTE: Ignore the token.
7131 wakaba 1.52 !!!next-token;
7132 wakaba 1.126 next B;
7133 wakaba 1.111 } # INSCOPE
7134 wakaba 1.107
7135     for (@{$self->{open_elements}}) {
7136 wakaba 1.123 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL) {
7137 wakaba 1.107 !!!cp ('t403');
7138 wakaba 1.122 !!!parse-error (type => 'not closed',
7139 wakaba 1.153 text => $_->[0]->manakai_local_name,
7140 wakaba 1.122 token => $token);
7141 wakaba 1.107 last;
7142     } else {
7143     !!!cp ('t404');
7144     }
7145     }
7146    
7147     $self->{insertion_mode} = AFTER_BODY_IM;
7148     !!!next-token;
7149 wakaba 1.126 next B;
7150 wakaba 1.52 } elsif ($token->{tag_name} eq 'html') {
7151 wakaba 1.122 ## TODO: Update this code. It seems that the code below is not
7152     ## up-to-date, though it has same effect as speced.
7153 wakaba 1.123 if (@{$self->{open_elements}} > 1 and
7154     $self->{open_elements}->[1]->[1] & BODY_EL) {
7155 wakaba 1.52 ## ISSUE: There is an issue in the spec.
7156 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & BODY_EL) {
7157 wakaba 1.79 !!!cp ('t406');
7158 wakaba 1.122 !!!parse-error (type => 'not closed',
7159 wakaba 1.153 text => $self->{open_elements}->[1]->[0]
7160 wakaba 1.122 ->manakai_local_name,
7161     token => $token);
7162 wakaba 1.79 } else {
7163     !!!cp ('t407');
7164 wakaba 1.1 }
7165 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
7166 wakaba 1.52 ## reprocess
7167 wakaba 1.126 next B;
7168 wakaba 1.51 } else {
7169 wakaba 1.79 !!!cp ('t408');
7170 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7171     text => $token->{tag_name}, token => $token);
7172 wakaba 1.52 ## Ignore the token
7173     !!!next-token;
7174 wakaba 1.126 next B;
7175 wakaba 1.51 }
7176 wakaba 1.52 } elsif ({
7177     address => 1, blockquote => 1, center => 1, dir => 1,
7178     div => 1, dl => 1, fieldset => 1, listing => 1,
7179     menu => 1, ol => 1, pre => 1, ul => 1,
7180     dd => 1, dt => 1, li => 1,
7181 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
7182 wakaba 1.52 }->{$token->{tag_name}}) {
7183     ## has an element in scope
7184     my $i;
7185     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7186     my $node = $self->{open_elements}->[$_];
7187 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
7188 wakaba 1.79 !!!cp ('t410');
7189 wakaba 1.52 $i = $_;
7190 wakaba 1.87 last INSCOPE;
7191 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7192 wakaba 1.79 !!!cp ('t411');
7193 wakaba 1.52 last INSCOPE;
7194 wakaba 1.51 }
7195 wakaba 1.52 } # INSCOPE
7196 wakaba 1.89
7197     unless (defined $i) { # has an element in scope
7198     !!!cp ('t413');
7199 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7200     text => $token->{tag_name}, token => $token);
7201 wakaba 1.157 ## NOTE: Ignore the token.
7202 wakaba 1.89 } else {
7203     ## Step 1. generate implied end tags
7204     while ({
7205 wakaba 1.151 ## END_TAG_OPTIONAL_EL
7206 wakaba 1.89 dd => ($token->{tag_name} ne 'dd'),
7207     dt => ($token->{tag_name} ne 'dt'),
7208     li => ($token->{tag_name} ne 'li'),
7209     p => 1,
7210 wakaba 1.151 rt => 1,
7211     rp => 1,
7212 wakaba 1.123 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
7213 wakaba 1.89 !!!cp ('t409');
7214     pop @{$self->{open_elements}};
7215     }
7216    
7217     ## Step 2.
7218 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7219     ne $token->{tag_name}) {
7220 wakaba 1.79 !!!cp ('t412');
7221 wakaba 1.122 !!!parse-error (type => 'not closed',
7222 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
7223 wakaba 1.122 ->manakai_local_name,
7224     token => $token);
7225 wakaba 1.51 } else {
7226 wakaba 1.89 !!!cp ('t414');
7227 wakaba 1.51 }
7228 wakaba 1.89
7229     ## Step 3.
7230 wakaba 1.52 splice @{$self->{open_elements}}, $i;
7231 wakaba 1.89
7232     ## Step 4.
7233     $clear_up_to_marker->()
7234     if {
7235 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
7236 wakaba 1.89 }->{$token->{tag_name}};
7237 wakaba 1.51 }
7238 wakaba 1.52 !!!next-token;
7239 wakaba 1.126 next B;
7240 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
7241 wakaba 1.92 undef $self->{form_element};
7242    
7243 wakaba 1.52 ## has an element in scope
7244 wakaba 1.92 my $i;
7245 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7246     my $node = $self->{open_elements}->[$_];
7247 wakaba 1.123 if ($node->[1] & FORM_EL) {
7248 wakaba 1.79 !!!cp ('t418');
7249 wakaba 1.92 $i = $_;
7250 wakaba 1.52 last INSCOPE;
7251 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7252 wakaba 1.79 !!!cp ('t419');
7253 wakaba 1.52 last INSCOPE;
7254     }
7255     } # INSCOPE
7256 wakaba 1.92
7257     unless (defined $i) { # has an element in scope
7258 wakaba 1.79 !!!cp ('t421');
7259 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7260     text => $token->{tag_name}, token => $token);
7261 wakaba 1.157 ## NOTE: Ignore the token.
7262 wakaba 1.92 } else {
7263     ## Step 1. generate implied end tags
7264 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7265 wakaba 1.92 !!!cp ('t417');
7266     pop @{$self->{open_elements}};
7267     }
7268    
7269     ## Step 2.
7270 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7271     ne $token->{tag_name}) {
7272 wakaba 1.92 !!!cp ('t417.1');
7273 wakaba 1.122 !!!parse-error (type => 'not closed',
7274 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
7275 wakaba 1.122 ->manakai_local_name,
7276     token => $token);
7277 wakaba 1.92 } else {
7278     !!!cp ('t420');
7279     }
7280    
7281     ## Step 3.
7282     splice @{$self->{open_elements}}, $i;
7283 wakaba 1.52 }
7284    
7285     !!!next-token;
7286 wakaba 1.126 next B;
7287 wakaba 1.52 } elsif ({
7288     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
7289     }->{$token->{tag_name}}) {
7290     ## has an element in scope
7291     my $i;
7292     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7293     my $node = $self->{open_elements}->[$_];
7294 wakaba 1.123 if ($node->[1] & HEADING_EL) {
7295 wakaba 1.79 !!!cp ('t423');
7296 wakaba 1.52 $i = $_;
7297     last INSCOPE;
7298 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7299 wakaba 1.79 !!!cp ('t424');
7300 wakaba 1.52 last INSCOPE;
7301 wakaba 1.51 }
7302 wakaba 1.52 } # INSCOPE
7303 wakaba 1.93
7304     unless (defined $i) { # has an element in scope
7305     !!!cp ('t425.1');
7306 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7307     text => $token->{tag_name}, token => $token);
7308 wakaba 1.157 ## NOTE: Ignore the token.
7309 wakaba 1.79 } else {
7310 wakaba 1.93 ## Step 1. generate implied end tags
7311 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7312 wakaba 1.93 !!!cp ('t422');
7313     pop @{$self->{open_elements}};
7314     }
7315    
7316     ## Step 2.
7317 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7318     ne $token->{tag_name}) {
7319 wakaba 1.93 !!!cp ('t425');
7320 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7321     text => $token->{tag_name}, token => $token);
7322 wakaba 1.93 } else {
7323     !!!cp ('t426');
7324     }
7325    
7326     ## Step 3.
7327     splice @{$self->{open_elements}}, $i;
7328 wakaba 1.36 }
7329 wakaba 1.52
7330     !!!next-token;
7331 wakaba 1.126 next B;
7332 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
7333     ## has an element in scope
7334     my $i;
7335     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
7336     my $node = $self->{open_elements}->[$_];
7337 wakaba 1.123 if ($node->[1] & P_EL) {
7338 wakaba 1.87 !!!cp ('t410.1');
7339     $i = $_;
7340 wakaba 1.88 last INSCOPE;
7341 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
7342 wakaba 1.87 !!!cp ('t411.1');
7343     last INSCOPE;
7344     }
7345     } # INSCOPE
7346 wakaba 1.91
7347     if (defined $i) {
7348 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7349     ne $token->{tag_name}) {
7350 wakaba 1.87 !!!cp ('t412.1');
7351 wakaba 1.122 !!!parse-error (type => 'not closed',
7352 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
7353 wakaba 1.122 ->manakai_local_name,
7354     token => $token);
7355 wakaba 1.87 } else {
7356 wakaba 1.91 !!!cp ('t414.1');
7357 wakaba 1.87 }
7358 wakaba 1.91
7359 wakaba 1.87 splice @{$self->{open_elements}}, $i;
7360     } else {
7361 wakaba 1.91 !!!cp ('t413.1');
7362 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7363     text => $token->{tag_name}, token => $token);
7364 wakaba 1.91
7365 wakaba 1.87 !!!cp ('t415.1');
7366     ## As if <p>, then reprocess the current token
7367     my $el;
7368 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'p',, $token);
7369 wakaba 1.87 $insert->($el);
7370 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
7371 wakaba 1.87 }
7372 wakaba 1.91
7373 wakaba 1.87 !!!next-token;
7374 wakaba 1.126 next B;
7375 wakaba 1.52 } elsif ({
7376     a => 1,
7377     b => 1, big => 1, em => 1, font => 1, i => 1,
7378     nobr => 1, s => 1, small => 1, strile => 1,
7379     strong => 1, tt => 1, u => 1,
7380     }->{$token->{tag_name}}) {
7381 wakaba 1.79 !!!cp ('t427');
7382 wakaba 1.113 $formatting_end_tag->($token);
7383 wakaba 1.126 next B;
7384 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
7385 wakaba 1.79 !!!cp ('t428');
7386 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7387     text => 'br', token => $token);
7388 wakaba 1.52
7389     ## As if <br>
7390     $reconstruct_active_formatting_elements->($insert_to_current);
7391    
7392     my $el;
7393 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'br',, $token);
7394 wakaba 1.52 $insert->($el);
7395    
7396     ## Ignore the token.
7397     !!!next-token;
7398 wakaba 1.126 next B;
7399 wakaba 1.52 } elsif ({
7400     caption => 1, col => 1, colgroup => 1, frame => 1,
7401     frameset => 1, head => 1, option => 1, optgroup => 1,
7402     tbody => 1, td => 1, tfoot => 1, th => 1,
7403     thead => 1, tr => 1,
7404     area => 1, basefont => 1, bgsound => 1,
7405     embed => 1, hr => 1, iframe => 1, image => 1,
7406     img => 1, input => 1, isindex => 1, noembed => 1,
7407     noframes => 1, param => 1, select => 1, spacer => 1,
7408     table => 1, textarea => 1, wbr => 1,
7409     noscript => 0, ## TODO: if scripting is enabled
7410     }->{$token->{tag_name}}) {
7411 wakaba 1.79 !!!cp ('t429');
7412 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7413     text => $token->{tag_name}, token => $token);
7414 wakaba 1.52 ## Ignore the token
7415     !!!next-token;
7416 wakaba 1.126 next B;
7417 wakaba 1.52
7418     ## ISSUE: Issue on HTML5 new elements in spec
7419    
7420     } else {
7421     ## Step 1
7422     my $node_i = -1;
7423     my $node = $self->{open_elements}->[$node_i];
7424 wakaba 1.51
7425 wakaba 1.52 ## Step 2
7426     S2: {
7427 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
7428 wakaba 1.52 ## Step 1
7429     ## generate implied end tags
7430 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7431 wakaba 1.79 !!!cp ('t430');
7432 wakaba 1.151 ## NOTE: |<ruby><rt></ruby>|.
7433     ## ISSUE: <ruby><rt></rt> will also take this code path,
7434     ## which seems wrong.
7435 wakaba 1.86 pop @{$self->{open_elements}};
7436 wakaba 1.151 $node_i++;
7437 wakaba 1.52 }
7438    
7439     ## Step 2
7440 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7441     ne $token->{tag_name}) {
7442 wakaba 1.79 !!!cp ('t431');
7443 wakaba 1.58 ## NOTE: <x><y></x>
7444 wakaba 1.122 !!!parse-error (type => 'not closed',
7445 wakaba 1.153 text => $self->{open_elements}->[-1]->[0]
7446 wakaba 1.122 ->manakai_local_name,
7447     token => $token);
7448 wakaba 1.79 } else {
7449     !!!cp ('t432');
7450 wakaba 1.52 }
7451    
7452     ## Step 3
7453 wakaba 1.151 splice @{$self->{open_elements}}, $node_i if $node_i < 0;
7454 wakaba 1.51
7455 wakaba 1.1 !!!next-token;
7456 wakaba 1.52 last S2;
7457 wakaba 1.1 } else {
7458 wakaba 1.52 ## Step 3
7459 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
7460 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
7461 wakaba 1.123 ($node->[1] & SPECIAL_EL or
7462     $node->[1] & SCOPING_EL)) {
7463 wakaba 1.79 !!!cp ('t433');
7464 wakaba 1.153 !!!parse-error (type => 'unmatched end tag',
7465     text => $token->{tag_name}, token => $token);
7466 wakaba 1.52 ## Ignore the token
7467     !!!next-token;
7468     last S2;
7469     }
7470 wakaba 1.79
7471     !!!cp ('t434');
7472 wakaba 1.1 }
7473 wakaba 1.52
7474     ## Step 4
7475     $node_i--;
7476     $node = $self->{open_elements}->[$node_i];
7477    
7478     ## Step 5;
7479     redo S2;
7480     } # S2
7481 wakaba 1.126 next B;
7482 wakaba 1.1 }
7483     }
7484 wakaba 1.126 next B;
7485     } continue { # B
7486     if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
7487     ## NOTE: The code below is executed in cases where it does not have
7488     ## to be, but it it is harmless even in those cases.
7489     ## has an element in scope
7490     INSCOPE: {
7491     for (reverse 0..$#{$self->{open_elements}}) {
7492     my $node = $self->{open_elements}->[$_];
7493     if ($node->[1] & FOREIGN_EL) {
7494     last INSCOPE;
7495     } elsif ($node->[1] & SCOPING_EL) {
7496     last;
7497     }
7498     }
7499    
7500     ## NOTE: No foreign element in scope.
7501     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
7502     } # INSCOPE
7503     }
7504 wakaba 1.1 } # B
7505    
7506     ## Stop parsing # MUST
7507    
7508     ## TODO: script stuffs
7509 wakaba 1.3 } # _tree_construct_main
7510    
7511 wakaba 1.162 sub set_inner_html ($$$;$) {
7512 wakaba 1.3 my $class = shift;
7513     my $node = shift;
7514     my $s = \$_[0];
7515     my $onerror = $_[1];
7516 wakaba 1.162 my $get_wrapper = $_[2] || sub ($) { return $_[0] };
7517 wakaba 1.3
7518 wakaba 1.63 ## ISSUE: Should {confident} be true?
7519    
7520 wakaba 1.3 my $nt = $node->node_type;
7521     if ($nt == 9) {
7522     # MUST
7523    
7524     ## Step 1 # MUST
7525     ## TODO: If the document has an active parser, ...
7526     ## ISSUE: There is an issue in the spec.
7527    
7528     ## Step 2 # MUST
7529     my @cn = @{$node->child_nodes};
7530     for (@cn) {
7531     $node->remove_child ($_);
7532     }
7533    
7534     ## Step 3, 4, 5 # MUST
7535 wakaba 1.162 $class->parse_char_string ($$s => $node, $onerror, $get_wrapper);
7536 wakaba 1.3 } elsif ($nt == 1) {
7537     ## TODO: If non-html element
7538    
7539     ## NOTE: Most of this code is copied from |parse_string|
7540    
7541 wakaba 1.162 ## TODO: Support for $get_wrapper
7542    
7543 wakaba 1.3 ## Step 1 # MUST
7544 wakaba 1.14 my $this_doc = $node->owner_document;
7545     my $doc = $this_doc->implementation->create_document;
7546 wakaba 1.18 $doc->manakai_is_html (1);
7547 wakaba 1.3 my $p = $class->new;
7548     $p->{document} = $doc;
7549    
7550 wakaba 1.84 ## Step 8 # MUST
7551 wakaba 1.3 my $i = 0;
7552 wakaba 1.121 $p->{line_prev} = $p->{line} = 1;
7553     $p->{column_prev} = $p->{column} = 0;
7554 wakaba 1.76 $p->{set_next_char} = sub {
7555 wakaba 1.3 my $self = shift;
7556 wakaba 1.14
7557 wakaba 1.76 pop @{$self->{prev_char}};
7558     unshift @{$self->{prev_char}}, $self->{next_char};
7559 wakaba 1.14
7560 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
7561     $self->{next_char} = ord substr $$s, $i++, 1;
7562 wakaba 1.121
7563     ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
7564     $p->{column}++;
7565 wakaba 1.4
7566 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
7567 wakaba 1.121 $p->{line}++;
7568     $p->{column} = 0;
7569 wakaba 1.79 !!!cp ('i1');
7570 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
7571 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
7572 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
7573 wakaba 1.121 $p->{line}++;
7574     $p->{column} = 0;
7575 wakaba 1.79 !!!cp ('i2');
7576 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
7577     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
7578 wakaba 1.79 !!!cp ('i3');
7579 wakaba 1.76 } elsif ($self->{next_char} == 0x0000) { # NULL
7580 wakaba 1.79 !!!cp ('i4');
7581 wakaba 1.14 !!!parse-error (type => 'NULL');
7582 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
7583 wakaba 1.132 } elsif ($self->{next_char} <= 0x0008 or
7584     (0x000E <= $self->{next_char} and
7585     $self->{next_char} <= 0x001F) or
7586     (0x007F <= $self->{next_char} and
7587     $self->{next_char} <= 0x009F) or
7588     (0xD800 <= $self->{next_char} and
7589     $self->{next_char} <= 0xDFFF) or
7590     (0xFDD0 <= $self->{next_char} and
7591     $self->{next_char} <= 0xFDDF) or
7592     {
7593     0xFFFE => 1, 0xFFFF => 1, 0x1FFFE => 1, 0x1FFFF => 1,
7594     0x2FFFE => 1, 0x2FFFF => 1, 0x3FFFE => 1, 0x3FFFF => 1,
7595     0x4FFFE => 1, 0x4FFFF => 1, 0x5FFFE => 1, 0x5FFFF => 1,
7596     0x6FFFE => 1, 0x6FFFF => 1, 0x7FFFE => 1, 0x7FFFF => 1,
7597     0x8FFFE => 1, 0x8FFFF => 1, 0x9FFFE => 1, 0x9FFFF => 1,
7598     0xAFFFE => 1, 0xAFFFF => 1, 0xBFFFE => 1, 0xBFFFF => 1,
7599     0xCFFFE => 1, 0xCFFFF => 1, 0xDFFFE => 1, 0xDFFFF => 1,
7600     0xEFFFE => 1, 0xEFFFF => 1, 0xFFFFE => 1, 0xFFFFF => 1,
7601     0x10FFFE => 1, 0x10FFFF => 1,
7602     }->{$self->{next_char}}) {
7603     !!!cp ('i4.1');
7604 wakaba 1.153 if ($self->{next_char} < 0x10000) {
7605     !!!parse-error (type => 'control char',
7606     text => (sprintf 'U+%04X', $self->{next_char}));
7607     } else {
7608     !!!parse-error (type => 'control char',
7609     text => (sprintf 'U-%08X', $self->{next_char}));
7610     }
7611 wakaba 1.3 }
7612     };
7613 wakaba 1.76 $p->{prev_char} = [-1, -1, -1];
7614     $p->{next_char} = -1;
7615 wakaba 1.3
7616     my $ponerror = $onerror || sub {
7617     my (%opt) = @_;
7618 wakaba 1.121 my $line = $opt{line};
7619     my $column = $opt{column};
7620     if (defined $opt{token} and defined $opt{token}->{line}) {
7621     $line = $opt{token}->{line};
7622     $column = $opt{token}->{column};
7623     }
7624     warn "Parse error ($opt{type}) at line $line column $column\n";
7625 wakaba 1.3 };
7626     $p->{parse_error} = sub {
7627 wakaba 1.121 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
7628 wakaba 1.3 };
7629    
7630     $p->_initialize_tokenizer;
7631     $p->_initialize_tree_constructor;
7632    
7633     ## Step 2
7634 wakaba 1.71 my $node_ln = $node->manakai_local_name;
7635 wakaba 1.40 $p->{content_model} = {
7636     title => RCDATA_CONTENT_MODEL,
7637     textarea => RCDATA_CONTENT_MODEL,
7638     style => CDATA_CONTENT_MODEL,
7639     script => CDATA_CONTENT_MODEL,
7640     xmp => CDATA_CONTENT_MODEL,
7641     iframe => CDATA_CONTENT_MODEL,
7642     noembed => CDATA_CONTENT_MODEL,
7643     noframes => CDATA_CONTENT_MODEL,
7644     noscript => CDATA_CONTENT_MODEL,
7645     plaintext => PLAINTEXT_CONTENT_MODEL,
7646     }->{$node_ln};
7647     $p->{content_model} = PCDATA_CONTENT_MODEL
7648     unless defined $p->{content_model};
7649     ## ISSUE: What is "the name of the element"? local name?
7650 wakaba 1.3
7651 wakaba 1.123 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
7652     ## TODO: Foreign element OK?
7653 wakaba 1.3
7654 wakaba 1.84 ## Step 3
7655 wakaba 1.3 my $root = $doc->create_element_ns
7656     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
7657    
7658 wakaba 1.84 ## Step 4 # MUST
7659 wakaba 1.3 $doc->append_child ($root);
7660    
7661 wakaba 1.84 ## Step 5 # MUST
7662 wakaba 1.123 push @{$p->{open_elements}}, [$root, $el_category->{html}];
7663 wakaba 1.3
7664     undef $p->{head_element};
7665    
7666 wakaba 1.84 ## Step 6 # MUST
7667 wakaba 1.3 $p->_reset_insertion_mode;
7668    
7669 wakaba 1.84 ## Step 7 # MUST
7670 wakaba 1.3 my $anode = $node;
7671     AN: while (defined $anode) {
7672     if ($anode->node_type == 1) {
7673     my $nsuri = $anode->namespace_uri;
7674     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
7675 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
7676 wakaba 1.79 !!!cp ('i5');
7677 wakaba 1.3 $p->{form_element} = $anode;
7678     last AN;
7679     }
7680     }
7681     }
7682     $anode = $anode->parent_node;
7683     } # AN
7684    
7685 wakaba 1.84 ## Step 9 # MUST
7686 wakaba 1.3 {
7687     my $self = $p;
7688     !!!next-token;
7689     }
7690     $p->_tree_construction_main;
7691    
7692 wakaba 1.84 ## Step 10 # MUST
7693 wakaba 1.3 my @cn = @{$node->child_nodes};
7694     for (@cn) {
7695     $node->remove_child ($_);
7696     }
7697     ## ISSUE: mutation events? read-only?
7698    
7699 wakaba 1.84 ## Step 11 # MUST
7700 wakaba 1.3 @cn = @{$root->child_nodes};
7701     for (@cn) {
7702 wakaba 1.14 $this_doc->adopt_node ($_);
7703 wakaba 1.3 $node->append_child ($_);
7704     }
7705 wakaba 1.14 ## ISSUE: mutation events?
7706 wakaba 1.3
7707     $p->_terminate_tree_constructor;
7708 wakaba 1.121
7709     delete $p->{parse_error}; # delete loop
7710 wakaba 1.3 } else {
7711     die "$0: |set_inner_html| is not defined for node of type $nt";
7712     }
7713     } # set_inner_html
7714    
7715     } # tree construction stage
7716 wakaba 1.1
7717 wakaba 1.63 package Whatpm::HTML::RestartParser;
7718     push our @ISA, 'Error';
7719    
7720 wakaba 1.1 1;
7721 wakaba 1.164 # $Date: 2008/09/13 04:19:56 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24