/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.133 - (hide annotations) (download) (as text)
Sat May 17 04:54:11 2008 UTC (16 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.132: +100 -25 lines
File MIME type: application/x-wais-source
++ whatpm/Whatpm/ChangeLog	17 May 2008 04:53:41 -0000
2008-05-17  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm.src (parse_byte_string): HTML5 encoding siniffing
	algorithm, except for the actual sniffing, is implemented
	with new framework with Message::Charset::Info.

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.133 our $VERSION=do{my @r=(q$Revision: 1.132 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.63 use Error qw(:try);
5 wakaba 1.1
6 wakaba 1.18 ## ISSUE:
7     ## var doc = implementation.createDocument (null, null, null);
8     ## doc.write ('');
9     ## alert (doc.compatMode);
10 wakaba 1.1
11 wakaba 1.70 ## TODO: 1252 parse error (revision 1264)
12     ## TODO: 8859-11 = 874 (revision 1271)
13    
14 wakaba 1.126 my $HTML_NS = q<http://www.w3.org/1999/xhtml>;
15     my $MML_NS = q<http://www.w3.org/1998/Math/MathML>;
16     my $SVG_NS = q<http://www.w3.org/2000/svg>;
17     my $XLINK_NS = q<http://www.w3.org/1999/xlink>;
18     my $XML_NS = q<http://www.w3.org/XML/1998/namespace>;
19     my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>;
20    
21 wakaba 1.123 sub A_EL () { 0b1 }
22     sub ADDRESS_EL () { 0b10 }
23     sub BODY_EL () { 0b100 }
24     sub BUTTON_EL () { 0b1000 }
25     sub CAPTION_EL () { 0b10000 }
26     sub DD_EL () { 0b100000 }
27     sub DIV_EL () { 0b1000000 }
28     sub DT_EL () { 0b10000000 }
29     sub FORM_EL () { 0b100000000 }
30     sub FORMATTING_EL () { 0b1000000000 }
31     sub FRAMESET_EL () { 0b10000000000 }
32     sub HEADING_EL () { 0b100000000000 }
33     sub HTML_EL () { 0b1000000000000 }
34     sub LI_EL () { 0b10000000000000 }
35     sub NOBR_EL () { 0b100000000000000 }
36     sub OPTION_EL () { 0b1000000000000000 }
37     sub OPTGROUP_EL () { 0b10000000000000000 }
38     sub P_EL () { 0b100000000000000000 }
39     sub SELECT_EL () { 0b1000000000000000000 }
40     sub TABLE_EL () { 0b10000000000000000000 }
41     sub TABLE_CELL_EL () { 0b100000000000000000000 }
42     sub TABLE_ROW_EL () { 0b1000000000000000000000 }
43     sub TABLE_ROW_GROUP_EL () { 0b10000000000000000000000 }
44     sub MISC_SCOPING_EL () { 0b100000000000000000000000 }
45     sub MISC_SPECIAL_EL () { 0b1000000000000000000000000 }
46 wakaba 1.126 sub FOREIGN_EL () { 0b10000000000000000000000000 }
47     sub FOREIGN_FLOW_CONTENT_EL () { 0b100000000000000000000000000 }
48     sub MML_AXML_EL () { 0b1000000000000000000000000000 }
49 wakaba 1.123
50     sub TABLE_ROWS_EL () {
51     TABLE_EL |
52     TABLE_ROW_EL |
53     TABLE_ROW_GROUP_EL
54     }
55    
56     sub END_TAG_OPTIONAL_EL () {
57     DD_EL |
58     DT_EL |
59     LI_EL |
60     P_EL
61     }
62    
63     sub ALL_END_TAG_OPTIONAL_EL () {
64     END_TAG_OPTIONAL_EL |
65     BODY_EL |
66     HTML_EL |
67     TABLE_CELL_EL |
68     TABLE_ROW_EL |
69     TABLE_ROW_GROUP_EL
70     }
71    
72     sub SCOPING_EL () {
73     BUTTON_EL |
74     CAPTION_EL |
75     HTML_EL |
76     TABLE_EL |
77     TABLE_CELL_EL |
78     MISC_SCOPING_EL
79     }
80    
81     sub TABLE_SCOPING_EL () {
82     HTML_EL |
83     TABLE_EL
84     }
85    
86     sub TABLE_ROWS_SCOPING_EL () {
87     HTML_EL |
88     TABLE_ROW_GROUP_EL
89     }
90    
91     sub TABLE_ROW_SCOPING_EL () {
92     HTML_EL |
93     TABLE_ROW_EL
94     }
95    
96     sub SPECIAL_EL () {
97     ADDRESS_EL |
98     BODY_EL |
99     DIV_EL |
100     END_TAG_OPTIONAL_EL |
101     FORM_EL |
102     FRAMESET_EL |
103     HEADING_EL |
104     OPTION_EL |
105     OPTGROUP_EL |
106     SELECT_EL |
107     TABLE_ROW_EL |
108     TABLE_ROW_GROUP_EL |
109     MISC_SPECIAL_EL
110     }
111    
112     my $el_category = {
113     a => A_EL | FORMATTING_EL,
114     address => ADDRESS_EL,
115     applet => MISC_SCOPING_EL,
116     area => MISC_SPECIAL_EL,
117     b => FORMATTING_EL,
118     base => MISC_SPECIAL_EL,
119     basefont => MISC_SPECIAL_EL,
120     bgsound => MISC_SPECIAL_EL,
121     big => FORMATTING_EL,
122     blockquote => MISC_SPECIAL_EL,
123     body => BODY_EL,
124     br => MISC_SPECIAL_EL,
125     button => BUTTON_EL,
126     caption => CAPTION_EL,
127     center => MISC_SPECIAL_EL,
128     col => MISC_SPECIAL_EL,
129     colgroup => MISC_SPECIAL_EL,
130     dd => DD_EL,
131     dir => MISC_SPECIAL_EL,
132     div => DIV_EL,
133     dl => MISC_SPECIAL_EL,
134     dt => DT_EL,
135     em => FORMATTING_EL,
136     embed => MISC_SPECIAL_EL,
137     fieldset => MISC_SPECIAL_EL,
138     font => FORMATTING_EL,
139     form => FORM_EL,
140     frame => MISC_SPECIAL_EL,
141     frameset => FRAMESET_EL,
142     h1 => HEADING_EL,
143     h2 => HEADING_EL,
144     h3 => HEADING_EL,
145     h4 => HEADING_EL,
146     h5 => HEADING_EL,
147     h6 => HEADING_EL,
148     head => MISC_SPECIAL_EL,
149     hr => MISC_SPECIAL_EL,
150     html => HTML_EL,
151     i => FORMATTING_EL,
152     iframe => MISC_SPECIAL_EL,
153     img => MISC_SPECIAL_EL,
154     input => MISC_SPECIAL_EL,
155     isindex => MISC_SPECIAL_EL,
156     li => LI_EL,
157     link => MISC_SPECIAL_EL,
158     listing => MISC_SPECIAL_EL,
159     marquee => MISC_SCOPING_EL,
160     menu => MISC_SPECIAL_EL,
161     meta => MISC_SPECIAL_EL,
162     nobr => NOBR_EL | FORMATTING_EL,
163     noembed => MISC_SPECIAL_EL,
164     noframes => MISC_SPECIAL_EL,
165     noscript => MISC_SPECIAL_EL,
166     object => MISC_SCOPING_EL,
167     ol => MISC_SPECIAL_EL,
168     optgroup => OPTGROUP_EL,
169     option => OPTION_EL,
170     p => P_EL,
171     param => MISC_SPECIAL_EL,
172     plaintext => MISC_SPECIAL_EL,
173     pre => MISC_SPECIAL_EL,
174     s => FORMATTING_EL,
175     script => MISC_SPECIAL_EL,
176     select => SELECT_EL,
177     small => FORMATTING_EL,
178     spacer => MISC_SPECIAL_EL,
179     strike => FORMATTING_EL,
180     strong => FORMATTING_EL,
181     style => MISC_SPECIAL_EL,
182     table => TABLE_EL,
183     tbody => TABLE_ROW_GROUP_EL,
184     td => TABLE_CELL_EL,
185     textarea => MISC_SPECIAL_EL,
186     tfoot => TABLE_ROW_GROUP_EL,
187     th => TABLE_CELL_EL,
188     thead => TABLE_ROW_GROUP_EL,
189     title => MISC_SPECIAL_EL,
190     tr => TABLE_ROW_EL,
191     tt => FORMATTING_EL,
192     u => FORMATTING_EL,
193     ul => MISC_SPECIAL_EL,
194     wbr => MISC_SPECIAL_EL,
195     };
196    
197 wakaba 1.126 my $el_category_f = {
198     $MML_NS => {
199     'annotation-xml' => MML_AXML_EL,
200     mi => FOREIGN_FLOW_CONTENT_EL,
201     mo => FOREIGN_FLOW_CONTENT_EL,
202     mn => FOREIGN_FLOW_CONTENT_EL,
203     ms => FOREIGN_FLOW_CONTENT_EL,
204     mtext => FOREIGN_FLOW_CONTENT_EL,
205     },
206     $SVG_NS => {
207 wakaba 1.131 foreignObject => FOREIGN_FLOW_CONTENT_EL,
208 wakaba 1.126 desc => FOREIGN_FLOW_CONTENT_EL,
209     title => FOREIGN_FLOW_CONTENT_EL,
210     },
211     ## NOTE: In addition, FOREIGN_EL is set to non-HTML elements.
212     };
213    
214 wakaba 1.131 my $svg_attr_name = {
215     attributetype => 'attributeType',
216     basefrequency => 'baseFrequency',
217     baseprofile => 'baseProfile',
218     calcmode => 'calcMode',
219     clippathunits => 'clipPathUnits',
220     contentscripttype => 'contentScriptType',
221     contentstyletype => 'contentStyleType',
222     diffuseconstant => 'diffuseConstant',
223     edgemode => 'edgeMode',
224     externalresourcesrequired => 'externalResourcesRequired',
225     fecolormatrix => 'feColorMatrix',
226     fecomposite => 'feComposite',
227     fegaussianblur => 'feGaussianBlur',
228     femorphology => 'feMorphology',
229     fetile => 'feTile',
230     filterres => 'filterRes',
231     filterunits => 'filterUnits',
232     glyphref => 'glyphRef',
233     gradienttransform => 'gradientTransform',
234     gradientunits => 'gradientUnits',
235     kernelmatrix => 'kernelMatrix',
236     kernelunitlength => 'kernelUnitLength',
237     keypoints => 'keyPoints',
238     keysplines => 'keySplines',
239     keytimes => 'keyTimes',
240     lengthadjust => 'lengthAdjust',
241     limitingconeangle => 'limitingConeAngle',
242     markerheight => 'markerHeight',
243     markerunits => 'markerUnits',
244     markerwidth => 'markerWidth',
245     maskcontentunits => 'maskContentUnits',
246     maskunits => 'maskUnits',
247     numoctaves => 'numOctaves',
248     pathlength => 'pathLength',
249     patterncontentunits => 'patternContentUnits',
250     patterntransform => 'patternTransform',
251     patternunits => 'patternUnits',
252     pointsatx => 'pointsAtX',
253     pointsaty => 'pointsAtY',
254     pointsatz => 'pointsAtZ',
255     preservealpha => 'preserveAlpha',
256     preserveaspectratio => 'preserveAspectRatio',
257     primitiveunits => 'primitiveUnits',
258     refx => 'refX',
259     refy => 'refY',
260     repeatcount => 'repeatCount',
261     repeatdur => 'repeatDur',
262     requiredextensions => 'requiredExtensions',
263     specularconstant => 'specularConstant',
264     specularexponent => 'specularExponent',
265     spreadmethod => 'spreadMethod',
266     startoffset => 'startOffset',
267     stddeviation => 'stdDeviation',
268     stitchtiles => 'stitchTiles',
269     surfacescale => 'surfaceScale',
270     systemlanguage => 'systemLanguage',
271     tablevalues => 'tableValues',
272     targetx => 'targetX',
273     targety => 'targetY',
274     textlength => 'textLength',
275     viewbox => 'viewBox',
276     viewtarget => 'viewTarget',
277     xchannelselector => 'xChannelSelector',
278     ychannelselector => 'yChannelSelector',
279     zoomandpan => 'zoomAndPan',
280     };
281    
282     my $foreign_attr_xname = {
283     'xlink:actuate' => [$XLINK_NS, ['xlink', 'actuate']],
284     'xlink:arcrole' => [$XLINK_NS, ['xlink', 'arcrole']],
285     'xlink:href' => [$XLINK_NS, ['xlink', 'href']],
286     'xlink:role' => [$XLINK_NS, ['xlink', 'role']],
287     'xlink:show' => [$XLINK_NS, ['xlink', 'show']],
288     'xlink:title' => [$XLINK_NS, ['xlink', 'title']],
289     'xlink:type' => [$XLINK_NS, ['xlink', 'type']],
290     'xml:base' => [$XML_NS, ['xml', 'base']],
291     'xml:lang' => [$XML_NS, ['xml', 'lang']],
292     'xml:space' => [$XML_NS, ['xml', 'space']],
293     'xmlns' => [$XMLNS_NS, [undef, 'xmlns']],
294     'xmlns:xlink' => [$XMLNS_NS, ['xmlns', 'xlink']],
295     };
296    
297     ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
298    
299 wakaba 1.4 my $c1_entity_char = {
300 wakaba 1.10 0x80 => 0x20AC,
301     0x81 => 0xFFFD,
302     0x82 => 0x201A,
303     0x83 => 0x0192,
304     0x84 => 0x201E,
305     0x85 => 0x2026,
306     0x86 => 0x2020,
307     0x87 => 0x2021,
308     0x88 => 0x02C6,
309     0x89 => 0x2030,
310     0x8A => 0x0160,
311     0x8B => 0x2039,
312     0x8C => 0x0152,
313     0x8D => 0xFFFD,
314     0x8E => 0x017D,
315     0x8F => 0xFFFD,
316     0x90 => 0xFFFD,
317     0x91 => 0x2018,
318     0x92 => 0x2019,
319     0x93 => 0x201C,
320     0x94 => 0x201D,
321     0x95 => 0x2022,
322     0x96 => 0x2013,
323     0x97 => 0x2014,
324     0x98 => 0x02DC,
325     0x99 => 0x2122,
326     0x9A => 0x0161,
327     0x9B => 0x203A,
328     0x9C => 0x0153,
329     0x9D => 0xFFFD,
330     0x9E => 0x017E,
331     0x9F => 0x0178,
332 wakaba 1.4 }; # $c1_entity_char
333 wakaba 1.1
334 wakaba 1.63 sub parse_byte_string ($$$$;$) {
335     my $self = ref $_[0] ? shift : shift->new;
336 wakaba 1.133 my $charset_name = shift;
337 wakaba 1.63 my $bytes_s = ref $_[0] ? $_[0] : \($_[0]);
338     my $s;
339 wakaba 1.133
340     ## HTML5 encoding sniffing algorithm
341     require Message::Charset::Info;
342     my $charset;
343     my ($e, $e_status);
344    
345     SNIFFING: {
346    
347     ## Step 1
348     if (defined $charset_name) {
349     $charset = Message::Charset::Info->get_by_iana_name ($charset_name);
350    
351     ## ISSUE: Unsupported encoding is not ignored according to the spec.
352     ($e, $e_status) = $charset->get_perl_encoding
353     (allow_error_reporting => 1,
354     allow_fallback => 1);
355     if ($e) {
356     $self->{confident} = 1;
357     last SNIFFING;
358     }
359     }
360    
361     ## Step 2
362     # wait
363    
364     ## Step 3
365     my $head = substr ($$bytes_s, 0, 3);
366     if ($head =~ /^\xFE\xFF/) {
367     $charset = Message::Charset::Info->get_by_iana_name ('utf-16be');
368     ($e, $e_status) = $charset->get_perl_encoding
369     (allow_error_reporting => 1,
370     allow_fallback => 1);
371     $self->{confident} = 1;
372     last SNIFFING;
373     } elsif ($head =~ /^\xFF\xFE/) {
374     $charset = Message::Charset::Info->get_by_iana_name ('utf-16le');
375     ($e, $e_status) = $charset->get_perl_encoding
376     (allow_error_reporting => 1,
377     allow_fallback => 1);
378     $self->{confident} = 1;
379     last SNIFFING;
380     } elsif ($head eq "\xEF\xBB\xBF") {
381     $charset = Message::Charset::Info->get_by_iana_name ('utf-8');
382     ($e, $e_status) = $charset->get_perl_encoding
383     (allow_error_reporting => 1,
384     allow_fallback => 1);
385     $self->{confident} = 1;
386     last SNIFFING;
387     }
388    
389     ## Step 4
390     ## TODO: <meta charset>
391    
392     ## Step 5
393     ## TODO: from history
394    
395     ## Step 6
396 wakaba 1.65 require Whatpm::Charset::UniversalCharDet;
397 wakaba 1.133 $charset_name = Whatpm::Charset::UniversalCharDet->detect_byte_string
398 wakaba 1.65 (substr ($$bytes_s, 0, 1024));
399 wakaba 1.133 if (defined $charset_name) {
400     $charset = Message::Charset::Info->get_by_iana_name ($charset_name);
401    
402     ## ISSUE: Unsupported encoding is not ignored according to the spec.
403     ($e, $e_status) = $charset->get_perl_encoding
404     (allow_error_reporting => 1,
405     allow_fallback => 1);
406     if ($e) {
407     $self->{confident} = 0;
408     last SNIFFING;
409     }
410     }
411    
412     ## Step 7: default
413     ## TODO: Make this configurable.
414     $charset = Message::Charset::Info->get_by_iana_name ('windows-1252');
415     ## NOTE: We choose |windows-1252| here, since |utf-8| should be
416     ## detectable in the step 6.
417     ($e, $e_status) = $charset->get_perl_encoding (allow_error_reporting => 1,
418     allow_fallback => 1);
419 wakaba 1.63 $self->{confident} = 0;
420 wakaba 1.133 } # SNIFFING
421    
422     if ($e_status & Message::Charset::Info::FALLBACK_ENCODING_IMPL ()) {
423    
424     } elsif (not ($e_status &
425     Message::Charset::Info::ERROR_REPORTING_ENCODING_IMPL())) {
426    
427 wakaba 1.63 }
428 wakaba 1.133 $s = \ $e->decode ($$bytes_s);
429     $self->{input_encoding} = $charset->get_iana_name;
430 wakaba 1.63
431     $self->{change_encoding} = sub {
432     my $self = shift;
433 wakaba 1.133 my $charset_name = lc shift;
434 wakaba 1.114 my $token = shift;
435 wakaba 1.133 ## TODO: if $charset_name is supported
436 wakaba 1.63 ## TODO: normalize charset name
437    
438     ## "Change the encoding" algorithm:
439    
440     ## Step 1
441 wakaba 1.133 if ($charset_name eq 'utf-16') { ## ISSUE: UTF-16BE -> UTF-8? UTF-16LE -> UTF-8?
442     $charset_name = 'utf-8';
443 wakaba 1.63 }
444    
445     ## Step 2
446     if (defined $self->{input_encoding} and
447 wakaba 1.133 $self->{input_encoding} eq $charset_name) {
448 wakaba 1.63 $self->{confident} = 1;
449     return;
450     }
451    
452 wakaba 1.64 !!!parse-error (type => 'charset label detected:'.$self->{input_encoding}.
453 wakaba 1.133 ':'.$charset_name, level => 'w', token => $token);
454 wakaba 1.63
455     ## Step 3
456     # if (can) {
457     ## change the encoding on the fly.
458     #$self->{confident} = 1;
459     #return;
460     # }
461    
462     ## Step 4
463 wakaba 1.133 throw Whatpm::HTML::RestartParser (charset => $charset_name);
464 wakaba 1.63 }; # $self->{change_encoding}
465    
466     my @args = @_; shift @args; # $s
467     my $return;
468     try {
469     $return = $self->parse_char_string ($s, @args);
470     } catch Whatpm::HTML::RestartParser with {
471 wakaba 1.133 my $charset_name = shift->{charset};
472     $s = \ (Encode::decode ($charset_name, $$bytes_s));
473     $self->{input_encoding} = $charset_name; ## TODO: normalize
474 wakaba 1.63 $self->{confident} = 1;
475     $return = $self->parse_char_string ($s, @args);
476     };
477     return $return;
478     } # parse_byte_string
479    
480 wakaba 1.71 ## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM
481     ## and the HTML layer MUST ignore it. However, we does strip BOM in
482     ## the encoding layer and the HTML layer does not ignore any U+FEFF,
483     ## because the core part of our HTML parser expects a string of character,
484     ## not a string of bytes or code units or anything which might contain a BOM.
485     ## Therefore, any parser interface that accepts a string of bytes,
486     ## such as |parse_byte_string| in this module, must ensure that it does
487     ## strip the BOM and never strip any ZWNBSP.
488    
489 wakaba 1.63 *parse_char_string = \&parse_string;
490    
491 wakaba 1.1 sub parse_string ($$$;$) {
492 wakaba 1.63 my $self = ref $_[0] ? shift : shift->new;
493     my $s = ref $_[0] ? $_[0] : \($_[0]);
494 wakaba 1.1 $self->{document} = $_[1];
495 wakaba 1.63 @{$self->{document}->child_nodes} = ();
496 wakaba 1.1
497 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
498    
499 wakaba 1.63 $self->{confident} = 1 unless exists $self->{confident};
500 wakaba 1.64 $self->{document}->input_encoding ($self->{input_encoding})
501     if defined $self->{input_encoding};
502 wakaba 1.63
503 wakaba 1.1 my $i = 0;
504 wakaba 1.112 $self->{line_prev} = $self->{line} = 1;
505     $self->{column_prev} = $self->{column} = 0;
506 wakaba 1.76 $self->{set_next_char} = sub {
507 wakaba 1.1 my $self = shift;
508 wakaba 1.13
509 wakaba 1.76 pop @{$self->{prev_char}};
510     unshift @{$self->{prev_char}}, $self->{next_char};
511 wakaba 1.13
512 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
513     $self->{next_char} = ord substr $$s, $i++, 1;
514 wakaba 1.112
515     ($self->{line_prev}, $self->{column_prev})
516     = ($self->{line}, $self->{column});
517     $self->{column}++;
518 wakaba 1.1
519 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
520 wakaba 1.132 !!!cp ('j1');
521 wakaba 1.112 $self->{line}++;
522     $self->{column} = 0;
523 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
524 wakaba 1.132 !!!cp ('j2');
525 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
526 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
527 wakaba 1.112 $self->{line}++;
528     $self->{column} = 0;
529 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
530 wakaba 1.132 !!!cp ('j3');
531 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
532     } elsif ($self->{next_char} == 0x0000) { # NULL
533 wakaba 1.132 !!!cp ('j4');
534 wakaba 1.8 !!!parse-error (type => 'NULL');
535 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
536 wakaba 1.132 } elsif ($self->{next_char} <= 0x0008 or
537     (0x000E <= $self->{next_char} and $self->{next_char} <= 0x001F) or
538     (0x007F <= $self->{next_char} and $self->{next_char} <= 0x009F) or
539     (0xD800 <= $self->{next_char} and $self->{next_char} <= 0xDFFF) or
540     (0xFDD0 <= $self->{next_char} and $self->{next_char} <= 0xFDDF) or
541     {
542     0xFFFE => 1, 0xFFFF => 1, 0x1FFFE => 1, 0x1FFFF => 1,
543     0x2FFFE => 1, 0x2FFFF => 1, 0x3FFFE => 1, 0x3FFFF => 1,
544     0x4FFFE => 1, 0x4FFFF => 1, 0x5FFFE => 1, 0x5FFFF => 1,
545     0x6FFFE => 1, 0x6FFFF => 1, 0x7FFFE => 1, 0x7FFFF => 1,
546     0x8FFFE => 1, 0x8FFFF => 1, 0x9FFFE => 1, 0x9FFFF => 1,
547     0xAFFFE => 1, 0xAFFFF => 1, 0xBFFFE => 1, 0xBFFFF => 1,
548     0xCFFFE => 1, 0xCFFFF => 1, 0xDFFFE => 1, 0xDFFFF => 1,
549     0xEFFFE => 1, 0xEFFFF => 1, 0xFFFFE => 1, 0xFFFFF => 1,
550     0x10FFFE => 1, 0x10FFFF => 1,
551     }->{$self->{next_char}}) {
552     !!!cp ('j5');
553     !!!parse-error (type => 'control char', level => $self->{must_level});
554     ## TODO: error type documentation
555 wakaba 1.1 }
556     };
557 wakaba 1.76 $self->{prev_char} = [-1, -1, -1];
558     $self->{next_char} = -1;
559 wakaba 1.1
560 wakaba 1.3 my $onerror = $_[2] || sub {
561     my (%opt) = @_;
562 wakaba 1.112 my $line = $opt{token} ? $opt{token}->{line} : $opt{line};
563     my $column = $opt{token} ? $opt{token}->{column} : $opt{column};
564     warn "Parse error ($opt{type}) at line $line column $column\n";
565 wakaba 1.3 };
566     $self->{parse_error} = sub {
567 wakaba 1.112 $onerror->(line => $self->{line}, column => $self->{column}, @_);
568 wakaba 1.1 };
569    
570     $self->_initialize_tokenizer;
571     $self->_initialize_tree_constructor;
572     $self->_construct_tree;
573     $self->_terminate_tree_constructor;
574    
575 wakaba 1.112 delete $self->{parse_error}; # remove loop
576    
577 wakaba 1.1 return $self->{document};
578     } # parse_string
579    
580     sub new ($) {
581     my $class = shift;
582     my $self = bless {}, $class;
583 wakaba 1.76 $self->{set_next_char} = sub {
584     $self->{next_char} = -1;
585 wakaba 1.1 };
586     $self->{parse_error} = sub {
587     #
588     };
589 wakaba 1.63 $self->{change_encoding} = sub {
590     # if ($_[0] is a supported encoding) {
591     # run "change the encoding" algorithm;
592     # throw Whatpm::HTML::RestartParser (charset => $new_encoding);
593     # }
594     };
595 wakaba 1.61 $self->{application_cache_selection} = sub {
596     #
597     };
598 wakaba 1.1 return $self;
599     } # new
600    
601 wakaba 1.40 sub CM_ENTITY () { 0b001 } # & markup in data
602     sub CM_LIMITED_MARKUP () { 0b010 } # < markup in data (limited)
603     sub CM_FULL_MARKUP () { 0b100 } # < markup in data (any)
604    
605     sub PLAINTEXT_CONTENT_MODEL () { 0 }
606     sub CDATA_CONTENT_MODEL () { CM_LIMITED_MARKUP }
607     sub RCDATA_CONTENT_MODEL () { CM_ENTITY | CM_LIMITED_MARKUP }
608     sub PCDATA_CONTENT_MODEL () { CM_ENTITY | CM_FULL_MARKUP }
609    
610 wakaba 1.57 sub DATA_STATE () { 0 }
611     sub ENTITY_DATA_STATE () { 1 }
612     sub TAG_OPEN_STATE () { 2 }
613     sub CLOSE_TAG_OPEN_STATE () { 3 }
614     sub TAG_NAME_STATE () { 4 }
615     sub BEFORE_ATTRIBUTE_NAME_STATE () { 5 }
616     sub ATTRIBUTE_NAME_STATE () { 6 }
617     sub AFTER_ATTRIBUTE_NAME_STATE () { 7 }
618     sub BEFORE_ATTRIBUTE_VALUE_STATE () { 8 }
619     sub ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE () { 9 }
620     sub ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE () { 10 }
621     sub ATTRIBUTE_VALUE_UNQUOTED_STATE () { 11 }
622     sub ENTITY_IN_ATTRIBUTE_VALUE_STATE () { 12 }
623     sub MARKUP_DECLARATION_OPEN_STATE () { 13 }
624     sub COMMENT_START_STATE () { 14 }
625     sub COMMENT_START_DASH_STATE () { 15 }
626     sub COMMENT_STATE () { 16 }
627     sub COMMENT_END_STATE () { 17 }
628     sub COMMENT_END_DASH_STATE () { 18 }
629     sub BOGUS_COMMENT_STATE () { 19 }
630     sub DOCTYPE_STATE () { 20 }
631     sub BEFORE_DOCTYPE_NAME_STATE () { 21 }
632     sub DOCTYPE_NAME_STATE () { 22 }
633     sub AFTER_DOCTYPE_NAME_STATE () { 23 }
634     sub BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 24 }
635     sub DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE () { 25 }
636     sub DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE () { 26 }
637     sub AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE () { 27 }
638     sub BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 28 }
639     sub DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE () { 29 }
640     sub DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE () { 30 }
641     sub AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE () { 31 }
642     sub BOGUS_DOCTYPE_STATE () { 32 }
643 wakaba 1.72 sub AFTER_ATTRIBUTE_VALUE_QUOTED_STATE () { 33 }
644 wakaba 1.125 sub SELF_CLOSING_START_TAG_STATE () { 34 }
645 wakaba 1.127 sub CDATA_BLOCK_STATE () { 35 }
646 wakaba 1.57
647 wakaba 1.55 sub DOCTYPE_TOKEN () { 1 }
648     sub COMMENT_TOKEN () { 2 }
649     sub START_TAG_TOKEN () { 3 }
650     sub END_TAG_TOKEN () { 4 }
651     sub END_OF_FILE_TOKEN () { 5 }
652     sub CHARACTER_TOKEN () { 6 }
653    
654 wakaba 1.54 sub AFTER_HTML_IMS () { 0b100 }
655     sub HEAD_IMS () { 0b1000 }
656     sub BODY_IMS () { 0b10000 }
657 wakaba 1.56 sub BODY_TABLE_IMS () { 0b100000 }
658 wakaba 1.54 sub TABLE_IMS () { 0b1000000 }
659 wakaba 1.56 sub ROW_IMS () { 0b10000000 }
660 wakaba 1.54 sub BODY_AFTER_IMS () { 0b100000000 }
661     sub FRAME_IMS () { 0b1000000000 }
662 wakaba 1.101 sub SELECT_IMS () { 0b10000000000 }
663 wakaba 1.126 sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 }
664     ## NOTE: "in foreign content" insertion mode is special; it is combined
665     ## with the secondary insertion mode. In this parser, they are stored
666     ## together in the bit-or'ed form.
667 wakaba 1.54
668 wakaba 1.84 ## NOTE: "initial" and "before html" insertion modes have no constants.
669    
670     ## NOTE: "after after body" insertion mode.
671 wakaba 1.54 sub AFTER_HTML_BODY_IM () { AFTER_HTML_IMS | BODY_AFTER_IMS }
672 wakaba 1.84
673     ## NOTE: "after after frameset" insertion mode.
674 wakaba 1.54 sub AFTER_HTML_FRAMESET_IM () { AFTER_HTML_IMS | FRAME_IMS }
675 wakaba 1.84
676 wakaba 1.54 sub IN_HEAD_IM () { HEAD_IMS | 0b00 }
677     sub IN_HEAD_NOSCRIPT_IM () { HEAD_IMS | 0b01 }
678     sub AFTER_HEAD_IM () { HEAD_IMS | 0b10 }
679     sub BEFORE_HEAD_IM () { HEAD_IMS | 0b11 }
680     sub IN_BODY_IM () { BODY_IMS }
681 wakaba 1.56 sub IN_CELL_IM () { BODY_IMS | BODY_TABLE_IMS | 0b01 }
682     sub IN_CAPTION_IM () { BODY_IMS | BODY_TABLE_IMS | 0b10 }
683     sub IN_ROW_IM () { TABLE_IMS | ROW_IMS | 0b01 }
684     sub IN_TABLE_BODY_IM () { TABLE_IMS | ROW_IMS | 0b10 }
685 wakaba 1.54 sub IN_TABLE_IM () { TABLE_IMS }
686     sub AFTER_BODY_IM () { BODY_AFTER_IMS }
687     sub IN_FRAMESET_IM () { FRAME_IMS | 0b01 }
688     sub AFTER_FRAMESET_IM () { FRAME_IMS | 0b10 }
689 wakaba 1.101 sub IN_SELECT_IM () { SELECT_IMS | 0b01 }
690     sub IN_SELECT_IN_TABLE_IM () { SELECT_IMS | 0b10 }
691 wakaba 1.54 sub IN_COLUMN_GROUP_IM () { 0b10 }
692    
693 wakaba 1.1 ## Implementations MUST act as if state machine in the spec
694    
695     sub _initialize_tokenizer ($) {
696     my $self = shift;
697 wakaba 1.57 $self->{state} = DATA_STATE; # MUST
698 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # be
699 wakaba 1.1 undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
700     undef $self->{current_attribute};
701     undef $self->{last_emitted_start_tag_name};
702     undef $self->{last_attribute_value_state};
703 wakaba 1.125 delete $self->{self_closing};
704 wakaba 1.1 $self->{char} = [];
705 wakaba 1.76 # $self->{next_char}
706 wakaba 1.1 !!!next-input-character;
707     $self->{token} = [];
708 wakaba 1.18 # $self->{escape}
709 wakaba 1.1 } # _initialize_tokenizer
710    
711     ## A token has:
712 wakaba 1.55 ## ->{type} == DOCTYPE_TOKEN, START_TAG_TOKEN, END_TAG_TOKEN, COMMENT_TOKEN,
713     ## CHARACTER_TOKEN, or END_OF_FILE_TOKEN
714     ## ->{name} (DOCTYPE_TOKEN)
715     ## ->{tag_name} (START_TAG_TOKEN, END_TAG_TOKEN)
716     ## ->{public_identifier} (DOCTYPE_TOKEN)
717     ## ->{system_identifier} (DOCTYPE_TOKEN)
718 wakaba 1.75 ## ->{quirks} == 1 or 0 (DOCTYPE_TOKEN): "force-quirks" flag
719 wakaba 1.55 ## ->{attributes} isa HASH (START_TAG_TOKEN, END_TAG_TOKEN)
720 wakaba 1.66 ## ->{name}
721     ## ->{value}
722     ## ->{has_reference} == 1 or 0
723 wakaba 1.55 ## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN)
724 wakaba 1.125 ## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|.
725     ## |->{self_closing}| is used to save the value of |$self->{self_closing}|
726     ## while the token is pushed back to the stack.
727    
728     ## ISSUE: "When a DOCTYPE token is created, its
729     ## <i>self-closing flag</i> must be unset (its other state is that it
730     ## be set), and its attributes list must be empty.": Wrong subject?
731 wakaba 1.1
732     ## Emitted token MUST immediately be handled by the tree construction state.
733    
734     ## Before each step, UA MAY check to see if either one of the scripts in
735     ## "list of scripts that will execute as soon as possible" or the first
736     ## script in the "list of scripts that will execute asynchronously",
737     ## has completed loading. If one has, then it MUST be executed
738     ## and removed from the list.
739    
740 wakaba 1.59 ## NOTE: HTML5 "Writing HTML documents" section, applied to
741     ## documents and not to user agents and conformance checkers,
742     ## contains some requirements that are not detected by the
743     ## parsing algorithm:
744     ## - Some requirements on character encoding declarations. ## TODO
745     ## - "Elements MUST NOT contain content that their content model disallows."
746     ## ... Some are parse error, some are not (will be reported by c.c.).
747     ## - Polytheistic slash SHOULD NOT be used. (Applied only to atheists.) ## TODO
748     ## - Text (in elements, attributes, and comments) SHOULD NOT contain
749     ## control characters other than space characters. ## TODO: (what is control character? C0, C1 and DEL? Unicode control character?)
750    
751     ## TODO: HTML5 poses authors two SHOULD-level requirements that cannot
752     ## be detected by the HTML5 parsing algorithm:
753     ## - Text,
754    
755 wakaba 1.1 sub _get_next_token ($) {
756     my $self = shift;
757 wakaba 1.125
758     if ($self->{self_closing}) {
759     !!!parse-error (type => 'nestc', token => $self->{current_token});
760     ## NOTE: The |self_closing| flag is only set by start tag token.
761     ## In addition, when a start tag token is emitted, it is always set to
762     ## |current_token|.
763     delete $self->{self_closing};
764     }
765    
766 wakaba 1.1 if (@{$self->{token}}) {
767 wakaba 1.125 $self->{self_closing} = $self->{token}->[0]->{self_closing};
768 wakaba 1.1 return shift @{$self->{token}};
769     }
770    
771     A: {
772 wakaba 1.57 if ($self->{state} == DATA_STATE) {
773 wakaba 1.76 if ($self->{next_char} == 0x0026) { # &
774 wakaba 1.72 if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA
775     not $self->{escape}) {
776 wakaba 1.77 !!!cp (1);
777 wakaba 1.57 $self->{state} = ENTITY_DATA_STATE;
778 wakaba 1.1 !!!next-input-character;
779     redo A;
780     } else {
781 wakaba 1.77 !!!cp (2);
782 wakaba 1.1 #
783     }
784 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
785 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
786 wakaba 1.13 unless ($self->{escape}) {
787 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
788     $self->{prev_char}->[1] == 0x0021 and # !
789     $self->{prev_char}->[2] == 0x003C) { # <
790 wakaba 1.77 !!!cp (3);
791 wakaba 1.13 $self->{escape} = 1;
792 wakaba 1.77 } else {
793     !!!cp (4);
794 wakaba 1.13 }
795 wakaba 1.77 } else {
796     !!!cp (5);
797 wakaba 1.13 }
798     }
799    
800     #
801 wakaba 1.76 } elsif ($self->{next_char} == 0x003C) { # <
802 wakaba 1.40 if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA
803     (($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA
804 wakaba 1.13 not $self->{escape})) {
805 wakaba 1.77 !!!cp (6);
806 wakaba 1.57 $self->{state} = TAG_OPEN_STATE;
807 wakaba 1.1 !!!next-input-character;
808     redo A;
809     } else {
810 wakaba 1.77 !!!cp (7);
811 wakaba 1.1 #
812     }
813 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
814 wakaba 1.13 if ($self->{escape} and
815 wakaba 1.40 ($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA
816 wakaba 1.76 if ($self->{prev_char}->[0] == 0x002D and # -
817     $self->{prev_char}->[1] == 0x002D) { # -
818 wakaba 1.77 !!!cp (8);
819 wakaba 1.13 delete $self->{escape};
820 wakaba 1.77 } else {
821     !!!cp (9);
822 wakaba 1.13 }
823 wakaba 1.77 } else {
824     !!!cp (10);
825 wakaba 1.13 }
826    
827     #
828 wakaba 1.76 } elsif ($self->{next_char} == -1) {
829 wakaba 1.77 !!!cp (11);
830 wakaba 1.112 !!!emit ({type => END_OF_FILE_TOKEN,
831     line => $self->{line}, column => $self->{column}});
832 wakaba 1.1 last A; ## TODO: ok?
833 wakaba 1.77 } else {
834     !!!cp (12);
835 wakaba 1.1 }
836     # Anything else
837 wakaba 1.55 my $token = {type => CHARACTER_TOKEN,
838 wakaba 1.112 data => chr $self->{next_char},
839 wakaba 1.120 line => $self->{line}, column => $self->{column},
840 wakaba 1.118 };
841 wakaba 1.1 ## Stay in the data state
842     !!!next-input-character;
843    
844     !!!emit ($token);
845    
846     redo A;
847 wakaba 1.57 } elsif ($self->{state} == ENTITY_DATA_STATE) {
848 wakaba 1.1 ## (cannot happen in CDATA state)
849 wakaba 1.112
850 wakaba 1.120 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
851 wakaba 1.1
852 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity (0, -1);
853 wakaba 1.1
854 wakaba 1.57 $self->{state} = DATA_STATE;
855 wakaba 1.1 # next-input-character is already done
856    
857     unless (defined $token) {
858 wakaba 1.77 !!!cp (13);
859 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '&',
860 wakaba 1.120 line => $l, column => $c,
861 wakaba 1.118 });
862 wakaba 1.1 } else {
863 wakaba 1.77 !!!cp (14);
864 wakaba 1.1 !!!emit ($token);
865     }
866    
867     redo A;
868 wakaba 1.57 } elsif ($self->{state} == TAG_OPEN_STATE) {
869 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
870 wakaba 1.76 if ($self->{next_char} == 0x002F) { # /
871 wakaba 1.77 !!!cp (15);
872 wakaba 1.1 !!!next-input-character;
873 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
874 wakaba 1.1 redo A;
875     } else {
876 wakaba 1.77 !!!cp (16);
877 wakaba 1.1 ## reconsume
878 wakaba 1.57 $self->{state} = DATA_STATE;
879 wakaba 1.1
880 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
881 wakaba 1.120 line => $self->{line_prev},
882     column => $self->{column_prev},
883 wakaba 1.118 });
884 wakaba 1.1
885     redo A;
886     }
887 wakaba 1.40 } elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA
888 wakaba 1.76 if ($self->{next_char} == 0x0021) { # !
889 wakaba 1.77 !!!cp (17);
890 wakaba 1.57 $self->{state} = MARKUP_DECLARATION_OPEN_STATE;
891 wakaba 1.1 !!!next-input-character;
892     redo A;
893 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
894 wakaba 1.77 !!!cp (18);
895 wakaba 1.57 $self->{state} = CLOSE_TAG_OPEN_STATE;
896 wakaba 1.1 !!!next-input-character;
897     redo A;
898 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
899     $self->{next_char} <= 0x005A) { # A..Z
900 wakaba 1.77 !!!cp (19);
901 wakaba 1.1 $self->{current_token}
902 wakaba 1.55 = {type => START_TAG_TOKEN,
903 wakaba 1.112 tag_name => chr ($self->{next_char} + 0x0020),
904     line => $self->{line_prev},
905     column => $self->{column_prev}};
906 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
907 wakaba 1.1 !!!next-input-character;
908     redo A;
909 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
910     $self->{next_char} <= 0x007A) { # a..z
911 wakaba 1.77 !!!cp (20);
912 wakaba 1.55 $self->{current_token} = {type => START_TAG_TOKEN,
913 wakaba 1.112 tag_name => chr ($self->{next_char}),
914     line => $self->{line_prev},
915     column => $self->{column_prev}};
916 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
917 wakaba 1.1 !!!next-input-character;
918     redo A;
919 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
920 wakaba 1.77 !!!cp (21);
921 wakaba 1.115 !!!parse-error (type => 'empty start tag',
922     line => $self->{line_prev},
923     column => $self->{column_prev});
924 wakaba 1.57 $self->{state} = DATA_STATE;
925 wakaba 1.1 !!!next-input-character;
926    
927 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<>',
928 wakaba 1.120 line => $self->{line_prev},
929     column => $self->{column_prev},
930 wakaba 1.118 });
931 wakaba 1.1
932     redo A;
933 wakaba 1.76 } elsif ($self->{next_char} == 0x003F) { # ?
934 wakaba 1.77 !!!cp (22);
935 wakaba 1.115 !!!parse-error (type => 'pio',
936     line => $self->{line_prev},
937     column => $self->{column_prev});
938 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
939 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
940 wakaba 1.120 line => $self->{line_prev},
941     column => $self->{column_prev},
942 wakaba 1.118 };
943 wakaba 1.76 ## $self->{next_char} is intentionally left as is
944 wakaba 1.1 redo A;
945     } else {
946 wakaba 1.77 !!!cp (23);
947 wakaba 1.3 !!!parse-error (type => 'bare stago');
948 wakaba 1.57 $self->{state} = DATA_STATE;
949 wakaba 1.1 ## reconsume
950    
951 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '<',
952 wakaba 1.120 line => $self->{line_prev},
953     column => $self->{column_prev},
954 wakaba 1.118 });
955 wakaba 1.1
956     redo A;
957     }
958     } else {
959 wakaba 1.40 die "$0: $self->{content_model} in tag open";
960 wakaba 1.1 }
961 wakaba 1.57 } elsif ($self->{state} == CLOSE_TAG_OPEN_STATE) {
962 wakaba 1.113 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1); # "<"of"</"
963 wakaba 1.40 if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA
964 wakaba 1.23 if (defined $self->{last_emitted_start_tag_name}) {
965 wakaba 1.112
966 wakaba 1.30 ## NOTE: <http://krijnhoetmer.nl/irc-logs/whatwg/20070626#l-564>
967 wakaba 1.23 my @next_char;
968     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
969 wakaba 1.76 push @next_char, $self->{next_char};
970 wakaba 1.23 my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
971     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
972 wakaba 1.76 if ($self->{next_char} == $c or $self->{next_char} == $C) {
973 wakaba 1.77 !!!cp (24);
974 wakaba 1.23 !!!next-input-character;
975     next TAGNAME;
976     } else {
977 wakaba 1.77 !!!cp (25);
978 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
979 wakaba 1.23 !!!back-next-input-character (@next_char);
980 wakaba 1.57 $self->{state} = DATA_STATE;
981 wakaba 1.23
982 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
983 wakaba 1.120 line => $l, column => $c,
984 wakaba 1.118 });
985 wakaba 1.23
986     redo A;
987     }
988     }
989 wakaba 1.76 push @next_char, $self->{next_char};
990 wakaba 1.23
991 wakaba 1.76 unless ($self->{next_char} == 0x0009 or # HT
992     $self->{next_char} == 0x000A or # LF
993     $self->{next_char} == 0x000B or # VT
994     $self->{next_char} == 0x000C or # FF
995     $self->{next_char} == 0x0020 or # SP
996     $self->{next_char} == 0x003E or # >
997     $self->{next_char} == 0x002F or # /
998     $self->{next_char} == -1) {
999 wakaba 1.77 !!!cp (26);
1000 wakaba 1.76 $self->{next_char} = shift @next_char; # reconsume
1001 wakaba 1.1 !!!back-next-input-character (@next_char);
1002 wakaba 1.57 $self->{state} = DATA_STATE;
1003 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1004 wakaba 1.120 line => $l, column => $c,
1005 wakaba 1.118 });
1006 wakaba 1.1 redo A;
1007 wakaba 1.23 } else {
1008 wakaba 1.77 !!!cp (27);
1009 wakaba 1.76 $self->{next_char} = shift @next_char;
1010 wakaba 1.23 !!!back-next-input-character (@next_char);
1011     # and consume...
1012 wakaba 1.1 }
1013 wakaba 1.23 } else {
1014     ## No start tag token has ever been emitted
1015 wakaba 1.77 !!!cp (28);
1016 wakaba 1.23 # next-input-character is already done
1017 wakaba 1.57 $self->{state} = DATA_STATE;
1018 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1019 wakaba 1.120 line => $l, column => $c,
1020 wakaba 1.118 });
1021 wakaba 1.1 redo A;
1022     }
1023     }
1024    
1025 wakaba 1.76 if (0x0041 <= $self->{next_char} and
1026     $self->{next_char} <= 0x005A) { # A..Z
1027 wakaba 1.77 !!!cp (29);
1028 wakaba 1.112 $self->{current_token}
1029     = {type => END_TAG_TOKEN,
1030     tag_name => chr ($self->{next_char} + 0x0020),
1031     line => $l, column => $c};
1032 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1033 wakaba 1.1 !!!next-input-character;
1034     redo A;
1035 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
1036     $self->{next_char} <= 0x007A) { # a..z
1037 wakaba 1.77 !!!cp (30);
1038 wakaba 1.55 $self->{current_token} = {type => END_TAG_TOKEN,
1039 wakaba 1.112 tag_name => chr ($self->{next_char}),
1040     line => $l, column => $c};
1041 wakaba 1.57 $self->{state} = TAG_NAME_STATE;
1042 wakaba 1.1 !!!next-input-character;
1043     redo A;
1044 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1045 wakaba 1.77 !!!cp (31);
1046 wakaba 1.115 !!!parse-error (type => 'empty end tag',
1047     line => $self->{line_prev}, ## "<" in "</>"
1048     column => $self->{column_prev} - 1);
1049 wakaba 1.57 $self->{state} = DATA_STATE;
1050 wakaba 1.1 !!!next-input-character;
1051     redo A;
1052 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1053 wakaba 1.77 !!!cp (32);
1054 wakaba 1.3 !!!parse-error (type => 'bare etago');
1055 wakaba 1.57 $self->{state} = DATA_STATE;
1056 wakaba 1.1 # reconsume
1057    
1058 wakaba 1.112 !!!emit ({type => CHARACTER_TOKEN, data => '</',
1059 wakaba 1.120 line => $l, column => $c,
1060 wakaba 1.118 });
1061 wakaba 1.1
1062     redo A;
1063     } else {
1064 wakaba 1.77 !!!cp (33);
1065 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
1066 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1067 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1068 wakaba 1.120 line => $self->{line_prev}, # "<" of "</"
1069     column => $self->{column_prev} - 1,
1070 wakaba 1.118 };
1071 wakaba 1.76 ## $self->{next_char} is intentionally left as is
1072 wakaba 1.1 redo A;
1073     }
1074 wakaba 1.57 } elsif ($self->{state} == TAG_NAME_STATE) {
1075 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1076     $self->{next_char} == 0x000A or # LF
1077     $self->{next_char} == 0x000B or # VT
1078     $self->{next_char} == 0x000C or # FF
1079     $self->{next_char} == 0x0020) { # SP
1080 wakaba 1.77 !!!cp (34);
1081 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1082 wakaba 1.1 !!!next-input-character;
1083     redo A;
1084 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1085 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1086 wakaba 1.77 !!!cp (35);
1087 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1088 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1089 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1090 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
1091     # ## NOTE: This should never be reached.
1092     # !!! cp (36);
1093     # !!! parse-error (type => 'end tag attribute');
1094     #} else {
1095 wakaba 1.77 !!!cp (37);
1096 wakaba 1.78 #}
1097 wakaba 1.1 } else {
1098     die "$0: $self->{current_token}->{type}: Unknown token type";
1099     }
1100 wakaba 1.57 $self->{state} = DATA_STATE;
1101 wakaba 1.1 !!!next-input-character;
1102    
1103     !!!emit ($self->{current_token}); # start tag or end tag
1104    
1105     redo A;
1106 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1107     $self->{next_char} <= 0x005A) { # A..Z
1108 wakaba 1.77 !!!cp (38);
1109 wakaba 1.76 $self->{current_token}->{tag_name} .= chr ($self->{next_char} + 0x0020);
1110 wakaba 1.1 # start tag or end tag
1111     ## Stay in this state
1112     !!!next-input-character;
1113     redo A;
1114 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1115 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1116 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1117 wakaba 1.77 !!!cp (39);
1118 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1119 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1120 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1121 wakaba 1.78 #if ($self->{current_token}->{attributes}) {
1122     # ## NOTE: This state should never be reached.
1123     # !!! cp (40);
1124     # !!! parse-error (type => 'end tag attribute');
1125     #} else {
1126 wakaba 1.77 !!!cp (41);
1127 wakaba 1.78 #}
1128 wakaba 1.1 } else {
1129     die "$0: $self->{current_token}->{type}: Unknown token type";
1130     }
1131 wakaba 1.57 $self->{state} = DATA_STATE;
1132 wakaba 1.1 # reconsume
1133    
1134     !!!emit ($self->{current_token}); # start tag or end tag
1135    
1136     redo A;
1137 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1138 wakaba 1.125 !!!cp (42);
1139     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1140 wakaba 1.1 !!!next-input-character;
1141     redo A;
1142     } else {
1143 wakaba 1.77 !!!cp (44);
1144 wakaba 1.76 $self->{current_token}->{tag_name} .= chr $self->{next_char};
1145 wakaba 1.1 # start tag or end tag
1146     ## Stay in the state
1147     !!!next-input-character;
1148     redo A;
1149     }
1150 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1151 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1152     $self->{next_char} == 0x000A or # LF
1153     $self->{next_char} == 0x000B or # VT
1154     $self->{next_char} == 0x000C or # FF
1155     $self->{next_char} == 0x0020) { # SP
1156 wakaba 1.77 !!!cp (45);
1157 wakaba 1.1 ## Stay in the state
1158     !!!next-input-character;
1159     redo A;
1160 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1161 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1162 wakaba 1.77 !!!cp (46);
1163 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1164 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1165 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1166 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1167 wakaba 1.77 !!!cp (47);
1168 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1169 wakaba 1.77 } else {
1170     !!!cp (48);
1171 wakaba 1.1 }
1172     } else {
1173     die "$0: $self->{current_token}->{type}: Unknown token type";
1174     }
1175 wakaba 1.57 $self->{state} = DATA_STATE;
1176 wakaba 1.1 !!!next-input-character;
1177    
1178     !!!emit ($self->{current_token}); # start tag or end tag
1179    
1180     redo A;
1181 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1182     $self->{next_char} <= 0x005A) { # A..Z
1183 wakaba 1.77 !!!cp (49);
1184 wakaba 1.119 $self->{current_attribute}
1185     = {name => chr ($self->{next_char} + 0x0020),
1186     value => '',
1187     line => $self->{line}, column => $self->{column}};
1188 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1189 wakaba 1.1 !!!next-input-character;
1190     redo A;
1191 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1192 wakaba 1.125 !!!cp (50);
1193     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1194 wakaba 1.1 !!!next-input-character;
1195     redo A;
1196 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1197 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1198 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1199 wakaba 1.77 !!!cp (52);
1200 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1201 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1202 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1203 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1204 wakaba 1.77 !!!cp (53);
1205 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1206 wakaba 1.77 } else {
1207     !!!cp (54);
1208 wakaba 1.1 }
1209     } else {
1210     die "$0: $self->{current_token}->{type}: Unknown token type";
1211     }
1212 wakaba 1.57 $self->{state} = DATA_STATE;
1213 wakaba 1.1 # reconsume
1214    
1215     !!!emit ($self->{current_token}); # start tag or end tag
1216    
1217     redo A;
1218     } else {
1219 wakaba 1.72 if ({
1220     0x0022 => 1, # "
1221     0x0027 => 1, # '
1222     0x003D => 1, # =
1223 wakaba 1.76 }->{$self->{next_char}}) {
1224 wakaba 1.77 !!!cp (55);
1225 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1226 wakaba 1.77 } else {
1227     !!!cp (56);
1228 wakaba 1.72 }
1229 wakaba 1.119 $self->{current_attribute}
1230     = {name => chr ($self->{next_char}),
1231     value => '',
1232     line => $self->{line}, column => $self->{column}};
1233 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1234 wakaba 1.1 !!!next-input-character;
1235     redo A;
1236     }
1237 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_NAME_STATE) {
1238 wakaba 1.1 my $before_leave = sub {
1239     if (exists $self->{current_token}->{attributes} # start tag or end tag
1240     ->{$self->{current_attribute}->{name}}) { # MUST
1241 wakaba 1.77 !!!cp (57);
1242 wakaba 1.120 !!!parse-error (type => 'duplicate attribute:'.$self->{current_attribute}->{name}, line => $self->{current_attribute}->{line}, column => $self->{current_attribute}->{column});
1243 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
1244     } else {
1245 wakaba 1.77 !!!cp (58);
1246 wakaba 1.1 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
1247     = $self->{current_attribute};
1248     }
1249     }; # $before_leave
1250    
1251 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1252     $self->{next_char} == 0x000A or # LF
1253     $self->{next_char} == 0x000B or # VT
1254     $self->{next_char} == 0x000C or # FF
1255     $self->{next_char} == 0x0020) { # SP
1256 wakaba 1.77 !!!cp (59);
1257 wakaba 1.1 $before_leave->();
1258 wakaba 1.57 $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
1259 wakaba 1.1 !!!next-input-character;
1260     redo A;
1261 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1262 wakaba 1.77 !!!cp (60);
1263 wakaba 1.1 $before_leave->();
1264 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1265 wakaba 1.1 !!!next-input-character;
1266     redo A;
1267 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1268 wakaba 1.1 $before_leave->();
1269 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1270 wakaba 1.77 !!!cp (61);
1271 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1272 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1273 wakaba 1.77 !!!cp (62);
1274 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1275 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1276 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1277 wakaba 1.1 }
1278     } else {
1279     die "$0: $self->{current_token}->{type}: Unknown token type";
1280     }
1281 wakaba 1.57 $self->{state} = DATA_STATE;
1282 wakaba 1.1 !!!next-input-character;
1283    
1284     !!!emit ($self->{current_token}); # start tag or end tag
1285    
1286     redo A;
1287 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1288     $self->{next_char} <= 0x005A) { # A..Z
1289 wakaba 1.77 !!!cp (63);
1290 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char} + 0x0020);
1291 wakaba 1.1 ## Stay in the state
1292     !!!next-input-character;
1293     redo A;
1294 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1295 wakaba 1.125 !!!cp (64);
1296 wakaba 1.1 $before_leave->();
1297 wakaba 1.125 $self->{state} = SELF_CLOSING_START_TAG_STATE;
1298 wakaba 1.1 !!!next-input-character;
1299     redo A;
1300 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1301 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1302 wakaba 1.1 $before_leave->();
1303 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1304 wakaba 1.77 !!!cp (66);
1305 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1306 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1307 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1308 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1309 wakaba 1.77 !!!cp (67);
1310 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1311 wakaba 1.77 } else {
1312 wakaba 1.78 ## NOTE: This state should never be reached.
1313 wakaba 1.77 !!!cp (68);
1314 wakaba 1.1 }
1315     } else {
1316     die "$0: $self->{current_token}->{type}: Unknown token type";
1317     }
1318 wakaba 1.57 $self->{state} = DATA_STATE;
1319 wakaba 1.1 # reconsume
1320    
1321     !!!emit ($self->{current_token}); # start tag or end tag
1322    
1323     redo A;
1324     } else {
1325 wakaba 1.76 if ($self->{next_char} == 0x0022 or # "
1326     $self->{next_char} == 0x0027) { # '
1327 wakaba 1.77 !!!cp (69);
1328 wakaba 1.72 !!!parse-error (type => 'bad attribute name');
1329 wakaba 1.77 } else {
1330     !!!cp (70);
1331 wakaba 1.72 }
1332 wakaba 1.76 $self->{current_attribute}->{name} .= chr ($self->{next_char});
1333 wakaba 1.1 ## Stay in the state
1334     !!!next-input-character;
1335     redo A;
1336     }
1337 wakaba 1.57 } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1338 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1339     $self->{next_char} == 0x000A or # LF
1340     $self->{next_char} == 0x000B or # VT
1341     $self->{next_char} == 0x000C or # FF
1342     $self->{next_char} == 0x0020) { # SP
1343 wakaba 1.77 !!!cp (71);
1344 wakaba 1.1 ## Stay in the state
1345     !!!next-input-character;
1346     redo A;
1347 wakaba 1.76 } elsif ($self->{next_char} == 0x003D) { # =
1348 wakaba 1.77 !!!cp (72);
1349 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_VALUE_STATE;
1350 wakaba 1.1 !!!next-input-character;
1351     redo A;
1352 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1353 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1354 wakaba 1.77 !!!cp (73);
1355 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1356 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1357 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1358 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1359 wakaba 1.77 !!!cp (74);
1360 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1361 wakaba 1.77 } else {
1362 wakaba 1.78 ## NOTE: This state should never be reached.
1363 wakaba 1.77 !!!cp (75);
1364 wakaba 1.1 }
1365     } else {
1366     die "$0: $self->{current_token}->{type}: Unknown token type";
1367     }
1368 wakaba 1.57 $self->{state} = DATA_STATE;
1369 wakaba 1.1 !!!next-input-character;
1370    
1371     !!!emit ($self->{current_token}); # start tag or end tag
1372    
1373     redo A;
1374 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
1375     $self->{next_char} <= 0x005A) { # A..Z
1376 wakaba 1.77 !!!cp (76);
1377 wakaba 1.119 $self->{current_attribute}
1378     = {name => chr ($self->{next_char} + 0x0020),
1379     value => '',
1380     line => $self->{line}, column => $self->{column}};
1381 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1382 wakaba 1.1 !!!next-input-character;
1383     redo A;
1384 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1385 wakaba 1.125 !!!cp (77);
1386     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1387 wakaba 1.1 !!!next-input-character;
1388     redo A;
1389 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1390 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1391 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1392 wakaba 1.77 !!!cp (79);
1393 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1394 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1395 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1396 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1397 wakaba 1.77 !!!cp (80);
1398 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1399 wakaba 1.77 } else {
1400 wakaba 1.78 ## NOTE: This state should never be reached.
1401 wakaba 1.77 !!!cp (81);
1402 wakaba 1.1 }
1403     } else {
1404     die "$0: $self->{current_token}->{type}: Unknown token type";
1405     }
1406 wakaba 1.57 $self->{state} = DATA_STATE;
1407 wakaba 1.1 # reconsume
1408    
1409     !!!emit ($self->{current_token}); # start tag or end tag
1410    
1411     redo A;
1412     } else {
1413 wakaba 1.77 !!!cp (82);
1414 wakaba 1.119 $self->{current_attribute}
1415     = {name => chr ($self->{next_char}),
1416     value => '',
1417     line => $self->{line}, column => $self->{column}};
1418 wakaba 1.57 $self->{state} = ATTRIBUTE_NAME_STATE;
1419 wakaba 1.1 !!!next-input-character;
1420     redo A;
1421     }
1422 wakaba 1.57 } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1423 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1424     $self->{next_char} == 0x000A or # LF
1425     $self->{next_char} == 0x000B or # VT
1426     $self->{next_char} == 0x000C or # FF
1427     $self->{next_char} == 0x0020) { # SP
1428 wakaba 1.77 !!!cp (83);
1429 wakaba 1.1 ## Stay in the state
1430     !!!next-input-character;
1431     redo A;
1432 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
1433 wakaba 1.77 !!!cp (84);
1434 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE;
1435 wakaba 1.1 !!!next-input-character;
1436     redo A;
1437 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1438 wakaba 1.77 !!!cp (85);
1439 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1440 wakaba 1.1 ## reconsume
1441     redo A;
1442 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
1443 wakaba 1.77 !!!cp (86);
1444 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE;
1445 wakaba 1.1 !!!next-input-character;
1446     redo A;
1447 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1448 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1449 wakaba 1.77 !!!cp (87);
1450 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1451 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1452 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1453 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1454 wakaba 1.77 !!!cp (88);
1455 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1456 wakaba 1.77 } else {
1457 wakaba 1.78 ## NOTE: This state should never be reached.
1458 wakaba 1.77 !!!cp (89);
1459 wakaba 1.1 }
1460     } else {
1461     die "$0: $self->{current_token}->{type}: Unknown token type";
1462     }
1463 wakaba 1.57 $self->{state} = DATA_STATE;
1464 wakaba 1.1 !!!next-input-character;
1465    
1466     !!!emit ($self->{current_token}); # start tag or end tag
1467    
1468     redo A;
1469 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1470 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1471 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1472 wakaba 1.77 !!!cp (90);
1473 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1474 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1475 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1476 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1477 wakaba 1.77 !!!cp (91);
1478 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1479 wakaba 1.77 } else {
1480 wakaba 1.78 ## NOTE: This state should never be reached.
1481 wakaba 1.77 !!!cp (92);
1482 wakaba 1.1 }
1483     } else {
1484     die "$0: $self->{current_token}->{type}: Unknown token type";
1485     }
1486 wakaba 1.57 $self->{state} = DATA_STATE;
1487 wakaba 1.1 ## reconsume
1488    
1489     !!!emit ($self->{current_token}); # start tag or end tag
1490    
1491     redo A;
1492     } else {
1493 wakaba 1.76 if ($self->{next_char} == 0x003D) { # =
1494 wakaba 1.77 !!!cp (93);
1495 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1496 wakaba 1.77 } else {
1497     !!!cp (94);
1498 wakaba 1.72 }
1499 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1500 wakaba 1.57 $self->{state} = ATTRIBUTE_VALUE_UNQUOTED_STATE;
1501 wakaba 1.1 !!!next-input-character;
1502     redo A;
1503     }
1504 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE) {
1505 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
1506 wakaba 1.77 !!!cp (95);
1507 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1508 wakaba 1.1 !!!next-input-character;
1509     redo A;
1510 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1511 wakaba 1.77 !!!cp (96);
1512 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1513     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1514 wakaba 1.1 !!!next-input-character;
1515     redo A;
1516 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1517 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1518 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1519 wakaba 1.77 !!!cp (97);
1520 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1521 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1522 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1523 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1524 wakaba 1.77 !!!cp (98);
1525 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1526 wakaba 1.77 } else {
1527 wakaba 1.78 ## NOTE: This state should never be reached.
1528 wakaba 1.77 !!!cp (99);
1529 wakaba 1.1 }
1530     } else {
1531     die "$0: $self->{current_token}->{type}: Unknown token type";
1532     }
1533 wakaba 1.57 $self->{state} = DATA_STATE;
1534 wakaba 1.1 ## reconsume
1535    
1536     !!!emit ($self->{current_token}); # start tag or end tag
1537    
1538     redo A;
1539     } else {
1540 wakaba 1.77 !!!cp (100);
1541 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1542 wakaba 1.1 ## Stay in the state
1543     !!!next-input-character;
1544     redo A;
1545     }
1546 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE) {
1547 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
1548 wakaba 1.77 !!!cp (101);
1549 wakaba 1.72 $self->{state} = AFTER_ATTRIBUTE_VALUE_QUOTED_STATE;
1550 wakaba 1.1 !!!next-input-character;
1551     redo A;
1552 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1553 wakaba 1.77 !!!cp (102);
1554 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1555     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1556 wakaba 1.1 !!!next-input-character;
1557     redo A;
1558 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1559 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1560 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1561 wakaba 1.77 !!!cp (103);
1562 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1563 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1564 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1565 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1566 wakaba 1.77 !!!cp (104);
1567 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1568 wakaba 1.77 } else {
1569 wakaba 1.78 ## NOTE: This state should never be reached.
1570 wakaba 1.77 !!!cp (105);
1571 wakaba 1.1 }
1572     } else {
1573     die "$0: $self->{current_token}->{type}: Unknown token type";
1574     }
1575 wakaba 1.57 $self->{state} = DATA_STATE;
1576 wakaba 1.1 ## reconsume
1577    
1578     !!!emit ($self->{current_token}); # start tag or end tag
1579    
1580     redo A;
1581     } else {
1582 wakaba 1.77 !!!cp (106);
1583 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1584 wakaba 1.1 ## Stay in the state
1585     !!!next-input-character;
1586     redo A;
1587     }
1588 wakaba 1.57 } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1589 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1590     $self->{next_char} == 0x000A or # LF
1591     $self->{next_char} == 0x000B or # HT
1592     $self->{next_char} == 0x000C or # FF
1593     $self->{next_char} == 0x0020) { # SP
1594 wakaba 1.77 !!!cp (107);
1595 wakaba 1.57 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1596 wakaba 1.1 !!!next-input-character;
1597     redo A;
1598 wakaba 1.76 } elsif ($self->{next_char} == 0x0026) { # &
1599 wakaba 1.77 !!!cp (108);
1600 wakaba 1.57 $self->{last_attribute_value_state} = $self->{state};
1601     $self->{state} = ENTITY_IN_ATTRIBUTE_VALUE_STATE;
1602 wakaba 1.1 !!!next-input-character;
1603     redo A;
1604 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1605 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1606 wakaba 1.77 !!!cp (109);
1607 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1608 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1609 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1610 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1611 wakaba 1.77 !!!cp (110);
1612 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1613 wakaba 1.77 } else {
1614 wakaba 1.78 ## NOTE: This state should never be reached.
1615 wakaba 1.77 !!!cp (111);
1616 wakaba 1.1 }
1617     } else {
1618     die "$0: $self->{current_token}->{type}: Unknown token type";
1619     }
1620 wakaba 1.57 $self->{state} = DATA_STATE;
1621 wakaba 1.1 !!!next-input-character;
1622    
1623     !!!emit ($self->{current_token}); # start tag or end tag
1624    
1625     redo A;
1626 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1627 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1628 wakaba 1.55 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1629 wakaba 1.77 !!!cp (112);
1630 wakaba 1.1 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1631 wakaba 1.55 } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1632 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1633 wakaba 1.1 if ($self->{current_token}->{attributes}) {
1634 wakaba 1.77 !!!cp (113);
1635 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1636 wakaba 1.77 } else {
1637 wakaba 1.78 ## NOTE: This state should never be reached.
1638 wakaba 1.77 !!!cp (114);
1639 wakaba 1.1 }
1640     } else {
1641     die "$0: $self->{current_token}->{type}: Unknown token type";
1642     }
1643 wakaba 1.57 $self->{state} = DATA_STATE;
1644 wakaba 1.1 ## reconsume
1645    
1646     !!!emit ($self->{current_token}); # start tag or end tag
1647    
1648     redo A;
1649     } else {
1650 wakaba 1.72 if ({
1651     0x0022 => 1, # "
1652     0x0027 => 1, # '
1653     0x003D => 1, # =
1654 wakaba 1.76 }->{$self->{next_char}}) {
1655 wakaba 1.77 !!!cp (115);
1656 wakaba 1.72 !!!parse-error (type => 'bad attribute value');
1657 wakaba 1.77 } else {
1658     !!!cp (116);
1659 wakaba 1.72 }
1660 wakaba 1.76 $self->{current_attribute}->{value} .= chr ($self->{next_char});
1661 wakaba 1.1 ## Stay in the state
1662     !!!next-input-character;
1663     redo A;
1664     }
1665 wakaba 1.57 } elsif ($self->{state} == ENTITY_IN_ATTRIBUTE_VALUE_STATE) {
1666 wakaba 1.72 my $token = $self->_tokenize_attempt_to_consume_an_entity
1667     (1,
1668     $self->{last_attribute_value_state}
1669     == ATTRIBUTE_VALUE_DOUBLE_QUOTED_STATE ? 0x0022 : # "
1670     $self->{last_attribute_value_state}
1671     == ATTRIBUTE_VALUE_SINGLE_QUOTED_STATE ? 0x0027 : # '
1672     -1);
1673 wakaba 1.1
1674     unless (defined $token) {
1675 wakaba 1.77 !!!cp (117);
1676 wakaba 1.1 $self->{current_attribute}->{value} .= '&';
1677     } else {
1678 wakaba 1.77 !!!cp (118);
1679 wakaba 1.1 $self->{current_attribute}->{value} .= $token->{data};
1680 wakaba 1.66 $self->{current_attribute}->{has_reference} = $token->{has_reference};
1681 wakaba 1.1 ## ISSUE: spec says "append the returned character token to the current attribute's value"
1682     }
1683    
1684     $self->{state} = $self->{last_attribute_value_state};
1685     # next-input-character is already done
1686     redo A;
1687 wakaba 1.72 } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1688 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
1689     $self->{next_char} == 0x000A or # LF
1690     $self->{next_char} == 0x000B or # VT
1691     $self->{next_char} == 0x000C or # FF
1692     $self->{next_char} == 0x0020) { # SP
1693 wakaba 1.77 !!!cp (118);
1694 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1695     !!!next-input-character;
1696     redo A;
1697 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1698 wakaba 1.72 if ($self->{current_token}->{type} == START_TAG_TOKEN) {
1699 wakaba 1.77 !!!cp (119);
1700 wakaba 1.72 $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1701     } elsif ($self->{current_token}->{type} == END_TAG_TOKEN) {
1702     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1703     if ($self->{current_token}->{attributes}) {
1704 wakaba 1.77 !!!cp (120);
1705 wakaba 1.72 !!!parse-error (type => 'end tag attribute');
1706 wakaba 1.77 } else {
1707 wakaba 1.78 ## NOTE: This state should never be reached.
1708 wakaba 1.77 !!!cp (121);
1709 wakaba 1.72 }
1710     } else {
1711     die "$0: $self->{current_token}->{type}: Unknown token type";
1712     }
1713     $self->{state} = DATA_STATE;
1714     !!!next-input-character;
1715    
1716     !!!emit ($self->{current_token}); # start tag or end tag
1717    
1718     redo A;
1719 wakaba 1.76 } elsif ($self->{next_char} == 0x002F) { # /
1720 wakaba 1.125 !!!cp (122);
1721     $self->{state} = SELF_CLOSING_START_TAG_STATE;
1722 wakaba 1.72 !!!next-input-character;
1723 wakaba 1.125 redo A;
1724     } else {
1725     !!!cp ('124.1');
1726     !!!parse-error (type => 'no space between attributes');
1727     $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1728     ## reconsume
1729     redo A;
1730     }
1731     } elsif ($self->{state} == SELF_CLOSING_START_TAG_STATE) {
1732     if ($self->{next_char} == 0x003E) { # >
1733     if ($self->{current_token}->{type} == END_TAG_TOKEN) {
1734     !!!cp ('124.2');
1735     !!!parse-error (type => 'nestc', token => $self->{current_token});
1736     ## TODO: Different type than slash in start tag
1737     $self->{content_model} = PCDATA_CONTENT_MODEL; # MUST
1738     if ($self->{current_token}->{attributes}) {
1739     !!!cp ('124.4');
1740     !!!parse-error (type => 'end tag attribute');
1741     } else {
1742     !!!cp ('124.5');
1743     }
1744     ## TODO: Test |<title></title/>|
1745 wakaba 1.72 } else {
1746 wakaba 1.125 !!!cp ('124.3');
1747     $self->{self_closing} = 1;
1748 wakaba 1.72 }
1749 wakaba 1.125
1750     $self->{state} = DATA_STATE;
1751     !!!next-input-character;
1752    
1753     !!!emit ($self->{current_token}); # start tag or end tag
1754    
1755 wakaba 1.72 redo A;
1756     } else {
1757 wakaba 1.125 !!!cp ('124.4');
1758     !!!parse-error (type => 'nestc');
1759     ## TODO: This error type is wrong.
1760 wakaba 1.72 $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1761 wakaba 1.125 ## Reconsume.
1762 wakaba 1.72 redo A;
1763     }
1764 wakaba 1.57 } elsif ($self->{state} == BOGUS_COMMENT_STATE) {
1765 wakaba 1.1 ## (only happen if PCDATA state)
1766    
1767 wakaba 1.112 ## NOTE: Set by the previous state
1768     #my $token = {type => COMMENT_TOKEN, data => ''};
1769 wakaba 1.1
1770     BC: {
1771 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
1772 wakaba 1.77 !!!cp (124);
1773 wakaba 1.57 $self->{state} = DATA_STATE;
1774 wakaba 1.1 !!!next-input-character;
1775    
1776 wakaba 1.112 !!!emit ($self->{current_token}); # comment
1777 wakaba 1.1
1778     redo A;
1779 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1780 wakaba 1.77 !!!cp (125);
1781 wakaba 1.57 $self->{state} = DATA_STATE;
1782 wakaba 1.1 ## reconsume
1783    
1784 wakaba 1.112 !!!emit ($self->{current_token}); # comment
1785 wakaba 1.1
1786     redo A;
1787     } else {
1788 wakaba 1.77 !!!cp (126);
1789 wakaba 1.112 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
1790 wakaba 1.1 !!!next-input-character;
1791     redo BC;
1792     }
1793     } # BC
1794 wakaba 1.77
1795     die "$0: _get_next_token: unexpected case [BC]";
1796 wakaba 1.57 } elsif ($self->{state} == MARKUP_DECLARATION_OPEN_STATE) {
1797 wakaba 1.1 ## (only happen if PCDATA state)
1798    
1799 wakaba 1.120 my ($l, $c) = ($self->{line_prev}, $self->{column_prev} - 1);
1800 wakaba 1.112
1801 wakaba 1.1 my @next_char;
1802 wakaba 1.76 push @next_char, $self->{next_char};
1803 wakaba 1.1
1804 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1805 wakaba 1.1 !!!next-input-character;
1806 wakaba 1.76 push @next_char, $self->{next_char};
1807     if ($self->{next_char} == 0x002D) { # -
1808 wakaba 1.77 !!!cp (127);
1809 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1810 wakaba 1.120 line => $l, column => $c,
1811 wakaba 1.118 };
1812 wakaba 1.57 $self->{state} = COMMENT_START_STATE;
1813 wakaba 1.1 !!!next-input-character;
1814     redo A;
1815 wakaba 1.77 } else {
1816     !!!cp (128);
1817 wakaba 1.1 }
1818 wakaba 1.76 } elsif ($self->{next_char} == 0x0044 or # D
1819     $self->{next_char} == 0x0064) { # d
1820 wakaba 1.1 !!!next-input-character;
1821 wakaba 1.76 push @next_char, $self->{next_char};
1822     if ($self->{next_char} == 0x004F or # O
1823     $self->{next_char} == 0x006F) { # o
1824 wakaba 1.1 !!!next-input-character;
1825 wakaba 1.76 push @next_char, $self->{next_char};
1826     if ($self->{next_char} == 0x0043 or # C
1827     $self->{next_char} == 0x0063) { # c
1828 wakaba 1.1 !!!next-input-character;
1829 wakaba 1.76 push @next_char, $self->{next_char};
1830     if ($self->{next_char} == 0x0054 or # T
1831     $self->{next_char} == 0x0074) { # t
1832 wakaba 1.1 !!!next-input-character;
1833 wakaba 1.76 push @next_char, $self->{next_char};
1834     if ($self->{next_char} == 0x0059 or # Y
1835     $self->{next_char} == 0x0079) { # y
1836 wakaba 1.1 !!!next-input-character;
1837 wakaba 1.76 push @next_char, $self->{next_char};
1838     if ($self->{next_char} == 0x0050 or # P
1839     $self->{next_char} == 0x0070) { # p
1840 wakaba 1.1 !!!next-input-character;
1841 wakaba 1.76 push @next_char, $self->{next_char};
1842     if ($self->{next_char} == 0x0045 or # E
1843     $self->{next_char} == 0x0065) { # e
1844 wakaba 1.77 !!!cp (129);
1845     ## TODO: What a stupid code this is!
1846 wakaba 1.57 $self->{state} = DOCTYPE_STATE;
1847 wakaba 1.112 $self->{current_token} = {type => DOCTYPE_TOKEN,
1848     quirks => 1,
1849 wakaba 1.120 line => $l, column => $c,
1850 wakaba 1.118 };
1851 wakaba 1.1 !!!next-input-character;
1852     redo A;
1853 wakaba 1.77 } else {
1854     !!!cp (130);
1855 wakaba 1.1 }
1856 wakaba 1.77 } else {
1857     !!!cp (131);
1858 wakaba 1.1 }
1859 wakaba 1.77 } else {
1860     !!!cp (132);
1861 wakaba 1.1 }
1862 wakaba 1.77 } else {
1863     !!!cp (133);
1864 wakaba 1.1 }
1865 wakaba 1.77 } else {
1866     !!!cp (134);
1867 wakaba 1.1 }
1868 wakaba 1.77 } else {
1869     !!!cp (135);
1870 wakaba 1.1 }
1871 wakaba 1.127 } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and
1872     $self->{open_elements}->[-1]->[1] & FOREIGN_EL and
1873     $self->{next_char} == 0x005B) { # [
1874     !!!next-input-character;
1875     push @next_char, $self->{next_char};
1876     if ($self->{next_char} == 0x0043) { # C
1877     !!!next-input-character;
1878     push @next_char, $self->{next_char};
1879     if ($self->{next_char} == 0x0044) { # D
1880     !!!next-input-character;
1881     push @next_char, $self->{next_char};
1882     if ($self->{next_char} == 0x0041) { # A
1883     !!!next-input-character;
1884     push @next_char, $self->{next_char};
1885     if ($self->{next_char} == 0x0054) { # T
1886     !!!next-input-character;
1887     push @next_char, $self->{next_char};
1888     if ($self->{next_char} == 0x0041) { # A
1889     !!!next-input-character;
1890     push @next_char, $self->{next_char};
1891     if ($self->{next_char} == 0x005B) { # [
1892     !!!cp (135.1);
1893     $self->{state} = CDATA_BLOCK_STATE;
1894     !!!next-input-character;
1895     redo A;
1896     } else {
1897     !!!cp (135.2);
1898     }
1899     } else {
1900     !!!cp (135.3);
1901     }
1902     } else {
1903     !!!cp (135.4);
1904     }
1905     } else {
1906     !!!cp (135.5);
1907     }
1908     } else {
1909     !!!cp (135.6);
1910     }
1911     } else {
1912     !!!cp (135.7);
1913     }
1914 wakaba 1.77 } else {
1915     !!!cp (136);
1916 wakaba 1.1 }
1917    
1918 wakaba 1.30 !!!parse-error (type => 'bogus comment');
1919 wakaba 1.76 $self->{next_char} = shift @next_char;
1920 wakaba 1.1 !!!back-next-input-character (@next_char);
1921 wakaba 1.57 $self->{state} = BOGUS_COMMENT_STATE;
1922 wakaba 1.112 $self->{current_token} = {type => COMMENT_TOKEN, data => '',
1923 wakaba 1.120 line => $l, column => $c,
1924 wakaba 1.118 };
1925 wakaba 1.1 redo A;
1926    
1927     ## ISSUE: typos in spec: chacacters, is is a parse error
1928     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1929 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_STATE) {
1930 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1931 wakaba 1.77 !!!cp (137);
1932 wakaba 1.57 $self->{state} = COMMENT_START_DASH_STATE;
1933 wakaba 1.23 !!!next-input-character;
1934     redo A;
1935 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1936 wakaba 1.77 !!!cp (138);
1937 wakaba 1.23 !!!parse-error (type => 'bogus comment');
1938 wakaba 1.57 $self->{state} = DATA_STATE;
1939 wakaba 1.23 !!!next-input-character;
1940    
1941     !!!emit ($self->{current_token}); # comment
1942    
1943     redo A;
1944 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1945 wakaba 1.77 !!!cp (139);
1946 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
1947 wakaba 1.57 $self->{state} = DATA_STATE;
1948 wakaba 1.23 ## reconsume
1949    
1950     !!!emit ($self->{current_token}); # comment
1951    
1952     redo A;
1953     } else {
1954 wakaba 1.77 !!!cp (140);
1955 wakaba 1.23 $self->{current_token}->{data} # comment
1956 wakaba 1.76 .= chr ($self->{next_char});
1957 wakaba 1.57 $self->{state} = COMMENT_STATE;
1958 wakaba 1.23 !!!next-input-character;
1959     redo A;
1960     }
1961 wakaba 1.57 } elsif ($self->{state} == COMMENT_START_DASH_STATE) {
1962 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1963 wakaba 1.77 !!!cp (141);
1964 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
1965 wakaba 1.23 !!!next-input-character;
1966     redo A;
1967 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
1968 wakaba 1.77 !!!cp (142);
1969 wakaba 1.23 !!!parse-error (type => 'bogus comment');
1970 wakaba 1.57 $self->{state} = DATA_STATE;
1971 wakaba 1.23 !!!next-input-character;
1972    
1973     !!!emit ($self->{current_token}); # comment
1974    
1975     redo A;
1976 wakaba 1.76 } elsif ($self->{next_char} == -1) {
1977 wakaba 1.77 !!!cp (143);
1978 wakaba 1.23 !!!parse-error (type => 'unclosed comment');
1979 wakaba 1.57 $self->{state} = DATA_STATE;
1980 wakaba 1.23 ## reconsume
1981    
1982     !!!emit ($self->{current_token}); # comment
1983    
1984     redo A;
1985     } else {
1986 wakaba 1.77 !!!cp (144);
1987 wakaba 1.23 $self->{current_token}->{data} # comment
1988 wakaba 1.76 .= '-' . chr ($self->{next_char});
1989 wakaba 1.57 $self->{state} = COMMENT_STATE;
1990 wakaba 1.23 !!!next-input-character;
1991     redo A;
1992     }
1993 wakaba 1.57 } elsif ($self->{state} == COMMENT_STATE) {
1994 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
1995 wakaba 1.77 !!!cp (145);
1996 wakaba 1.57 $self->{state} = COMMENT_END_DASH_STATE;
1997 wakaba 1.1 !!!next-input-character;
1998     redo A;
1999 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2000 wakaba 1.77 !!!cp (146);
2001 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2002 wakaba 1.57 $self->{state} = DATA_STATE;
2003 wakaba 1.1 ## reconsume
2004    
2005     !!!emit ($self->{current_token}); # comment
2006    
2007     redo A;
2008     } else {
2009 wakaba 1.77 !!!cp (147);
2010 wakaba 1.76 $self->{current_token}->{data} .= chr ($self->{next_char}); # comment
2011 wakaba 1.1 ## Stay in the state
2012     !!!next-input-character;
2013     redo A;
2014     }
2015 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_DASH_STATE) {
2016 wakaba 1.76 if ($self->{next_char} == 0x002D) { # -
2017 wakaba 1.77 !!!cp (148);
2018 wakaba 1.57 $self->{state} = COMMENT_END_STATE;
2019 wakaba 1.1 !!!next-input-character;
2020     redo A;
2021 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2022 wakaba 1.77 !!!cp (149);
2023 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2024 wakaba 1.57 $self->{state} = DATA_STATE;
2025 wakaba 1.1 ## reconsume
2026    
2027     !!!emit ($self->{current_token}); # comment
2028    
2029     redo A;
2030     } else {
2031 wakaba 1.77 !!!cp (150);
2032 wakaba 1.76 $self->{current_token}->{data} .= '-' . chr ($self->{next_char}); # comment
2033 wakaba 1.57 $self->{state} = COMMENT_STATE;
2034 wakaba 1.1 !!!next-input-character;
2035     redo A;
2036     }
2037 wakaba 1.57 } elsif ($self->{state} == COMMENT_END_STATE) {
2038 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2039 wakaba 1.77 !!!cp (151);
2040 wakaba 1.57 $self->{state} = DATA_STATE;
2041 wakaba 1.1 !!!next-input-character;
2042    
2043     !!!emit ($self->{current_token}); # comment
2044    
2045     redo A;
2046 wakaba 1.76 } elsif ($self->{next_char} == 0x002D) { # -
2047 wakaba 1.77 !!!cp (152);
2048 wakaba 1.114 !!!parse-error (type => 'dash in comment',
2049     line => $self->{line_prev},
2050     column => $self->{column_prev});
2051 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
2052     ## Stay in the state
2053     !!!next-input-character;
2054     redo A;
2055 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2056 wakaba 1.77 !!!cp (153);
2057 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
2058 wakaba 1.57 $self->{state} = DATA_STATE;
2059 wakaba 1.1 ## reconsume
2060    
2061     !!!emit ($self->{current_token}); # comment
2062    
2063     redo A;
2064     } else {
2065 wakaba 1.77 !!!cp (154);
2066 wakaba 1.114 !!!parse-error (type => 'dash in comment',
2067     line => $self->{line_prev},
2068     column => $self->{column_prev});
2069 wakaba 1.76 $self->{current_token}->{data} .= '--' . chr ($self->{next_char}); # comment
2070 wakaba 1.57 $self->{state} = COMMENT_STATE;
2071 wakaba 1.1 !!!next-input-character;
2072     redo A;
2073     }
2074 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_STATE) {
2075 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2076     $self->{next_char} == 0x000A or # LF
2077     $self->{next_char} == 0x000B or # VT
2078     $self->{next_char} == 0x000C or # FF
2079     $self->{next_char} == 0x0020) { # SP
2080 wakaba 1.77 !!!cp (155);
2081 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2082 wakaba 1.1 !!!next-input-character;
2083     redo A;
2084     } else {
2085 wakaba 1.77 !!!cp (156);
2086 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
2087 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2088 wakaba 1.1 ## reconsume
2089     redo A;
2090     }
2091 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
2092 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2093     $self->{next_char} == 0x000A or # LF
2094     $self->{next_char} == 0x000B or # VT
2095     $self->{next_char} == 0x000C or # FF
2096     $self->{next_char} == 0x0020) { # SP
2097 wakaba 1.77 !!!cp (157);
2098 wakaba 1.1 ## Stay in the state
2099     !!!next-input-character;
2100     redo A;
2101 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2102 wakaba 1.77 !!!cp (158);
2103 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
2104 wakaba 1.57 $self->{state} = DATA_STATE;
2105 wakaba 1.1 !!!next-input-character;
2106    
2107 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
2108 wakaba 1.1
2109     redo A;
2110 wakaba 1.77 } elsif ($self->{next_char} == -1) {
2111     !!!cp (159);
2112 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
2113 wakaba 1.57 $self->{state} = DATA_STATE;
2114 wakaba 1.1 ## reconsume
2115    
2116 wakaba 1.112 !!!emit ($self->{current_token}); # DOCTYPE (quirks)
2117 wakaba 1.1
2118     redo A;
2119     } else {
2120 wakaba 1.77 !!!cp (160);
2121 wakaba 1.112 $self->{current_token}->{name} = chr $self->{next_char};
2122     delete $self->{current_token}->{quirks};
2123 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
2124 wakaba 1.57 $self->{state} = DOCTYPE_NAME_STATE;
2125 wakaba 1.1 !!!next-input-character;
2126     redo A;
2127     }
2128 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
2129 wakaba 1.18 ## ISSUE: Redundant "First," in the spec.
2130 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2131     $self->{next_char} == 0x000A or # LF
2132     $self->{next_char} == 0x000B or # VT
2133     $self->{next_char} == 0x000C or # FF
2134     $self->{next_char} == 0x0020) { # SP
2135 wakaba 1.77 !!!cp (161);
2136 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_NAME_STATE;
2137 wakaba 1.1 !!!next-input-character;
2138     redo A;
2139 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2140 wakaba 1.77 !!!cp (162);
2141 wakaba 1.57 $self->{state} = DATA_STATE;
2142 wakaba 1.1 !!!next-input-character;
2143    
2144     !!!emit ($self->{current_token}); # DOCTYPE
2145    
2146     redo A;
2147 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2148 wakaba 1.77 !!!cp (163);
2149 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2150 wakaba 1.57 $self->{state} = DATA_STATE;
2151 wakaba 1.1 ## reconsume
2152    
2153 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2154 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2155 wakaba 1.1
2156     redo A;
2157     } else {
2158 wakaba 1.77 !!!cp (164);
2159 wakaba 1.1 $self->{current_token}->{name}
2160 wakaba 1.76 .= chr ($self->{next_char}); # DOCTYPE
2161 wakaba 1.1 ## Stay in the state
2162     !!!next-input-character;
2163     redo A;
2164     }
2165 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
2166 wakaba 1.76 if ($self->{next_char} == 0x0009 or # HT
2167     $self->{next_char} == 0x000A or # LF
2168     $self->{next_char} == 0x000B or # VT
2169     $self->{next_char} == 0x000C or # FF
2170     $self->{next_char} == 0x0020) { # SP
2171 wakaba 1.77 !!!cp (165);
2172 wakaba 1.1 ## Stay in the state
2173     !!!next-input-character;
2174     redo A;
2175 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2176 wakaba 1.77 !!!cp (166);
2177 wakaba 1.57 $self->{state} = DATA_STATE;
2178 wakaba 1.1 !!!next-input-character;
2179    
2180     !!!emit ($self->{current_token}); # DOCTYPE
2181    
2182     redo A;
2183 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2184 wakaba 1.77 !!!cp (167);
2185 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2186 wakaba 1.57 $self->{state} = DATA_STATE;
2187 wakaba 1.1 ## reconsume
2188    
2189 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2190 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2191    
2192     redo A;
2193 wakaba 1.76 } elsif ($self->{next_char} == 0x0050 or # P
2194     $self->{next_char} == 0x0070) { # p
2195 wakaba 1.18 !!!next-input-character;
2196 wakaba 1.76 if ($self->{next_char} == 0x0055 or # U
2197     $self->{next_char} == 0x0075) { # u
2198 wakaba 1.18 !!!next-input-character;
2199 wakaba 1.76 if ($self->{next_char} == 0x0042 or # B
2200     $self->{next_char} == 0x0062) { # b
2201 wakaba 1.18 !!!next-input-character;
2202 wakaba 1.76 if ($self->{next_char} == 0x004C or # L
2203     $self->{next_char} == 0x006C) { # l
2204 wakaba 1.18 !!!next-input-character;
2205 wakaba 1.76 if ($self->{next_char} == 0x0049 or # I
2206     $self->{next_char} == 0x0069) { # i
2207 wakaba 1.18 !!!next-input-character;
2208 wakaba 1.76 if ($self->{next_char} == 0x0043 or # C
2209     $self->{next_char} == 0x0063) { # c
2210 wakaba 1.77 !!!cp (168);
2211 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2212 wakaba 1.18 !!!next-input-character;
2213     redo A;
2214 wakaba 1.77 } else {
2215     !!!cp (169);
2216 wakaba 1.18 }
2217 wakaba 1.77 } else {
2218     !!!cp (170);
2219 wakaba 1.18 }
2220 wakaba 1.77 } else {
2221     !!!cp (171);
2222 wakaba 1.18 }
2223 wakaba 1.77 } else {
2224     !!!cp (172);
2225 wakaba 1.18 }
2226 wakaba 1.77 } else {
2227     !!!cp (173);
2228 wakaba 1.18 }
2229    
2230     #
2231 wakaba 1.76 } elsif ($self->{next_char} == 0x0053 or # S
2232     $self->{next_char} == 0x0073) { # s
2233 wakaba 1.18 !!!next-input-character;
2234 wakaba 1.76 if ($self->{next_char} == 0x0059 or # Y
2235     $self->{next_char} == 0x0079) { # y
2236 wakaba 1.18 !!!next-input-character;
2237 wakaba 1.76 if ($self->{next_char} == 0x0053 or # S
2238     $self->{next_char} == 0x0073) { # s
2239 wakaba 1.18 !!!next-input-character;
2240 wakaba 1.76 if ($self->{next_char} == 0x0054 or # T
2241     $self->{next_char} == 0x0074) { # t
2242 wakaba 1.18 !!!next-input-character;
2243 wakaba 1.76 if ($self->{next_char} == 0x0045 or # E
2244     $self->{next_char} == 0x0065) { # e
2245 wakaba 1.18 !!!next-input-character;
2246 wakaba 1.76 if ($self->{next_char} == 0x004D or # M
2247     $self->{next_char} == 0x006D) { # m
2248 wakaba 1.77 !!!cp (174);
2249 wakaba 1.57 $self->{state} = BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2250 wakaba 1.18 !!!next-input-character;
2251     redo A;
2252 wakaba 1.77 } else {
2253     !!!cp (175);
2254 wakaba 1.18 }
2255 wakaba 1.77 } else {
2256     !!!cp (176);
2257 wakaba 1.18 }
2258 wakaba 1.77 } else {
2259     !!!cp (177);
2260 wakaba 1.18 }
2261 wakaba 1.77 } else {
2262     !!!cp (178);
2263 wakaba 1.18 }
2264 wakaba 1.77 } else {
2265     !!!cp (179);
2266 wakaba 1.18 }
2267    
2268     #
2269     } else {
2270 wakaba 1.77 !!!cp (180);
2271 wakaba 1.18 !!!next-input-character;
2272     #
2273     }
2274    
2275     !!!parse-error (type => 'string after DOCTYPE name');
2276 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2277 wakaba 1.73
2278 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2279 wakaba 1.18 # next-input-character is already done
2280     redo A;
2281 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2282 wakaba 1.18 if ({
2283     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2284     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2285 wakaba 1.76 }->{$self->{next_char}}) {
2286 wakaba 1.77 !!!cp (181);
2287 wakaba 1.18 ## Stay in the state
2288     !!!next-input-character;
2289     redo A;
2290 wakaba 1.76 } elsif ($self->{next_char} eq 0x0022) { # "
2291 wakaba 1.77 !!!cp (182);
2292 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2293 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE;
2294 wakaba 1.18 !!!next-input-character;
2295     redo A;
2296 wakaba 1.76 } elsif ($self->{next_char} eq 0x0027) { # '
2297 wakaba 1.77 !!!cp (183);
2298 wakaba 1.18 $self->{current_token}->{public_identifier} = ''; # DOCTYPE
2299 wakaba 1.57 $self->{state} = DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE;
2300 wakaba 1.18 !!!next-input-character;
2301     redo A;
2302 wakaba 1.76 } elsif ($self->{next_char} eq 0x003E) { # >
2303 wakaba 1.77 !!!cp (184);
2304 wakaba 1.18 !!!parse-error (type => 'no PUBLIC literal');
2305    
2306 wakaba 1.57 $self->{state} = DATA_STATE;
2307 wakaba 1.18 !!!next-input-character;
2308    
2309 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2310 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2311    
2312     redo A;
2313 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2314 wakaba 1.77 !!!cp (185);
2315 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2316    
2317 wakaba 1.57 $self->{state} = DATA_STATE;
2318 wakaba 1.18 ## reconsume
2319    
2320 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2321 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2322    
2323     redo A;
2324     } else {
2325 wakaba 1.77 !!!cp (186);
2326 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC');
2327 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2328 wakaba 1.73
2329 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2330 wakaba 1.18 !!!next-input-character;
2331     redo A;
2332     }
2333 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2334 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2335 wakaba 1.77 !!!cp (187);
2336 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2337 wakaba 1.18 !!!next-input-character;
2338     redo A;
2339 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2340 wakaba 1.77 !!!cp (188);
2341 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2342    
2343     $self->{state} = DATA_STATE;
2344     !!!next-input-character;
2345    
2346 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2347 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2348    
2349     redo A;
2350 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2351 wakaba 1.77 !!!cp (189);
2352 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2353    
2354 wakaba 1.57 $self->{state} = DATA_STATE;
2355 wakaba 1.18 ## reconsume
2356    
2357 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2358 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2359    
2360     redo A;
2361     } else {
2362 wakaba 1.77 !!!cp (190);
2363 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2364 wakaba 1.76 .= chr $self->{next_char};
2365 wakaba 1.18 ## Stay in the state
2366     !!!next-input-character;
2367     redo A;
2368     }
2369 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED_STATE) {
2370 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2371 wakaba 1.77 !!!cp (191);
2372 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE;
2373 wakaba 1.18 !!!next-input-character;
2374     redo A;
2375 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2376 wakaba 1.77 !!!cp (192);
2377 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2378    
2379     $self->{state} = DATA_STATE;
2380     !!!next-input-character;
2381    
2382 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2383 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2384    
2385     redo A;
2386 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2387 wakaba 1.77 !!!cp (193);
2388 wakaba 1.18 !!!parse-error (type => 'unclosed PUBLIC literal');
2389    
2390 wakaba 1.57 $self->{state} = DATA_STATE;
2391 wakaba 1.18 ## reconsume
2392    
2393 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2394 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2395    
2396     redo A;
2397     } else {
2398 wakaba 1.77 !!!cp (194);
2399 wakaba 1.18 $self->{current_token}->{public_identifier} # DOCTYPE
2400 wakaba 1.76 .= chr $self->{next_char};
2401 wakaba 1.18 ## Stay in the state
2402     !!!next-input-character;
2403     redo A;
2404     }
2405 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2406 wakaba 1.18 if ({
2407     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2408     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2409 wakaba 1.76 }->{$self->{next_char}}) {
2410 wakaba 1.77 !!!cp (195);
2411 wakaba 1.18 ## Stay in the state
2412     !!!next-input-character;
2413     redo A;
2414 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2415 wakaba 1.77 !!!cp (196);
2416 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2417 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2418 wakaba 1.18 !!!next-input-character;
2419     redo A;
2420 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2421 wakaba 1.77 !!!cp (197);
2422 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2423 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2424 wakaba 1.18 !!!next-input-character;
2425     redo A;
2426 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2427 wakaba 1.77 !!!cp (198);
2428 wakaba 1.57 $self->{state} = DATA_STATE;
2429 wakaba 1.18 !!!next-input-character;
2430    
2431     !!!emit ($self->{current_token}); # DOCTYPE
2432    
2433     redo A;
2434 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2435 wakaba 1.77 !!!cp (199);
2436 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2437    
2438 wakaba 1.57 $self->{state} = DATA_STATE;
2439 wakaba 1.26 ## reconsume
2440 wakaba 1.18
2441 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2442 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2443    
2444     redo A;
2445     } else {
2446 wakaba 1.77 !!!cp (200);
2447 wakaba 1.18 !!!parse-error (type => 'string after PUBLIC literal');
2448 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2449 wakaba 1.73
2450 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2451 wakaba 1.18 !!!next-input-character;
2452     redo A;
2453     }
2454 wakaba 1.57 } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2455 wakaba 1.18 if ({
2456     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2457     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2458 wakaba 1.76 }->{$self->{next_char}}) {
2459 wakaba 1.77 !!!cp (201);
2460 wakaba 1.18 ## Stay in the state
2461     !!!next-input-character;
2462     redo A;
2463 wakaba 1.76 } elsif ($self->{next_char} == 0x0022) { # "
2464 wakaba 1.77 !!!cp (202);
2465 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2466 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE;
2467 wakaba 1.18 !!!next-input-character;
2468     redo A;
2469 wakaba 1.76 } elsif ($self->{next_char} == 0x0027) { # '
2470 wakaba 1.77 !!!cp (203);
2471 wakaba 1.18 $self->{current_token}->{system_identifier} = ''; # DOCTYPE
2472 wakaba 1.57 $self->{state} = DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE;
2473 wakaba 1.18 !!!next-input-character;
2474     redo A;
2475 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2476 wakaba 1.77 !!!cp (204);
2477 wakaba 1.18 !!!parse-error (type => 'no SYSTEM literal');
2478 wakaba 1.57 $self->{state} = DATA_STATE;
2479 wakaba 1.18 !!!next-input-character;
2480    
2481 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2482 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2483    
2484     redo A;
2485 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2486 wakaba 1.77 !!!cp (205);
2487 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2488    
2489 wakaba 1.57 $self->{state} = DATA_STATE;
2490 wakaba 1.26 ## reconsume
2491 wakaba 1.18
2492 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2493 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2494    
2495     redo A;
2496     } else {
2497 wakaba 1.77 !!!cp (206);
2498 wakaba 1.30 !!!parse-error (type => 'string after SYSTEM');
2499 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2500 wakaba 1.73
2501 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2502 wakaba 1.18 !!!next-input-character;
2503     redo A;
2504     }
2505 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED_STATE) {
2506 wakaba 1.76 if ($self->{next_char} == 0x0022) { # "
2507 wakaba 1.77 !!!cp (207);
2508 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2509 wakaba 1.18 !!!next-input-character;
2510     redo A;
2511 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2512 wakaba 1.77 !!!cp (208);
2513 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2514    
2515     $self->{state} = DATA_STATE;
2516     !!!next-input-character;
2517    
2518 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2519 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2520    
2521     redo A;
2522 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2523 wakaba 1.77 !!!cp (209);
2524 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2525    
2526 wakaba 1.57 $self->{state} = DATA_STATE;
2527 wakaba 1.18 ## reconsume
2528    
2529 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2530 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2531    
2532     redo A;
2533     } else {
2534 wakaba 1.77 !!!cp (210);
2535 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2536 wakaba 1.76 .= chr $self->{next_char};
2537 wakaba 1.18 ## Stay in the state
2538     !!!next-input-character;
2539     redo A;
2540     }
2541 wakaba 1.57 } elsif ($self->{state} == DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED_STATE) {
2542 wakaba 1.76 if ($self->{next_char} == 0x0027) { # '
2543 wakaba 1.77 !!!cp (211);
2544 wakaba 1.57 $self->{state} = AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE;
2545 wakaba 1.18 !!!next-input-character;
2546     redo A;
2547 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2548 wakaba 1.77 !!!cp (212);
2549 wakaba 1.69 !!!parse-error (type => 'unclosed PUBLIC literal');
2550    
2551     $self->{state} = DATA_STATE;
2552     !!!next-input-character;
2553    
2554 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2555 wakaba 1.69 !!!emit ($self->{current_token}); # DOCTYPE
2556    
2557     redo A;
2558 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2559 wakaba 1.77 !!!cp (213);
2560 wakaba 1.18 !!!parse-error (type => 'unclosed SYSTEM literal');
2561    
2562 wakaba 1.57 $self->{state} = DATA_STATE;
2563 wakaba 1.18 ## reconsume
2564    
2565 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2566 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
2567    
2568     redo A;
2569     } else {
2570 wakaba 1.77 !!!cp (214);
2571 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
2572 wakaba 1.76 .= chr $self->{next_char};
2573 wakaba 1.18 ## Stay in the state
2574     !!!next-input-character;
2575     redo A;
2576     }
2577 wakaba 1.57 } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2578 wakaba 1.18 if ({
2579     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
2580     #0x000D => 1, # HT, LF, VT, FF, SP, CR
2581 wakaba 1.76 }->{$self->{next_char}}) {
2582 wakaba 1.77 !!!cp (215);
2583 wakaba 1.18 ## Stay in the state
2584     !!!next-input-character;
2585     redo A;
2586 wakaba 1.76 } elsif ($self->{next_char} == 0x003E) { # >
2587 wakaba 1.77 !!!cp (216);
2588 wakaba 1.57 $self->{state} = DATA_STATE;
2589 wakaba 1.18 !!!next-input-character;
2590    
2591     !!!emit ($self->{current_token}); # DOCTYPE
2592    
2593     redo A;
2594 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2595 wakaba 1.77 !!!cp (217);
2596 wakaba 1.18 !!!parse-error (type => 'unclosed DOCTYPE');
2597    
2598 wakaba 1.57 $self->{state} = DATA_STATE;
2599 wakaba 1.26 ## reconsume
2600 wakaba 1.18
2601 wakaba 1.75 $self->{current_token}->{quirks} = 1;
2602 wakaba 1.18 !!!emit ($self->{current_token}); # DOCTYPE
2603    
2604     redo A;
2605     } else {
2606 wakaba 1.77 !!!cp (218);
2607 wakaba 1.18 !!!parse-error (type => 'string after SYSTEM literal');
2608 wakaba 1.75 #$self->{current_token}->{quirks} = 1;
2609 wakaba 1.73
2610 wakaba 1.57 $self->{state} = BOGUS_DOCTYPE_STATE;
2611 wakaba 1.1 !!!next-input-character;
2612     redo A;
2613     }
2614 wakaba 1.57 } elsif ($self->{state} == BOGUS_DOCTYPE_STATE) {
2615 wakaba 1.76 if ($self->{next_char} == 0x003E) { # >
2616 wakaba 1.77 !!!cp (219);
2617 wakaba 1.57 $self->{state} = DATA_STATE;
2618 wakaba 1.1 !!!next-input-character;
2619    
2620     !!!emit ($self->{current_token}); # DOCTYPE
2621    
2622     redo A;
2623 wakaba 1.76 } elsif ($self->{next_char} == -1) {
2624 wakaba 1.77 !!!cp (220);
2625 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
2626 wakaba 1.57 $self->{state} = DATA_STATE;
2627 wakaba 1.1 ## reconsume
2628    
2629     !!!emit ($self->{current_token}); # DOCTYPE
2630    
2631     redo A;
2632     } else {
2633 wakaba 1.77 !!!cp (221);
2634 wakaba 1.1 ## Stay in the state
2635     !!!next-input-character;
2636     redo A;
2637     }
2638 wakaba 1.127 } elsif ($self->{state} == CDATA_BLOCK_STATE) {
2639     my $s = '';
2640    
2641     my ($l, $c) = ($self->{line}, $self->{column});
2642    
2643     CS: while ($self->{next_char} != -1) {
2644     if ($self->{next_char} == 0x005D) { # ]
2645     !!!next-input-character;
2646     if ($self->{next_char} == 0x005D) { # ]
2647     !!!next-input-character;
2648     MDC: {
2649     if ($self->{next_char} == 0x003E) { # >
2650     !!!cp (221.1);
2651     !!!next-input-character;
2652     last CS;
2653     } elsif ($self->{next_char} == 0x005D) { # ]
2654     !!!cp (221.2);
2655     $s .= ']';
2656     !!!next-input-character;
2657     redo MDC;
2658     } else {
2659     !!!cp (221.3);
2660     $s .= ']]';
2661     #
2662     }
2663     } # MDC
2664     } else {
2665     !!!cp (221.4);
2666     $s .= ']';
2667     #
2668     }
2669     } else {
2670     !!!cp (221.5);
2671     #
2672     }
2673     $s .= chr $self->{next_char};
2674     !!!next-input-character;
2675     } # CS
2676    
2677     $self->{state} = DATA_STATE;
2678     ## next-input-character done or EOF, which is reconsumed.
2679    
2680     if (length $s) {
2681     !!!cp (221.6);
2682     !!!emit ({type => CHARACTER_TOKEN, data => $s,
2683     line => $l, column => $c});
2684     } else {
2685     !!!cp (221.7);
2686     }
2687    
2688     redo A;
2689    
2690     ## ISSUE: "text tokens" in spec.
2691     ## TODO: Streaming support
2692 wakaba 1.1 } else {
2693     die "$0: $self->{state}: Unknown state";
2694     }
2695     } # A
2696    
2697     die "$0: _get_next_token: unexpected case";
2698     } # _get_next_token
2699    
2700 wakaba 1.72 sub _tokenize_attempt_to_consume_an_entity ($$$) {
2701     my ($self, $in_attr, $additional) = @_;
2702 wakaba 1.20
2703 wakaba 1.112 my ($l, $c) = ($self->{line_prev}, $self->{column_prev});
2704    
2705 wakaba 1.20 if ({
2706     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
2707     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
2708 wakaba 1.72 $additional => 1,
2709 wakaba 1.76 }->{$self->{next_char}}) {
2710 wakaba 1.78 !!!cp (1001);
2711 wakaba 1.20 ## Don't consume
2712     ## No error
2713     return undef;
2714 wakaba 1.76 } elsif ($self->{next_char} == 0x0023) { # #
2715 wakaba 1.1 !!!next-input-character;
2716 wakaba 1.76 if ($self->{next_char} == 0x0078 or # x
2717     $self->{next_char} == 0x0058) { # X
2718 wakaba 1.26 my $code;
2719 wakaba 1.1 X: {
2720 wakaba 1.76 my $x_char = $self->{next_char};
2721 wakaba 1.1 !!!next-input-character;
2722 wakaba 1.76 if (0x0030 <= $self->{next_char} and
2723     $self->{next_char} <= 0x0039) { # 0..9
2724 wakaba 1.78 !!!cp (1002);
2725 wakaba 1.26 $code ||= 0;
2726     $code *= 0x10;
2727 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2728 wakaba 1.1 redo X;
2729 wakaba 1.76 } elsif (0x0061 <= $self->{next_char} and
2730     $self->{next_char} <= 0x0066) { # a..f
2731 wakaba 1.78 !!!cp (1003);
2732 wakaba 1.26 $code ||= 0;
2733     $code *= 0x10;
2734 wakaba 1.76 $code += $self->{next_char} - 0x0060 + 9;
2735 wakaba 1.1 redo X;
2736 wakaba 1.76 } elsif (0x0041 <= $self->{next_char} and
2737     $self->{next_char} <= 0x0046) { # A..F
2738 wakaba 1.78 !!!cp (1004);
2739 wakaba 1.26 $code ||= 0;
2740     $code *= 0x10;
2741 wakaba 1.76 $code += $self->{next_char} - 0x0040 + 9;
2742 wakaba 1.1 redo X;
2743 wakaba 1.26 } elsif (not defined $code) { # no hexadecimal digit
2744 wakaba 1.78 !!!cp (1005);
2745 wakaba 1.112 !!!parse-error (type => 'bare hcro', line => $l, column => $c);
2746 wakaba 1.76 !!!back-next-input-character ($x_char, $self->{next_char});
2747     $self->{next_char} = 0x0023; # #
2748 wakaba 1.1 return undef;
2749 wakaba 1.76 } elsif ($self->{next_char} == 0x003B) { # ;
2750 wakaba 1.78 !!!cp (1006);
2751 wakaba 1.1 !!!next-input-character;
2752     } else {
2753 wakaba 1.78 !!!cp (1007);
2754 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2755 wakaba 1.1 }
2756    
2757 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2758 wakaba 1.78 !!!cp (1008);
2759 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U+%04X', $code), line => $l, column => $c);
2760 wakaba 1.26 $code = 0xFFFD;
2761     } elsif ($code > 0x10FFFF) {
2762 wakaba 1.78 !!!cp (1009);
2763 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U-%08X', $code), line => $l, column => $c);
2764 wakaba 1.26 $code = 0xFFFD;
2765     } elsif ($code == 0x000D) {
2766 wakaba 1.78 !!!cp (1010);
2767 wakaba 1.112 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
2768 wakaba 1.26 $code = 0x000A;
2769     } elsif (0x80 <= $code and $code <= 0x9F) {
2770 wakaba 1.78 !!!cp (1011);
2771 wakaba 1.112 !!!parse-error (type => (sprintf 'C1 character reference:U+%04X', $code), line => $l, column => $c);
2772 wakaba 1.26 $code = $c1_entity_char->{$code};
2773 wakaba 1.1 }
2774    
2775 wakaba 1.66 return {type => CHARACTER_TOKEN, data => chr $code,
2776 wakaba 1.118 has_reference => 1,
2777 wakaba 1.120 line => $l, column => $c,
2778 wakaba 1.118 };
2779 wakaba 1.1 } # X
2780 wakaba 1.76 } elsif (0x0030 <= $self->{next_char} and
2781     $self->{next_char} <= 0x0039) { # 0..9
2782     my $code = $self->{next_char} - 0x0030;
2783 wakaba 1.1 !!!next-input-character;
2784    
2785 wakaba 1.76 while (0x0030 <= $self->{next_char} and
2786     $self->{next_char} <= 0x0039) { # 0..9
2787 wakaba 1.78 !!!cp (1012);
2788 wakaba 1.1 $code *= 10;
2789 wakaba 1.76 $code += $self->{next_char} - 0x0030;
2790 wakaba 1.1
2791     !!!next-input-character;
2792     }
2793    
2794 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
2795 wakaba 1.78 !!!cp (1013);
2796 wakaba 1.1 !!!next-input-character;
2797     } else {
2798 wakaba 1.78 !!!cp (1014);
2799 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2800 wakaba 1.1 }
2801    
2802 wakaba 1.26 if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {
2803 wakaba 1.78 !!!cp (1015);
2804 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U+%04X', $code), line => $l, column => $c);
2805 wakaba 1.26 $code = 0xFFFD;
2806     } elsif ($code > 0x10FFFF) {
2807 wakaba 1.78 !!!cp (1016);
2808 wakaba 1.112 !!!parse-error (type => (sprintf 'invalid character reference:U-%08X', $code), line => $l, column => $c);
2809 wakaba 1.26 $code = 0xFFFD;
2810     } elsif ($code == 0x000D) {
2811 wakaba 1.78 !!!cp (1017);
2812 wakaba 1.112 !!!parse-error (type => 'CR character reference', line => $l, column => $c);
2813 wakaba 1.26 $code = 0x000A;
2814 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
2815 wakaba 1.78 !!!cp (1018);
2816 wakaba 1.112 !!!parse-error (type => (sprintf 'C1 character reference:U+%04X', $code), line => $l, column => $c);
2817 wakaba 1.4 $code = $c1_entity_char->{$code};
2818 wakaba 1.1 }
2819    
2820 wakaba 1.112 return {type => CHARACTER_TOKEN, data => chr $code, has_reference => 1,
2821 wakaba 1.120 line => $l, column => $c,
2822 wakaba 1.118 };
2823 wakaba 1.1 } else {
2824 wakaba 1.78 !!!cp (1019);
2825 wakaba 1.112 !!!parse-error (type => 'bare nero', line => $l, column => $c);
2826 wakaba 1.76 !!!back-next-input-character ($self->{next_char});
2827     $self->{next_char} = 0x0023; # #
2828 wakaba 1.1 return undef;
2829     }
2830 wakaba 1.76 } elsif ((0x0041 <= $self->{next_char} and
2831     $self->{next_char} <= 0x005A) or
2832     (0x0061 <= $self->{next_char} and
2833     $self->{next_char} <= 0x007A)) {
2834     my $entity_name = chr $self->{next_char};
2835 wakaba 1.1 !!!next-input-character;
2836    
2837     my $value = $entity_name;
2838 wakaba 1.37 my $match = 0;
2839 wakaba 1.16 require Whatpm::_NamedEntityList;
2840     our $EntityChar;
2841 wakaba 1.1
2842 wakaba 1.128 while (length $entity_name < 30 and
2843 wakaba 1.1 ## NOTE: Some number greater than the maximum length of entity name
2844 wakaba 1.76 ((0x0041 <= $self->{next_char} and # a
2845     $self->{next_char} <= 0x005A) or # x
2846     (0x0061 <= $self->{next_char} and # a
2847     $self->{next_char} <= 0x007A) or # z
2848     (0x0030 <= $self->{next_char} and # 0
2849     $self->{next_char} <= 0x0039) or # 9
2850     $self->{next_char} == 0x003B)) { # ;
2851     $entity_name .= chr $self->{next_char};
2852 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
2853 wakaba 1.76 if ($self->{next_char} == 0x003B) { # ;
2854 wakaba 1.78 !!!cp (1020);
2855 wakaba 1.26 $value = $EntityChar->{$entity_name};
2856 wakaba 1.16 $match = 1;
2857     !!!next-input-character;
2858     last;
2859 wakaba 1.37 } else {
2860 wakaba 1.78 !!!cp (1021);
2861 wakaba 1.26 $value = $EntityChar->{$entity_name};
2862     $match = -1;
2863 wakaba 1.37 !!!next-input-character;
2864 wakaba 1.16 }
2865 wakaba 1.1 } else {
2866 wakaba 1.78 !!!cp (1022);
2867 wakaba 1.76 $value .= chr $self->{next_char};
2868 wakaba 1.37 $match *= 2;
2869     !!!next-input-character;
2870 wakaba 1.1 }
2871     }
2872    
2873 wakaba 1.16 if ($match > 0) {
2874 wakaba 1.78 !!!cp (1023);
2875 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
2876 wakaba 1.120 line => $l, column => $c,
2877 wakaba 1.118 };
2878 wakaba 1.16 } elsif ($match < 0) {
2879 wakaba 1.112 !!!parse-error (type => 'no refc', line => $l, column => $c);
2880 wakaba 1.37 if ($in_attr and $match < -1) {
2881 wakaba 1.78 !!!cp (1024);
2882 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$entity_name,
2883 wakaba 1.120 line => $l, column => $c,
2884 wakaba 1.118 };
2885 wakaba 1.37 } else {
2886 wakaba 1.78 !!!cp (1025);
2887 wakaba 1.112 return {type => CHARACTER_TOKEN, data => $value, has_reference => 1,
2888 wakaba 1.120 line => $l, column => $c,
2889 wakaba 1.118 };
2890 wakaba 1.37 }
2891 wakaba 1.1 } else {
2892 wakaba 1.78 !!!cp (1026);
2893 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
2894 wakaba 1.66 ## NOTE: "No characters are consumed" in the spec.
2895 wakaba 1.112 return {type => CHARACTER_TOKEN, data => '&'.$value,
2896 wakaba 1.120 line => $l, column => $c,
2897 wakaba 1.118 };
2898 wakaba 1.1 }
2899     } else {
2900 wakaba 1.78 !!!cp (1027);
2901 wakaba 1.1 ## no characters are consumed
2902 wakaba 1.112 !!!parse-error (type => 'bare ero', line => $l, column => $c);
2903 wakaba 1.1 return undef;
2904     }
2905     } # _tokenize_attempt_to_consume_an_entity
2906    
2907     sub _initialize_tree_constructor ($) {
2908     my $self = shift;
2909     ## NOTE: $self->{document} MUST be specified before this method is called
2910     $self->{document}->strict_error_checking (0);
2911     ## TODO: Turn mutation events off # MUST
2912     ## TODO: Turn loose Document option (manakai extension) on
2913 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
2914 wakaba 1.1 } # _initialize_tree_constructor
2915    
2916     sub _terminate_tree_constructor ($) {
2917     my $self = shift;
2918     $self->{document}->strict_error_checking (1);
2919     ## TODO: Turn mutation events on
2920     } # _terminate_tree_constructor
2921    
2922     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
2923    
2924 wakaba 1.3 { # tree construction stage
2925     my $token;
2926    
2927 wakaba 1.1 sub _construct_tree ($) {
2928     my ($self) = @_;
2929    
2930     ## When an interactive UA render the $self->{document} available
2931     ## to the user, or when it begin accepting user input, are
2932     ## not defined.
2933    
2934     ## Append a character: collect it and all subsequent consecutive
2935     ## characters and insert one Text node whose data is concatenation
2936     ## of all those characters. # MUST
2937    
2938     !!!next-token;
2939    
2940 wakaba 1.3 undef $self->{form_element};
2941     undef $self->{head_element};
2942     $self->{open_elements} = [];
2943     undef $self->{inner_html_node};
2944    
2945 wakaba 1.84 ## NOTE: The "initial" insertion mode.
2946 wakaba 1.3 $self->_tree_construction_initial; # MUST
2947 wakaba 1.84
2948     ## NOTE: The "before html" insertion mode.
2949 wakaba 1.3 $self->_tree_construction_root_element;
2950 wakaba 1.84 $self->{insertion_mode} = BEFORE_HEAD_IM;
2951    
2952     ## NOTE: The "before head" insertion mode and so on.
2953 wakaba 1.3 $self->_tree_construction_main;
2954     } # _construct_tree
2955    
2956     sub _tree_construction_initial ($) {
2957     my $self = shift;
2958 wakaba 1.84
2959     ## NOTE: "initial" insertion mode
2960    
2961 wakaba 1.18 INITIAL: {
2962 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
2963 wakaba 1.18 ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
2964     ## error, switch to a conformance checking mode for another
2965     ## language.
2966     my $doctype_name = $token->{name};
2967     $doctype_name = '' unless defined $doctype_name;
2968     $doctype_name =~ tr/a-z/A-Z/;
2969     if (not defined $token->{name} or # <!DOCTYPE>
2970     defined $token->{public_identifier} or
2971     defined $token->{system_identifier}) {
2972 wakaba 1.79 !!!cp ('t1');
2973 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
2974 wakaba 1.18 } elsif ($doctype_name ne 'HTML') {
2975 wakaba 1.79 !!!cp ('t2');
2976 wakaba 1.18 ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
2977 wakaba 1.113 !!!parse-error (type => 'not HTML5', token => $token);
2978 wakaba 1.79 } else {
2979     !!!cp ('t3');
2980 wakaba 1.18 }
2981    
2982     my $doctype = $self->{document}->create_document_type_definition
2983     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
2984 wakaba 1.122 ## NOTE: Default value for both |public_id| and |system_id| attributes
2985     ## are empty strings, so that we don't set any value in missing cases.
2986 wakaba 1.18 $doctype->public_id ($token->{public_identifier})
2987     if defined $token->{public_identifier};
2988     $doctype->system_id ($token->{system_identifier})
2989     if defined $token->{system_identifier};
2990     ## NOTE: Other DocumentType attributes are null or empty lists.
2991     ## ISSUE: internalSubset = null??
2992     $self->{document}->append_child ($doctype);
2993    
2994 wakaba 1.75 if ($token->{quirks} or $doctype_name ne 'HTML') {
2995 wakaba 1.79 !!!cp ('t4');
2996 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
2997     } elsif (defined $token->{public_identifier}) {
2998     my $pubid = $token->{public_identifier};
2999     $pubid =~ tr/a-z/A-z/;
3000     if ({
3001     "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
3002     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
3003     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
3004     "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
3005     "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
3006     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
3007     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
3008     "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
3009     "-//IETF//DTD HTML 2.0//EN" => 1,
3010     "-//IETF//DTD HTML 2.1E//EN" => 1,
3011     "-//IETF//DTD HTML 3.0//EN" => 1,
3012     "-//IETF//DTD HTML 3.0//EN//" => 1,
3013     "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
3014     "-//IETF//DTD HTML 3.2//EN" => 1,
3015     "-//IETF//DTD HTML 3//EN" => 1,
3016     "-//IETF//DTD HTML LEVEL 0//EN" => 1,
3017     "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
3018     "-//IETF//DTD HTML LEVEL 1//EN" => 1,
3019     "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
3020     "-//IETF//DTD HTML LEVEL 2//EN" => 1,
3021     "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
3022     "-//IETF//DTD HTML LEVEL 3//EN" => 1,
3023     "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
3024     "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
3025     "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
3026     "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
3027     "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
3028     "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
3029     "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
3030     "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
3031     "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
3032     "-//IETF//DTD HTML STRICT//EN" => 1,
3033     "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
3034     "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
3035     "-//IETF//DTD HTML//EN" => 1,
3036     "-//IETF//DTD HTML//EN//2.0" => 1,
3037     "-//IETF//DTD HTML//EN//3.0" => 1,
3038     "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
3039     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
3040     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
3041     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
3042     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
3043     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
3044     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
3045     "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
3046     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
3047     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
3048     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
3049 wakaba 1.72 "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED RELAXED 1.0//EN" => 1,
3050     "-//SOFTQUAD SOFTWARE//DTD HOTMETAL PRO 6.0::19990601::EXTENSIONS TO HTML 4.0//EN" => 1,
3051     "-//SOFTQUAD//DTD HOTMETAL PRO 4.0::19971010::EXTENSIONS TO HTML 4.0//EN" => 1,
3052 wakaba 1.18 "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
3053     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
3054     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
3055     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
3056     "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
3057     "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
3058     "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
3059     "-//W3C//DTD HTML 3.2//EN" => 1,
3060     "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
3061     "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
3062     "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
3063     "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
3064     "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
3065     "-//W3C//DTD W3 HTML//EN" => 1,
3066     "-//W3O//DTD W3 HTML 3.0//EN" => 1,
3067     "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
3068     "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
3069     "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
3070     "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
3071     "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
3072     "HTML" => 1,
3073     }->{$pubid}) {
3074 wakaba 1.79 !!!cp ('t5');
3075 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3076     } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
3077     $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
3078     if (defined $token->{system_identifier}) {
3079 wakaba 1.79 !!!cp ('t6');
3080 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3081     } else {
3082 wakaba 1.79 !!!cp ('t7');
3083 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
3084 wakaba 1.3 }
3085 wakaba 1.80 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 FRAMESET//EN" or
3086     $pubid eq "-//W3C//DTD XHTML 1.0 TRANSITIONAL//EN") {
3087 wakaba 1.79 !!!cp ('t8');
3088 wakaba 1.18 $self->{document}->manakai_compat_mode ('limited quirks');
3089 wakaba 1.79 } else {
3090     !!!cp ('t9');
3091 wakaba 1.18 }
3092 wakaba 1.79 } else {
3093     !!!cp ('t10');
3094 wakaba 1.18 }
3095     if (defined $token->{system_identifier}) {
3096     my $sysid = $token->{system_identifier};
3097     $sysid =~ tr/A-Z/a-z/;
3098     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
3099 wakaba 1.80 ## TODO: Check the spec: PUBLIC "(limited quirks)" "(quirks)"
3100 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3101 wakaba 1.79 !!!cp ('t11');
3102     } else {
3103     !!!cp ('t12');
3104 wakaba 1.18 }
3105 wakaba 1.79 } else {
3106     !!!cp ('t13');
3107 wakaba 1.18 }
3108    
3109 wakaba 1.84 ## Go to the "before html" insertion mode.
3110 wakaba 1.18 !!!next-token;
3111     return;
3112     } elsif ({
3113 wakaba 1.55 START_TAG_TOKEN, 1,
3114     END_TAG_TOKEN, 1,
3115     END_OF_FILE_TOKEN, 1,
3116 wakaba 1.18 }->{$token->{type}}) {
3117 wakaba 1.79 !!!cp ('t14');
3118 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
3119 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3120 wakaba 1.84 ## Go to the "before html" insertion mode.
3121 wakaba 1.18 ## reprocess
3122 wakaba 1.125 !!!ack-later;
3123 wakaba 1.18 return;
3124 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
3125 wakaba 1.18 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
3126     ## Ignore the token
3127 wakaba 1.26
3128 wakaba 1.18 unless (length $token->{data}) {
3129 wakaba 1.79 !!!cp ('t15');
3130 wakaba 1.84 ## Stay in the insertion mode.
3131 wakaba 1.18 !!!next-token;
3132     redo INITIAL;
3133 wakaba 1.79 } else {
3134     !!!cp ('t16');
3135 wakaba 1.3 }
3136 wakaba 1.79 } else {
3137     !!!cp ('t17');
3138 wakaba 1.3 }
3139 wakaba 1.18
3140 wakaba 1.113 !!!parse-error (type => 'no DOCTYPE', token => $token);
3141 wakaba 1.18 $self->{document}->manakai_compat_mode ('quirks');
3142 wakaba 1.84 ## Go to the "before html" insertion mode.
3143 wakaba 1.18 ## reprocess
3144     return;
3145 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3146 wakaba 1.79 !!!cp ('t18');
3147 wakaba 1.18 my $comment = $self->{document}->create_comment ($token->{data});
3148     $self->{document}->append_child ($comment);
3149    
3150 wakaba 1.84 ## Stay in the insertion mode.
3151 wakaba 1.18 !!!next-token;
3152     redo INITIAL;
3153     } else {
3154 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
3155 wakaba 1.18 }
3156     } # INITIAL
3157 wakaba 1.79
3158     die "$0: _tree_construction_initial: This should be never reached";
3159 wakaba 1.3 } # _tree_construction_initial
3160    
3161     sub _tree_construction_root_element ($) {
3162     my $self = shift;
3163 wakaba 1.84
3164     ## NOTE: "before html" insertion mode.
3165 wakaba 1.3
3166     B: {
3167 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3168 wakaba 1.79 !!!cp ('t19');
3169 wakaba 1.113 !!!parse-error (type => 'in html:#DOCTYPE', token => $token);
3170 wakaba 1.3 ## Ignore the token
3171 wakaba 1.84 ## Stay in the insertion mode.
3172 wakaba 1.3 !!!next-token;
3173     redo B;
3174 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3175 wakaba 1.79 !!!cp ('t20');
3176 wakaba 1.3 my $comment = $self->{document}->create_comment ($token->{data});
3177     $self->{document}->append_child ($comment);
3178 wakaba 1.84 ## Stay in the insertion mode.
3179 wakaba 1.3 !!!next-token;
3180     redo B;
3181 wakaba 1.55 } elsif ($token->{type} == CHARACTER_TOKEN) {
3182 wakaba 1.26 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
3183     ## Ignore the token.
3184    
3185 wakaba 1.3 unless (length $token->{data}) {
3186 wakaba 1.79 !!!cp ('t21');
3187 wakaba 1.84 ## Stay in the insertion mode.
3188 wakaba 1.3 !!!next-token;
3189     redo B;
3190 wakaba 1.79 } else {
3191     !!!cp ('t22');
3192 wakaba 1.3 }
3193 wakaba 1.79 } else {
3194     !!!cp ('t23');
3195 wakaba 1.3 }
3196 wakaba 1.61
3197     $self->{application_cache_selection}->(undef);
3198    
3199     #
3200     } elsif ($token->{type} == START_TAG_TOKEN) {
3201 wakaba 1.84 if ($token->{tag_name} eq 'html') {
3202     my $root_element;
3203 wakaba 1.126 !!!create-element ($root_element, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
3204 wakaba 1.84 $self->{document}->append_child ($root_element);
3205 wakaba 1.123 push @{$self->{open_elements}},
3206     [$root_element, $el_category->{html}];
3207 wakaba 1.84
3208     if ($token->{attributes}->{manifest}) {
3209     !!!cp ('t24');
3210     $self->{application_cache_selection}
3211     ->($token->{attributes}->{manifest}->{value});
3212 wakaba 1.118 ## ISSUE: Spec is unclear on relative references.
3213     ## According to Hixie (#whatwg 2008-03-19), it should be
3214     ## resolved against the base URI of the document in HTML
3215     ## or xml:base of the element in XHTML.
3216 wakaba 1.84 } else {
3217     !!!cp ('t25');
3218     $self->{application_cache_selection}->(undef);
3219     }
3220    
3221 wakaba 1.125 !!!nack ('t25c');
3222    
3223 wakaba 1.84 !!!next-token;
3224     return; ## Go to the "before head" insertion mode.
3225 wakaba 1.61 } else {
3226 wakaba 1.84 !!!cp ('t25.1');
3227     #
3228 wakaba 1.61 }
3229 wakaba 1.3 } elsif ({
3230 wakaba 1.55 END_TAG_TOKEN, 1,
3231     END_OF_FILE_TOKEN, 1,
3232 wakaba 1.3 }->{$token->{type}}) {
3233 wakaba 1.79 !!!cp ('t26');
3234 wakaba 1.3 #
3235     } else {
3236 wakaba 1.55 die "$0: $token->{type}: Unknown token type";
3237 wakaba 1.3 }
3238 wakaba 1.61
3239 wakaba 1.126 my $root_element;
3240     !!!create-element ($root_element, $HTML_NS, 'html',, $token);
3241 wakaba 1.84 $self->{document}->append_child ($root_element);
3242 wakaba 1.123 push @{$self->{open_elements}}, [$root_element, $el_category->{html}];
3243 wakaba 1.84
3244     $self->{application_cache_selection}->(undef);
3245    
3246     ## NOTE: Reprocess the token.
3247 wakaba 1.125 !!!ack-later;
3248 wakaba 1.84 return; ## Go to the "before head" insertion mode.
3249    
3250     ## ISSUE: There is an issue in the spec
3251 wakaba 1.3 } # B
3252 wakaba 1.79
3253     die "$0: _tree_construction_root_element: This should never be reached";
3254 wakaba 1.3 } # _tree_construction_root_element
3255    
3256     sub _reset_insertion_mode ($) {
3257     my $self = shift;
3258    
3259     ## Step 1
3260     my $last;
3261    
3262     ## Step 2
3263     my $i = -1;
3264     my $node = $self->{open_elements}->[$i];
3265    
3266     ## Step 3
3267     S3: {
3268 wakaba 1.29 if ($self->{open_elements}->[0]->[0] eq $node->[0]) {
3269     $last = 1;
3270     if (defined $self->{inner_html_node}) {
3271 wakaba 1.123 if ($self->{inner_html_node}->[1] & TABLE_CELL_EL) {
3272 wakaba 1.79 !!!cp ('t27');
3273 wakaba 1.29 #
3274     } else {
3275 wakaba 1.79 !!!cp ('t28');
3276 wakaba 1.29 $node = $self->{inner_html_node};
3277     }
3278 wakaba 1.3 }
3279     }
3280    
3281 wakaba 1.126 ## Step 4..14
3282     my $new_mode;
3283     if ($node->[1] & FOREIGN_EL) {
3284     ## NOTE: Strictly spaking, the line below only applies to MathML and
3285     ## SVG elements. Currently the HTML syntax supports only MathML and
3286     ## SVG elements as foreigners.
3287     $new_mode = $self->{insertion_mode} | IN_FOREIGN_CONTENT_IM;
3288     ## ISSUE: What is set as the secondary insertion mode?
3289     } else {
3290     $new_mode = {
3291 wakaba 1.54 select => IN_SELECT_IM,
3292 wakaba 1.83 ## NOTE: |option| and |optgroup| do not set
3293     ## insertion mode to "in select" by themselves.
3294 wakaba 1.54 td => IN_CELL_IM,
3295     th => IN_CELL_IM,
3296     tr => IN_ROW_IM,
3297     tbody => IN_TABLE_BODY_IM,
3298     thead => IN_TABLE_BODY_IM,
3299     tfoot => IN_TABLE_BODY_IM,
3300     caption => IN_CAPTION_IM,
3301     colgroup => IN_COLUMN_GROUP_IM,
3302     table => IN_TABLE_IM,
3303     head => IN_BODY_IM, # not in head!
3304     body => IN_BODY_IM,
3305     frameset => IN_FRAMESET_IM,
3306 wakaba 1.123 }->{$node->[0]->manakai_local_name};
3307 wakaba 1.126 }
3308     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
3309 wakaba 1.3
3310 wakaba 1.126 ## Step 15
3311 wakaba 1.123 if ($node->[1] & HTML_EL) {
3312 wakaba 1.3 unless (defined $self->{head_element}) {
3313 wakaba 1.79 !!!cp ('t29');
3314 wakaba 1.54 $self->{insertion_mode} = BEFORE_HEAD_IM;
3315 wakaba 1.3 } else {
3316 wakaba 1.81 ## ISSUE: Can this state be reached?
3317 wakaba 1.79 !!!cp ('t30');
3318 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
3319 wakaba 1.3 }
3320     return;
3321 wakaba 1.79 } else {
3322     !!!cp ('t31');
3323 wakaba 1.3 }
3324    
3325 wakaba 1.126 ## Step 16
3326 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM and return if $last;
3327 wakaba 1.3
3328 wakaba 1.126 ## Step 17
3329 wakaba 1.3 $i--;
3330     $node = $self->{open_elements}->[$i];
3331    
3332 wakaba 1.126 ## Step 18
3333 wakaba 1.3 redo S3;
3334     } # S3
3335 wakaba 1.79
3336     die "$0: _reset_insertion_mode: This line should never be reached";
3337 wakaba 1.3 } # _reset_insertion_mode
3338    
3339     sub _tree_construction_main ($) {
3340     my $self = shift;
3341    
3342 wakaba 1.1 my $active_formatting_elements = [];
3343    
3344     my $reconstruct_active_formatting_elements = sub { # MUST
3345     my $insert = shift;
3346    
3347     ## Step 1
3348     return unless @$active_formatting_elements;
3349    
3350     ## Step 3
3351     my $i = -1;
3352     my $entry = $active_formatting_elements->[$i];
3353    
3354     ## Step 2
3355     return if $entry->[0] eq '#marker';
3356 wakaba 1.3 for (@{$self->{open_elements}}) {
3357 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3358 wakaba 1.79 !!!cp ('t32');
3359 wakaba 1.1 return;
3360     }
3361     }
3362    
3363     S4: {
3364     ## Step 4
3365     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
3366    
3367     ## Step 5
3368     $i--;
3369     $entry = $active_formatting_elements->[$i];
3370    
3371     ## Step 6
3372     if ($entry->[0] eq '#marker') {
3373 wakaba 1.81 !!!cp ('t33_1');
3374 wakaba 1.1 #
3375     } else {
3376     my $in_open_elements;
3377 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
3378 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
3379 wakaba 1.79 !!!cp ('t33');
3380 wakaba 1.1 $in_open_elements = 1;
3381     last OE;
3382     }
3383     }
3384     if ($in_open_elements) {
3385 wakaba 1.79 !!!cp ('t34');
3386 wakaba 1.1 #
3387     } else {
3388 wakaba 1.81 ## NOTE: <!DOCTYPE HTML><p><b><i><u></p> <p>X
3389 wakaba 1.79 !!!cp ('t35');
3390 wakaba 1.1 redo S4;
3391     }
3392     }
3393    
3394     ## Step 7
3395     $i++;
3396     $entry = $active_formatting_elements->[$i];
3397     } # S4
3398    
3399     S7: {
3400     ## Step 8
3401     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
3402    
3403     ## Step 9
3404     $insert->($clone->[0]);
3405 wakaba 1.3 push @{$self->{open_elements}}, $clone;
3406 wakaba 1.1
3407     ## Step 10
3408 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
3409 wakaba 1.1
3410     ## Step 11
3411     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
3412 wakaba 1.79 !!!cp ('t36');
3413 wakaba 1.1 ## Step 7'
3414     $i++;
3415     $entry = $active_formatting_elements->[$i];
3416    
3417     redo S7;
3418     }
3419 wakaba 1.79
3420     !!!cp ('t37');
3421 wakaba 1.1 } # S7
3422     }; # $reconstruct_active_formatting_elements
3423    
3424     my $clear_up_to_marker = sub {
3425     for (reverse 0..$#$active_formatting_elements) {
3426     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3427 wakaba 1.79 !!!cp ('t38');
3428 wakaba 1.1 splice @$active_formatting_elements, $_;
3429     return;
3430     }
3431     }
3432 wakaba 1.79
3433     !!!cp ('t39');
3434 wakaba 1.1 }; # $clear_up_to_marker
3435    
3436 wakaba 1.96 my $insert;
3437    
3438     my $parse_rcdata = sub ($) {
3439     my ($content_model_flag) = @_;
3440 wakaba 1.25
3441     ## Step 1
3442     my $start_tag_name = $token->{tag_name};
3443     my $el;
3444 wakaba 1.126 !!!create-element ($el, $HTML_NS, $start_tag_name, $token->{attributes}, $token);
3445 wakaba 1.25
3446     ## Step 2
3447 wakaba 1.96 $insert->($el);
3448 wakaba 1.25
3449     ## Step 3
3450 wakaba 1.40 $self->{content_model} = $content_model_flag; # CDATA or RCDATA
3451 wakaba 1.13 delete $self->{escape}; # MUST
3452 wakaba 1.25
3453     ## Step 4
3454 wakaba 1.1 my $text = '';
3455 wakaba 1.125 !!!nack ('t40.1');
3456 wakaba 1.1 !!!next-token;
3457 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) { # or until stop tokenizing
3458 wakaba 1.79 !!!cp ('t40');
3459 wakaba 1.1 $text .= $token->{data};
3460     !!!next-token;
3461 wakaba 1.25 }
3462    
3463     ## Step 5
3464 wakaba 1.1 if (length $text) {
3465 wakaba 1.79 !!!cp ('t41');
3466 wakaba 1.25 my $text = $self->{document}->create_text_node ($text);
3467     $el->append_child ($text);
3468 wakaba 1.1 }
3469 wakaba 1.25
3470     ## Step 6
3471 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3472 wakaba 1.25
3473     ## Step 7
3474 wakaba 1.79 if ($token->{type} == END_TAG_TOKEN and
3475     $token->{tag_name} eq $start_tag_name) {
3476     !!!cp ('t42');
3477 wakaba 1.1 ## Ignore the token
3478     } else {
3479 wakaba 1.96 ## NOTE: An end-of-file token.
3480     if ($content_model_flag == CDATA_CONTENT_MODEL) {
3481     !!!cp ('t43');
3482 wakaba 1.113 !!!parse-error (type => 'in CDATA:#'.$token->{type}, token => $token);
3483 wakaba 1.96 } elsif ($content_model_flag == RCDATA_CONTENT_MODEL) {
3484     !!!cp ('t44');
3485 wakaba 1.113 !!!parse-error (type => 'in RCDATA:#'.$token->{type}, token => $token);
3486 wakaba 1.96 } else {
3487     die "$0: $content_model_flag in parse_rcdata";
3488     }
3489 wakaba 1.1 }
3490     !!!next-token;
3491 wakaba 1.25 }; # $parse_rcdata
3492 wakaba 1.1
3493 wakaba 1.96 my $script_start_tag = sub () {
3494 wakaba 1.1 my $script_el;
3495 wakaba 1.126 !!!create-element ($script_el, $HTML_NS, 'script', $token->{attributes}, $token);
3496 wakaba 1.1 ## TODO: mark as "parser-inserted"
3497    
3498 wakaba 1.40 $self->{content_model} = CDATA_CONTENT_MODEL;
3499 wakaba 1.13 delete $self->{escape}; # MUST
3500 wakaba 1.1
3501     my $text = '';
3502 wakaba 1.125 !!!nack ('t45.1');
3503 wakaba 1.1 !!!next-token;
3504 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
3505 wakaba 1.79 !!!cp ('t45');
3506 wakaba 1.1 $text .= $token->{data};
3507     !!!next-token;
3508     } # stop if non-character token or tokenizer stops tokenising
3509     if (length $text) {
3510 wakaba 1.79 !!!cp ('t46');
3511 wakaba 1.1 $script_el->manakai_append_text ($text);
3512     }
3513    
3514 wakaba 1.40 $self->{content_model} = PCDATA_CONTENT_MODEL;
3515 wakaba 1.1
3516 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
3517 wakaba 1.1 $token->{tag_name} eq 'script') {
3518 wakaba 1.79 !!!cp ('t47');
3519 wakaba 1.1 ## Ignore the token
3520     } else {
3521 wakaba 1.79 !!!cp ('t48');
3522 wakaba 1.113 !!!parse-error (type => 'in CDATA:#'.$token->{type}, token => $token);
3523 wakaba 1.1 ## ISSUE: And ignore?
3524     ## TODO: mark as "already executed"
3525     }
3526    
3527 wakaba 1.3 if (defined $self->{inner_html_node}) {
3528 wakaba 1.79 !!!cp ('t49');
3529 wakaba 1.3 ## TODO: mark as "already executed"
3530     } else {
3531 wakaba 1.79 !!!cp ('t50');
3532 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
3533     ## TODO: insertion point = just before the next input character
3534 wakaba 1.25
3535     $insert->($script_el);
3536 wakaba 1.1
3537     ## TODO: insertion point = $old_insertion_point (might be "undefined")
3538    
3539     ## TODO: if there is a script that will execute as soon as the parser resume, then...
3540     }
3541    
3542     !!!next-token;
3543     }; # $script_start_tag
3544    
3545 wakaba 1.102 ## NOTE: $open_tables->[-1]->[0] is the "current table" element node.
3546     ## NOTE: $open_tables->[-1]->[1] is the "tainted" flag.
3547     my $open_tables = [[$self->{open_elements}->[0]->[0]]];
3548    
3549 wakaba 1.1 my $formatting_end_tag = sub {
3550 wakaba 1.113 my $end_tag_token = shift;
3551     my $tag_name = $end_tag_token->{tag_name};
3552 wakaba 1.1
3553 wakaba 1.103 ## NOTE: The adoption agency algorithm (AAA).
3554 wakaba 1.102
3555 wakaba 1.1 FET: {
3556     ## Step 1
3557     my $formatting_element;
3558     my $formatting_element_i_in_active;
3559     AFE: for (reverse 0..$#$active_formatting_elements) {
3560 wakaba 1.123 if ($active_formatting_elements->[$_]->[0] eq '#marker') {
3561     !!!cp ('t52');
3562     last AFE;
3563     } elsif ($active_formatting_elements->[$_]->[0]->manakai_local_name
3564     eq $tag_name) {
3565 wakaba 1.79 !!!cp ('t51');
3566 wakaba 1.1 $formatting_element = $active_formatting_elements->[$_];
3567     $formatting_element_i_in_active = $_;
3568     last AFE;
3569     }
3570     } # AFE
3571     unless (defined $formatting_element) {
3572 wakaba 1.79 !!!cp ('t53');
3573 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$tag_name, token => $end_tag_token);
3574 wakaba 1.1 ## Ignore the token
3575     !!!next-token;
3576     return;
3577     }
3578     ## has an element in scope
3579     my $in_scope = 1;
3580     my $formatting_element_i_in_open;
3581 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3582     my $node = $self->{open_elements}->[$_];
3583 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
3584     if ($in_scope) {
3585 wakaba 1.79 !!!cp ('t54');
3586 wakaba 1.1 $formatting_element_i_in_open = $_;
3587     last INSCOPE;
3588     } else { # in open elements but not in scope
3589 wakaba 1.79 !!!cp ('t55');
3590 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name},
3591     token => $end_tag_token);
3592 wakaba 1.1 ## Ignore the token
3593     !!!next-token;
3594     return;
3595     }
3596 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
3597 wakaba 1.79 !!!cp ('t56');
3598 wakaba 1.1 $in_scope = 0;
3599     }
3600     } # INSCOPE
3601     unless (defined $formatting_element_i_in_open) {
3602 wakaba 1.79 !!!cp ('t57');
3603 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name},
3604     token => $end_tag_token);
3605 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
3606     !!!next-token; ## TODO: ok?
3607     return;
3608     }
3609 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
3610 wakaba 1.79 !!!cp ('t58');
3611 wakaba 1.122 !!!parse-error (type => 'not closed',
3612     value => $self->{open_elements}->[-1]->[0]
3613     ->manakai_local_name,
3614 wakaba 1.113 token => $end_tag_token);
3615 wakaba 1.1 }
3616    
3617     ## Step 2
3618     my $furthest_block;
3619     my $furthest_block_i_in_open;
3620 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3621     my $node = $self->{open_elements}->[$_];
3622 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
3623 wakaba 1.1 #not $phrasing_category->{$node->[1]} and
3624 wakaba 1.123 ($node->[1] & SPECIAL_EL or
3625     $node->[1] & SCOPING_EL)) { ## Scoping is redundant, maybe
3626 wakaba 1.79 !!!cp ('t59');
3627 wakaba 1.1 $furthest_block = $node;
3628     $furthest_block_i_in_open = $_;
3629     } elsif ($node->[0] eq $formatting_element->[0]) {
3630 wakaba 1.79 !!!cp ('t60');
3631 wakaba 1.1 last OE;
3632     }
3633     } # OE
3634    
3635     ## Step 3
3636     unless (defined $furthest_block) { # MUST
3637 wakaba 1.79 !!!cp ('t61');
3638 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
3639 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
3640     !!!next-token;
3641     return;
3642     }
3643    
3644     ## Step 4
3645 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
3646 wakaba 1.1
3647     ## Step 5
3648     my $furthest_block_parent = $furthest_block->[0]->parent_node;
3649     if (defined $furthest_block_parent) {
3650 wakaba 1.79 !!!cp ('t62');
3651 wakaba 1.1 $furthest_block_parent->remove_child ($furthest_block->[0]);
3652     }
3653    
3654     ## Step 6
3655     my $bookmark_prev_el
3656     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
3657     ->[0];
3658    
3659     ## Step 7
3660     my $node = $furthest_block;
3661     my $node_i_in_open = $furthest_block_i_in_open;
3662     my $last_node = $furthest_block;
3663     S7: {
3664     ## Step 1
3665     $node_i_in_open--;
3666 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
3667 wakaba 1.1
3668     ## Step 2
3669     my $node_i_in_active;
3670     S7S2: {
3671     for (reverse 0..$#$active_formatting_elements) {
3672     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
3673 wakaba 1.79 !!!cp ('t63');
3674 wakaba 1.1 $node_i_in_active = $_;
3675     last S7S2;
3676     }
3677     }
3678 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
3679 wakaba 1.1 redo S7;
3680     } # S7S2
3681    
3682     ## Step 3
3683     last S7 if $node->[0] eq $formatting_element->[0];
3684    
3685     ## Step 4
3686     if ($last_node->[0] eq $furthest_block->[0]) {
3687 wakaba 1.79 !!!cp ('t64');
3688 wakaba 1.1 $bookmark_prev_el = $node->[0];
3689     }
3690    
3691     ## Step 5
3692     if ($node->[0]->has_child_nodes ()) {
3693 wakaba 1.79 !!!cp ('t65');
3694 wakaba 1.1 my $clone = [$node->[0]->clone_node (0), $node->[1]];
3695     $active_formatting_elements->[$node_i_in_active] = $clone;
3696 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
3697 wakaba 1.1 $node = $clone;
3698     }
3699    
3700     ## Step 6
3701     $node->[0]->append_child ($last_node->[0]);
3702    
3703     ## Step 7
3704     $last_node = $node;
3705    
3706     ## Step 8
3707     redo S7;
3708     } # S7
3709    
3710     ## Step 8
3711 wakaba 1.123 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) {
3712 wakaba 1.102 my $foster_parent_element;
3713     my $next_sibling;
3714 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
3715     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
3716 wakaba 1.102 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3717     if (defined $parent and $parent->node_type == 1) {
3718     !!!cp ('t65.1');
3719     $foster_parent_element = $parent;
3720     $next_sibling = $self->{open_elements}->[$_]->[0];
3721     } else {
3722     !!!cp ('t65.2');
3723     $foster_parent_element
3724     = $self->{open_elements}->[$_ - 1]->[0];
3725     }
3726     last OE;
3727     }
3728     } # OE
3729     $foster_parent_element = $self->{open_elements}->[0]->[0]
3730     unless defined $foster_parent_element;
3731     $foster_parent_element->insert_before ($last_node->[0], $next_sibling);
3732     $open_tables->[-1]->[1] = 1; # tainted
3733     } else {
3734     !!!cp ('t65.3');
3735     $common_ancestor_node->[0]->append_child ($last_node->[0]);
3736     }
3737 wakaba 1.1
3738     ## Step 9
3739     my $clone = [$formatting_element->[0]->clone_node (0),
3740     $formatting_element->[1]];
3741    
3742     ## Step 10
3743     my @cn = @{$furthest_block->[0]->child_nodes};
3744     $clone->[0]->append_child ($_) for @cn;
3745    
3746     ## Step 11
3747     $furthest_block->[0]->append_child ($clone->[0]);
3748    
3749     ## Step 12
3750     my $i;
3751     AFE: for (reverse 0..$#$active_formatting_elements) {
3752     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
3753 wakaba 1.79 !!!cp ('t66');
3754 wakaba 1.1 splice @$active_formatting_elements, $_, 1;
3755     $i-- and last AFE if defined $i;
3756     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
3757 wakaba 1.79 !!!cp ('t67');
3758 wakaba 1.1 $i = $_;
3759     }
3760     } # AFE
3761     splice @$active_formatting_elements, $i + 1, 0, $clone;
3762    
3763     ## Step 13
3764     undef $i;
3765 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3766     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
3767 wakaba 1.79 !!!cp ('t68');
3768 wakaba 1.3 splice @{$self->{open_elements}}, $_, 1;
3769 wakaba 1.1 $i-- and last OE if defined $i;
3770 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
3771 wakaba 1.79 !!!cp ('t69');
3772 wakaba 1.1 $i = $_;
3773     }
3774     } # OE
3775 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
3776 wakaba 1.1
3777     ## Step 14
3778     redo FET;
3779     } # FET
3780     }; # $formatting_end_tag
3781    
3782 wakaba 1.96 $insert = my $insert_to_current = sub {
3783 wakaba 1.25 $self->{open_elements}->[-1]->[0]->append_child ($_[0]);
3784 wakaba 1.1 }; # $insert_to_current
3785    
3786     my $insert_to_foster = sub {
3787 wakaba 1.95 my $child = shift;
3788 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
3789 wakaba 1.95 # MUST
3790     my $foster_parent_element;
3791     my $next_sibling;
3792 wakaba 1.123 OE: for (reverse 0..$#{$self->{open_elements}}) {
3793     if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
3794 wakaba 1.3 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3795 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3796 wakaba 1.79 !!!cp ('t70');
3797 wakaba 1.1 $foster_parent_element = $parent;
3798 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3799 wakaba 1.1 } else {
3800 wakaba 1.79 !!!cp ('t71');
3801 wakaba 1.1 $foster_parent_element
3802 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
3803 wakaba 1.1 }
3804     last OE;
3805     }
3806     } # OE
3807 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
3808 wakaba 1.1 unless defined $foster_parent_element;
3809     $foster_parent_element->insert_before
3810     ($child, $next_sibling);
3811 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
3812     } else {
3813     !!!cp ('t72');
3814     $self->{open_elements}->[-1]->[0]->append_child ($child);
3815     }
3816 wakaba 1.1 }; # $insert_to_foster
3817    
3818 wakaba 1.126 B: while (1) {
3819 wakaba 1.55 if ($token->{type} == DOCTYPE_TOKEN) {
3820 wakaba 1.79 !!!cp ('t73');
3821 wakaba 1.113 !!!parse-error (type => 'DOCTYPE in the middle', token => $token);
3822 wakaba 1.52 ## Ignore the token
3823     ## Stay in the phase
3824     !!!next-token;
3825 wakaba 1.126 next B;
3826 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN and
3827 wakaba 1.52 $token->{tag_name} eq 'html') {
3828 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
3829 wakaba 1.79 !!!cp ('t79');
3830 wakaba 1.113 !!!parse-error (type => 'after html:html', token => $token);
3831 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
3832     } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
3833 wakaba 1.79 !!!cp ('t80');
3834 wakaba 1.113 !!!parse-error (type => 'after html:html', token => $token);
3835 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
3836 wakaba 1.79 } else {
3837     !!!cp ('t81');
3838 wakaba 1.52 }
3839    
3840 wakaba 1.84 !!!cp ('t82');
3841 wakaba 1.113 !!!parse-error (type => 'not first start tag', token => $token);
3842 wakaba 1.52 my $top_el = $self->{open_elements}->[0]->[0];
3843     for my $attr_name (keys %{$token->{attributes}}) {
3844     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
3845 wakaba 1.79 !!!cp ('t84');
3846 wakaba 1.52 $top_el->set_attribute_ns
3847     (undef, [undef, $attr_name],
3848     $token->{attributes}->{$attr_name}->{value});
3849     }
3850     }
3851 wakaba 1.125 !!!nack ('t84.1');
3852 wakaba 1.52 !!!next-token;
3853 wakaba 1.126 next B;
3854 wakaba 1.55 } elsif ($token->{type} == COMMENT_TOKEN) {
3855 wakaba 1.52 my $comment = $self->{document}->create_comment ($token->{data});
3856 wakaba 1.56 if ($self->{insertion_mode} & AFTER_HTML_IMS) {
3857 wakaba 1.79 !!!cp ('t85');
3858 wakaba 1.52 $self->{document}->append_child ($comment);
3859 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_BODY_IM) {
3860 wakaba 1.79 !!!cp ('t86');
3861 wakaba 1.52 $self->{open_elements}->[0]->[0]->append_child ($comment);
3862     } else {
3863 wakaba 1.79 !!!cp ('t87');
3864 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3865     }
3866     !!!next-token;
3867 wakaba 1.126 next B;
3868     } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
3869     if ($token->{type} == CHARACTER_TOKEN) {
3870     !!!cp ('t87.1');
3871     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3872     !!!next-token;
3873     next B;
3874     } elsif ($token->{type} == START_TAG_TOKEN) {
3875 wakaba 1.129 if ((not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} and
3876     $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL) or
3877 wakaba 1.126 not ($self->{open_elements}->[-1]->[1] & FOREIGN_EL) or
3878     ($token->{tag_name} eq 'svg' and
3879     $self->{open_elements}->[-1]->[1] & MML_AXML_EL)) {
3880     ## NOTE: "using the rules for secondary insertion mode"then"continue"
3881     !!!cp ('t87.2');
3882     #
3883     } elsif ({
3884 wakaba 1.130 b => 1, big => 1, blockquote => 1, body => 1, br => 1,
3885     center => 1, code => 1, dd => 1, div => 1, dl => 1, em => 1,
3886     embed => 1, font => 1, h1 => 1, h2 => 1, h3 => 1, ## No h4!
3887     h5 => 1, h6 => 1, head => 1, hr => 1, i => 1, img => 1,
3888     li => 1, menu => 1, meta => 1, nobr => 1, p => 1, pre => 1,
3889     ruby => 1, s => 1, small => 1, span => 1, strong => 1,
3890     sub => 1, sup => 1, table => 1, tt => 1, u => 1, ul => 1,
3891     var => 1,
3892 wakaba 1.126 }->{$token->{tag_name}}) {
3893     !!!cp ('t87.2');
3894     !!!parse-error (type => 'not closed',
3895     value => $self->{open_elements}->[-1]->[0]
3896     ->manakai_local_name,
3897     token => $token);
3898    
3899     pop @{$self->{open_elements}}
3900     while $self->{open_elements}->[-1]->[1] & FOREIGN_EL;
3901    
3902 wakaba 1.130 $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
3903 wakaba 1.126 ## Reprocess.
3904     next B;
3905     } else {
3906 wakaba 1.131 my $nsuri = $self->{open_elements}->[-1]->[0]->namespace_uri;
3907     my $tag_name = $token->{tag_name};
3908     if ($nsuri eq $SVG_NS) {
3909     $tag_name = {
3910     altglyph => 'altGlyph',
3911     altglyphdef => 'altGlyphDef',
3912     altglyphitem => 'altGlyphItem',
3913     animatecolor => 'animateColor',
3914     animatemotion => 'animateMotion',
3915     animatetransform => 'animateTransform',
3916     clippath => 'clipPath',
3917     feblend => 'feBlend',
3918     fecolormatrix => 'feColorMatrix',
3919     fecomponenttransfer => 'feComponentTransfer',
3920     fecomposite => 'feComposite',
3921     feconvolvematrix => 'feConvolveMatrix',
3922     fediffuselighting => 'feDiffuseLighting',
3923     fedisplacementmap => 'feDisplacementMap',
3924     fedistantlight => 'feDistantLight',
3925     feflood => 'feFlood',
3926     fefunca => 'feFuncA',
3927     fefuncb => 'feFuncB',
3928     fefuncg => 'feFuncG',
3929     fefuncr => 'feFuncR',
3930     fegaussianblur => 'feGaussianBlur',
3931     feimage => 'feImage',
3932     femerge => 'feMerge',
3933     femergenode => 'feMergeNode',
3934     femorphology => 'feMorphology',
3935     feoffset => 'feOffset',
3936     fepointlight => 'fePointLight',
3937     fespecularlighting => 'feSpecularLighting',
3938     fespotlight => 'feSpotLight',
3939     fetile => 'feTile',
3940     feturbulence => 'feTurbulence',
3941     foreignobject => 'foreignObject',
3942     glyphref => 'glyphRef',
3943     lineargradient => 'linearGradient',
3944     radialgradient => 'radialGradient',
3945     #solidcolor => 'solidColor', ## NOTE: Commented in spec (SVG1.2)
3946     textpath => 'textPath',
3947     }->{$tag_name} || $tag_name;
3948     }
3949    
3950     ## "adjust SVG attributes" (SVG only) - done in insert-element-f
3951    
3952     ## "adjust foreign attributes" - done in insert-element-f
3953 wakaba 1.126
3954 wakaba 1.131 !!!insert-element-f ($nsuri, $tag_name, $token->{attributes}, $token);
3955 wakaba 1.126
3956     if ($self->{self_closing}) {
3957     pop @{$self->{open_elements}};
3958     !!!ack ('t87.3');
3959     } else {
3960     !!!cp ('t87.4');
3961     }
3962    
3963     !!!next-token;
3964     next B;
3965     }
3966     } elsif ($token->{type} == END_TAG_TOKEN) {
3967     ## NOTE: "using the rules for secondary insertion mode" then "continue"
3968     !!!cp ('t87.5');
3969     #
3970     } elsif ($token->{type} == END_OF_FILE_TOKEN) {
3971     ## NOTE: "using the rules for secondary insertion mode" then "continue"
3972     !!!cp ('t87.6');
3973     #
3974     ## TODO: ...
3975     } else {
3976     die "$0: $token->{type}: Unknown token type";
3977     }
3978     }
3979    
3980     if ($self->{insertion_mode} & HEAD_IMS) {
3981 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
3982 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3983 wakaba 1.99 unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
3984     !!!cp ('t88.2');
3985     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3986     } else {
3987     !!!cp ('t88.1');
3988     ## Ignore the token.
3989     !!!next-token;
3990 wakaba 1.126 next B;
3991 wakaba 1.99 }
3992 wakaba 1.52 unless (length $token->{data}) {
3993 wakaba 1.79 !!!cp ('t88');
3994 wakaba 1.52 !!!next-token;
3995 wakaba 1.126 next B;
3996 wakaba 1.1 }
3997     }
3998 wakaba 1.52
3999 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4000 wakaba 1.79 !!!cp ('t89');
4001 wakaba 1.52 ## As if <head>
4002 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4003 wakaba 1.52 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4004 wakaba 1.123 push @{$self->{open_elements}},
4005     [$self->{head_element}, $el_category->{head}];
4006 wakaba 1.52
4007     ## Reprocess in the "in head" insertion mode...
4008     pop @{$self->{open_elements}};
4009    
4010     ## Reprocess in the "after head" insertion mode...
4011 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4012 wakaba 1.79 !!!cp ('t90');
4013 wakaba 1.52 ## As if </noscript>
4014     pop @{$self->{open_elements}};
4015 wakaba 1.113 !!!parse-error (type => 'in noscript:#character', token => $token);
4016 wakaba 1.1
4017 wakaba 1.52 ## Reprocess in the "in head" insertion mode...
4018     ## As if </head>
4019     pop @{$self->{open_elements}};
4020    
4021     ## Reprocess in the "after head" insertion mode...
4022 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4023 wakaba 1.79 !!!cp ('t91');
4024 wakaba 1.52 pop @{$self->{open_elements}};
4025    
4026     ## Reprocess in the "after head" insertion mode...
4027 wakaba 1.79 } else {
4028     !!!cp ('t92');
4029 wakaba 1.1 }
4030 wakaba 1.52
4031 wakaba 1.123 ## "after head" insertion mode
4032     ## As if <body>
4033     !!!insert-element ('body',, $token);
4034     $self->{insertion_mode} = IN_BODY_IM;
4035     ## reprocess
4036 wakaba 1.126 next B;
4037 wakaba 1.123 } elsif ($token->{type} == START_TAG_TOKEN) {
4038     if ($token->{tag_name} eq 'head') {
4039     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4040     !!!cp ('t93');
4041 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
4042 wakaba 1.123 $self->{open_elements}->[-1]->[0]->append_child
4043     ($self->{head_element});
4044     push @{$self->{open_elements}},
4045     [$self->{head_element}, $el_category->{head}];
4046     $self->{insertion_mode} = IN_HEAD_IM;
4047 wakaba 1.125 !!!nack ('t93.1');
4048 wakaba 1.123 !!!next-token;
4049 wakaba 1.126 next B;
4050 wakaba 1.125 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4051     !!!cp ('t94');
4052     #
4053     } else {
4054     !!!cp ('t95');
4055     !!!parse-error (type => 'in head:head', token => $token); # or in head noscript
4056     ## Ignore the token
4057     !!!nack ('t95.1');
4058     !!!next-token;
4059 wakaba 1.126 next B;
4060 wakaba 1.125 }
4061     } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4062 wakaba 1.126 !!!cp ('t96');
4063     ## As if <head>
4064     !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4065     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4066     push @{$self->{open_elements}},
4067     [$self->{head_element}, $el_category->{head}];
4068 wakaba 1.52
4069 wakaba 1.126 $self->{insertion_mode} = IN_HEAD_IM;
4070     ## Reprocess in the "in head" insertion mode...
4071     } else {
4072     !!!cp ('t97');
4073     }
4074 wakaba 1.52
4075 wakaba 1.49 if ($token->{tag_name} eq 'base') {
4076 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4077 wakaba 1.79 !!!cp ('t98');
4078 wakaba 1.49 ## As if </noscript>
4079     pop @{$self->{open_elements}};
4080 wakaba 1.113 !!!parse-error (type => 'in noscript:base', token => $token);
4081 wakaba 1.49
4082 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4083 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4084 wakaba 1.79 } else {
4085     !!!cp ('t99');
4086 wakaba 1.49 }
4087    
4088     ## NOTE: There is a "as if in head" code clone.
4089 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4090 wakaba 1.79 !!!cp ('t100');
4091 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4092 wakaba 1.123 push @{$self->{open_elements}},
4093     [$self->{head_element}, $el_category->{head}];
4094 wakaba 1.79 } else {
4095     !!!cp ('t101');
4096 wakaba 1.49 }
4097 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4098 wakaba 1.49 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4099 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4100 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4101 wakaba 1.125 !!!nack ('t101.1');
4102 wakaba 1.49 !!!next-token;
4103 wakaba 1.126 next B;
4104 wakaba 1.49 } elsif ($token->{tag_name} eq 'link') {
4105 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4106 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4107 wakaba 1.79 !!!cp ('t102');
4108 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4109 wakaba 1.123 push @{$self->{open_elements}},
4110     [$self->{head_element}, $el_category->{head}];
4111 wakaba 1.79 } else {
4112     !!!cp ('t103');
4113 wakaba 1.25 }
4114 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4115 wakaba 1.25 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4116 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4117 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4118 wakaba 1.125 !!!ack ('t103.1');
4119 wakaba 1.1 !!!next-token;
4120 wakaba 1.126 next B;
4121 wakaba 1.34 } elsif ($token->{tag_name} eq 'meta') {
4122     ## NOTE: There is a "as if in head" code clone.
4123 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4124 wakaba 1.79 !!!cp ('t104');
4125 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4126 wakaba 1.123 push @{$self->{open_elements}},
4127     [$self->{head_element}, $el_category->{head}];
4128 wakaba 1.79 } else {
4129     !!!cp ('t105');
4130 wakaba 1.34 }
4131 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4132 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
4133 wakaba 1.34
4134     unless ($self->{confident}) {
4135     if ($token->{attributes}->{charset}) { ## TODO: And if supported
4136 wakaba 1.79 !!!cp ('t106');
4137 wakaba 1.63 $self->{change_encoding}
4138 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value},
4139     $token);
4140 wakaba 1.66
4141     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4142     ->set_user_data (manakai_has_reference =>
4143     $token->{attributes}->{charset}
4144     ->{has_reference});
4145 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
4146 wakaba 1.35 ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
4147 wakaba 1.63 if ($token->{attributes}->{content}->{value}
4148 wakaba 1.70 =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4149     [\x09-\x0D\x20]*=
4150 wakaba 1.34 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4151     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
4152 wakaba 1.79 !!!cp ('t107');
4153 wakaba 1.63 $self->{change_encoding}
4154 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3,
4155     $token);
4156 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4157     ->set_user_data (manakai_has_reference =>
4158     $token->{attributes}->{content}
4159     ->{has_reference});
4160 wakaba 1.79 } else {
4161     !!!cp ('t108');
4162 wakaba 1.63 }
4163 wakaba 1.34 }
4164 wakaba 1.66 } else {
4165     if ($token->{attributes}->{charset}) {
4166 wakaba 1.79 !!!cp ('t109');
4167 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
4168     ->set_user_data (manakai_has_reference =>
4169     $token->{attributes}->{charset}
4170     ->{has_reference});
4171     }
4172 wakaba 1.68 if ($token->{attributes}->{content}) {
4173 wakaba 1.79 !!!cp ('t110');
4174 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
4175     ->set_user_data (manakai_has_reference =>
4176     $token->{attributes}->{content}
4177     ->{has_reference});
4178     }
4179 wakaba 1.34 }
4180    
4181 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4182 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4183 wakaba 1.125 !!!ack ('t110.1');
4184 wakaba 1.34 !!!next-token;
4185 wakaba 1.126 next B;
4186 wakaba 1.49 } elsif ($token->{tag_name} eq 'title') {
4187 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4188 wakaba 1.79 !!!cp ('t111');
4189 wakaba 1.49 ## As if </noscript>
4190     pop @{$self->{open_elements}};
4191 wakaba 1.113 !!!parse-error (type => 'in noscript:title', token => $token);
4192 wakaba 1.49
4193 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4194 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4195 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4196 wakaba 1.79 !!!cp ('t112');
4197 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4198 wakaba 1.123 push @{$self->{open_elements}},
4199     [$self->{head_element}, $el_category->{head}];
4200 wakaba 1.79 } else {
4201     !!!cp ('t113');
4202 wakaba 1.25 }
4203 wakaba 1.49
4204     ## NOTE: There is a "as if in head" code clone.
4205 wakaba 1.31 my $parent = defined $self->{head_element} ? $self->{head_element}
4206     : $self->{open_elements}->[-1]->[0];
4207 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
4208 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4209 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4210 wakaba 1.126 next B;
4211 wakaba 1.25 } elsif ($token->{tag_name} eq 'style') {
4212     ## NOTE: Or (scripting is enabled and tag_name eq 'noscript' and
4213 wakaba 1.54 ## insertion mode IN_HEAD_IM)
4214 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4215 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4216 wakaba 1.79 !!!cp ('t114');
4217 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4218 wakaba 1.123 push @{$self->{open_elements}},
4219     [$self->{head_element}, $el_category->{head}];
4220 wakaba 1.79 } else {
4221     !!!cp ('t115');
4222 wakaba 1.25 }
4223 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
4224 wakaba 1.100 pop @{$self->{open_elements}} # <head>
4225 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4226 wakaba 1.126 next B;
4227 wakaba 1.25 } elsif ($token->{tag_name} eq 'noscript') {
4228 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_IM) {
4229 wakaba 1.79 !!!cp ('t116');
4230 wakaba 1.25 ## NOTE: and scripting is disalbed
4231 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4232 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_NOSCRIPT_IM;
4233 wakaba 1.125 !!!nack ('t116.1');
4234 wakaba 1.1 !!!next-token;
4235 wakaba 1.126 next B;
4236 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4237 wakaba 1.79 !!!cp ('t117');
4238 wakaba 1.113 !!!parse-error (type => 'in noscript:noscript', token => $token);
4239 wakaba 1.1 ## Ignore the token
4240 wakaba 1.125 !!!nack ('t117.1');
4241 wakaba 1.41 !!!next-token;
4242 wakaba 1.126 next B;
4243 wakaba 1.1 } else {
4244 wakaba 1.79 !!!cp ('t118');
4245 wakaba 1.25 #
4246 wakaba 1.1 }
4247 wakaba 1.49 } elsif ($token->{tag_name} eq 'script') {
4248 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4249 wakaba 1.79 !!!cp ('t119');
4250 wakaba 1.49 ## As if </noscript>
4251     pop @{$self->{open_elements}};
4252 wakaba 1.113 !!!parse-error (type => 'in noscript:script', token => $token);
4253 wakaba 1.49
4254 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4255 wakaba 1.49 ## Reprocess in the "in head" insertion mode...
4256 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_HEAD_IM) {
4257 wakaba 1.79 !!!cp ('t120');
4258 wakaba 1.113 !!!parse-error (type => 'after head:'.$token->{tag_name}, token => $token);
4259 wakaba 1.123 push @{$self->{open_elements}},
4260     [$self->{head_element}, $el_category->{head}];
4261 wakaba 1.79 } else {
4262     !!!cp ('t121');
4263 wakaba 1.25 }
4264 wakaba 1.49
4265 wakaba 1.25 ## NOTE: There is a "as if in head" code clone.
4266 wakaba 1.100 $script_start_tag->();
4267     pop @{$self->{open_elements}} # <head>
4268 wakaba 1.54 if $self->{insertion_mode} == AFTER_HEAD_IM;
4269 wakaba 1.126 next B;
4270 wakaba 1.49 } elsif ($token->{tag_name} eq 'body' or
4271 wakaba 1.25 $token->{tag_name} eq 'frameset') {
4272 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4273 wakaba 1.79 !!!cp ('t122');
4274 wakaba 1.49 ## As if </noscript>
4275     pop @{$self->{open_elements}};
4276 wakaba 1.113 !!!parse-error (type => 'in noscript:'.$token->{tag_name}, token => $token);
4277 wakaba 1.49
4278     ## Reprocess in the "in head" insertion mode...
4279     ## As if </head>
4280     pop @{$self->{open_elements}};
4281    
4282     ## Reprocess in the "after head" insertion mode...
4283 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4284 wakaba 1.79 !!!cp ('t124');
4285 wakaba 1.49 pop @{$self->{open_elements}};
4286    
4287     ## Reprocess in the "after head" insertion mode...
4288 wakaba 1.79 } else {
4289     !!!cp ('t125');
4290 wakaba 1.49 }
4291    
4292     ## "after head" insertion mode
4293 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4294 wakaba 1.54 if ($token->{tag_name} eq 'body') {
4295 wakaba 1.79 !!!cp ('t126');
4296 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4297     } elsif ($token->{tag_name} eq 'frameset') {
4298 wakaba 1.79 !!!cp ('t127');
4299 wakaba 1.54 $self->{insertion_mode} = IN_FRAMESET_IM;
4300     } else {
4301     die "$0: tag name: $self->{tag_name}";
4302     }
4303 wakaba 1.125 !!!nack ('t127.1');
4304 wakaba 1.1 !!!next-token;
4305 wakaba 1.126 next B;
4306 wakaba 1.1 } else {
4307 wakaba 1.79 !!!cp ('t128');
4308 wakaba 1.1 #
4309     }
4310 wakaba 1.49
4311 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4312 wakaba 1.79 !!!cp ('t129');
4313 wakaba 1.49 ## As if </noscript>
4314     pop @{$self->{open_elements}};
4315 wakaba 1.113 !!!parse-error (type => 'in noscript:/'.$token->{tag_name}, token => $token);
4316 wakaba 1.49
4317     ## Reprocess in the "in head" insertion mode...
4318     ## As if </head>
4319 wakaba 1.25 pop @{$self->{open_elements}};
4320 wakaba 1.49
4321     ## Reprocess in the "after head" insertion mode...
4322 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4323 wakaba 1.79 !!!cp ('t130');
4324 wakaba 1.49 ## As if </head>
4325 wakaba 1.25 pop @{$self->{open_elements}};
4326 wakaba 1.49
4327     ## Reprocess in the "after head" insertion mode...
4328 wakaba 1.79 } else {
4329     !!!cp ('t131');
4330 wakaba 1.49 }
4331    
4332     ## "after head" insertion mode
4333     ## As if <body>
4334 wakaba 1.116 !!!insert-element ('body',, $token);
4335 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4336 wakaba 1.49 ## reprocess
4337 wakaba 1.125 !!!ack-later;
4338 wakaba 1.126 next B;
4339 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4340 wakaba 1.49 if ($token->{tag_name} eq 'head') {
4341 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4342 wakaba 1.79 !!!cp ('t132');
4343 wakaba 1.50 ## As if <head>
4344 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4345 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4346 wakaba 1.123 push @{$self->{open_elements}},
4347     [$self->{head_element}, $el_category->{head}];
4348 wakaba 1.50
4349     ## Reprocess in the "in head" insertion mode...
4350     pop @{$self->{open_elements}};
4351 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4352 wakaba 1.50 !!!next-token;
4353 wakaba 1.126 next B;
4354 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4355 wakaba 1.79 !!!cp ('t133');
4356 wakaba 1.49 ## As if </noscript>
4357     pop @{$self->{open_elements}};
4358 wakaba 1.113 !!!parse-error (type => 'in noscript:/head', token => $token);
4359 wakaba 1.49
4360     ## Reprocess in the "in head" insertion mode...
4361 wakaba 1.50 pop @{$self->{open_elements}};
4362 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4363 wakaba 1.50 !!!next-token;
4364 wakaba 1.126 next B;
4365 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4366 wakaba 1.79 !!!cp ('t134');
4367 wakaba 1.49 pop @{$self->{open_elements}};
4368 wakaba 1.54 $self->{insertion_mode} = AFTER_HEAD_IM;
4369 wakaba 1.49 !!!next-token;
4370 wakaba 1.126 next B;
4371 wakaba 1.49 } else {
4372 wakaba 1.79 !!!cp ('t135');
4373 wakaba 1.49 #
4374     }
4375     } elsif ($token->{tag_name} eq 'noscript') {
4376 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4377 wakaba 1.79 !!!cp ('t136');
4378 wakaba 1.49 pop @{$self->{open_elements}};
4379 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4380 wakaba 1.49 !!!next-token;
4381 wakaba 1.126 next B;
4382 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4383 wakaba 1.79 !!!cp ('t137');
4384 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:noscript', token => $token);
4385 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
4386     !!!next-token;
4387 wakaba 1.126 next B;
4388 wakaba 1.49 } else {
4389 wakaba 1.79 !!!cp ('t138');
4390 wakaba 1.49 #
4391     }
4392     } elsif ({
4393 wakaba 1.31 body => 1, html => 1,
4394     }->{$token->{tag_name}}) {
4395 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4396 wakaba 1.79 !!!cp ('t139');
4397 wakaba 1.50 ## As if <head>
4398 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4399 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4400 wakaba 1.123 push @{$self->{open_elements}},
4401     [$self->{head_element}, $el_category->{head}];
4402 wakaba 1.50
4403 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4404 wakaba 1.50 ## Reprocess in the "in head" insertion mode...
4405 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4406 wakaba 1.79 !!!cp ('t140');
4407 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4408 wakaba 1.49 ## Ignore the token
4409     !!!next-token;
4410 wakaba 1.126 next B;
4411 wakaba 1.79 } else {
4412     !!!cp ('t141');
4413 wakaba 1.49 }
4414 wakaba 1.50
4415     #
4416 wakaba 1.49 } elsif ({
4417 wakaba 1.31 p => 1, br => 1,
4418     }->{$token->{tag_name}}) {
4419 wakaba 1.54 if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4420 wakaba 1.79 !!!cp ('t142');
4421 wakaba 1.50 ## As if <head>
4422 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4423 wakaba 1.50 $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
4424 wakaba 1.123 push @{$self->{open_elements}},
4425     [$self->{head_element}, $el_category->{head}];
4426 wakaba 1.50
4427 wakaba 1.54 $self->{insertion_mode} = IN_HEAD_IM;
4428 wakaba 1.50 ## Reprocess in the "in head" insertion mode...
4429 wakaba 1.79 } else {
4430     !!!cp ('t143');
4431 wakaba 1.50 }
4432    
4433 wakaba 1.1 #
4434 wakaba 1.25 } else {
4435 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4436 wakaba 1.79 !!!cp ('t144');
4437 wakaba 1.54 #
4438     } else {
4439 wakaba 1.79 !!!cp ('t145');
4440 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4441 wakaba 1.49 ## Ignore the token
4442     !!!next-token;
4443 wakaba 1.126 next B;
4444 wakaba 1.49 }
4445     }
4446    
4447 wakaba 1.54 if ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4448 wakaba 1.79 !!!cp ('t146');
4449 wakaba 1.49 ## As if </noscript>
4450     pop @{$self->{open_elements}};
4451 wakaba 1.113 !!!parse-error (type => 'in noscript:/'.$token->{tag_name}, token => $token);
4452 wakaba 1.49
4453     ## Reprocess in the "in head" insertion mode...
4454     ## As if </head>
4455     pop @{$self->{open_elements}};
4456    
4457     ## Reprocess in the "after head" insertion mode...
4458 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4459 wakaba 1.79 !!!cp ('t147');
4460 wakaba 1.49 ## As if </head>
4461     pop @{$self->{open_elements}};
4462    
4463     ## Reprocess in the "after head" insertion mode...
4464 wakaba 1.54 } elsif ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4465 wakaba 1.82 ## ISSUE: This case cannot be reached?
4466 wakaba 1.79 !!!cp ('t148');
4467 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4468 wakaba 1.50 ## Ignore the token ## ISSUE: An issue in the spec.
4469     !!!next-token;
4470 wakaba 1.126 next B;
4471 wakaba 1.79 } else {
4472     !!!cp ('t149');
4473 wakaba 1.1 }
4474    
4475 wakaba 1.49 ## "after head" insertion mode
4476     ## As if <body>
4477 wakaba 1.116 !!!insert-element ('body',, $token);
4478 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
4479 wakaba 1.52 ## reprocess
4480 wakaba 1.126 next B;
4481 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4482     if ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4483     !!!cp ('t149.1');
4484    
4485     ## NOTE: As if <head>
4486 wakaba 1.126 !!!create-element ($self->{head_element}, $HTML_NS, 'head',, $token);
4487 wakaba 1.104 $self->{open_elements}->[-1]->[0]->append_child
4488     ($self->{head_element});
4489 wakaba 1.123 #push @{$self->{open_elements}},
4490     # [$self->{head_element}, $el_category->{head}];
4491 wakaba 1.104 #$self->{insertion_mode} = IN_HEAD_IM;
4492     ## NOTE: Reprocess.
4493    
4494     ## NOTE: As if </head>
4495     #pop @{$self->{open_elements}};
4496     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4497     ## NOTE: Reprocess.
4498    
4499     #
4500     } elsif ($self->{insertion_mode} == IN_HEAD_IM) {
4501     !!!cp ('t149.2');
4502    
4503     ## NOTE: As if </head>
4504     pop @{$self->{open_elements}};
4505     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4506     ## NOTE: Reprocess.
4507    
4508     #
4509     } elsif ($self->{insertion_mode} == IN_HEAD_NOSCRIPT_IM) {
4510     !!!cp ('t149.3');
4511    
4512 wakaba 1.113 !!!parse-error (type => 'in noscript:#eof', token => $token);
4513 wakaba 1.104
4514     ## As if </noscript>
4515     pop @{$self->{open_elements}};
4516     #$self->{insertion_mode} = IN_HEAD_IM;
4517     ## NOTE: Reprocess.
4518    
4519     ## NOTE: As if </head>
4520     pop @{$self->{open_elements}};
4521     #$self->{insertion_mode} = IN_AFTER_HEAD_IM;
4522     ## NOTE: Reprocess.
4523    
4524     #
4525     } else {
4526     !!!cp ('t149.4');
4527     #
4528     }
4529    
4530     ## NOTE: As if <body>
4531 wakaba 1.116 !!!insert-element ('body',, $token);
4532 wakaba 1.104 $self->{insertion_mode} = IN_BODY_IM;
4533     ## NOTE: Reprocess.
4534 wakaba 1.126 next B;
4535 wakaba 1.104 } else {
4536     die "$0: $token->{type}: Unknown token type";
4537     }
4538 wakaba 1.52
4539     ## ISSUE: An issue in the spec.
4540 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_IMS) {
4541 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
4542 wakaba 1.79 !!!cp ('t150');
4543 wakaba 1.52 ## NOTE: There is a code clone of "character in body".
4544     $reconstruct_active_formatting_elements->($insert_to_current);
4545    
4546     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4547    
4548     !!!next-token;
4549 wakaba 1.126 next B;
4550 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
4551 wakaba 1.52 if ({
4552     caption => 1, col => 1, colgroup => 1, tbody => 1,
4553     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
4554     }->{$token->{tag_name}}) {
4555 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
4556 wakaba 1.52 ## have an element in table scope
4557 wakaba 1.108 for (reverse 0..$#{$self->{open_elements}}) {
4558 wakaba 1.52 my $node = $self->{open_elements}->[$_];
4559 wakaba 1.123 if ($node->[1] & TABLE_CELL_EL) {
4560 wakaba 1.79 !!!cp ('t151');
4561 wakaba 1.108
4562     ## Close the cell
4563 wakaba 1.125 !!!back-token; # <x>
4564 wakaba 1.122 $token = {type => END_TAG_TOKEN,
4565     tag_name => $node->[0]->manakai_local_name,
4566 wakaba 1.114 line => $token->{line},
4567     column => $token->{column}};
4568 wakaba 1.126 next B;
4569 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4570 wakaba 1.79 !!!cp ('t152');
4571 wakaba 1.108 ## ISSUE: This case can never be reached, maybe.
4572     last;
4573 wakaba 1.52 }
4574 wakaba 1.108 }
4575    
4576     !!!cp ('t153');
4577     !!!parse-error (type => 'start tag not allowed',
4578 wakaba 1.113 value => $token->{tag_name}, token => $token);
4579 wakaba 1.108 ## Ignore the token
4580 wakaba 1.125 !!!nack ('t153.1');
4581 wakaba 1.108 !!!next-token;
4582 wakaba 1.126 next B;
4583 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4584 wakaba 1.113 !!!parse-error (type => 'not closed:caption', token => $token);
4585 wakaba 1.52
4586 wakaba 1.108 ## NOTE: As if </caption>.
4587 wakaba 1.52 ## have a table element in table scope
4588     my $i;
4589 wakaba 1.108 INSCOPE: {
4590     for (reverse 0..$#{$self->{open_elements}}) {
4591     my $node = $self->{open_elements}->[$_];
4592 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4593 wakaba 1.108 !!!cp ('t155');
4594     $i = $_;
4595     last INSCOPE;
4596 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4597 wakaba 1.108 !!!cp ('t156');
4598     last;
4599     }
4600 wakaba 1.52 }
4601 wakaba 1.108
4602     !!!cp ('t157');
4603     !!!parse-error (type => 'start tag not allowed',
4604 wakaba 1.113 value => $token->{tag_name}, token => $token);
4605 wakaba 1.108 ## Ignore the token
4606 wakaba 1.125 !!!nack ('t157.1');
4607 wakaba 1.108 !!!next-token;
4608 wakaba 1.126 next B;
4609 wakaba 1.52 } # INSCOPE
4610    
4611     ## generate implied end tags
4612 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4613     & END_TAG_OPTIONAL_EL) {
4614 wakaba 1.79 !!!cp ('t158');
4615 wakaba 1.86 pop @{$self->{open_elements}};
4616 wakaba 1.52 }
4617    
4618 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
4619 wakaba 1.79 !!!cp ('t159');
4620 wakaba 1.122 !!!parse-error (type => 'not closed',
4621     value => $self->{open_elements}->[-1]->[0]
4622     ->manakai_local_name,
4623     token => $token);
4624 wakaba 1.79 } else {
4625     !!!cp ('t160');
4626 wakaba 1.52 }
4627    
4628     splice @{$self->{open_elements}}, $i;
4629    
4630     $clear_up_to_marker->();
4631    
4632 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4633 wakaba 1.52
4634     ## reprocess
4635 wakaba 1.125 !!!ack-later;
4636 wakaba 1.126 next B;
4637 wakaba 1.52 } else {
4638 wakaba 1.79 !!!cp ('t161');
4639 wakaba 1.52 #
4640     }
4641     } else {
4642 wakaba 1.79 !!!cp ('t162');
4643 wakaba 1.52 #
4644     }
4645 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
4646 wakaba 1.52 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4647 wakaba 1.54 if ($self->{insertion_mode} == IN_CELL_IM) {
4648 wakaba 1.43 ## have an element in table scope
4649 wakaba 1.52 my $i;
4650 wakaba 1.43 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4651     my $node = $self->{open_elements}->[$_];
4652 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
4653 wakaba 1.79 !!!cp ('t163');
4654 wakaba 1.52 $i = $_;
4655 wakaba 1.43 last INSCOPE;
4656 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4657 wakaba 1.79 !!!cp ('t164');
4658 wakaba 1.43 last INSCOPE;
4659     }
4660     } # INSCOPE
4661 wakaba 1.52 unless (defined $i) {
4662 wakaba 1.79 !!!cp ('t165');
4663 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4664 wakaba 1.43 ## Ignore the token
4665     !!!next-token;
4666 wakaba 1.126 next B;
4667 wakaba 1.43 }
4668    
4669 wakaba 1.52 ## generate implied end tags
4670 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4671     & END_TAG_OPTIONAL_EL) {
4672 wakaba 1.79 !!!cp ('t166');
4673 wakaba 1.86 pop @{$self->{open_elements}};
4674 wakaba 1.52 }
4675 wakaba 1.86
4676 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
4677     ne $token->{tag_name}) {
4678 wakaba 1.79 !!!cp ('t167');
4679 wakaba 1.122 !!!parse-error (type => 'not closed',
4680     value => $self->{open_elements}->[-1]->[0]
4681     ->manakai_local_name,
4682     token => $token);
4683 wakaba 1.79 } else {
4684     !!!cp ('t168');
4685 wakaba 1.52 }
4686    
4687     splice @{$self->{open_elements}}, $i;
4688    
4689     $clear_up_to_marker->();
4690    
4691 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
4692 wakaba 1.52
4693     !!!next-token;
4694 wakaba 1.126 next B;
4695 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CAPTION_IM) {
4696 wakaba 1.79 !!!cp ('t169');
4697 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4698 wakaba 1.52 ## Ignore the token
4699     !!!next-token;
4700 wakaba 1.126 next B;
4701 wakaba 1.52 } else {
4702 wakaba 1.79 !!!cp ('t170');
4703 wakaba 1.52 #
4704     }
4705     } elsif ($token->{tag_name} eq 'caption') {
4706 wakaba 1.54 if ($self->{insertion_mode} == IN_CAPTION_IM) {
4707 wakaba 1.43 ## have a table element in table scope
4708     my $i;
4709 wakaba 1.108 INSCOPE: {
4710     for (reverse 0..$#{$self->{open_elements}}) {
4711     my $node = $self->{open_elements}->[$_];
4712 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4713 wakaba 1.108 !!!cp ('t171');
4714     $i = $_;
4715     last INSCOPE;
4716 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4717 wakaba 1.108 !!!cp ('t172');
4718     last;
4719     }
4720 wakaba 1.43 }
4721 wakaba 1.108
4722     !!!cp ('t173');
4723     !!!parse-error (type => 'unmatched end tag',
4724 wakaba 1.113 value => $token->{tag_name}, token => $token);
4725 wakaba 1.108 ## Ignore the token
4726     !!!next-token;
4727 wakaba 1.126 next B;
4728 wakaba 1.43 } # INSCOPE
4729    
4730     ## generate implied end tags
4731 wakaba 1.123 while ($self->{open_elements}->[-1]->[1]
4732     & END_TAG_OPTIONAL_EL) {
4733 wakaba 1.79 !!!cp ('t174');
4734 wakaba 1.86 pop @{$self->{open_elements}};
4735 wakaba 1.43 }
4736 wakaba 1.52
4737 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
4738 wakaba 1.79 !!!cp ('t175');
4739 wakaba 1.122 !!!parse-error (type => 'not closed',
4740     value => $self->{open_elements}->[-1]->[0]
4741     ->manakai_local_name,
4742     token => $token);
4743 wakaba 1.79 } else {
4744     !!!cp ('t176');
4745 wakaba 1.52 }
4746    
4747     splice @{$self->{open_elements}}, $i;
4748    
4749     $clear_up_to_marker->();
4750    
4751 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4752 wakaba 1.52
4753     !!!next-token;
4754 wakaba 1.126 next B;
4755 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_CELL_IM) {
4756 wakaba 1.79 !!!cp ('t177');
4757 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4758 wakaba 1.52 ## Ignore the token
4759     !!!next-token;
4760 wakaba 1.126 next B;
4761 wakaba 1.52 } else {
4762 wakaba 1.79 !!!cp ('t178');
4763 wakaba 1.52 #
4764     }
4765     } elsif ({
4766     table => 1, tbody => 1, tfoot => 1,
4767     thead => 1, tr => 1,
4768     }->{$token->{tag_name}} and
4769 wakaba 1.54 $self->{insertion_mode} == IN_CELL_IM) {
4770 wakaba 1.52 ## have an element in table scope
4771     my $i;
4772     my $tn;
4773 wakaba 1.108 INSCOPE: {
4774     for (reverse 0..$#{$self->{open_elements}}) {
4775     my $node = $self->{open_elements}->[$_];
4776 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
4777 wakaba 1.108 !!!cp ('t179');
4778     $i = $_;
4779    
4780     ## Close the cell
4781 wakaba 1.125 !!!back-token; # </x>
4782 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => $tn,
4783     line => $token->{line},
4784     column => $token->{column}};
4785 wakaba 1.126 next B;
4786 wakaba 1.123 } elsif ($node->[1] & TABLE_CELL_EL) {
4787 wakaba 1.108 !!!cp ('t180');
4788 wakaba 1.123 $tn = $node->[0]->manakai_local_name;
4789 wakaba 1.108 ## NOTE: There is exactly one |td| or |th| element
4790     ## in scope in the stack of open elements by definition.
4791 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4792 wakaba 1.108 ## ISSUE: Can this be reached?
4793     !!!cp ('t181');
4794     last;
4795     }
4796 wakaba 1.52 }
4797 wakaba 1.108
4798 wakaba 1.79 !!!cp ('t182');
4799 wakaba 1.108 !!!parse-error (type => 'unmatched end tag',
4800 wakaba 1.113 value => $token->{tag_name}, token => $token);
4801 wakaba 1.52 ## Ignore the token
4802     !!!next-token;
4803 wakaba 1.126 next B;
4804 wakaba 1.108 } # INSCOPE
4805 wakaba 1.52 } elsif ($token->{tag_name} eq 'table' and
4806 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
4807 wakaba 1.113 !!!parse-error (type => 'not closed:caption', token => $token);
4808 wakaba 1.52
4809     ## As if </caption>
4810     ## have a table element in table scope
4811     my $i;
4812     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4813     my $node = $self->{open_elements}->[$_];
4814 wakaba 1.123 if ($node->[1] & CAPTION_EL) {
4815 wakaba 1.79 !!!cp ('t184');
4816 wakaba 1.52 $i = $_;
4817     last INSCOPE;
4818 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
4819 wakaba 1.79 !!!cp ('t185');
4820 wakaba 1.52 last INSCOPE;
4821     }
4822     } # INSCOPE
4823     unless (defined $i) {
4824 wakaba 1.79 !!!cp ('t186');
4825 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:caption', token => $token);
4826 wakaba 1.52 ## Ignore the token
4827     !!!next-token;
4828 wakaba 1.126 next B;
4829 wakaba 1.52 }
4830    
4831     ## generate implied end tags
4832 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
4833 wakaba 1.79 !!!cp ('t187');
4834 wakaba 1.86 pop @{$self->{open_elements}};
4835 wakaba 1.52 }
4836    
4837 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & CAPTION_EL) {
4838 wakaba 1.79 !!!cp ('t188');
4839 wakaba 1.122 !!!parse-error (type => 'not closed',
4840     value => $self->{open_elements}->[-1]->[0]
4841     ->manakai_local_name,
4842     token => $token);
4843 wakaba 1.79 } else {
4844     !!!cp ('t189');
4845 wakaba 1.52 }
4846    
4847     splice @{$self->{open_elements}}, $i;
4848    
4849     $clear_up_to_marker->();
4850    
4851 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
4852 wakaba 1.52
4853     ## reprocess
4854 wakaba 1.126 next B;
4855 wakaba 1.52 } elsif ({
4856     body => 1, col => 1, colgroup => 1, html => 1,
4857     }->{$token->{tag_name}}) {
4858 wakaba 1.56 if ($self->{insertion_mode} & BODY_TABLE_IMS) {
4859 wakaba 1.79 !!!cp ('t190');
4860 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4861 wakaba 1.52 ## Ignore the token
4862     !!!next-token;
4863 wakaba 1.126 next B;
4864 wakaba 1.52 } else {
4865 wakaba 1.79 !!!cp ('t191');
4866 wakaba 1.52 #
4867     }
4868     } elsif ({
4869     tbody => 1, tfoot => 1,
4870     thead => 1, tr => 1,
4871     }->{$token->{tag_name}} and
4872 wakaba 1.54 $self->{insertion_mode} == IN_CAPTION_IM) {
4873 wakaba 1.79 !!!cp ('t192');
4874 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
4875 wakaba 1.52 ## Ignore the token
4876     !!!next-token;
4877 wakaba 1.126 next B;
4878 wakaba 1.52 } else {
4879 wakaba 1.79 !!!cp ('t193');
4880 wakaba 1.52 #
4881     }
4882 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
4883     for my $entry (@{$self->{open_elements}}) {
4884 wakaba 1.123 unless ($entry->[1] & ALL_END_TAG_OPTIONAL_EL) {
4885 wakaba 1.104 !!!cp ('t75');
4886 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
4887 wakaba 1.104 last;
4888     }
4889     }
4890    
4891     ## Stop parsing.
4892     last B;
4893 wakaba 1.52 } else {
4894     die "$0: $token->{type}: Unknown token type";
4895     }
4896    
4897     $insert = $insert_to_current;
4898     #
4899 wakaba 1.56 } elsif ($self->{insertion_mode} & TABLE_IMS) {
4900 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
4901 wakaba 1.95 if (not $open_tables->[-1]->[1] and # tainted
4902     $token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4903     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4904 wakaba 1.52
4905 wakaba 1.95 unless (length $token->{data}) {
4906     !!!cp ('t194');
4907     !!!next-token;
4908 wakaba 1.126 next B;
4909 wakaba 1.95 } else {
4910     !!!cp ('t195');
4911     }
4912     }
4913 wakaba 1.52
4914 wakaba 1.113 !!!parse-error (type => 'in table:#character', token => $token);
4915 wakaba 1.52
4916     ## As if in body, but insert into foster parent element
4917     ## ISSUE: Spec says that "whenever a node would be inserted
4918     ## into the current node" while characters might not be
4919     ## result in a new Text node.
4920     $reconstruct_active_formatting_elements->($insert_to_foster);
4921    
4922 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) {
4923 wakaba 1.52 # MUST
4924     my $foster_parent_element;
4925     my $next_sibling;
4926     my $prev_sibling;
4927     OE: for (reverse 0..$#{$self->{open_elements}}) {
4928 wakaba 1.123 if ($self->{open_elements}->[$_]->[1] & TABLE_EL) {
4929 wakaba 1.52 my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4930     if (defined $parent and $parent->node_type == 1) {
4931 wakaba 1.79 !!!cp ('t196');
4932 wakaba 1.52 $foster_parent_element = $parent;
4933     $next_sibling = $self->{open_elements}->[$_]->[0];
4934     $prev_sibling = $next_sibling->previous_sibling;
4935     } else {
4936 wakaba 1.79 !!!cp ('t197');
4937 wakaba 1.52 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4938     $prev_sibling = $foster_parent_element->last_child;
4939     }
4940     last OE;
4941     }
4942     } # OE
4943     $foster_parent_element = $self->{open_elements}->[0]->[0] and
4944     $prev_sibling = $foster_parent_element->last_child
4945     unless defined $foster_parent_element;
4946     if (defined $prev_sibling and
4947     $prev_sibling->node_type == 3) {
4948 wakaba 1.79 !!!cp ('t198');
4949 wakaba 1.52 $prev_sibling->manakai_append_text ($token->{data});
4950     } else {
4951 wakaba 1.79 !!!cp ('t199');
4952 wakaba 1.52 $foster_parent_element->insert_before
4953     ($self->{document}->create_text_node ($token->{data}),
4954     $next_sibling);
4955     }
4956 wakaba 1.95 $open_tables->[-1]->[1] = 1; # tainted
4957     } else {
4958     !!!cp ('t200');
4959     $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4960     }
4961 wakaba 1.52
4962 wakaba 1.95 !!!next-token;
4963 wakaba 1.126 next B;
4964 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
4965 wakaba 1.52 if ({
4966 wakaba 1.54 tr => ($self->{insertion_mode} != IN_ROW_IM),
4967 wakaba 1.52 th => 1, td => 1,
4968     }->{$token->{tag_name}}) {
4969 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_IM) {
4970 wakaba 1.52 ## Clear back to table context
4971 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4972     & TABLE_SCOPING_EL)) {
4973 wakaba 1.79 !!!cp ('t201');
4974 wakaba 1.52 pop @{$self->{open_elements}};
4975 wakaba 1.43 }
4976    
4977 wakaba 1.116 !!!insert-element ('tbody',, $token);
4978 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
4979 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
4980     }
4981    
4982 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
4983 wakaba 1.52 unless ($token->{tag_name} eq 'tr') {
4984 wakaba 1.79 !!!cp ('t202');
4985 wakaba 1.113 !!!parse-error (type => 'missing start tag:tr', token => $token);
4986 wakaba 1.52 }
4987 wakaba 1.43
4988 wakaba 1.52 ## Clear back to table body context
4989 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
4990     & TABLE_ROWS_SCOPING_EL)) {
4991 wakaba 1.79 !!!cp ('t203');
4992 wakaba 1.83 ## ISSUE: Can this case be reached?
4993 wakaba 1.52 pop @{$self->{open_elements}};
4994     }
4995 wakaba 1.43
4996 wakaba 1.54 $self->{insertion_mode} = IN_ROW_IM;
4997 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
4998 wakaba 1.79 !!!cp ('t204');
4999 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5000 wakaba 1.125 !!!nack ('t204');
5001 wakaba 1.52 !!!next-token;
5002 wakaba 1.126 next B;
5003 wakaba 1.52 } else {
5004 wakaba 1.79 !!!cp ('t205');
5005 wakaba 1.116 !!!insert-element ('tr',, $token);
5006 wakaba 1.52 ## reprocess in the "in row" insertion mode
5007     }
5008 wakaba 1.79 } else {
5009     !!!cp ('t206');
5010 wakaba 1.52 }
5011    
5012     ## Clear back to table row context
5013 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5014     & TABLE_ROW_SCOPING_EL)) {
5015 wakaba 1.79 !!!cp ('t207');
5016 wakaba 1.52 pop @{$self->{open_elements}};
5017 wakaba 1.43 }
5018 wakaba 1.52
5019 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5020 wakaba 1.54 $self->{insertion_mode} = IN_CELL_IM;
5021 wakaba 1.52
5022     push @$active_formatting_elements, ['#marker', ''];
5023    
5024 wakaba 1.125 !!!nack ('t207.1');
5025 wakaba 1.52 !!!next-token;
5026 wakaba 1.126 next B;
5027 wakaba 1.52 } elsif ({
5028     caption => 1, col => 1, colgroup => 1,
5029     tbody => 1, tfoot => 1, thead => 1,
5030 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
5031 wakaba 1.52 }->{$token->{tag_name}}) {
5032 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5033 wakaba 1.52 ## As if </tr>
5034 wakaba 1.43 ## have an element in table scope
5035     my $i;
5036     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5037     my $node = $self->{open_elements}->[$_];
5038 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5039 wakaba 1.79 !!!cp ('t208');
5040 wakaba 1.43 $i = $_;
5041     last INSCOPE;
5042 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5043 wakaba 1.79 !!!cp ('t209');
5044 wakaba 1.43 last INSCOPE;
5045     }
5046     } # INSCOPE
5047 wakaba 1.79 unless (defined $i) {
5048 wakaba 1.125 !!!cp ('t210');
5049 wakaba 1.83 ## TODO: This type is wrong.
5050 wakaba 1.125 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name}, token => $token);
5051 wakaba 1.52 ## Ignore the token
5052 wakaba 1.125 !!!nack ('t210.1');
5053 wakaba 1.52 !!!next-token;
5054 wakaba 1.126 next B;
5055 wakaba 1.43 }
5056    
5057 wakaba 1.52 ## Clear back to table row context
5058 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5059     & TABLE_ROW_SCOPING_EL)) {
5060 wakaba 1.79 !!!cp ('t211');
5061 wakaba 1.83 ## ISSUE: Can this case be reached?
5062 wakaba 1.52 pop @{$self->{open_elements}};
5063 wakaba 1.1 }
5064 wakaba 1.43
5065 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5066 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5067 wakaba 1.52 if ($token->{tag_name} eq 'tr') {
5068 wakaba 1.79 !!!cp ('t212');
5069 wakaba 1.52 ## reprocess
5070 wakaba 1.125 !!!ack-later;
5071 wakaba 1.126 next B;
5072 wakaba 1.52 } else {
5073 wakaba 1.79 !!!cp ('t213');
5074 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
5075     }
5076 wakaba 1.1 }
5077 wakaba 1.52
5078 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5079 wakaba 1.52 ## have an element in table scope
5080 wakaba 1.43 my $i;
5081     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5082     my $node = $self->{open_elements}->[$_];
5083 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
5084 wakaba 1.79 !!!cp ('t214');
5085 wakaba 1.43 $i = $_;
5086     last INSCOPE;
5087 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5088 wakaba 1.79 !!!cp ('t215');
5089 wakaba 1.43 last INSCOPE;
5090     }
5091     } # INSCOPE
5092 wakaba 1.52 unless (defined $i) {
5093 wakaba 1.79 !!!cp ('t216');
5094 wakaba 1.82 ## TODO: This erorr type ios wrong.
5095 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5096 wakaba 1.52 ## Ignore the token
5097 wakaba 1.125 !!!nack ('t216.1');
5098 wakaba 1.52 !!!next-token;
5099 wakaba 1.126 next B;
5100 wakaba 1.43 }
5101 wakaba 1.52
5102     ## Clear back to table body context
5103 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5104     & TABLE_ROWS_SCOPING_EL)) {
5105 wakaba 1.79 !!!cp ('t217');
5106 wakaba 1.83 ## ISSUE: Can this state be reached?
5107 wakaba 1.52 pop @{$self->{open_elements}};
5108 wakaba 1.43 }
5109    
5110 wakaba 1.52 ## As if <{current node}>
5111     ## have an element in table scope
5112     ## true by definition
5113 wakaba 1.43
5114 wakaba 1.52 ## Clear back to table body context
5115     ## nop by definition
5116 wakaba 1.43
5117 wakaba 1.52 pop @{$self->{open_elements}};
5118 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5119 wakaba 1.52 ## reprocess in "in table" insertion mode...
5120 wakaba 1.79 } else {
5121     !!!cp ('t218');
5122 wakaba 1.52 }
5123    
5124     if ($token->{tag_name} eq 'col') {
5125     ## Clear back to table context
5126 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5127     & TABLE_SCOPING_EL)) {
5128 wakaba 1.79 !!!cp ('t219');
5129 wakaba 1.83 ## ISSUE: Can this state be reached?
5130 wakaba 1.52 pop @{$self->{open_elements}};
5131     }
5132 wakaba 1.43
5133 wakaba 1.116 !!!insert-element ('colgroup',, $token);
5134 wakaba 1.54 $self->{insertion_mode} = IN_COLUMN_GROUP_IM;
5135 wakaba 1.52 ## reprocess
5136 wakaba 1.125 !!!ack-later;
5137 wakaba 1.126 next B;
5138 wakaba 1.52 } elsif ({
5139     caption => 1,
5140     colgroup => 1,
5141     tbody => 1, tfoot => 1, thead => 1,
5142     }->{$token->{tag_name}}) {
5143     ## Clear back to table context
5144 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5145     & TABLE_SCOPING_EL)) {
5146 wakaba 1.79 !!!cp ('t220');
5147 wakaba 1.83 ## ISSUE: Can this state be reached?
5148 wakaba 1.52 pop @{$self->{open_elements}};
5149 wakaba 1.1 }
5150 wakaba 1.52
5151     push @$active_formatting_elements, ['#marker', '']
5152     if $token->{tag_name} eq 'caption';
5153    
5154 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5155 wakaba 1.52 $self->{insertion_mode} = {
5156 wakaba 1.54 caption => IN_CAPTION_IM,
5157     colgroup => IN_COLUMN_GROUP_IM,
5158     tbody => IN_TABLE_BODY_IM,
5159     tfoot => IN_TABLE_BODY_IM,
5160     thead => IN_TABLE_BODY_IM,
5161 wakaba 1.52 }->{$token->{tag_name}};
5162 wakaba 1.1 !!!next-token;
5163 wakaba 1.125 !!!nack ('t220.1');
5164 wakaba 1.126 next B;
5165 wakaba 1.52 } else {
5166     die "$0: in table: <>: $token->{tag_name}";
5167 wakaba 1.1 }
5168 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
5169 wakaba 1.122 !!!parse-error (type => 'not closed',
5170     value => $self->{open_elements}->[-1]->[0]
5171     ->manakai_local_name,
5172     token => $token);
5173 wakaba 1.1
5174 wakaba 1.52 ## As if </table>
5175 wakaba 1.1 ## have a table element in table scope
5176     my $i;
5177 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5178     my $node = $self->{open_elements}->[$_];
5179 wakaba 1.123 if ($node->[1] & TABLE_EL) {
5180 wakaba 1.79 !!!cp ('t221');
5181 wakaba 1.1 $i = $_;
5182     last INSCOPE;
5183 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5184 wakaba 1.79 !!!cp ('t222');
5185 wakaba 1.1 last INSCOPE;
5186     }
5187     } # INSCOPE
5188     unless (defined $i) {
5189 wakaba 1.79 !!!cp ('t223');
5190 wakaba 1.83 ## TODO: The following is wrong, maybe.
5191 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:table', token => $token);
5192 wakaba 1.52 ## Ignore tokens </table><table>
5193 wakaba 1.125 !!!nack ('t223.1');
5194 wakaba 1.1 !!!next-token;
5195 wakaba 1.126 next B;
5196 wakaba 1.1 }
5197    
5198 wakaba 1.106 ## TODO: Followings are removed from the latest spec.
5199 wakaba 1.1 ## generate implied end tags
5200 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
5201 wakaba 1.79 !!!cp ('t224');
5202 wakaba 1.86 pop @{$self->{open_elements}};
5203 wakaba 1.1 }
5204    
5205 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & TABLE_EL) {
5206 wakaba 1.79 !!!cp ('t225');
5207 wakaba 1.122 ## NOTE: |<table><tr><table>|
5208     !!!parse-error (type => 'not closed',
5209     value => $self->{open_elements}->[-1]->[0]
5210     ->manakai_local_name,
5211     token => $token);
5212 wakaba 1.79 } else {
5213     !!!cp ('t226');
5214 wakaba 1.1 }
5215    
5216 wakaba 1.3 splice @{$self->{open_elements}}, $i;
5217 wakaba 1.95 pop @{$open_tables};
5218 wakaba 1.1
5219 wakaba 1.52 $self->_reset_insertion_mode;
5220 wakaba 1.1
5221 wakaba 1.125 ## reprocess
5222     !!!ack-later;
5223 wakaba 1.126 next B;
5224 wakaba 1.100 } elsif ($token->{tag_name} eq 'style') {
5225     if (not $open_tables->[-1]->[1]) { # tainted
5226     !!!cp ('t227.8');
5227     ## NOTE: This is a "as if in head" code clone.
5228     $parse_rcdata->(CDATA_CONTENT_MODEL);
5229 wakaba 1.126 next B;
5230 wakaba 1.100 } else {
5231     !!!cp ('t227.7');
5232     #
5233     }
5234     } elsif ($token->{tag_name} eq 'script') {
5235     if (not $open_tables->[-1]->[1]) { # tainted
5236     !!!cp ('t227.6');
5237     ## NOTE: This is a "as if in head" code clone.
5238     $script_start_tag->();
5239 wakaba 1.126 next B;
5240 wakaba 1.100 } else {
5241     !!!cp ('t227.5');
5242     #
5243     }
5244 wakaba 1.98 } elsif ($token->{tag_name} eq 'input') {
5245     if (not $open_tables->[-1]->[1]) { # tainted
5246     if ($token->{attributes}->{type}) { ## TODO: case
5247     my $type = lc $token->{attributes}->{type}->{value};
5248     if ($type eq 'hidden') {
5249     !!!cp ('t227.3');
5250 wakaba 1.113 !!!parse-error (type => 'in table:'.$token->{tag_name}, token => $token);
5251 wakaba 1.98
5252 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5253 wakaba 1.98
5254     ## TODO: form element pointer
5255    
5256     pop @{$self->{open_elements}};
5257    
5258     !!!next-token;
5259 wakaba 1.125 !!!ack ('t227.2.1');
5260 wakaba 1.126 next B;
5261 wakaba 1.98 } else {
5262     !!!cp ('t227.2');
5263     #
5264     }
5265     } else {
5266     !!!cp ('t227.1');
5267     #
5268     }
5269     } else {
5270     !!!cp ('t227.4');
5271     #
5272     }
5273 wakaba 1.58 } else {
5274 wakaba 1.79 !!!cp ('t227');
5275 wakaba 1.58 #
5276     }
5277 wakaba 1.98
5278 wakaba 1.113 !!!parse-error (type => 'in table:'.$token->{tag_name}, token => $token);
5279 wakaba 1.98
5280     $insert = $insert_to_foster;
5281     #
5282 wakaba 1.58 } elsif ($token->{type} == END_TAG_TOKEN) {
5283 wakaba 1.52 if ($token->{tag_name} eq 'tr' and
5284 wakaba 1.54 $self->{insertion_mode} == IN_ROW_IM) {
5285 wakaba 1.52 ## have an element in table scope
5286     my $i;
5287     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5288     my $node = $self->{open_elements}->[$_];
5289 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5290 wakaba 1.79 !!!cp ('t228');
5291 wakaba 1.52 $i = $_;
5292     last INSCOPE;
5293 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5294 wakaba 1.79 !!!cp ('t229');
5295 wakaba 1.52 last INSCOPE;
5296     }
5297     } # INSCOPE
5298     unless (defined $i) {
5299 wakaba 1.79 !!!cp ('t230');
5300 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5301 wakaba 1.52 ## Ignore the token
5302 wakaba 1.125 !!!nack ('t230.1');
5303 wakaba 1.42 !!!next-token;
5304 wakaba 1.126 next B;
5305 wakaba 1.79 } else {
5306     !!!cp ('t232');
5307 wakaba 1.42 }
5308    
5309 wakaba 1.52 ## Clear back to table row context
5310 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5311     & TABLE_ROW_SCOPING_EL)) {
5312 wakaba 1.79 !!!cp ('t231');
5313 wakaba 1.83 ## ISSUE: Can this state be reached?
5314 wakaba 1.52 pop @{$self->{open_elements}};
5315     }
5316 wakaba 1.42
5317 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5318 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5319 wakaba 1.52 !!!next-token;
5320 wakaba 1.125 !!!nack ('t231.1');
5321 wakaba 1.126 next B;
5322 wakaba 1.52 } elsif ($token->{tag_name} eq 'table') {
5323 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5324 wakaba 1.52 ## As if </tr>
5325     ## have an element in table scope
5326     my $i;
5327     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5328     my $node = $self->{open_elements}->[$_];
5329 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5330 wakaba 1.79 !!!cp ('t233');
5331 wakaba 1.52 $i = $_;
5332     last INSCOPE;
5333 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5334 wakaba 1.79 !!!cp ('t234');
5335 wakaba 1.52 last INSCOPE;
5336 wakaba 1.42 }
5337 wakaba 1.52 } # INSCOPE
5338     unless (defined $i) {
5339 wakaba 1.79 !!!cp ('t235');
5340 wakaba 1.83 ## TODO: The following is wrong.
5341 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{type}, token => $token);
5342 wakaba 1.52 ## Ignore the token
5343 wakaba 1.125 !!!nack ('t236.1');
5344 wakaba 1.52 !!!next-token;
5345 wakaba 1.126 next B;
5346 wakaba 1.42 }
5347 wakaba 1.52
5348     ## Clear back to table row context
5349 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5350     & TABLE_ROW_SCOPING_EL)) {
5351 wakaba 1.79 !!!cp ('t236');
5352 wakaba 1.83 ## ISSUE: Can this state be reached?
5353 wakaba 1.46 pop @{$self->{open_elements}};
5354 wakaba 1.1 }
5355 wakaba 1.46
5356 wakaba 1.52 pop @{$self->{open_elements}}; # tr
5357 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5358 wakaba 1.46 ## reprocess in the "in table body" insertion mode...
5359 wakaba 1.1 }
5360    
5361 wakaba 1.54 if ($self->{insertion_mode} == IN_TABLE_BODY_IM) {
5362 wakaba 1.52 ## have an element in table scope
5363     my $i;
5364     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5365     my $node = $self->{open_elements}->[$_];
5366 wakaba 1.123 if ($node->[1] & TABLE_ROW_GROUP_EL) {
5367 wakaba 1.79 !!!cp ('t237');
5368 wakaba 1.52 $i = $_;
5369     last INSCOPE;
5370 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5371 wakaba 1.79 !!!cp ('t238');
5372 wakaba 1.52 last INSCOPE;
5373     }
5374     } # INSCOPE
5375     unless (defined $i) {
5376 wakaba 1.79 !!!cp ('t239');
5377 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5378 wakaba 1.52 ## Ignore the token
5379 wakaba 1.125 !!!nack ('t239.1');
5380 wakaba 1.52 !!!next-token;
5381 wakaba 1.126 next B;
5382 wakaba 1.47 }
5383    
5384     ## Clear back to table body context
5385 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5386     & TABLE_ROWS_SCOPING_EL)) {
5387 wakaba 1.79 !!!cp ('t240');
5388 wakaba 1.47 pop @{$self->{open_elements}};
5389     }
5390    
5391 wakaba 1.52 ## As if <{current node}>
5392     ## have an element in table scope
5393     ## true by definition
5394    
5395     ## Clear back to table body context
5396     ## nop by definition
5397    
5398     pop @{$self->{open_elements}};
5399 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5400 wakaba 1.52 ## reprocess in the "in table" insertion mode...
5401     }
5402    
5403 wakaba 1.94 ## NOTE: </table> in the "in table" insertion mode.
5404     ## When you edit the code fragment below, please ensure that
5405     ## the code for <table> in the "in table" insertion mode
5406     ## is synced with it.
5407    
5408 wakaba 1.52 ## have a table element in table scope
5409     my $i;
5410     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5411     my $node = $self->{open_elements}->[$_];
5412 wakaba 1.123 if ($node->[1] & TABLE_EL) {
5413 wakaba 1.79 !!!cp ('t241');
5414 wakaba 1.52 $i = $_;
5415     last INSCOPE;
5416 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5417 wakaba 1.79 !!!cp ('t242');
5418 wakaba 1.52 last INSCOPE;
5419 wakaba 1.47 }
5420 wakaba 1.52 } # INSCOPE
5421     unless (defined $i) {
5422 wakaba 1.79 !!!cp ('t243');
5423 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5424 wakaba 1.52 ## Ignore the token
5425 wakaba 1.125 !!!nack ('t243.1');
5426 wakaba 1.52 !!!next-token;
5427 wakaba 1.126 next B;
5428 wakaba 1.3 }
5429 wakaba 1.52
5430     splice @{$self->{open_elements}}, $i;
5431 wakaba 1.95 pop @{$open_tables};
5432 wakaba 1.1
5433 wakaba 1.52 $self->_reset_insertion_mode;
5434 wakaba 1.47
5435     !!!next-token;
5436 wakaba 1.126 next B;
5437 wakaba 1.47 } elsif ({
5438 wakaba 1.48 tbody => 1, tfoot => 1, thead => 1,
5439 wakaba 1.52 }->{$token->{tag_name}} and
5440 wakaba 1.56 $self->{insertion_mode} & ROW_IMS) {
5441 wakaba 1.54 if ($self->{insertion_mode} == IN_ROW_IM) {
5442 wakaba 1.52 ## have an element in table scope
5443     my $i;
5444     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5445     my $node = $self->{open_elements}->[$_];
5446 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5447 wakaba 1.79 !!!cp ('t247');
5448 wakaba 1.52 $i = $_;
5449     last INSCOPE;
5450 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5451 wakaba 1.79 !!!cp ('t248');
5452 wakaba 1.52 last INSCOPE;
5453     }
5454     } # INSCOPE
5455     unless (defined $i) {
5456 wakaba 1.79 !!!cp ('t249');
5457 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5458 wakaba 1.52 ## Ignore the token
5459 wakaba 1.125 !!!nack ('t249.1');
5460 wakaba 1.52 !!!next-token;
5461 wakaba 1.126 next B;
5462 wakaba 1.52 }
5463    
5464 wakaba 1.48 ## As if </tr>
5465     ## have an element in table scope
5466     my $i;
5467     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5468     my $node = $self->{open_elements}->[$_];
5469 wakaba 1.123 if ($node->[1] & TABLE_ROW_EL) {
5470 wakaba 1.79 !!!cp ('t250');
5471 wakaba 1.48 $i = $_;
5472     last INSCOPE;
5473 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5474 wakaba 1.79 !!!cp ('t251');
5475 wakaba 1.48 last INSCOPE;
5476     }
5477     } # INSCOPE
5478 wakaba 1.52 unless (defined $i) {
5479 wakaba 1.79 !!!cp ('t252');
5480 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:tr', token => $token);
5481 wakaba 1.52 ## Ignore the token
5482 wakaba 1.125 !!!nack ('t252.1');
5483 wakaba 1.52 !!!next-token;
5484 wakaba 1.126 next B;
5485 wakaba 1.52 }
5486 wakaba 1.48
5487     ## Clear back to table row context
5488 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5489     & TABLE_ROW_SCOPING_EL)) {
5490 wakaba 1.79 !!!cp ('t253');
5491 wakaba 1.83 ## ISSUE: Can this case be reached?
5492 wakaba 1.48 pop @{$self->{open_elements}};
5493     }
5494    
5495     pop @{$self->{open_elements}}; # tr
5496 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_BODY_IM;
5497 wakaba 1.52 ## reprocess in the "in table body" insertion mode...
5498     }
5499    
5500     ## have an element in table scope
5501     my $i;
5502     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5503     my $node = $self->{open_elements}->[$_];
5504 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5505 wakaba 1.79 !!!cp ('t254');
5506 wakaba 1.52 $i = $_;
5507     last INSCOPE;
5508 wakaba 1.123 } elsif ($node->[1] & TABLE_SCOPING_EL) {
5509 wakaba 1.79 !!!cp ('t255');
5510 wakaba 1.52 last INSCOPE;
5511     }
5512     } # INSCOPE
5513     unless (defined $i) {
5514 wakaba 1.79 !!!cp ('t256');
5515 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5516 wakaba 1.52 ## Ignore the token
5517 wakaba 1.125 !!!nack ('t256.1');
5518 wakaba 1.52 !!!next-token;
5519 wakaba 1.126 next B;
5520 wakaba 1.52 }
5521    
5522     ## Clear back to table body context
5523 wakaba 1.123 while (not ($self->{open_elements}->[-1]->[1]
5524     & TABLE_ROWS_SCOPING_EL)) {
5525 wakaba 1.79 !!!cp ('t257');
5526 wakaba 1.83 ## ISSUE: Can this case be reached?
5527 wakaba 1.52 pop @{$self->{open_elements}};
5528     }
5529    
5530     pop @{$self->{open_elements}};
5531 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5532 wakaba 1.125 !!!nack ('t257.1');
5533 wakaba 1.52 !!!next-token;
5534 wakaba 1.126 next B;
5535 wakaba 1.52 } elsif ({
5536     body => 1, caption => 1, col => 1, colgroup => 1,
5537     html => 1, td => 1, th => 1,
5538 wakaba 1.54 tr => 1, # $self->{insertion_mode} == IN_ROW_IM
5539     tbody => 1, tfoot => 1, thead => 1, # $self->{insertion_mode} == IN_TABLE_IM
5540 wakaba 1.52 }->{$token->{tag_name}}) {
5541 wakaba 1.125 !!!cp ('t258');
5542     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5543     ## Ignore the token
5544     !!!nack ('t258.1');
5545     !!!next-token;
5546 wakaba 1.126 next B;
5547 wakaba 1.58 } else {
5548 wakaba 1.79 !!!cp ('t259');
5549 wakaba 1.113 !!!parse-error (type => 'in table:/'.$token->{tag_name}, token => $token);
5550 wakaba 1.52
5551 wakaba 1.58 $insert = $insert_to_foster;
5552     #
5553     }
5554 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5555 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
5556 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5557 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5558 wakaba 1.104 !!!cp ('t259.1');
5559 wakaba 1.105 #
5560 wakaba 1.104 } else {
5561     !!!cp ('t259.2');
5562 wakaba 1.105 #
5563 wakaba 1.104 }
5564    
5565     ## Stop parsing
5566     last B;
5567 wakaba 1.58 } else {
5568     die "$0: $token->{type}: Unknown token type";
5569     }
5570 wakaba 1.54 } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
5571 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5572 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5573     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5574     unless (length $token->{data}) {
5575 wakaba 1.79 !!!cp ('t260');
5576 wakaba 1.52 !!!next-token;
5577 wakaba 1.126 next B;
5578 wakaba 1.52 }
5579     }
5580    
5581 wakaba 1.79 !!!cp ('t261');
5582 wakaba 1.52 #
5583 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5584 wakaba 1.52 if ($token->{tag_name} eq 'col') {
5585 wakaba 1.79 !!!cp ('t262');
5586 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5587 wakaba 1.52 pop @{$self->{open_elements}};
5588 wakaba 1.125 !!!ack ('t262.1');
5589 wakaba 1.52 !!!next-token;
5590 wakaba 1.126 next B;
5591 wakaba 1.52 } else {
5592 wakaba 1.79 !!!cp ('t263');
5593 wakaba 1.52 #
5594     }
5595 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5596 wakaba 1.52 if ($token->{tag_name} eq 'colgroup') {
5597 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
5598 wakaba 1.79 !!!cp ('t264');
5599 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:colgroup', token => $token);
5600 wakaba 1.52 ## Ignore the token
5601     !!!next-token;
5602 wakaba 1.126 next B;
5603 wakaba 1.52 } else {
5604 wakaba 1.79 !!!cp ('t265');
5605 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
5606 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5607 wakaba 1.52 !!!next-token;
5608 wakaba 1.126 next B;
5609 wakaba 1.52 }
5610     } elsif ($token->{tag_name} eq 'col') {
5611 wakaba 1.79 !!!cp ('t266');
5612 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:col', token => $token);
5613 wakaba 1.52 ## Ignore the token
5614     !!!next-token;
5615 wakaba 1.126 next B;
5616 wakaba 1.52 } else {
5617 wakaba 1.79 !!!cp ('t267');
5618 wakaba 1.52 #
5619     }
5620 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5621 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
5622 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5623     !!!cp ('t270.2');
5624     ## Stop parsing.
5625     last B;
5626     } else {
5627     ## NOTE: As if </colgroup>.
5628     !!!cp ('t270.1');
5629     pop @{$self->{open_elements}}; # colgroup
5630     $self->{insertion_mode} = IN_TABLE_IM;
5631     ## Reprocess.
5632 wakaba 1.126 next B;
5633 wakaba 1.104 }
5634     } else {
5635     die "$0: $token->{type}: Unknown token type";
5636     }
5637 wakaba 1.52
5638     ## As if </colgroup>
5639 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL) {
5640 wakaba 1.79 !!!cp ('t269');
5641 wakaba 1.104 ## TODO: Wrong error type?
5642 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:colgroup', token => $token);
5643 wakaba 1.52 ## Ignore the token
5644 wakaba 1.125 !!!nack ('t269.1');
5645 wakaba 1.52 !!!next-token;
5646 wakaba 1.126 next B;
5647 wakaba 1.52 } else {
5648 wakaba 1.79 !!!cp ('t270');
5649 wakaba 1.52 pop @{$self->{open_elements}}; # colgroup
5650 wakaba 1.54 $self->{insertion_mode} = IN_TABLE_IM;
5651 wakaba 1.125 !!!ack-later;
5652 wakaba 1.52 ## reprocess
5653 wakaba 1.126 next B;
5654 wakaba 1.52 }
5655 wakaba 1.101 } elsif ($self->{insertion_mode} & SELECT_IMS) {
5656 wakaba 1.58 if ($token->{type} == CHARACTER_TOKEN) {
5657 wakaba 1.79 !!!cp ('t271');
5658 wakaba 1.58 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
5659     !!!next-token;
5660 wakaba 1.126 next B;
5661 wakaba 1.58 } elsif ($token->{type} == START_TAG_TOKEN) {
5662 wakaba 1.123 if ($token->{tag_name} eq 'option') {
5663     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5664     !!!cp ('t272');
5665     ## As if </option>
5666     pop @{$self->{open_elements}};
5667     } else {
5668     !!!cp ('t273');
5669     }
5670 wakaba 1.52
5671 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5672 wakaba 1.125 !!!nack ('t273.1');
5673 wakaba 1.123 !!!next-token;
5674 wakaba 1.126 next B;
5675 wakaba 1.123 } elsif ($token->{tag_name} eq 'optgroup') {
5676     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5677     !!!cp ('t274');
5678     ## As if </option>
5679     pop @{$self->{open_elements}};
5680     } else {
5681     !!!cp ('t275');
5682     }
5683 wakaba 1.52
5684 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
5685     !!!cp ('t276');
5686     ## As if </optgroup>
5687     pop @{$self->{open_elements}};
5688     } else {
5689     !!!cp ('t277');
5690     }
5691 wakaba 1.52
5692 wakaba 1.123 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
5693 wakaba 1.125 !!!nack ('t277.1');
5694 wakaba 1.123 !!!next-token;
5695 wakaba 1.126 next B;
5696 wakaba 1.101 } elsif ($token->{tag_name} eq 'select' or
5697     $token->{tag_name} eq 'input' or
5698     ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
5699     {
5700     caption => 1, table => 1,
5701     tbody => 1, tfoot => 1, thead => 1,
5702     tr => 1, td => 1, th => 1,
5703     }->{$token->{tag_name}})) {
5704     ## TODO: The type below is not good - <select> is replaced by </select>
5705 wakaba 1.113 !!!parse-error (type => 'not closed:select', token => $token);
5706 wakaba 1.101 ## NOTE: As if the token were </select> (<select> case) or
5707     ## as if there were </select> (otherwise).
5708 wakaba 1.123 ## have an element in table scope
5709     my $i;
5710     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5711     my $node = $self->{open_elements}->[$_];
5712     if ($node->[1] & SELECT_EL) {
5713     !!!cp ('t278');
5714     $i = $_;
5715     last INSCOPE;
5716     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5717     !!!cp ('t279');
5718     last INSCOPE;
5719     }
5720     } # INSCOPE
5721     unless (defined $i) {
5722     !!!cp ('t280');
5723     !!!parse-error (type => 'unmatched end tag:select', token => $token);
5724     ## Ignore the token
5725 wakaba 1.125 !!!nack ('t280.1');
5726 wakaba 1.123 !!!next-token;
5727 wakaba 1.126 next B;
5728 wakaba 1.123 }
5729 wakaba 1.52
5730 wakaba 1.123 !!!cp ('t281');
5731     splice @{$self->{open_elements}}, $i;
5732 wakaba 1.52
5733 wakaba 1.123 $self->_reset_insertion_mode;
5734 wakaba 1.47
5735 wakaba 1.101 if ($token->{tag_name} eq 'select') {
5736 wakaba 1.125 !!!nack ('t281.2');
5737 wakaba 1.101 !!!next-token;
5738 wakaba 1.126 next B;
5739 wakaba 1.101 } else {
5740     !!!cp ('t281.1');
5741 wakaba 1.125 !!!ack-later;
5742 wakaba 1.101 ## Reprocess the token.
5743 wakaba 1.126 next B;
5744 wakaba 1.101 }
5745 wakaba 1.58 } else {
5746 wakaba 1.79 !!!cp ('t282');
5747 wakaba 1.113 !!!parse-error (type => 'in select:'.$token->{tag_name}, token => $token);
5748 wakaba 1.58 ## Ignore the token
5749 wakaba 1.125 !!!nack ('t282.1');
5750 wakaba 1.58 !!!next-token;
5751 wakaba 1.126 next B;
5752 wakaba 1.58 }
5753     } elsif ($token->{type} == END_TAG_TOKEN) {
5754 wakaba 1.123 if ($token->{tag_name} eq 'optgroup') {
5755     if ($self->{open_elements}->[-1]->[1] & OPTION_EL and
5756     $self->{open_elements}->[-2]->[1] & OPTGROUP_EL) {
5757     !!!cp ('t283');
5758     ## As if </option>
5759     splice @{$self->{open_elements}}, -2;
5760     } elsif ($self->{open_elements}->[-1]->[1] & OPTGROUP_EL) {
5761     !!!cp ('t284');
5762     pop @{$self->{open_elements}};
5763     } else {
5764     !!!cp ('t285');
5765     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5766     ## Ignore the token
5767     }
5768 wakaba 1.125 !!!nack ('t285.1');
5769 wakaba 1.123 !!!next-token;
5770 wakaba 1.126 next B;
5771 wakaba 1.123 } elsif ($token->{tag_name} eq 'option') {
5772     if ($self->{open_elements}->[-1]->[1] & OPTION_EL) {
5773     !!!cp ('t286');
5774     pop @{$self->{open_elements}};
5775     } else {
5776     !!!cp ('t287');
5777     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5778     ## Ignore the token
5779     }
5780 wakaba 1.125 !!!nack ('t287.1');
5781 wakaba 1.123 !!!next-token;
5782 wakaba 1.126 next B;
5783 wakaba 1.123 } elsif ($token->{tag_name} eq 'select') {
5784     ## have an element in table scope
5785     my $i;
5786     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5787     my $node = $self->{open_elements}->[$_];
5788     if ($node->[1] & SELECT_EL) {
5789     !!!cp ('t288');
5790     $i = $_;
5791     last INSCOPE;
5792     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5793     !!!cp ('t289');
5794     last INSCOPE;
5795     }
5796     } # INSCOPE
5797     unless (defined $i) {
5798     !!!cp ('t290');
5799     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5800     ## Ignore the token
5801 wakaba 1.125 !!!nack ('t290.1');
5802 wakaba 1.123 !!!next-token;
5803 wakaba 1.126 next B;
5804 wakaba 1.123 }
5805 wakaba 1.52
5806 wakaba 1.123 !!!cp ('t291');
5807     splice @{$self->{open_elements}}, $i;
5808 wakaba 1.52
5809 wakaba 1.123 $self->_reset_insertion_mode;
5810 wakaba 1.52
5811 wakaba 1.125 !!!nack ('t291.1');
5812 wakaba 1.123 !!!next-token;
5813 wakaba 1.126 next B;
5814 wakaba 1.101 } elsif ($self->{insertion_mode} == IN_SELECT_IN_TABLE_IM and
5815     {
5816     caption => 1, table => 1, tbody => 1,
5817     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
5818     }->{$token->{tag_name}}) {
5819 wakaba 1.83 ## TODO: The following is wrong?
5820 wakaba 1.123 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
5821 wakaba 1.52
5822 wakaba 1.123 ## have an element in table scope
5823     my $i;
5824     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5825     my $node = $self->{open_elements}->[$_];
5826     if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
5827     !!!cp ('t292');
5828     $i = $_;
5829     last INSCOPE;
5830     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5831     !!!cp ('t293');
5832     last INSCOPE;
5833     }
5834     } # INSCOPE
5835     unless (defined $i) {
5836     !!!cp ('t294');
5837     ## Ignore the token
5838 wakaba 1.125 !!!nack ('t294.1');
5839 wakaba 1.123 !!!next-token;
5840 wakaba 1.126 next B;
5841 wakaba 1.123 }
5842 wakaba 1.52
5843 wakaba 1.123 ## As if </select>
5844     ## have an element in table scope
5845     undef $i;
5846     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
5847     my $node = $self->{open_elements}->[$_];
5848     if ($node->[1] & SELECT_EL) {
5849     !!!cp ('t295');
5850     $i = $_;
5851     last INSCOPE;
5852     } elsif ($node->[1] & TABLE_SCOPING_EL) {
5853 wakaba 1.83 ## ISSUE: Can this state be reached?
5854 wakaba 1.123 !!!cp ('t296');
5855     last INSCOPE;
5856     }
5857     } # INSCOPE
5858     unless (defined $i) {
5859     !!!cp ('t297');
5860 wakaba 1.83 ## TODO: The following error type is correct?
5861 wakaba 1.123 !!!parse-error (type => 'unmatched end tag:select', token => $token);
5862     ## Ignore the </select> token
5863 wakaba 1.125 !!!nack ('t297.1');
5864 wakaba 1.123 !!!next-token; ## TODO: ok?
5865 wakaba 1.126 next B;
5866 wakaba 1.123 }
5867 wakaba 1.52
5868 wakaba 1.123 !!!cp ('t298');
5869     splice @{$self->{open_elements}}, $i;
5870 wakaba 1.52
5871 wakaba 1.123 $self->_reset_insertion_mode;
5872 wakaba 1.52
5873 wakaba 1.125 !!!ack-later;
5874 wakaba 1.123 ## reprocess
5875 wakaba 1.126 next B;
5876 wakaba 1.58 } else {
5877 wakaba 1.79 !!!cp ('t299');
5878 wakaba 1.113 !!!parse-error (type => 'in select:/'.$token->{tag_name}, token => $token);
5879 wakaba 1.52 ## Ignore the token
5880 wakaba 1.125 !!!nack ('t299.3');
5881 wakaba 1.52 !!!next-token;
5882 wakaba 1.126 next B;
5883 wakaba 1.58 }
5884 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5885 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
5886 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
5887     !!!cp ('t299.1');
5888 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
5889 wakaba 1.104 } else {
5890     !!!cp ('t299.2');
5891     }
5892    
5893     ## Stop parsing.
5894     last B;
5895 wakaba 1.58 } else {
5896     die "$0: $token->{type}: Unknown token type";
5897     }
5898 wakaba 1.56 } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
5899 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5900 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5901     my $data = $1;
5902     ## As if in body
5903     $reconstruct_active_formatting_elements->($insert_to_current);
5904    
5905     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5906    
5907     unless (length $token->{data}) {
5908 wakaba 1.79 !!!cp ('t300');
5909 wakaba 1.52 !!!next-token;
5910 wakaba 1.126 next B;
5911 wakaba 1.52 }
5912     }
5913    
5914 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5915 wakaba 1.79 !!!cp ('t301');
5916 wakaba 1.113 !!!parse-error (type => 'after html:#character', token => $token);
5917 wakaba 1.52
5918 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5919 wakaba 1.79 } else {
5920     !!!cp ('t302');
5921 wakaba 1.52 }
5922    
5923     ## "after body" insertion mode
5924 wakaba 1.113 !!!parse-error (type => 'after body:#character', token => $token);
5925 wakaba 1.52
5926 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5927 wakaba 1.52 ## reprocess
5928 wakaba 1.126 next B;
5929 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
5930 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5931 wakaba 1.79 !!!cp ('t303');
5932 wakaba 1.113 !!!parse-error (type => 'after html:'.$token->{tag_name}, token => $token);
5933 wakaba 1.52
5934 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5935 wakaba 1.79 } else {
5936     !!!cp ('t304');
5937 wakaba 1.52 }
5938    
5939     ## "after body" insertion mode
5940 wakaba 1.113 !!!parse-error (type => 'after body:'.$token->{tag_name}, token => $token);
5941 wakaba 1.52
5942 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5943 wakaba 1.125 !!!ack-later;
5944 wakaba 1.52 ## reprocess
5945 wakaba 1.126 next B;
5946 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
5947 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
5948 wakaba 1.79 !!!cp ('t305');
5949 wakaba 1.113 !!!parse-error (type => 'after html:/'.$token->{tag_name}, token => $token);
5950 wakaba 1.52
5951 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
5952 wakaba 1.84 ## Reprocess in the "after body" insertion mode.
5953 wakaba 1.79 } else {
5954     !!!cp ('t306');
5955 wakaba 1.52 }
5956    
5957     ## "after body" insertion mode
5958     if ($token->{tag_name} eq 'html') {
5959     if (defined $self->{inner_html_node}) {
5960 wakaba 1.79 !!!cp ('t307');
5961 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:html', token => $token);
5962 wakaba 1.52 ## Ignore the token
5963     !!!next-token;
5964 wakaba 1.126 next B;
5965 wakaba 1.52 } else {
5966 wakaba 1.79 !!!cp ('t308');
5967 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_BODY_IM;
5968 wakaba 1.52 !!!next-token;
5969 wakaba 1.126 next B;
5970 wakaba 1.52 }
5971     } else {
5972 wakaba 1.79 !!!cp ('t309');
5973 wakaba 1.113 !!!parse-error (type => 'after body:/'.$token->{tag_name}, token => $token);
5974 wakaba 1.52
5975 wakaba 1.54 $self->{insertion_mode} = IN_BODY_IM;
5976 wakaba 1.52 ## reprocess
5977 wakaba 1.126 next B;
5978 wakaba 1.52 }
5979 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
5980     !!!cp ('t309.2');
5981     ## Stop parsing
5982     last B;
5983 wakaba 1.52 } else {
5984     die "$0: $token->{type}: Unknown token type";
5985     }
5986 wakaba 1.56 } elsif ($self->{insertion_mode} & FRAME_IMS) {
5987 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
5988 wakaba 1.52 if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5989     $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5990    
5991     unless (length $token->{data}) {
5992 wakaba 1.79 !!!cp ('t310');
5993 wakaba 1.52 !!!next-token;
5994 wakaba 1.126 next B;
5995 wakaba 1.52 }
5996     }
5997    
5998     if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {
5999 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6000 wakaba 1.79 !!!cp ('t311');
6001 wakaba 1.113 !!!parse-error (type => 'in frameset:#character', token => $token);
6002 wakaba 1.54 } elsif ($self->{insertion_mode} == AFTER_FRAMESET_IM) {
6003 wakaba 1.79 !!!cp ('t312');
6004 wakaba 1.113 !!!parse-error (type => 'after frameset:#character', token => $token);
6005 wakaba 1.52 } else { # "after html frameset"
6006 wakaba 1.79 !!!cp ('t313');
6007 wakaba 1.113 !!!parse-error (type => 'after html:#character', token => $token);
6008 wakaba 1.52
6009 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
6010 wakaba 1.84 ## Reprocess in the "after frameset" insertion mode.
6011 wakaba 1.113 !!!parse-error (type => 'after frameset:#character', token => $token);
6012 wakaba 1.52 }
6013    
6014     ## Ignore the token.
6015     if (length $token->{data}) {
6016 wakaba 1.79 !!!cp ('t314');
6017 wakaba 1.52 ## reprocess the rest of characters
6018     } else {
6019 wakaba 1.79 !!!cp ('t315');
6020 wakaba 1.52 !!!next-token;
6021     }
6022 wakaba 1.126 next B;
6023 wakaba 1.52 }
6024    
6025     die qq[$0: Character "$token->{data}"];
6026 wakaba 1.55 } elsif ($token->{type} == START_TAG_TOKEN) {
6027 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
6028 wakaba 1.79 !!!cp ('t316');
6029 wakaba 1.113 !!!parse-error (type => 'after html:'.$token->{tag_name}, token => $token);
6030 wakaba 1.1
6031 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
6032 wakaba 1.84 ## Process in the "after frameset" insertion mode.
6033 wakaba 1.79 } else {
6034     !!!cp ('t317');
6035     }
6036 wakaba 1.1
6037 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
6038 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6039 wakaba 1.79 !!!cp ('t318');
6040 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6041 wakaba 1.125 !!!nack ('t318.1');
6042 wakaba 1.52 !!!next-token;
6043 wakaba 1.126 next B;
6044 wakaba 1.52 } elsif ($token->{tag_name} eq 'frame' and
6045 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6046 wakaba 1.79 !!!cp ('t319');
6047 wakaba 1.116 !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
6048 wakaba 1.52 pop @{$self->{open_elements}};
6049 wakaba 1.125 !!!ack ('t319.1');
6050 wakaba 1.52 !!!next-token;
6051 wakaba 1.126 next B;
6052 wakaba 1.52 } elsif ($token->{tag_name} eq 'noframes') {
6053 wakaba 1.79 !!!cp ('t320');
6054 wakaba 1.52 ## NOTE: As if in body.
6055 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6056 wakaba 1.126 next B;
6057 wakaba 1.52 } else {
6058 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6059 wakaba 1.79 !!!cp ('t321');
6060 wakaba 1.113 !!!parse-error (type => 'in frameset:'.$token->{tag_name}, token => $token);
6061 wakaba 1.52 } else {
6062 wakaba 1.79 !!!cp ('t322');
6063 wakaba 1.113 !!!parse-error (type => 'after frameset:'.$token->{tag_name}, token => $token);
6064 wakaba 1.52 }
6065     ## Ignore the token
6066 wakaba 1.125 !!!nack ('t322.1');
6067 wakaba 1.52 !!!next-token;
6068 wakaba 1.126 next B;
6069 wakaba 1.52 }
6070 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6071 wakaba 1.54 if ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) {
6072 wakaba 1.79 !!!cp ('t323');
6073 wakaba 1.113 !!!parse-error (type => 'after html:/'.$token->{tag_name}, token => $token);
6074 wakaba 1.1
6075 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
6076 wakaba 1.84 ## Process in the "after frameset" insertion mode.
6077 wakaba 1.79 } else {
6078     !!!cp ('t324');
6079 wakaba 1.52 }
6080 wakaba 1.1
6081 wakaba 1.52 if ($token->{tag_name} eq 'frameset' and
6082 wakaba 1.54 $self->{insertion_mode} == IN_FRAMESET_IM) {
6083 wakaba 1.123 if ($self->{open_elements}->[-1]->[1] & HTML_EL and
6084 wakaba 1.52 @{$self->{open_elements}} == 1) {
6085 wakaba 1.79 !!!cp ('t325');
6086 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6087 wakaba 1.52 ## Ignore the token
6088     !!!next-token;
6089     } else {
6090 wakaba 1.79 !!!cp ('t326');
6091 wakaba 1.52 pop @{$self->{open_elements}};
6092     !!!next-token;
6093     }
6094 wakaba 1.47
6095 wakaba 1.52 if (not defined $self->{inner_html_node} and
6096 wakaba 1.123 not ($self->{open_elements}->[-1]->[1] & FRAMESET_EL)) {
6097 wakaba 1.79 !!!cp ('t327');
6098 wakaba 1.54 $self->{insertion_mode} = AFTER_FRAMESET_IM;
6099 wakaba 1.79 } else {
6100     !!!cp ('t328');
6101 wakaba 1.52 }
6102 wakaba 1.126 next B;
6103 wakaba 1.52 } elsif ($token->{tag_name} eq 'html' and
6104 wakaba 1.54 $self->{insertion_mode} == AFTER_FRAMESET_IM) {
6105 wakaba 1.79 !!!cp ('t329');
6106 wakaba 1.54 $self->{insertion_mode} = AFTER_HTML_FRAMESET_IM;
6107 wakaba 1.52 !!!next-token;
6108 wakaba 1.126 next B;
6109 wakaba 1.52 } else {
6110 wakaba 1.54 if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6111 wakaba 1.79 !!!cp ('t330');
6112 wakaba 1.113 !!!parse-error (type => 'in frameset:/'.$token->{tag_name}, token => $token);
6113 wakaba 1.52 } else {
6114 wakaba 1.79 !!!cp ('t331');
6115 wakaba 1.113 !!!parse-error (type => 'after frameset:/'.$token->{tag_name}, token => $token);
6116 wakaba 1.52 }
6117     ## Ignore the token
6118     !!!next-token;
6119 wakaba 1.126 next B;
6120 wakaba 1.52 }
6121 wakaba 1.104 } elsif ($token->{type} == END_OF_FILE_TOKEN) {
6122 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & HTML_EL and
6123 wakaba 1.104 @{$self->{open_elements}} == 1) { # redundant, maybe
6124     !!!cp ('t331.1');
6125 wakaba 1.113 !!!parse-error (type => 'in body:#eof', token => $token);
6126 wakaba 1.104 } else {
6127     !!!cp ('t331.2');
6128     }
6129    
6130     ## Stop parsing
6131     last B;
6132 wakaba 1.52 } else {
6133     die "$0: $token->{type}: Unknown token type";
6134     }
6135 wakaba 1.47
6136 wakaba 1.52 ## ISSUE: An issue in spec here
6137     } else {
6138     die "$0: $self->{insertion_mode}: Unknown insertion mode";
6139     }
6140 wakaba 1.47
6141 wakaba 1.52 ## "in body" insertion mode
6142 wakaba 1.55 if ($token->{type} == START_TAG_TOKEN) {
6143 wakaba 1.52 if ($token->{tag_name} eq 'script') {
6144 wakaba 1.79 !!!cp ('t332');
6145 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6146 wakaba 1.100 $script_start_tag->();
6147 wakaba 1.126 next B;
6148 wakaba 1.52 } elsif ($token->{tag_name} eq 'style') {
6149 wakaba 1.79 !!!cp ('t333');
6150 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6151 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6152 wakaba 1.126 next B;
6153 wakaba 1.52 } elsif ({
6154     base => 1, link => 1,
6155     }->{$token->{tag_name}}) {
6156 wakaba 1.79 !!!cp ('t334');
6157 wakaba 1.52 ## NOTE: This is an "as if in head" code clone, only "-t" differs
6158 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6159 wakaba 1.52 pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
6160 wakaba 1.125 !!!ack ('t334.1');
6161 wakaba 1.52 !!!next-token;
6162 wakaba 1.126 next B;
6163 wakaba 1.52 } elsif ($token->{tag_name} eq 'meta') {
6164     ## NOTE: This is an "as if in head" code clone, only "-t" differs
6165 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6166 wakaba 1.66 my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.
6167 wakaba 1.46
6168 wakaba 1.52 unless ($self->{confident}) {
6169     if ($token->{attributes}->{charset}) { ## TODO: And if supported
6170 wakaba 1.79 !!!cp ('t335');
6171 wakaba 1.63 $self->{change_encoding}
6172 wakaba 1.114 ->($self, $token->{attributes}->{charset}->{value}, $token);
6173 wakaba 1.66
6174     $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
6175     ->set_user_data (manakai_has_reference =>
6176     $token->{attributes}->{charset}
6177     ->{has_reference});
6178 wakaba 1.63 } elsif ($token->{attributes}->{content}) {
6179 wakaba 1.52 ## ISSUE: Algorithm name in the spec was incorrect so that not linked to the definition.
6180 wakaba 1.63 if ($token->{attributes}->{content}->{value}
6181 wakaba 1.70 =~ /\A[^;]*;[\x09-\x0D\x20]*[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
6182     [\x09-\x0D\x20]*=
6183 wakaba 1.52 [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
6184     ([^"'\x09-\x0D\x20][^\x09-\x0D\x20]*))/x) {
6185 wakaba 1.79 !!!cp ('t336');
6186 wakaba 1.63 $self->{change_encoding}
6187 wakaba 1.114 ->($self, defined $1 ? $1 : defined $2 ? $2 : $3, $token);
6188 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
6189     ->set_user_data (manakai_has_reference =>
6190     $token->{attributes}->{content}
6191     ->{has_reference});
6192 wakaba 1.63 }
6193 wakaba 1.52 }
6194 wakaba 1.66 } else {
6195     if ($token->{attributes}->{charset}) {
6196 wakaba 1.79 !!!cp ('t337');
6197 wakaba 1.66 $meta_el->[0]->get_attribute_node_ns (undef, 'charset')
6198     ->set_user_data (manakai_has_reference =>
6199     $token->{attributes}->{charset}
6200     ->{has_reference});
6201     }
6202 wakaba 1.68 if ($token->{attributes}->{content}) {
6203 wakaba 1.79 !!!cp ('t338');
6204 wakaba 1.68 $meta_el->[0]->get_attribute_node_ns (undef, 'content')
6205     ->set_user_data (manakai_has_reference =>
6206     $token->{attributes}->{content}
6207     ->{has_reference});
6208     }
6209 wakaba 1.52 }
6210 wakaba 1.1
6211 wakaba 1.125 !!!ack ('t338.1');
6212 wakaba 1.52 !!!next-token;
6213 wakaba 1.126 next B;
6214 wakaba 1.52 } elsif ($token->{tag_name} eq 'title') {
6215 wakaba 1.79 !!!cp ('t341');
6216 wakaba 1.52 ## NOTE: This is an "as if in head" code clone
6217 wakaba 1.96 $parse_rcdata->(RCDATA_CONTENT_MODEL);
6218 wakaba 1.126 next B;
6219 wakaba 1.52 } elsif ($token->{tag_name} eq 'body') {
6220 wakaba 1.113 !!!parse-error (type => 'in body:body', token => $token);
6221 wakaba 1.46
6222 wakaba 1.52 if (@{$self->{open_elements}} == 1 or
6223 wakaba 1.123 not ($self->{open_elements}->[1]->[1] & BODY_EL)) {
6224 wakaba 1.79 !!!cp ('t342');
6225 wakaba 1.52 ## Ignore the token
6226     } else {
6227     my $body_el = $self->{open_elements}->[1]->[0];
6228     for my $attr_name (keys %{$token->{attributes}}) {
6229     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
6230 wakaba 1.79 !!!cp ('t343');
6231 wakaba 1.52 $body_el->set_attribute_ns
6232     (undef, [undef, $attr_name],
6233     $token->{attributes}->{$attr_name}->{value});
6234     }
6235     }
6236     }
6237 wakaba 1.125 !!!nack ('t343.1');
6238 wakaba 1.52 !!!next-token;
6239 wakaba 1.126 next B;
6240 wakaba 1.52 } elsif ({
6241     address => 1, blockquote => 1, center => 1, dir => 1,
6242 wakaba 1.85 div => 1, dl => 1, fieldset => 1,
6243     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6244 wakaba 1.97 menu => 1, ol => 1, p => 1, ul => 1,
6245     pre => 1, listing => 1,
6246 wakaba 1.109 form => 1,
6247     table => 1,
6248     hr => 1,
6249 wakaba 1.52 }->{$token->{tag_name}}) {
6250 wakaba 1.109 if ($token->{tag_name} eq 'form' and defined $self->{form_element}) {
6251     !!!cp ('t350');
6252 wakaba 1.113 !!!parse-error (type => 'in form:form', token => $token);
6253 wakaba 1.109 ## Ignore the token
6254 wakaba 1.125 !!!nack ('t350.1');
6255 wakaba 1.109 !!!next-token;
6256 wakaba 1.126 next B;
6257 wakaba 1.109 }
6258    
6259 wakaba 1.52 ## has a p element in scope
6260     INSCOPE: for (reverse @{$self->{open_elements}}) {
6261 wakaba 1.123 if ($_->[1] & P_EL) {
6262 wakaba 1.79 !!!cp ('t344');
6263 wakaba 1.125 !!!back-token; # <form>
6264 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6265     line => $token->{line}, column => $token->{column}};
6266 wakaba 1.126 next B;
6267 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6268 wakaba 1.79 !!!cp ('t345');
6269 wakaba 1.52 last INSCOPE;
6270     }
6271     } # INSCOPE
6272    
6273 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6274 wakaba 1.97 if ($token->{tag_name} eq 'pre' or $token->{tag_name} eq 'listing') {
6275 wakaba 1.125 !!!nack ('t346.1');
6276 wakaba 1.52 !!!next-token;
6277 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6278 wakaba 1.52 $token->{data} =~ s/^\x0A//;
6279     unless (length $token->{data}) {
6280 wakaba 1.79 !!!cp ('t346');
6281 wakaba 1.1 !!!next-token;
6282 wakaba 1.79 } else {
6283     !!!cp ('t349');
6284 wakaba 1.52 }
6285 wakaba 1.79 } else {
6286     !!!cp ('t348');
6287 wakaba 1.52 }
6288 wakaba 1.109 } elsif ($token->{tag_name} eq 'form') {
6289     !!!cp ('t347.1');
6290     $self->{form_element} = $self->{open_elements}->[-1]->[0];
6291    
6292 wakaba 1.125 !!!nack ('t347.2');
6293 wakaba 1.109 !!!next-token;
6294     } elsif ($token->{tag_name} eq 'table') {
6295     !!!cp ('t382');
6296     push @{$open_tables}, [$self->{open_elements}->[-1]->[0]];
6297    
6298     $self->{insertion_mode} = IN_TABLE_IM;
6299    
6300 wakaba 1.125 !!!nack ('t382.1');
6301 wakaba 1.109 !!!next-token;
6302     } elsif ($token->{tag_name} eq 'hr') {
6303     !!!cp ('t386');
6304     pop @{$self->{open_elements}};
6305    
6306 wakaba 1.125 !!!nack ('t386.1');
6307 wakaba 1.109 !!!next-token;
6308 wakaba 1.52 } else {
6309 wakaba 1.125 !!!nack ('t347.1');
6310 wakaba 1.52 !!!next-token;
6311     }
6312 wakaba 1.126 next B;
6313 wakaba 1.109 } elsif ({li => 1, dt => 1, dd => 1}->{$token->{tag_name}}) {
6314 wakaba 1.52 ## has a p element in scope
6315     INSCOPE: for (reverse @{$self->{open_elements}}) {
6316 wakaba 1.123 if ($_->[1] & P_EL) {
6317 wakaba 1.79 !!!cp ('t353');
6318 wakaba 1.125 !!!back-token; # <x>
6319 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6320     line => $token->{line}, column => $token->{column}};
6321 wakaba 1.126 next B;
6322 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6323 wakaba 1.79 !!!cp ('t354');
6324 wakaba 1.52 last INSCOPE;
6325     }
6326     } # INSCOPE
6327    
6328     ## Step 1
6329     my $i = -1;
6330     my $node = $self->{open_elements}->[$i];
6331 wakaba 1.109 my $li_or_dtdd = {li => {li => 1},
6332     dt => {dt => 1, dd => 1},
6333     dd => {dt => 1, dd => 1}}->{$token->{tag_name}};
6334 wakaba 1.52 LI: {
6335     ## Step 2
6336 wakaba 1.123 if ($li_or_dtdd->{$node->[0]->manakai_local_name}) {
6337 wakaba 1.52 if ($i != -1) {
6338 wakaba 1.79 !!!cp ('t355');
6339 wakaba 1.122 !!!parse-error (type => 'not closed',
6340     value => $self->{open_elements}->[-1]->[0]
6341     ->manakai_local_name,
6342     token => $token);
6343 wakaba 1.79 } else {
6344     !!!cp ('t356');
6345 wakaba 1.52 }
6346     splice @{$self->{open_elements}}, $i;
6347     last LI;
6348 wakaba 1.79 } else {
6349     !!!cp ('t357');
6350 wakaba 1.52 }
6351    
6352     ## Step 3
6353 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
6354 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
6355 wakaba 1.123 ($node->[1] & SPECIAL_EL or
6356     $node->[1] & SCOPING_EL) and
6357     not ($node->[1] & ADDRESS_EL) and
6358     not ($node->[1] & DIV_EL)) {
6359 wakaba 1.79 !!!cp ('t358');
6360 wakaba 1.52 last LI;
6361     }
6362    
6363 wakaba 1.79 !!!cp ('t359');
6364 wakaba 1.52 ## Step 4
6365     $i--;
6366     $node = $self->{open_elements}->[$i];
6367     redo LI;
6368     } # LI
6369    
6370 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6371 wakaba 1.125 !!!nack ('t359.1');
6372 wakaba 1.52 !!!next-token;
6373 wakaba 1.126 next B;
6374 wakaba 1.52 } elsif ($token->{tag_name} eq 'plaintext') {
6375     ## has a p element in scope
6376     INSCOPE: for (reverse @{$self->{open_elements}}) {
6377 wakaba 1.123 if ($_->[1] & P_EL) {
6378 wakaba 1.79 !!!cp ('t367');
6379 wakaba 1.125 !!!back-token; # <plaintext>
6380 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'p',
6381     line => $token->{line}, column => $token->{column}};
6382 wakaba 1.126 next B;
6383 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6384 wakaba 1.79 !!!cp ('t368');
6385 wakaba 1.52 last INSCOPE;
6386 wakaba 1.46 }
6387 wakaba 1.52 } # INSCOPE
6388    
6389 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6390 wakaba 1.52
6391     $self->{content_model} = PLAINTEXT_CONTENT_MODEL;
6392    
6393 wakaba 1.125 !!!nack ('t368.1');
6394 wakaba 1.52 !!!next-token;
6395 wakaba 1.126 next B;
6396 wakaba 1.52 } elsif ($token->{tag_name} eq 'a') {
6397     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
6398     my $node = $active_formatting_elements->[$i];
6399 wakaba 1.123 if ($node->[1] & A_EL) {
6400 wakaba 1.79 !!!cp ('t371');
6401 wakaba 1.113 !!!parse-error (type => 'in a:a', token => $token);
6402 wakaba 1.52
6403 wakaba 1.125 !!!back-token; # <a>
6404 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'a',
6405     line => $token->{line}, column => $token->{column}};
6406 wakaba 1.113 $formatting_end_tag->($token);
6407 wakaba 1.52
6408     AFE2: for (reverse 0..$#$active_formatting_elements) {
6409     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
6410 wakaba 1.79 !!!cp ('t372');
6411 wakaba 1.52 splice @$active_formatting_elements, $_, 1;
6412     last AFE2;
6413 wakaba 1.1 }
6414 wakaba 1.52 } # AFE2
6415     OE: for (reverse 0..$#{$self->{open_elements}}) {
6416     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
6417 wakaba 1.79 !!!cp ('t373');
6418 wakaba 1.52 splice @{$self->{open_elements}}, $_, 1;
6419     last OE;
6420 wakaba 1.1 }
6421 wakaba 1.52 } # OE
6422     last AFE;
6423     } elsif ($node->[0] eq '#marker') {
6424 wakaba 1.79 !!!cp ('t374');
6425 wakaba 1.52 last AFE;
6426     }
6427     } # AFE
6428    
6429     $reconstruct_active_formatting_elements->($insert_to_current);
6430 wakaba 1.1
6431 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6432 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
6433 wakaba 1.1
6434 wakaba 1.125 !!!nack ('t374.1');
6435 wakaba 1.52 !!!next-token;
6436 wakaba 1.126 next B;
6437 wakaba 1.52 } elsif ($token->{tag_name} eq 'nobr') {
6438     $reconstruct_active_formatting_elements->($insert_to_current);
6439 wakaba 1.1
6440 wakaba 1.52 ## has a |nobr| element in scope
6441     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6442     my $node = $self->{open_elements}->[$_];
6443 wakaba 1.123 if ($node->[1] & NOBR_EL) {
6444 wakaba 1.79 !!!cp ('t376');
6445 wakaba 1.113 !!!parse-error (type => 'in nobr:nobr', token => $token);
6446 wakaba 1.125 !!!back-token; # <nobr>
6447 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'nobr',
6448     line => $token->{line}, column => $token->{column}};
6449 wakaba 1.126 next B;
6450 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6451 wakaba 1.79 !!!cp ('t377');
6452 wakaba 1.52 last INSCOPE;
6453     }
6454     } # INSCOPE
6455    
6456 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6457 wakaba 1.52 push @$active_formatting_elements, $self->{open_elements}->[-1];
6458    
6459 wakaba 1.125 !!!nack ('t377.1');
6460 wakaba 1.52 !!!next-token;
6461 wakaba 1.126 next B;
6462 wakaba 1.52 } elsif ($token->{tag_name} eq 'button') {
6463     ## has a button element in scope
6464     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6465     my $node = $self->{open_elements}->[$_];
6466 wakaba 1.123 if ($node->[1] & BUTTON_EL) {
6467 wakaba 1.79 !!!cp ('t378');
6468 wakaba 1.113 !!!parse-error (type => 'in button:button', token => $token);
6469 wakaba 1.125 !!!back-token; # <button>
6470 wakaba 1.114 $token = {type => END_TAG_TOKEN, tag_name => 'button',
6471     line => $token->{line}, column => $token->{column}};
6472 wakaba 1.126 next B;
6473 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6474 wakaba 1.79 !!!cp ('t379');
6475 wakaba 1.52 last INSCOPE;
6476     }
6477     } # INSCOPE
6478    
6479     $reconstruct_active_formatting_elements->($insert_to_current);
6480    
6481 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6482 wakaba 1.85
6483     ## TODO: associate with $self->{form_element} if defined
6484    
6485 wakaba 1.52 push @$active_formatting_elements, ['#marker', ''];
6486 wakaba 1.1
6487 wakaba 1.125 !!!nack ('t379.1');
6488 wakaba 1.52 !!!next-token;
6489 wakaba 1.126 next B;
6490 wakaba 1.103 } elsif ({
6491 wakaba 1.109 xmp => 1,
6492     iframe => 1,
6493     noembed => 1,
6494     noframes => 1,
6495     noscript => 0, ## TODO: 1 if scripting is enabled
6496 wakaba 1.103 }->{$token->{tag_name}}) {
6497 wakaba 1.109 if ($token->{tag_name} eq 'xmp') {
6498     !!!cp ('t381');
6499     $reconstruct_active_formatting_elements->($insert_to_current);
6500     } else {
6501     !!!cp ('t399');
6502     }
6503     ## NOTE: There is an "as if in body" code clone.
6504 wakaba 1.96 $parse_rcdata->(CDATA_CONTENT_MODEL);
6505 wakaba 1.126 next B;
6506 wakaba 1.52 } elsif ($token->{tag_name} eq 'isindex') {
6507 wakaba 1.113 !!!parse-error (type => 'isindex', token => $token);
6508 wakaba 1.52
6509     if (defined $self->{form_element}) {
6510 wakaba 1.79 !!!cp ('t389');
6511 wakaba 1.52 ## Ignore the token
6512 wakaba 1.125 !!!nack ('t389'); ## NOTE: Not acknowledged.
6513 wakaba 1.52 !!!next-token;
6514 wakaba 1.126 next B;
6515 wakaba 1.52 } else {
6516     my $at = $token->{attributes};
6517     my $form_attrs;
6518     $form_attrs->{action} = $at->{action} if $at->{action};
6519     my $prompt_attr = $at->{prompt};
6520     $at->{name} = {name => 'name', value => 'isindex'};
6521     delete $at->{action};
6522     delete $at->{prompt};
6523     my @tokens = (
6524 wakaba 1.55 {type => START_TAG_TOKEN, tag_name => 'form',
6525 wakaba 1.114 attributes => $form_attrs,
6526     line => $token->{line}, column => $token->{column}},
6527     {type => START_TAG_TOKEN, tag_name => 'hr',
6528     line => $token->{line}, column => $token->{column}},
6529     {type => START_TAG_TOKEN, tag_name => 'p',
6530     line => $token->{line}, column => $token->{column}},
6531     {type => START_TAG_TOKEN, tag_name => 'label',
6532     line => $token->{line}, column => $token->{column}},
6533 wakaba 1.52 );
6534     if ($prompt_attr) {
6535 wakaba 1.79 !!!cp ('t390');
6536 wakaba 1.114 push @tokens, {type => CHARACTER_TOKEN, data => $prompt_attr->{value},
6537 wakaba 1.118 #line => $token->{line}, column => $token->{column},
6538     };
6539 wakaba 1.1 } else {
6540 wakaba 1.79 !!!cp ('t391');
6541 wakaba 1.55 push @tokens, {type => CHARACTER_TOKEN,
6542 wakaba 1.114 data => 'This is a searchable index. Insert your search keywords here: ',
6543 wakaba 1.118 #line => $token->{line}, column => $token->{column},
6544     }; # SHOULD
6545 wakaba 1.52 ## TODO: make this configurable
6546 wakaba 1.1 }
6547 wakaba 1.52 push @tokens,
6548 wakaba 1.114 {type => START_TAG_TOKEN, tag_name => 'input', attributes => $at,
6549     line => $token->{line}, column => $token->{column}},
6550 wakaba 1.55 #{type => CHARACTER_TOKEN, data => ''}, # SHOULD
6551 wakaba 1.114 {type => END_TAG_TOKEN, tag_name => 'label',
6552     line => $token->{line}, column => $token->{column}},
6553     {type => END_TAG_TOKEN, tag_name => 'p',
6554     line => $token->{line}, column => $token->{column}},
6555     {type => START_TAG_TOKEN, tag_name => 'hr',
6556     line => $token->{line}, column => $token->{column}},
6557     {type => END_TAG_TOKEN, tag_name => 'form',
6558     line => $token->{line}, column => $token->{column}};
6559 wakaba 1.125 !!!nack ('t391.1'); ## NOTE: Not acknowledged.
6560 wakaba 1.52 !!!back-token (@tokens);
6561 wakaba 1.125 !!!next-token;
6562 wakaba 1.126 next B;
6563 wakaba 1.52 }
6564     } elsif ($token->{tag_name} eq 'textarea') {
6565     my $tag_name = $token->{tag_name};
6566     my $el;
6567 wakaba 1.126 !!!create-element ($el, $HTML_NS, $token->{tag_name}, $token->{attributes}, $token);
6568 wakaba 1.52
6569     ## TODO: $self->{form_element} if defined
6570     $self->{content_model} = RCDATA_CONTENT_MODEL;
6571     delete $self->{escape}; # MUST
6572    
6573     $insert->($el);
6574    
6575     my $text = '';
6576 wakaba 1.125 !!!nack ('t392.1');
6577 wakaba 1.52 !!!next-token;
6578 wakaba 1.55 if ($token->{type} == CHARACTER_TOKEN) {
6579 wakaba 1.52 $token->{data} =~ s/^\x0A//;
6580 wakaba 1.51 unless (length $token->{data}) {
6581 wakaba 1.79 !!!cp ('t392');
6582 wakaba 1.51 !!!next-token;
6583 wakaba 1.79 } else {
6584     !!!cp ('t393');
6585 wakaba 1.51 }
6586 wakaba 1.79 } else {
6587     !!!cp ('t394');
6588 wakaba 1.51 }
6589 wakaba 1.55 while ($token->{type} == CHARACTER_TOKEN) {
6590 wakaba 1.79 !!!cp ('t395');
6591 wakaba 1.52 $text .= $token->{data};
6592     !!!next-token;
6593     }
6594     if (length $text) {
6595 wakaba 1.79 !!!cp ('t396');
6596 wakaba 1.52 $el->manakai_append_text ($text);
6597     }
6598    
6599     $self->{content_model} = PCDATA_CONTENT_MODEL;
6600 wakaba 1.51
6601 wakaba 1.55 if ($token->{type} == END_TAG_TOKEN and
6602 wakaba 1.52 $token->{tag_name} eq $tag_name) {
6603 wakaba 1.79 !!!cp ('t397');
6604 wakaba 1.52 ## Ignore the token
6605     } else {
6606 wakaba 1.79 !!!cp ('t398');
6607 wakaba 1.113 !!!parse-error (type => 'in RCDATA:#'.$token->{type}, token => $token);
6608 wakaba 1.51 }
6609 wakaba 1.52 !!!next-token;
6610 wakaba 1.126 next B;
6611     } elsif ($token->{tag_name} eq 'math' or
6612     $token->{tag_name} eq 'svg') {
6613     $reconstruct_active_formatting_elements->($insert_to_current);
6614 wakaba 1.131
6615     ## "adjust SVG attributes" ('svg' only) - done in insert-element-f
6616    
6617     ## "adjust foreign attributes" - done in insert-element-f
6618 wakaba 1.126
6619 wakaba 1.131 !!!insert-element-f ($token->{tag_name} eq 'math' ? $MML_NS : $SVG_NS, $token->{tag_name}, $token->{attributes}, $token);
6620 wakaba 1.126
6621     if ($self->{self_closing}) {
6622     pop @{$self->{open_elements}};
6623     !!!ack ('t398.1');
6624     } else {
6625     !!!cp ('t398.2');
6626     $self->{insertion_mode} |= IN_FOREIGN_CONTENT_IM;
6627     ## NOTE: |<body><math><mi><svg>| -> "in foreign content" insertion
6628     ## mode, "in body" (not "in foreign content") secondary insertion
6629     ## mode, maybe.
6630     }
6631    
6632     !!!next-token;
6633     next B;
6634 wakaba 1.52 } elsif ({
6635     caption => 1, col => 1, colgroup => 1, frame => 1,
6636     frameset => 1, head => 1, option => 1, optgroup => 1,
6637     tbody => 1, td => 1, tfoot => 1, th => 1,
6638     thead => 1, tr => 1,
6639     }->{$token->{tag_name}}) {
6640 wakaba 1.79 !!!cp ('t401');
6641 wakaba 1.113 !!!parse-error (type => 'in body:'.$token->{tag_name}, token => $token);
6642 wakaba 1.52 ## Ignore the token
6643 wakaba 1.125 !!!nack ('t401.1'); ## NOTE: |<col/>| or |<frame/>| here is an error.
6644 wakaba 1.52 !!!next-token;
6645 wakaba 1.126 next B;
6646 wakaba 1.52
6647     ## ISSUE: An issue on HTML5 new elements in the spec.
6648     } else {
6649 wakaba 1.110 if ($token->{tag_name} eq 'image') {
6650     !!!cp ('t384');
6651 wakaba 1.113 !!!parse-error (type => 'image', token => $token);
6652 wakaba 1.110 $token->{tag_name} = 'img';
6653     } else {
6654     !!!cp ('t385');
6655     }
6656    
6657     ## NOTE: There is an "as if <br>" code clone.
6658 wakaba 1.52 $reconstruct_active_formatting_elements->($insert_to_current);
6659    
6660 wakaba 1.116 !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6661 wakaba 1.109
6662 wakaba 1.110 if ({
6663     applet => 1, marquee => 1, object => 1,
6664     }->{$token->{tag_name}}) {
6665     !!!cp ('t380');
6666     push @$active_formatting_elements, ['#marker', ''];
6667 wakaba 1.125 !!!nack ('t380.1');
6668 wakaba 1.110 } elsif ({
6669     b => 1, big => 1, em => 1, font => 1, i => 1,
6670     s => 1, small => 1, strile => 1,
6671     strong => 1, tt => 1, u => 1,
6672     }->{$token->{tag_name}}) {
6673     !!!cp ('t375');
6674     push @$active_formatting_elements, $self->{open_elements}->[-1];
6675 wakaba 1.125 !!!nack ('t375.1');
6676 wakaba 1.110 } elsif ($token->{tag_name} eq 'input') {
6677     !!!cp ('t388');
6678     ## TODO: associate with $self->{form_element} if defined
6679     pop @{$self->{open_elements}};
6680 wakaba 1.125 !!!ack ('t388.2');
6681 wakaba 1.110 } elsif ({
6682     area => 1, basefont => 1, bgsound => 1, br => 1,
6683     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
6684     #image => 1,
6685     }->{$token->{tag_name}}) {
6686     !!!cp ('t388.1');
6687     pop @{$self->{open_elements}};
6688 wakaba 1.125 !!!ack ('t388.3');
6689 wakaba 1.110 } elsif ($token->{tag_name} eq 'select') {
6690 wakaba 1.109 ## TODO: associate with $self->{form_element} if defined
6691    
6692     if ($self->{insertion_mode} & TABLE_IMS or
6693     $self->{insertion_mode} & BODY_TABLE_IMS or
6694     $self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
6695     !!!cp ('t400.1');
6696     $self->{insertion_mode} = IN_SELECT_IN_TABLE_IM;
6697     } else {
6698     !!!cp ('t400.2');
6699     $self->{insertion_mode} = IN_SELECT_IM;
6700     }
6701 wakaba 1.125 !!!nack ('t400.3');
6702 wakaba 1.110 } else {
6703 wakaba 1.125 !!!nack ('t402');
6704 wakaba 1.109 }
6705 wakaba 1.51
6706 wakaba 1.52 !!!next-token;
6707 wakaba 1.126 next B;
6708 wakaba 1.52 }
6709 wakaba 1.55 } elsif ($token->{type} == END_TAG_TOKEN) {
6710 wakaba 1.52 if ($token->{tag_name} eq 'body') {
6711 wakaba 1.107 ## has a |body| element in scope
6712     my $i;
6713 wakaba 1.111 INSCOPE: {
6714     for (reverse @{$self->{open_elements}}) {
6715 wakaba 1.123 if ($_->[1] & BODY_EL) {
6716 wakaba 1.111 !!!cp ('t405');
6717     $i = $_;
6718     last INSCOPE;
6719 wakaba 1.123 } elsif ($_->[1] & SCOPING_EL) {
6720 wakaba 1.111 !!!cp ('t405.1');
6721     last;
6722     }
6723 wakaba 1.52 }
6724 wakaba 1.111
6725     !!!parse-error (type => 'start tag not allowed',
6726 wakaba 1.113 value => $token->{tag_name}, token => $token);
6727 wakaba 1.107 ## NOTE: Ignore the token.
6728 wakaba 1.52 !!!next-token;
6729 wakaba 1.126 next B;
6730 wakaba 1.111 } # INSCOPE
6731 wakaba 1.107
6732     for (@{$self->{open_elements}}) {
6733 wakaba 1.123 unless ($_->[1] & ALL_END_TAG_OPTIONAL_EL) {
6734 wakaba 1.107 !!!cp ('t403');
6735 wakaba 1.122 !!!parse-error (type => 'not closed',
6736     value => $_->[0]->manakai_local_name,
6737     token => $token);
6738 wakaba 1.107 last;
6739     } else {
6740     !!!cp ('t404');
6741     }
6742     }
6743    
6744     $self->{insertion_mode} = AFTER_BODY_IM;
6745     !!!next-token;
6746 wakaba 1.126 next B;
6747 wakaba 1.52 } elsif ($token->{tag_name} eq 'html') {
6748 wakaba 1.122 ## TODO: Update this code. It seems that the code below is not
6749     ## up-to-date, though it has same effect as speced.
6750 wakaba 1.123 if (@{$self->{open_elements}} > 1 and
6751     $self->{open_elements}->[1]->[1] & BODY_EL) {
6752 wakaba 1.52 ## ISSUE: There is an issue in the spec.
6753 wakaba 1.123 unless ($self->{open_elements}->[-1]->[1] & BODY_EL) {
6754 wakaba 1.79 !!!cp ('t406');
6755 wakaba 1.122 !!!parse-error (type => 'not closed',
6756     value => $self->{open_elements}->[1]->[0]
6757     ->manakai_local_name,
6758     token => $token);
6759 wakaba 1.79 } else {
6760     !!!cp ('t407');
6761 wakaba 1.1 }
6762 wakaba 1.54 $self->{insertion_mode} = AFTER_BODY_IM;
6763 wakaba 1.52 ## reprocess
6764 wakaba 1.126 next B;
6765 wakaba 1.51 } else {
6766 wakaba 1.79 !!!cp ('t408');
6767 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6768 wakaba 1.52 ## Ignore the token
6769     !!!next-token;
6770 wakaba 1.126 next B;
6771 wakaba 1.51 }
6772 wakaba 1.52 } elsif ({
6773     address => 1, blockquote => 1, center => 1, dir => 1,
6774     div => 1, dl => 1, fieldset => 1, listing => 1,
6775     menu => 1, ol => 1, pre => 1, ul => 1,
6776     dd => 1, dt => 1, li => 1,
6777 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
6778 wakaba 1.52 }->{$token->{tag_name}}) {
6779     ## has an element in scope
6780     my $i;
6781     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6782     my $node = $self->{open_elements}->[$_];
6783 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
6784 wakaba 1.79 !!!cp ('t410');
6785 wakaba 1.52 $i = $_;
6786 wakaba 1.87 last INSCOPE;
6787 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6788 wakaba 1.79 !!!cp ('t411');
6789 wakaba 1.52 last INSCOPE;
6790 wakaba 1.51 }
6791 wakaba 1.52 } # INSCOPE
6792 wakaba 1.89
6793     unless (defined $i) { # has an element in scope
6794     !!!cp ('t413');
6795 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6796 wakaba 1.89 } else {
6797     ## Step 1. generate implied end tags
6798     while ({
6799     dd => ($token->{tag_name} ne 'dd'),
6800     dt => ($token->{tag_name} ne 'dt'),
6801     li => ($token->{tag_name} ne 'li'),
6802     p => 1,
6803 wakaba 1.123 }->{$self->{open_elements}->[-1]->[0]->manakai_local_name}) {
6804 wakaba 1.89 !!!cp ('t409');
6805     pop @{$self->{open_elements}};
6806     }
6807    
6808     ## Step 2.
6809 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6810     ne $token->{tag_name}) {
6811 wakaba 1.79 !!!cp ('t412');
6812 wakaba 1.122 !!!parse-error (type => 'not closed',
6813     value => $self->{open_elements}->[-1]->[0]
6814     ->manakai_local_name,
6815     token => $token);
6816 wakaba 1.51 } else {
6817 wakaba 1.89 !!!cp ('t414');
6818 wakaba 1.51 }
6819 wakaba 1.89
6820     ## Step 3.
6821 wakaba 1.52 splice @{$self->{open_elements}}, $i;
6822 wakaba 1.89
6823     ## Step 4.
6824     $clear_up_to_marker->()
6825     if {
6826 wakaba 1.103 applet => 1, button => 1, marquee => 1, object => 1,
6827 wakaba 1.89 }->{$token->{tag_name}};
6828 wakaba 1.51 }
6829 wakaba 1.52 !!!next-token;
6830 wakaba 1.126 next B;
6831 wakaba 1.52 } elsif ($token->{tag_name} eq 'form') {
6832 wakaba 1.92 undef $self->{form_element};
6833    
6834 wakaba 1.52 ## has an element in scope
6835 wakaba 1.92 my $i;
6836 wakaba 1.52 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6837     my $node = $self->{open_elements}->[$_];
6838 wakaba 1.123 if ($node->[1] & FORM_EL) {
6839 wakaba 1.79 !!!cp ('t418');
6840 wakaba 1.92 $i = $_;
6841 wakaba 1.52 last INSCOPE;
6842 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6843 wakaba 1.79 !!!cp ('t419');
6844 wakaba 1.52 last INSCOPE;
6845     }
6846     } # INSCOPE
6847 wakaba 1.92
6848     unless (defined $i) { # has an element in scope
6849 wakaba 1.79 !!!cp ('t421');
6850 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6851 wakaba 1.92 } else {
6852     ## Step 1. generate implied end tags
6853 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
6854 wakaba 1.92 !!!cp ('t417');
6855     pop @{$self->{open_elements}};
6856     }
6857    
6858     ## Step 2.
6859 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6860     ne $token->{tag_name}) {
6861 wakaba 1.92 !!!cp ('t417.1');
6862 wakaba 1.122 !!!parse-error (type => 'not closed',
6863     value => $self->{open_elements}->[-1]->[0]
6864     ->manakai_local_name,
6865     token => $token);
6866 wakaba 1.92 } else {
6867     !!!cp ('t420');
6868     }
6869    
6870     ## Step 3.
6871     splice @{$self->{open_elements}}, $i;
6872 wakaba 1.52 }
6873    
6874     !!!next-token;
6875 wakaba 1.126 next B;
6876 wakaba 1.52 } elsif ({
6877     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
6878     }->{$token->{tag_name}}) {
6879     ## has an element in scope
6880     my $i;
6881     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6882     my $node = $self->{open_elements}->[$_];
6883 wakaba 1.123 if ($node->[1] & HEADING_EL) {
6884 wakaba 1.79 !!!cp ('t423');
6885 wakaba 1.52 $i = $_;
6886     last INSCOPE;
6887 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6888 wakaba 1.79 !!!cp ('t424');
6889 wakaba 1.52 last INSCOPE;
6890 wakaba 1.51 }
6891 wakaba 1.52 } # INSCOPE
6892 wakaba 1.93
6893     unless (defined $i) { # has an element in scope
6894     !!!cp ('t425.1');
6895 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6896 wakaba 1.79 } else {
6897 wakaba 1.93 ## Step 1. generate implied end tags
6898 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
6899 wakaba 1.93 !!!cp ('t422');
6900     pop @{$self->{open_elements}};
6901     }
6902    
6903     ## Step 2.
6904 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6905     ne $token->{tag_name}) {
6906 wakaba 1.93 !!!cp ('t425');
6907 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6908 wakaba 1.93 } else {
6909     !!!cp ('t426');
6910     }
6911    
6912     ## Step 3.
6913     splice @{$self->{open_elements}}, $i;
6914 wakaba 1.36 }
6915 wakaba 1.52
6916     !!!next-token;
6917 wakaba 1.126 next B;
6918 wakaba 1.87 } elsif ($token->{tag_name} eq 'p') {
6919     ## has an element in scope
6920     my $i;
6921     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
6922     my $node = $self->{open_elements}->[$_];
6923 wakaba 1.123 if ($node->[1] & P_EL) {
6924 wakaba 1.87 !!!cp ('t410.1');
6925     $i = $_;
6926 wakaba 1.88 last INSCOPE;
6927 wakaba 1.123 } elsif ($node->[1] & SCOPING_EL) {
6928 wakaba 1.87 !!!cp ('t411.1');
6929     last INSCOPE;
6930     }
6931     } # INSCOPE
6932 wakaba 1.91
6933     if (defined $i) {
6934 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
6935     ne $token->{tag_name}) {
6936 wakaba 1.87 !!!cp ('t412.1');
6937 wakaba 1.122 !!!parse-error (type => 'not closed',
6938     value => $self->{open_elements}->[-1]->[0]
6939     ->manakai_local_name,
6940     token => $token);
6941 wakaba 1.87 } else {
6942 wakaba 1.91 !!!cp ('t414.1');
6943 wakaba 1.87 }
6944 wakaba 1.91
6945 wakaba 1.87 splice @{$self->{open_elements}}, $i;
6946     } else {
6947 wakaba 1.91 !!!cp ('t413.1');
6948 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6949 wakaba 1.91
6950 wakaba 1.87 !!!cp ('t415.1');
6951     ## As if <p>, then reprocess the current token
6952     my $el;
6953 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'p',, $token);
6954 wakaba 1.87 $insert->($el);
6955 wakaba 1.91 ## NOTE: Not inserted into |$self->{open_elements}|.
6956 wakaba 1.87 }
6957 wakaba 1.91
6958 wakaba 1.87 !!!next-token;
6959 wakaba 1.126 next B;
6960 wakaba 1.52 } elsif ({
6961     a => 1,
6962     b => 1, big => 1, em => 1, font => 1, i => 1,
6963     nobr => 1, s => 1, small => 1, strile => 1,
6964     strong => 1, tt => 1, u => 1,
6965     }->{$token->{tag_name}}) {
6966 wakaba 1.79 !!!cp ('t427');
6967 wakaba 1.113 $formatting_end_tag->($token);
6968 wakaba 1.126 next B;
6969 wakaba 1.52 } elsif ($token->{tag_name} eq 'br') {
6970 wakaba 1.79 !!!cp ('t428');
6971 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:br', token => $token);
6972 wakaba 1.52
6973     ## As if <br>
6974     $reconstruct_active_formatting_elements->($insert_to_current);
6975    
6976     my $el;
6977 wakaba 1.126 !!!create-element ($el, $HTML_NS, 'br',, $token);
6978 wakaba 1.52 $insert->($el);
6979    
6980     ## Ignore the token.
6981     !!!next-token;
6982 wakaba 1.126 next B;
6983 wakaba 1.52 } elsif ({
6984     caption => 1, col => 1, colgroup => 1, frame => 1,
6985     frameset => 1, head => 1, option => 1, optgroup => 1,
6986     tbody => 1, td => 1, tfoot => 1, th => 1,
6987     thead => 1, tr => 1,
6988     area => 1, basefont => 1, bgsound => 1,
6989     embed => 1, hr => 1, iframe => 1, image => 1,
6990     img => 1, input => 1, isindex => 1, noembed => 1,
6991     noframes => 1, param => 1, select => 1, spacer => 1,
6992     table => 1, textarea => 1, wbr => 1,
6993     noscript => 0, ## TODO: if scripting is enabled
6994     }->{$token->{tag_name}}) {
6995 wakaba 1.79 !!!cp ('t429');
6996 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
6997 wakaba 1.52 ## Ignore the token
6998     !!!next-token;
6999 wakaba 1.126 next B;
7000 wakaba 1.52
7001     ## ISSUE: Issue on HTML5 new elements in spec
7002    
7003     } else {
7004     ## Step 1
7005     my $node_i = -1;
7006     my $node = $self->{open_elements}->[$node_i];
7007 wakaba 1.51
7008 wakaba 1.52 ## Step 2
7009     S2: {
7010 wakaba 1.123 if ($node->[0]->manakai_local_name eq $token->{tag_name}) {
7011 wakaba 1.52 ## Step 1
7012     ## generate implied end tags
7013 wakaba 1.123 while ($self->{open_elements}->[-1]->[1] & END_TAG_OPTIONAL_EL) {
7014 wakaba 1.79 !!!cp ('t430');
7015 wakaba 1.83 ## ISSUE: Can this case be reached?
7016 wakaba 1.86 pop @{$self->{open_elements}};
7017 wakaba 1.52 }
7018    
7019     ## Step 2
7020 wakaba 1.123 if ($self->{open_elements}->[-1]->[0]->manakai_local_name
7021     ne $token->{tag_name}) {
7022 wakaba 1.79 !!!cp ('t431');
7023 wakaba 1.58 ## NOTE: <x><y></x>
7024 wakaba 1.122 !!!parse-error (type => 'not closed',
7025     value => $self->{open_elements}->[-1]->[0]
7026     ->manakai_local_name,
7027     token => $token);
7028 wakaba 1.79 } else {
7029     !!!cp ('t432');
7030 wakaba 1.52 }
7031    
7032     ## Step 3
7033     splice @{$self->{open_elements}}, $node_i;
7034 wakaba 1.51
7035 wakaba 1.1 !!!next-token;
7036 wakaba 1.52 last S2;
7037 wakaba 1.1 } else {
7038 wakaba 1.52 ## Step 3
7039 wakaba 1.123 if (not ($node->[1] & FORMATTING_EL) and
7040 wakaba 1.52 #not $phrasing_category->{$node->[1]} and
7041 wakaba 1.123 ($node->[1] & SPECIAL_EL or
7042     $node->[1] & SCOPING_EL)) {
7043 wakaba 1.79 !!!cp ('t433');
7044 wakaba 1.113 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name}, token => $token);
7045 wakaba 1.52 ## Ignore the token
7046     !!!next-token;
7047     last S2;
7048     }
7049 wakaba 1.79
7050     !!!cp ('t434');
7051 wakaba 1.1 }
7052 wakaba 1.52
7053     ## Step 4
7054     $node_i--;
7055     $node = $self->{open_elements}->[$node_i];
7056    
7057     ## Step 5;
7058     redo S2;
7059     } # S2
7060 wakaba 1.126 next B;
7061 wakaba 1.1 }
7062     }
7063 wakaba 1.126 next B;
7064     } continue { # B
7065     if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) {
7066     ## NOTE: The code below is executed in cases where it does not have
7067     ## to be, but it it is harmless even in those cases.
7068     ## has an element in scope
7069     INSCOPE: {
7070     for (reverse 0..$#{$self->{open_elements}}) {
7071     my $node = $self->{open_elements}->[$_];
7072     if ($node->[1] & FOREIGN_EL) {
7073     last INSCOPE;
7074     } elsif ($node->[1] & SCOPING_EL) {
7075     last;
7076     }
7077     }
7078    
7079     ## NOTE: No foreign element in scope.
7080     $self->{insertion_mode} &= ~ IN_FOREIGN_CONTENT_IM;
7081     } # INSCOPE
7082     }
7083 wakaba 1.1 } # B
7084    
7085     ## Stop parsing # MUST
7086    
7087     ## TODO: script stuffs
7088 wakaba 1.3 } # _tree_construct_main
7089    
7090     sub set_inner_html ($$$) {
7091     my $class = shift;
7092     my $node = shift;
7093     my $s = \$_[0];
7094     my $onerror = $_[1];
7095    
7096 wakaba 1.63 ## ISSUE: Should {confident} be true?
7097    
7098 wakaba 1.3 my $nt = $node->node_type;
7099     if ($nt == 9) {
7100     # MUST
7101    
7102     ## Step 1 # MUST
7103     ## TODO: If the document has an active parser, ...
7104     ## ISSUE: There is an issue in the spec.
7105    
7106     ## Step 2 # MUST
7107     my @cn = @{$node->child_nodes};
7108     for (@cn) {
7109     $node->remove_child ($_);
7110     }
7111    
7112     ## Step 3, 4, 5 # MUST
7113     $class->parse_string ($$s => $node, $onerror);
7114     } elsif ($nt == 1) {
7115     ## TODO: If non-html element
7116    
7117     ## NOTE: Most of this code is copied from |parse_string|
7118    
7119     ## Step 1 # MUST
7120 wakaba 1.14 my $this_doc = $node->owner_document;
7121     my $doc = $this_doc->implementation->create_document;
7122 wakaba 1.18 $doc->manakai_is_html (1);
7123 wakaba 1.3 my $p = $class->new;
7124     $p->{document} = $doc;
7125    
7126 wakaba 1.84 ## Step 8 # MUST
7127 wakaba 1.3 my $i = 0;
7128 wakaba 1.121 $p->{line_prev} = $p->{line} = 1;
7129     $p->{column_prev} = $p->{column} = 0;
7130 wakaba 1.76 $p->{set_next_char} = sub {
7131 wakaba 1.3 my $self = shift;
7132 wakaba 1.14
7133 wakaba 1.76 pop @{$self->{prev_char}};
7134     unshift @{$self->{prev_char}}, $self->{next_char};
7135 wakaba 1.14
7136 wakaba 1.76 $self->{next_char} = -1 and return if $i >= length $$s;
7137     $self->{next_char} = ord substr $$s, $i++, 1;
7138 wakaba 1.121
7139     ($p->{line_prev}, $p->{column_prev}) = ($p->{line}, $p->{column});
7140     $p->{column}++;
7141 wakaba 1.4
7142 wakaba 1.76 if ($self->{next_char} == 0x000A) { # LF
7143 wakaba 1.121 $p->{line}++;
7144     $p->{column} = 0;
7145 wakaba 1.79 !!!cp ('i1');
7146 wakaba 1.76 } elsif ($self->{next_char} == 0x000D) { # CR
7147 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
7148 wakaba 1.76 $self->{next_char} = 0x000A; # LF # MUST
7149 wakaba 1.121 $p->{line}++;
7150     $p->{column} = 0;
7151 wakaba 1.79 !!!cp ('i2');
7152 wakaba 1.76 } elsif ($self->{next_char} > 0x10FFFF) {
7153     $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
7154 wakaba 1.79 !!!cp ('i3');
7155 wakaba 1.76 } elsif ($self->{next_char} == 0x0000) { # NULL
7156 wakaba 1.79 !!!cp ('i4');
7157 wakaba 1.14 !!!parse-error (type => 'NULL');
7158 wakaba 1.76 $self->{next_char} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
7159 wakaba 1.132 } elsif ($self->{next_char} <= 0x0008 or
7160     (0x000E <= $self->{next_char} and
7161     $self->{next_char} <= 0x001F) or
7162     (0x007F <= $self->{next_char} and
7163     $self->{next_char} <= 0x009F) or
7164     (0xD800 <= $self->{next_char} and
7165     $self->{next_char} <= 0xDFFF) or
7166     (0xFDD0 <= $self->{next_char} and
7167     $self->{next_char} <= 0xFDDF) or
7168     {
7169     0xFFFE => 1, 0xFFFF => 1, 0x1FFFE => 1, 0x1FFFF => 1,
7170     0x2FFFE => 1, 0x2FFFF => 1, 0x3FFFE => 1, 0x3FFFF => 1,
7171     0x4FFFE => 1, 0x4FFFF => 1, 0x5FFFE => 1, 0x5FFFF => 1,
7172     0x6FFFE => 1, 0x6FFFF => 1, 0x7FFFE => 1, 0x7FFFF => 1,
7173     0x8FFFE => 1, 0x8FFFF => 1, 0x9FFFE => 1, 0x9FFFF => 1,
7174     0xAFFFE => 1, 0xAFFFF => 1, 0xBFFFE => 1, 0xBFFFF => 1,
7175     0xCFFFE => 1, 0xCFFFF => 1, 0xDFFFE => 1, 0xDFFFF => 1,
7176     0xEFFFE => 1, 0xEFFFF => 1, 0xFFFFE => 1, 0xFFFFF => 1,
7177     0x10FFFE => 1, 0x10FFFF => 1,
7178     }->{$self->{next_char}}) {
7179     !!!cp ('i4.1');
7180     !!!parse-error (type => 'control char', level => $self->{must_level});
7181     ## TODO: error type documentation
7182 wakaba 1.3 }
7183     };
7184 wakaba 1.76 $p->{prev_char} = [-1, -1, -1];
7185     $p->{next_char} = -1;
7186 wakaba 1.3
7187     my $ponerror = $onerror || sub {
7188     my (%opt) = @_;
7189 wakaba 1.121 my $line = $opt{line};
7190     my $column = $opt{column};
7191     if (defined $opt{token} and defined $opt{token}->{line}) {
7192     $line = $opt{token}->{line};
7193     $column = $opt{token}->{column};
7194     }
7195     warn "Parse error ($opt{type}) at line $line column $column\n";
7196 wakaba 1.3 };
7197     $p->{parse_error} = sub {
7198 wakaba 1.121 $ponerror->(line => $p->{line}, column => $p->{column}, @_);
7199 wakaba 1.3 };
7200    
7201     $p->_initialize_tokenizer;
7202     $p->_initialize_tree_constructor;
7203    
7204     ## Step 2
7205 wakaba 1.71 my $node_ln = $node->manakai_local_name;
7206 wakaba 1.40 $p->{content_model} = {
7207     title => RCDATA_CONTENT_MODEL,
7208     textarea => RCDATA_CONTENT_MODEL,
7209     style => CDATA_CONTENT_MODEL,
7210     script => CDATA_CONTENT_MODEL,
7211     xmp => CDATA_CONTENT_MODEL,
7212     iframe => CDATA_CONTENT_MODEL,
7213     noembed => CDATA_CONTENT_MODEL,
7214     noframes => CDATA_CONTENT_MODEL,
7215     noscript => CDATA_CONTENT_MODEL,
7216     plaintext => PLAINTEXT_CONTENT_MODEL,
7217     }->{$node_ln};
7218     $p->{content_model} = PCDATA_CONTENT_MODEL
7219     unless defined $p->{content_model};
7220     ## ISSUE: What is "the name of the element"? local name?
7221 wakaba 1.3
7222 wakaba 1.123 $p->{inner_html_node} = [$node, $el_category->{$node_ln}];
7223     ## TODO: Foreign element OK?
7224 wakaba 1.3
7225 wakaba 1.84 ## Step 3
7226 wakaba 1.3 my $root = $doc->create_element_ns
7227     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
7228    
7229 wakaba 1.84 ## Step 4 # MUST
7230 wakaba 1.3 $doc->append_child ($root);
7231    
7232 wakaba 1.84 ## Step 5 # MUST
7233 wakaba 1.123 push @{$p->{open_elements}}, [$root, $el_category->{html}];
7234 wakaba 1.3
7235     undef $p->{head_element};
7236    
7237 wakaba 1.84 ## Step 6 # MUST
7238 wakaba 1.3 $p->_reset_insertion_mode;
7239    
7240 wakaba 1.84 ## Step 7 # MUST
7241 wakaba 1.3 my $anode = $node;
7242     AN: while (defined $anode) {
7243     if ($anode->node_type == 1) {
7244     my $nsuri = $anode->namespace_uri;
7245     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
7246 wakaba 1.71 if ($anode->manakai_local_name eq 'form') {
7247 wakaba 1.79 !!!cp ('i5');
7248 wakaba 1.3 $p->{form_element} = $anode;
7249     last AN;
7250     }
7251     }
7252     }
7253     $anode = $anode->parent_node;
7254     } # AN
7255    
7256 wakaba 1.84 ## Step 9 # MUST
7257 wakaba 1.3 {
7258     my $self = $p;
7259     !!!next-token;
7260     }
7261     $p->_tree_construction_main;
7262    
7263 wakaba 1.84 ## Step 10 # MUST
7264 wakaba 1.3 my @cn = @{$node->child_nodes};
7265     for (@cn) {
7266     $node->remove_child ($_);
7267     }
7268     ## ISSUE: mutation events? read-only?
7269    
7270 wakaba 1.84 ## Step 11 # MUST
7271 wakaba 1.3 @cn = @{$root->child_nodes};
7272     for (@cn) {
7273 wakaba 1.14 $this_doc->adopt_node ($_);
7274 wakaba 1.3 $node->append_child ($_);
7275     }
7276 wakaba 1.14 ## ISSUE: mutation events?
7277 wakaba 1.3
7278     $p->_terminate_tree_constructor;
7279 wakaba 1.121
7280     delete $p->{parse_error}; # delete loop
7281 wakaba 1.3 } else {
7282     die "$0: |set_inner_html| is not defined for node of type $nt";
7283     }
7284     } # set_inner_html
7285    
7286     } # tree construction stage
7287 wakaba 1.1
7288 wakaba 1.63 package Whatpm::HTML::RestartParser;
7289     push our @ISA, 'Error';
7290    
7291 wakaba 1.1 1;
7292 wakaba 1.133 # $Date: 2008/04/13 10:36:40 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24