/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (hide annotations) (download) (as text)
Sat Jun 23 02:41:51 2007 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.8: +13 -4 lines
File MIME type: application/x-wais-source
++ whatpm/Whatpm/ChangeLog	23 Jun 2007 02:41:47 -0000
2007-06-23  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm.src: HTML5 revision 867 (a LF at the beginning of
	a |textarea| is removed).

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.9 our $VERSION=do{my @r=(q$Revision: 1.8 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5     ## This is an early version of an HTML parser.
6    
7     my $permitted_slash_tag_name = {
8     base => 1,
9     link => 1,
10     meta => 1,
11     hr => 1,
12     br => 1,
13     img=> 1,
14     embed => 1,
15     param => 1,
16     area => 1,
17     col => 1,
18     input => 1,
19     };
20    
21     my $entity_char = {
22     AElig => "\x{00C6}",
23     Aacute => "\x{00C1}",
24     Acirc => "\x{00C2}",
25     Agrave => "\x{00C0}",
26     Alpha => "\x{0391}",
27     Aring => "\x{00C5}",
28     Atilde => "\x{00C3}",
29     Auml => "\x{00C4}",
30     Beta => "\x{0392}",
31     Ccedil => "\x{00C7}",
32     Chi => "\x{03A7}",
33     Dagger => "\x{2021}",
34     Delta => "\x{0394}",
35     ETH => "\x{00D0}",
36     Eacute => "\x{00C9}",
37     Ecirc => "\x{00CA}",
38     Egrave => "\x{00C8}",
39     Epsilon => "\x{0395}",
40     Eta => "\x{0397}",
41     Euml => "\x{00CB}",
42     Gamma => "\x{0393}",
43     Iacute => "\x{00CD}",
44     Icirc => "\x{00CE}",
45     Igrave => "\x{00CC}",
46     Iota => "\x{0399}",
47     Iuml => "\x{00CF}",
48     Kappa => "\x{039A}",
49     Lambda => "\x{039B}",
50     Mu => "\x{039C}",
51     Ntilde => "\x{00D1}",
52     Nu => "\x{039D}",
53     OElig => "\x{0152}",
54     Oacute => "\x{00D3}",
55     Ocirc => "\x{00D4}",
56     Ograve => "\x{00D2}",
57     Omega => "\x{03A9}",
58     Omicron => "\x{039F}",
59     Oslash => "\x{00D8}",
60     Otilde => "\x{00D5}",
61     Ouml => "\x{00D6}",
62     Phi => "\x{03A6}",
63     Pi => "\x{03A0}",
64     Prime => "\x{2033}",
65     Psi => "\x{03A8}",
66     Rho => "\x{03A1}",
67     Scaron => "\x{0160}",
68     Sigma => "\x{03A3}",
69     THORN => "\x{00DE}",
70     Tau => "\x{03A4}",
71     Theta => "\x{0398}",
72     Uacute => "\x{00DA}",
73     Ucirc => "\x{00DB}",
74     Ugrave => "\x{00D9}",
75     Upsilon => "\x{03A5}",
76     Uuml => "\x{00DC}",
77     Xi => "\x{039E}",
78     Yacute => "\x{00DD}",
79     Yuml => "\x{0178}",
80     Zeta => "\x{0396}",
81     aacute => "\x{00E1}",
82     acirc => "\x{00E2}",
83     acute => "\x{00B4}",
84     aelig => "\x{00E6}",
85     agrave => "\x{00E0}",
86     alefsym => "\x{2135}",
87     alpha => "\x{03B1}",
88     amp => "\x{0026}",
89     AMP => "\x{0026}",
90     and => "\x{2227}",
91     ang => "\x{2220}",
92     apos => "\x{0027}",
93     aring => "\x{00E5}",
94     asymp => "\x{2248}",
95     atilde => "\x{00E3}",
96     auml => "\x{00E4}",
97     bdquo => "\x{201E}",
98     beta => "\x{03B2}",
99     brvbar => "\x{00A6}",
100     bull => "\x{2022}",
101     cap => "\x{2229}",
102     ccedil => "\x{00E7}",
103     cedil => "\x{00B8}",
104     cent => "\x{00A2}",
105     chi => "\x{03C7}",
106     circ => "\x{02C6}",
107     clubs => "\x{2663}",
108     cong => "\x{2245}",
109     copy => "\x{00A9}",
110     COPY => "\x{00A9}",
111     crarr => "\x{21B5}",
112     cup => "\x{222A}",
113     curren => "\x{00A4}",
114     dArr => "\x{21D3}",
115     dagger => "\x{2020}",
116     darr => "\x{2193}",
117     deg => "\x{00B0}",
118     delta => "\x{03B4}",
119     diams => "\x{2666}",
120     divide => "\x{00F7}",
121     eacute => "\x{00E9}",
122     ecirc => "\x{00EA}",
123     egrave => "\x{00E8}",
124     empty => "\x{2205}",
125     emsp => "\x{2003}",
126     ensp => "\x{2002}",
127     epsilon => "\x{03B5}",
128     equiv => "\x{2261}",
129     eta => "\x{03B7}",
130     eth => "\x{00F0}",
131     euml => "\x{00EB}",
132     euro => "\x{20AC}",
133     exist => "\x{2203}",
134     fnof => "\x{0192}",
135     forall => "\x{2200}",
136     frac12 => "\x{00BD}",
137     frac14 => "\x{00BC}",
138     frac34 => "\x{00BE}",
139     frasl => "\x{2044}",
140     gamma => "\x{03B3}",
141     ge => "\x{2265}",
142     gt => "\x{003E}",
143     GT => "\x{003E}",
144     hArr => "\x{21D4}",
145     harr => "\x{2194}",
146     hearts => "\x{2665}",
147     hellip => "\x{2026}",
148     iacute => "\x{00ED}",
149     icirc => "\x{00EE}",
150     iexcl => "\x{00A1}",
151     igrave => "\x{00EC}",
152     image => "\x{2111}",
153     infin => "\x{221E}",
154     int => "\x{222B}",
155     iota => "\x{03B9}",
156     iquest => "\x{00BF}",
157     isin => "\x{2208}",
158     iuml => "\x{00EF}",
159     kappa => "\x{03BA}",
160     lArr => "\x{21D0}",
161     lambda => "\x{03BB}",
162     lang => "\x{2329}",
163     laquo => "\x{00AB}",
164     larr => "\x{2190}",
165     lceil => "\x{2308}",
166     ldquo => "\x{201C}",
167     le => "\x{2264}",
168     lfloor => "\x{230A}",
169     lowast => "\x{2217}",
170     loz => "\x{25CA}",
171     lrm => "\x{200E}",
172     lsaquo => "\x{2039}",
173     lsquo => "\x{2018}",
174     lt => "\x{003C}",
175     LT => "\x{003C}",
176     macr => "\x{00AF}",
177     mdash => "\x{2014}",
178     micro => "\x{00B5}",
179     middot => "\x{00B7}",
180     minus => "\x{2212}",
181     mu => "\x{03BC}",
182     nabla => "\x{2207}",
183     nbsp => "\x{00A0}",
184     ndash => "\x{2013}",
185     ne => "\x{2260}",
186     ni => "\x{220B}",
187     not => "\x{00AC}",
188     notin => "\x{2209}",
189     nsub => "\x{2284}",
190     ntilde => "\x{00F1}",
191     nu => "\x{03BD}",
192     oacute => "\x{00F3}",
193     ocirc => "\x{00F4}",
194     oelig => "\x{0153}",
195     ograve => "\x{00F2}",
196     oline => "\x{203E}",
197     omega => "\x{03C9}",
198     omicron => "\x{03BF}",
199     oplus => "\x{2295}",
200     or => "\x{2228}",
201     ordf => "\x{00AA}",
202     ordm => "\x{00BA}",
203     oslash => "\x{00F8}",
204     otilde => "\x{00F5}",
205     otimes => "\x{2297}",
206     ouml => "\x{00F6}",
207     para => "\x{00B6}",
208     part => "\x{2202}",
209     permil => "\x{2030}",
210     perp => "\x{22A5}",
211     phi => "\x{03C6}",
212     pi => "\x{03C0}",
213     piv => "\x{03D6}",
214     plusmn => "\x{00B1}",
215     pound => "\x{00A3}",
216     prime => "\x{2032}",
217     prod => "\x{220F}",
218     prop => "\x{221D}",
219     psi => "\x{03C8}",
220     quot => "\x{0022}",
221     QUOT => "\x{0022}",
222     rArr => "\x{21D2}",
223     radic => "\x{221A}",
224     rang => "\x{232A}",
225     raquo => "\x{00BB}",
226     rarr => "\x{2192}",
227     rceil => "\x{2309}",
228     rdquo => "\x{201D}",
229     real => "\x{211C}",
230     reg => "\x{00AE}",
231     REG => "\x{00AE}",
232     rfloor => "\x{230B}",
233     rho => "\x{03C1}",
234     rlm => "\x{200F}",
235     rsaquo => "\x{203A}",
236     rsquo => "\x{2019}",
237     sbquo => "\x{201A}",
238     scaron => "\x{0161}",
239     sdot => "\x{22C5}",
240     sect => "\x{00A7}",
241     shy => "\x{00AD}",
242     sigma => "\x{03C3}",
243     sigmaf => "\x{03C2}",
244     sim => "\x{223C}",
245     spades => "\x{2660}",
246     sub => "\x{2282}",
247     sube => "\x{2286}",
248     sum => "\x{2211}",
249     sup => "\x{2283}",
250     sup1 => "\x{00B9}",
251     sup2 => "\x{00B2}",
252     sup3 => "\x{00B3}",
253     supe => "\x{2287}",
254     szlig => "\x{00DF}",
255     tau => "\x{03C4}",
256     there4 => "\x{2234}",
257     theta => "\x{03B8}",
258     thetasym => "\x{03D1}",
259     thinsp => "\x{2009}",
260     thorn => "\x{00FE}",
261     tilde => "\x{02DC}",
262     times => "\x{00D7}",
263     trade => "\x{2122}",
264     uArr => "\x{21D1}",
265     uacute => "\x{00FA}",
266     uarr => "\x{2191}",
267     ucirc => "\x{00FB}",
268     ugrave => "\x{00F9}",
269     uml => "\x{00A8}",
270     upsih => "\x{03D2}",
271     upsilon => "\x{03C5}",
272     uuml => "\x{00FC}",
273     weierp => "\x{2118}",
274     xi => "\x{03BE}",
275     yacute => "\x{00FD}",
276     yen => "\x{00A5}",
277     yuml => "\x{00FF}",
278     zeta => "\x{03B6}",
279     zwj => "\x{200D}",
280     zwnj => "\x{200C}",
281 wakaba 1.4 }; # $entity_char
282    
283 wakaba 1.8 ## TODO: Ensure that this table match to <http://html5.org/tools/web-apps-tracker?from=868&to=869>.
284 wakaba 1.4 ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562>
285     my $c1_entity_char = {
286     128, 8364,
287     129, 65533,
288     130, 8218,
289     131, 402,
290     132, 8222,
291     133, 8230,
292     134, 8224,
293     135, 8225,
294     136, 710,
295     137, 8240,
296     138, 352,
297     139, 8249,
298     140, 338,
299     141, 65533,
300     142, 381,
301     143, 65533,
302     144, 65533,
303     145, 8216,
304     146, 8217,
305     147, 8220,
306     148, 8221,
307     149, 8226,
308     150, 8211,
309     151, 8212,
310     152, 732,
311     153, 8482,
312     154, 353,
313     155, 8250,
314     156, 339,
315     157, 65533,
316     158, 382,
317     159, 376,
318     }; # $c1_entity_char
319 wakaba 1.1
320     my $special_category = {
321     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
322     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
323     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
324     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
325     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
326     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
327     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
328     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
329     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
330     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
331     };
332     my $scoping_category = {
333     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
334     table => 1, td => 1, th => 1,
335     };
336     my $formatting_category = {
337     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
338     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
339     };
340     # $phrasing_category: all other elements
341    
342     sub parse_string ($$$;$) {
343     my $self = shift->new;
344     my $s = \$_[0];
345     $self->{document} = $_[1];
346    
347 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
348    
349 wakaba 1.1 my $i = 0;
350 wakaba 1.3 my $line = 1;
351     my $column = 0;
352 wakaba 1.1 $self->{set_next_input_character} = sub {
353     my $self = shift;
354     $self->{next_input_character} = -1 and return if $i >= length $$s;
355     $self->{next_input_character} = ord substr $$s, $i++, 1;
356 wakaba 1.3 $column++;
357 wakaba 1.1
358 wakaba 1.4 if ($self->{next_input_character} == 0x000A) { # LF
359     $line++;
360     $column = 0;
361     } elsif ($self->{next_input_character} == 0x000D) { # CR
362 wakaba 1.1 if ($i >= length $$s) {
363     #
364     } else {
365     my $next_char = ord substr $$s, $i++, 1;
366     if ($next_char == 0x000A) { # LF
367     #
368     } else {
369     push @{$self->{char}}, $next_char;
370     }
371     }
372     $self->{next_input_character} = 0x000A; # LF # MUST
373 wakaba 1.3 $line++;
374 wakaba 1.4 $column = 0;
375 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
376     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
377     } elsif ($self->{next_input_character} == 0x0000) { # NULL
378 wakaba 1.8 !!!parse-error (type => 'NULL');
379     ## TODO: test
380 wakaba 1.1 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
381     }
382     };
383    
384 wakaba 1.3 my $onerror = $_[2] || sub {
385     my (%opt) = @_;
386     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
387     };
388     $self->{parse_error} = sub {
389     $onerror->(@_, line => $line, column => $column);
390 wakaba 1.1 };
391    
392     $self->_initialize_tokenizer;
393     $self->_initialize_tree_constructor;
394     $self->_construct_tree;
395     $self->_terminate_tree_constructor;
396    
397     return $self->{document};
398     } # parse_string
399    
400     sub new ($) {
401     my $class = shift;
402     my $self = bless {}, $class;
403     $self->{set_next_input_character} = sub {
404     $self->{next_input_character} = -1;
405     };
406     $self->{parse_error} = sub {
407     #
408     };
409     return $self;
410     } # new
411    
412     ## Implementations MUST act as if state machine in the spec
413    
414     sub _initialize_tokenizer ($) {
415     my $self = shift;
416     $self->{state} = 'data'; # MUST
417     $self->{content_model_flag} = 'PCDATA'; # be
418     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
419     undef $self->{current_attribute};
420     undef $self->{last_emitted_start_tag_name};
421     undef $self->{last_attribute_value_state};
422     $self->{char} = [];
423     # $self->{next_input_character}
424     !!!next-input-character;
425     $self->{token} = [];
426     } # _initialize_tokenizer
427    
428     ## A token has:
429     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
430     ## 'character', or 'end-of-file'
431     ## ->{name} (DOCTYPE, start tag (tagname), end tag (tagname))
432     ## ISSUE: the spec need s/tagname/tag name/
433     ## ->{error} == 1 or 0 (DOCTYPE)
434     ## ->{attributes} isa HASH (start tag, end tag)
435     ## ->{data} (comment, character)
436    
437     ## Macros
438     ## Macros MUST be preceded by three EXCLAMATION MARKs.
439     ## emit ($token)
440     ## Emits the specified token.
441    
442     ## Emitted token MUST immediately be handled by the tree construction state.
443    
444     ## Before each step, UA MAY check to see if either one of the scripts in
445     ## "list of scripts that will execute as soon as possible" or the first
446     ## script in the "list of scripts that will execute asynchronously",
447     ## has completed loading. If one has, then it MUST be executed
448     ## and removed from the list.
449    
450 wakaba 1.8 ## ISSUE: <http://html5.org/tools/web-apps-tracker?from=874&to=876>
451    
452 wakaba 1.1 sub _get_next_token ($) {
453     my $self = shift;
454     if (@{$self->{token}}) {
455     return shift @{$self->{token}};
456     }
457    
458     A: {
459     if ($self->{state} eq 'data') {
460     if ($self->{next_input_character} == 0x0026) { # &
461     if ($self->{content_model_flag} eq 'PCDATA' or
462     $self->{content_model_flag} eq 'RCDATA') {
463     $self->{state} = 'entity data';
464     !!!next-input-character;
465     redo A;
466     } else {
467     #
468     }
469     } elsif ($self->{next_input_character} == 0x003C) { # <
470     if ($self->{content_model_flag} ne 'PLAINTEXT') {
471     $self->{state} = 'tag open';
472     !!!next-input-character;
473     redo A;
474     } else {
475     #
476     }
477     } elsif ($self->{next_input_character} == -1) {
478     !!!emit ({type => 'end-of-file'});
479     last A; ## TODO: ok?
480     }
481     # Anything else
482     my $token = {type => 'character',
483     data => chr $self->{next_input_character}};
484     ## Stay in the data state
485     !!!next-input-character;
486    
487     !!!emit ($token);
488    
489     redo A;
490     } elsif ($self->{state} eq 'entity data') {
491     ## (cannot happen in CDATA state)
492    
493     my $token = $self->_tokenize_attempt_to_consume_an_entity;
494    
495     $self->{state} = 'data';
496     # next-input-character is already done
497    
498     unless (defined $token) {
499     !!!emit ({type => 'character', data => '&'});
500     } else {
501     !!!emit ($token);
502     }
503    
504     redo A;
505     } elsif ($self->{state} eq 'tag open') {
506     if ($self->{content_model_flag} eq 'RCDATA' or
507     $self->{content_model_flag} eq 'CDATA') {
508     if ($self->{next_input_character} == 0x002F) { # /
509     !!!next-input-character;
510     $self->{state} = 'close tag open';
511     redo A;
512     } else {
513     ## reconsume
514     $self->{state} = 'data';
515    
516     !!!emit ({type => 'character', data => '<'});
517    
518     redo A;
519     }
520     } elsif ($self->{content_model_flag} eq 'PCDATA') {
521     if ($self->{next_input_character} == 0x0021) { # !
522     $self->{state} = 'markup declaration open';
523     !!!next-input-character;
524     redo A;
525     } elsif ($self->{next_input_character} == 0x002F) { # /
526     $self->{state} = 'close tag open';
527     !!!next-input-character;
528     redo A;
529     } elsif (0x0041 <= $self->{next_input_character} and
530     $self->{next_input_character} <= 0x005A) { # A..Z
531     $self->{current_token}
532     = {type => 'start tag',
533     tag_name => chr ($self->{next_input_character} + 0x0020)};
534     $self->{state} = 'tag name';
535     !!!next-input-character;
536     redo A;
537     } elsif (0x0061 <= $self->{next_input_character} and
538     $self->{next_input_character} <= 0x007A) { # a..z
539     $self->{current_token} = {type => 'start tag',
540     tag_name => chr ($self->{next_input_character})};
541     $self->{state} = 'tag name';
542     !!!next-input-character;
543     redo A;
544     } elsif ($self->{next_input_character} == 0x003E) { # >
545 wakaba 1.3 !!!parse-error (type => 'empty start tag');
546 wakaba 1.1 $self->{state} = 'data';
547     !!!next-input-character;
548    
549     !!!emit ({type => 'character', data => '<>'});
550    
551     redo A;
552     } elsif ($self->{next_input_character} == 0x003F) { # ?
553 wakaba 1.3 !!!parse-error (type => 'pio');
554 wakaba 1.1 $self->{state} = 'bogus comment';
555     ## $self->{next_input_character} is intentionally left as is
556     redo A;
557     } else {
558 wakaba 1.3 !!!parse-error (type => 'bare stago');
559 wakaba 1.1 $self->{state} = 'data';
560     ## reconsume
561    
562     !!!emit ({type => 'character', data => '<'});
563    
564     redo A;
565     }
566     } else {
567     die "$0: $self->{content_model_flag}: Unknown content model flag";
568     }
569     } elsif ($self->{state} eq 'close tag open') {
570     if ($self->{content_model_flag} eq 'RCDATA' or
571     $self->{content_model_flag} eq 'CDATA') {
572     my @next_char;
573     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
574     push @next_char, $self->{next_input_character};
575     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
576     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
577     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
578     !!!next-input-character;
579     next TAGNAME;
580     } else {
581 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
582 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
583     !!!back-next-input-character (@next_char);
584     $self->{state} = 'data';
585    
586     !!!emit ({type => 'character', data => '</'});
587    
588     redo A;
589     }
590     }
591     push @next_char, $self->{next_input_character};
592    
593     unless ($self->{next_input_character} == 0x0009 or # HT
594     $self->{next_input_character} == 0x000A or # LF
595     $self->{next_input_character} == 0x000B or # VT
596     $self->{next_input_character} == 0x000C or # FF
597     $self->{next_input_character} == 0x0020 or # SP
598     $self->{next_input_character} == 0x003E or # >
599     $self->{next_input_character} == 0x002F or # /
600     $self->{next_input_character} == 0x003C or # <
601     $self->{next_input_character} == -1) {
602 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
603 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
604     !!!back-next-input-character (@next_char);
605     $self->{state} = 'data';
606    
607     !!!emit ({type => 'character', data => '</'});
608    
609     redo A;
610     } else {
611     $self->{next_input_character} = shift @next_char;
612     !!!back-next-input-character (@next_char);
613     # and consume...
614     }
615     }
616    
617     if (0x0041 <= $self->{next_input_character} and
618     $self->{next_input_character} <= 0x005A) { # A..Z
619     $self->{current_token} = {type => 'end tag',
620     tag_name => chr ($self->{next_input_character} + 0x0020)};
621     $self->{state} = 'tag name';
622     !!!next-input-character;
623     redo A;
624     } elsif (0x0061 <= $self->{next_input_character} and
625     $self->{next_input_character} <= 0x007A) { # a..z
626     $self->{current_token} = {type => 'end tag',
627     tag_name => chr ($self->{next_input_character})};
628     $self->{state} = 'tag name';
629     !!!next-input-character;
630     redo A;
631     } elsif ($self->{next_input_character} == 0x003E) { # >
632 wakaba 1.3 !!!parse-error (type => 'empty end tag');
633 wakaba 1.1 $self->{state} = 'data';
634     !!!next-input-character;
635     redo A;
636     } elsif ($self->{next_input_character} == -1) {
637 wakaba 1.3 !!!parse-error (type => 'bare etago');
638 wakaba 1.1 $self->{state} = 'data';
639     # reconsume
640    
641     !!!emit ({type => 'character', data => '</'});
642    
643     redo A;
644     } else {
645 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
646 wakaba 1.1 $self->{state} = 'bogus comment';
647     ## $self->{next_input_character} is intentionally left as is
648     redo A;
649     }
650     } elsif ($self->{state} eq 'tag name') {
651     if ($self->{next_input_character} == 0x0009 or # HT
652     $self->{next_input_character} == 0x000A or # LF
653     $self->{next_input_character} == 0x000B or # VT
654     $self->{next_input_character} == 0x000C or # FF
655     $self->{next_input_character} == 0x0020) { # SP
656     $self->{state} = 'before attribute name';
657     !!!next-input-character;
658     redo A;
659     } elsif ($self->{next_input_character} == 0x003E) { # >
660     if ($self->{current_token}->{type} eq 'start tag') {
661     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
662     } elsif ($self->{current_token}->{type} eq 'end tag') {
663     $self->{content_model_flag} = 'PCDATA'; # MUST
664     if ($self->{current_token}->{attributes}) {
665 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
666 wakaba 1.1 }
667     } else {
668     die "$0: $self->{current_token}->{type}: Unknown token type";
669     }
670     $self->{state} = 'data';
671     !!!next-input-character;
672    
673     !!!emit ($self->{current_token}); # start tag or end tag
674     undef $self->{current_token};
675    
676     redo A;
677     } elsif (0x0041 <= $self->{next_input_character} and
678     $self->{next_input_character} <= 0x005A) { # A..Z
679     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
680     # start tag or end tag
681     ## Stay in this state
682     !!!next-input-character;
683     redo A;
684     } elsif ($self->{next_input_character} == 0x003C or # <
685     $self->{next_input_character} == -1) {
686 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
687 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
688     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
689     } elsif ($self->{current_token}->{type} eq 'end tag') {
690     $self->{content_model_flag} = 'PCDATA'; # MUST
691     if ($self->{current_token}->{attributes}) {
692 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
693 wakaba 1.1 }
694     } else {
695     die "$0: $self->{current_token}->{type}: Unknown token type";
696     }
697     $self->{state} = 'data';
698     # reconsume
699    
700     !!!emit ($self->{current_token}); # start tag or end tag
701     undef $self->{current_token};
702    
703     redo A;
704     } elsif ($self->{next_input_character} == 0x002F) { # /
705     !!!next-input-character;
706     if ($self->{next_input_character} == 0x003E and # >
707     $self->{current_token}->{type} eq 'start tag' and
708     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
709     # permitted slash
710     #
711     } else {
712 wakaba 1.3 !!!parse-error (type => 'nestc');
713 wakaba 1.1 }
714     $self->{state} = 'before attribute name';
715     # next-input-character is already done
716     redo A;
717     } else {
718     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
719     # start tag or end tag
720     ## Stay in the state
721     !!!next-input-character;
722     redo A;
723     }
724     } elsif ($self->{state} eq 'before attribute name') {
725     if ($self->{next_input_character} == 0x0009 or # HT
726     $self->{next_input_character} == 0x000A or # LF
727     $self->{next_input_character} == 0x000B or # VT
728     $self->{next_input_character} == 0x000C or # FF
729     $self->{next_input_character} == 0x0020) { # SP
730     ## Stay in the state
731     !!!next-input-character;
732     redo A;
733     } elsif ($self->{next_input_character} == 0x003E) { # >
734     if ($self->{current_token}->{type} eq 'start tag') {
735     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
736     } elsif ($self->{current_token}->{type} eq 'end tag') {
737     $self->{content_model_flag} = 'PCDATA'; # MUST
738     if ($self->{current_token}->{attributes}) {
739 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
740 wakaba 1.1 }
741     } else {
742     die "$0: $self->{current_token}->{type}: Unknown token type";
743     }
744     $self->{state} = 'data';
745     !!!next-input-character;
746    
747     !!!emit ($self->{current_token}); # start tag or end tag
748     undef $self->{current_token};
749    
750     redo A;
751     } elsif (0x0041 <= $self->{next_input_character} and
752     $self->{next_input_character} <= 0x005A) { # A..Z
753     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
754     value => ''};
755     $self->{state} = 'attribute name';
756     !!!next-input-character;
757     redo A;
758     } elsif ($self->{next_input_character} == 0x002F) { # /
759     !!!next-input-character;
760     if ($self->{next_input_character} == 0x003E and # >
761     $self->{current_token}->{type} eq 'start tag' and
762     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
763     # permitted slash
764     #
765     } else {
766 wakaba 1.3 !!!parse-error (type => 'nestc');
767 wakaba 1.1 }
768     ## Stay in the state
769     # next-input-character is already done
770     redo A;
771     } elsif ($self->{next_input_character} == 0x003C or # <
772     $self->{next_input_character} == -1) {
773 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
774 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
775     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
776     } elsif ($self->{current_token}->{type} eq 'end tag') {
777     $self->{content_model_flag} = 'PCDATA'; # MUST
778     if ($self->{current_token}->{attributes}) {
779 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
780 wakaba 1.1 }
781     } else {
782     die "$0: $self->{current_token}->{type}: Unknown token type";
783     }
784     $self->{state} = 'data';
785     # reconsume
786    
787     !!!emit ($self->{current_token}); # start tag or end tag
788     undef $self->{current_token};
789    
790     redo A;
791     } else {
792     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
793     value => ''};
794     $self->{state} = 'attribute name';
795     !!!next-input-character;
796     redo A;
797     }
798     } elsif ($self->{state} eq 'attribute name') {
799     my $before_leave = sub {
800     if (exists $self->{current_token}->{attributes} # start tag or end tag
801     ->{$self->{current_attribute}->{name}}) { # MUST
802 wakaba 1.3 !!!parse-error (type => 'dupulicate attribute');
803 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
804     } else {
805     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
806     = $self->{current_attribute};
807     }
808     }; # $before_leave
809    
810     if ($self->{next_input_character} == 0x0009 or # HT
811     $self->{next_input_character} == 0x000A or # LF
812     $self->{next_input_character} == 0x000B or # VT
813     $self->{next_input_character} == 0x000C or # FF
814     $self->{next_input_character} == 0x0020) { # SP
815     $before_leave->();
816     $self->{state} = 'after attribute name';
817     !!!next-input-character;
818     redo A;
819     } elsif ($self->{next_input_character} == 0x003D) { # =
820     $before_leave->();
821     $self->{state} = 'before attribute value';
822     !!!next-input-character;
823     redo A;
824     } elsif ($self->{next_input_character} == 0x003E) { # >
825     $before_leave->();
826     if ($self->{current_token}->{type} eq 'start tag') {
827     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
828     } elsif ($self->{current_token}->{type} eq 'end tag') {
829     $self->{content_model_flag} = 'PCDATA'; # MUST
830     if ($self->{current_token}->{attributes}) {
831 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
832 wakaba 1.1 }
833     } else {
834     die "$0: $self->{current_token}->{type}: Unknown token type";
835     }
836     $self->{state} = 'data';
837     !!!next-input-character;
838    
839     !!!emit ($self->{current_token}); # start tag or end tag
840     undef $self->{current_token};
841    
842     redo A;
843     } elsif (0x0041 <= $self->{next_input_character} and
844     $self->{next_input_character} <= 0x005A) { # A..Z
845     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
846     ## Stay in the state
847     !!!next-input-character;
848     redo A;
849     } elsif ($self->{next_input_character} == 0x002F) { # /
850     $before_leave->();
851     !!!next-input-character;
852     if ($self->{next_input_character} == 0x003E and # >
853     $self->{current_token}->{type} eq 'start tag' and
854     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
855     # permitted slash
856     #
857     } else {
858 wakaba 1.3 !!!parse-error (type => 'nestc');
859 wakaba 1.1 }
860     $self->{state} = 'before attribute name';
861     # next-input-character is already done
862     redo A;
863     } elsif ($self->{next_input_character} == 0x003C or # <
864     $self->{next_input_character} == -1) {
865 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
866 wakaba 1.1 $before_leave->();
867     if ($self->{current_token}->{type} eq 'start tag') {
868     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
869     } elsif ($self->{current_token}->{type} eq 'end tag') {
870     $self->{content_model_flag} = 'PCDATA'; # MUST
871     if ($self->{current_token}->{attributes}) {
872 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
873 wakaba 1.1 }
874     } else {
875     die "$0: $self->{current_token}->{type}: Unknown token type";
876     }
877     $self->{state} = 'data';
878     # reconsume
879    
880     !!!emit ($self->{current_token}); # start tag or end tag
881     undef $self->{current_token};
882    
883     redo A;
884     } else {
885     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
886     ## Stay in the state
887     !!!next-input-character;
888     redo A;
889     }
890     } elsif ($self->{state} eq 'after attribute name') {
891     if ($self->{next_input_character} == 0x0009 or # HT
892     $self->{next_input_character} == 0x000A or # LF
893     $self->{next_input_character} == 0x000B or # VT
894     $self->{next_input_character} == 0x000C or # FF
895     $self->{next_input_character} == 0x0020) { # SP
896     ## Stay in the state
897     !!!next-input-character;
898     redo A;
899     } elsif ($self->{next_input_character} == 0x003D) { # =
900     $self->{state} = 'before attribute value';
901     !!!next-input-character;
902     redo A;
903     } elsif ($self->{next_input_character} == 0x003E) { # >
904     if ($self->{current_token}->{type} eq 'start tag') {
905     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
906     } elsif ($self->{current_token}->{type} eq 'end tag') {
907     $self->{content_model_flag} = 'PCDATA'; # MUST
908     if ($self->{current_token}->{attributes}) {
909 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
910 wakaba 1.1 }
911     } else {
912     die "$0: $self->{current_token}->{type}: Unknown token type";
913     }
914     $self->{state} = 'data';
915     !!!next-input-character;
916    
917     !!!emit ($self->{current_token}); # start tag or end tag
918     undef $self->{current_token};
919    
920     redo A;
921     } elsif (0x0041 <= $self->{next_input_character} and
922     $self->{next_input_character} <= 0x005A) { # A..Z
923     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
924     value => ''};
925     $self->{state} = 'attribute name';
926     !!!next-input-character;
927     redo A;
928     } elsif ($self->{next_input_character} == 0x002F) { # /
929     !!!next-input-character;
930     if ($self->{next_input_character} == 0x003E and # >
931     $self->{current_token}->{type} eq 'start tag' and
932     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
933     # permitted slash
934     #
935     } else {
936 wakaba 1.3 !!!parse-error (type => 'nestc');
937 wakaba 1.1 }
938     $self->{state} = 'before attribute name';
939     # next-input-character is already done
940     redo A;
941     } elsif ($self->{next_input_character} == 0x003C or # <
942     $self->{next_input_character} == -1) {
943 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
944 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
945     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
946     } elsif ($self->{current_token}->{type} eq 'end tag') {
947     $self->{content_model_flag} = 'PCDATA'; # MUST
948     if ($self->{current_token}->{attributes}) {
949 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
950 wakaba 1.1 }
951     } else {
952     die "$0: $self->{current_token}->{type}: Unknown token type";
953     }
954     $self->{state} = 'data';
955     # reconsume
956    
957     !!!emit ($self->{current_token}); # start tag or end tag
958     undef $self->{current_token};
959    
960     redo A;
961     } else {
962     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
963     value => ''};
964     $self->{state} = 'attribute name';
965     !!!next-input-character;
966     redo A;
967     }
968     } elsif ($self->{state} eq 'before attribute value') {
969     if ($self->{next_input_character} == 0x0009 or # HT
970     $self->{next_input_character} == 0x000A or # LF
971     $self->{next_input_character} == 0x000B or # VT
972     $self->{next_input_character} == 0x000C or # FF
973     $self->{next_input_character} == 0x0020) { # SP
974     ## Stay in the state
975     !!!next-input-character;
976     redo A;
977     } elsif ($self->{next_input_character} == 0x0022) { # "
978     $self->{state} = 'attribute value (double-quoted)';
979     !!!next-input-character;
980     redo A;
981     } elsif ($self->{next_input_character} == 0x0026) { # &
982     $self->{state} = 'attribute value (unquoted)';
983     ## reconsume
984     redo A;
985     } elsif ($self->{next_input_character} == 0x0027) { # '
986     $self->{state} = 'attribute value (single-quoted)';
987     !!!next-input-character;
988     redo A;
989     } elsif ($self->{next_input_character} == 0x003E) { # >
990     if ($self->{current_token}->{type} eq 'start tag') {
991     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
992     } elsif ($self->{current_token}->{type} eq 'end tag') {
993     $self->{content_model_flag} = 'PCDATA'; # MUST
994     if ($self->{current_token}->{attributes}) {
995 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
996 wakaba 1.1 }
997     } else {
998     die "$0: $self->{current_token}->{type}: Unknown token type";
999     }
1000     $self->{state} = 'data';
1001     !!!next-input-character;
1002    
1003     !!!emit ($self->{current_token}); # start tag or end tag
1004     undef $self->{current_token};
1005    
1006     redo A;
1007     } elsif ($self->{next_input_character} == 0x003C or # <
1008     $self->{next_input_character} == -1) {
1009 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1010 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1011     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1012     } elsif ($self->{current_token}->{type} eq 'end tag') {
1013     $self->{content_model_flag} = 'PCDATA'; # MUST
1014     if ($self->{current_token}->{attributes}) {
1015 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1016 wakaba 1.1 }
1017     } else {
1018     die "$0: $self->{current_token}->{type}: Unknown token type";
1019     }
1020     $self->{state} = 'data';
1021     ## reconsume
1022    
1023     !!!emit ($self->{current_token}); # start tag or end tag
1024     undef $self->{current_token};
1025    
1026     redo A;
1027     } else {
1028     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1029     $self->{state} = 'attribute value (unquoted)';
1030     !!!next-input-character;
1031     redo A;
1032     }
1033     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
1034     if ($self->{next_input_character} == 0x0022) { # "
1035     $self->{state} = 'before attribute name';
1036     !!!next-input-character;
1037     redo A;
1038     } elsif ($self->{next_input_character} == 0x0026) { # &
1039     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
1040     $self->{state} = 'entity in attribute value';
1041     !!!next-input-character;
1042     redo A;
1043     } elsif ($self->{next_input_character} == -1) {
1044 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1045 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1046     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1047     } elsif ($self->{current_token}->{type} eq 'end tag') {
1048     $self->{content_model_flag} = 'PCDATA'; # MUST
1049     if ($self->{current_token}->{attributes}) {
1050 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1051 wakaba 1.1 }
1052     } else {
1053     die "$0: $self->{current_token}->{type}: Unknown token type";
1054     }
1055     $self->{state} = 'data';
1056     ## reconsume
1057    
1058     !!!emit ($self->{current_token}); # start tag or end tag
1059     undef $self->{current_token};
1060    
1061     redo A;
1062     } else {
1063     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1064     ## Stay in the state
1065     !!!next-input-character;
1066     redo A;
1067     }
1068     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
1069     if ($self->{next_input_character} == 0x0027) { # '
1070     $self->{state} = 'before attribute name';
1071     !!!next-input-character;
1072     redo A;
1073     } elsif ($self->{next_input_character} == 0x0026) { # &
1074     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
1075     $self->{state} = 'entity in attribute value';
1076     !!!next-input-character;
1077     redo A;
1078     } elsif ($self->{next_input_character} == -1) {
1079 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1080 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1081     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1082     } elsif ($self->{current_token}->{type} eq 'end tag') {
1083     $self->{content_model_flag} = 'PCDATA'; # MUST
1084     if ($self->{current_token}->{attributes}) {
1085 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1086 wakaba 1.1 }
1087     } else {
1088     die "$0: $self->{current_token}->{type}: Unknown token type";
1089     }
1090     $self->{state} = 'data';
1091     ## reconsume
1092    
1093     !!!emit ($self->{current_token}); # start tag or end tag
1094     undef $self->{current_token};
1095    
1096     redo A;
1097     } else {
1098     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1099     ## Stay in the state
1100     !!!next-input-character;
1101     redo A;
1102     }
1103     } elsif ($self->{state} eq 'attribute value (unquoted)') {
1104     if ($self->{next_input_character} == 0x0009 or # HT
1105     $self->{next_input_character} == 0x000A or # LF
1106     $self->{next_input_character} == 0x000B or # HT
1107     $self->{next_input_character} == 0x000C or # FF
1108     $self->{next_input_character} == 0x0020) { # SP
1109     $self->{state} = 'before attribute name';
1110     !!!next-input-character;
1111     redo A;
1112     } elsif ($self->{next_input_character} == 0x0026) { # &
1113     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
1114     $self->{state} = 'entity in attribute value';
1115     !!!next-input-character;
1116     redo A;
1117     } elsif ($self->{next_input_character} == 0x003E) { # >
1118     if ($self->{current_token}->{type} eq 'start tag') {
1119     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1120     } elsif ($self->{current_token}->{type} eq 'end tag') {
1121     $self->{content_model_flag} = 'PCDATA'; # MUST
1122     if ($self->{current_token}->{attributes}) {
1123 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1124 wakaba 1.1 }
1125     } else {
1126     die "$0: $self->{current_token}->{type}: Unknown token type";
1127     }
1128     $self->{state} = 'data';
1129     !!!next-input-character;
1130    
1131     !!!emit ($self->{current_token}); # start tag or end tag
1132     undef $self->{current_token};
1133    
1134     redo A;
1135     } elsif ($self->{next_input_character} == 0x003C or # <
1136     $self->{next_input_character} == -1) {
1137 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1138 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1139     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1140     } elsif ($self->{current_token}->{type} eq 'end tag') {
1141     $self->{content_model_flag} = 'PCDATA'; # MUST
1142     if ($self->{current_token}->{attributes}) {
1143 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1144 wakaba 1.1 }
1145     } else {
1146     die "$0: $self->{current_token}->{type}: Unknown token type";
1147     }
1148     $self->{state} = 'data';
1149     ## reconsume
1150    
1151     !!!emit ($self->{current_token}); # start tag or end tag
1152     undef $self->{current_token};
1153    
1154     redo A;
1155     } else {
1156     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1157     ## Stay in the state
1158     !!!next-input-character;
1159     redo A;
1160     }
1161     } elsif ($self->{state} eq 'entity in attribute value') {
1162     my $token = $self->_tokenize_attempt_to_consume_an_entity;
1163    
1164     unless (defined $token) {
1165     $self->{current_attribute}->{value} .= '&';
1166     } else {
1167     $self->{current_attribute}->{value} .= $token->{data};
1168     ## ISSUE: spec says "append the returned character token to the current attribute's value"
1169     }
1170    
1171     $self->{state} = $self->{last_attribute_value_state};
1172     # next-input-character is already done
1173     redo A;
1174     } elsif ($self->{state} eq 'bogus comment') {
1175     ## (only happen if PCDATA state)
1176    
1177     my $token = {type => 'comment', data => ''};
1178    
1179     BC: {
1180     if ($self->{next_input_character} == 0x003E) { # >
1181     $self->{state} = 'data';
1182     !!!next-input-character;
1183    
1184     !!!emit ($token);
1185    
1186     redo A;
1187     } elsif ($self->{next_input_character} == -1) {
1188     $self->{state} = 'data';
1189     ## reconsume
1190    
1191     !!!emit ($token);
1192    
1193     redo A;
1194     } else {
1195     $token->{data} .= chr ($self->{next_input_character});
1196     !!!next-input-character;
1197     redo BC;
1198     }
1199     } # BC
1200     } elsif ($self->{state} eq 'markup declaration open') {
1201     ## (only happen if PCDATA state)
1202    
1203     my @next_char;
1204     push @next_char, $self->{next_input_character};
1205    
1206     if ($self->{next_input_character} == 0x002D) { # -
1207     !!!next-input-character;
1208     push @next_char, $self->{next_input_character};
1209     if ($self->{next_input_character} == 0x002D) { # -
1210     $self->{current_token} = {type => 'comment', data => ''};
1211     $self->{state} = 'comment';
1212     !!!next-input-character;
1213     redo A;
1214     }
1215     } elsif ($self->{next_input_character} == 0x0044 or # D
1216     $self->{next_input_character} == 0x0064) { # d
1217     !!!next-input-character;
1218     push @next_char, $self->{next_input_character};
1219     if ($self->{next_input_character} == 0x004F or # O
1220     $self->{next_input_character} == 0x006F) { # o
1221     !!!next-input-character;
1222     push @next_char, $self->{next_input_character};
1223     if ($self->{next_input_character} == 0x0043 or # C
1224     $self->{next_input_character} == 0x0063) { # c
1225     !!!next-input-character;
1226     push @next_char, $self->{next_input_character};
1227     if ($self->{next_input_character} == 0x0054 or # T
1228     $self->{next_input_character} == 0x0074) { # t
1229     !!!next-input-character;
1230     push @next_char, $self->{next_input_character};
1231     if ($self->{next_input_character} == 0x0059 or # Y
1232     $self->{next_input_character} == 0x0079) { # y
1233     !!!next-input-character;
1234     push @next_char, $self->{next_input_character};
1235     if ($self->{next_input_character} == 0x0050 or # P
1236     $self->{next_input_character} == 0x0070) { # p
1237     !!!next-input-character;
1238     push @next_char, $self->{next_input_character};
1239     if ($self->{next_input_character} == 0x0045 or # E
1240     $self->{next_input_character} == 0x0065) { # e
1241     ## ISSUE: What a stupid code this is!
1242     $self->{state} = 'DOCTYPE';
1243     !!!next-input-character;
1244     redo A;
1245     }
1246     }
1247     }
1248     }
1249     }
1250     }
1251     }
1252    
1253 wakaba 1.3 !!!parse-error (type => 'bogus comment open');
1254 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1255     !!!back-next-input-character (@next_char);
1256     $self->{state} = 'bogus comment';
1257     redo A;
1258    
1259     ## ISSUE: typos in spec: chacacters, is is a parse error
1260     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1261     } elsif ($self->{state} eq 'comment') {
1262     if ($self->{next_input_character} == 0x002D) { # -
1263     $self->{state} = 'comment dash';
1264     !!!next-input-character;
1265     redo A;
1266     } elsif ($self->{next_input_character} == -1) {
1267 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1268 wakaba 1.1 $self->{state} = 'data';
1269     ## reconsume
1270    
1271     !!!emit ($self->{current_token}); # comment
1272     undef $self->{current_token};
1273    
1274     redo A;
1275     } else {
1276     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1277     ## Stay in the state
1278     !!!next-input-character;
1279     redo A;
1280     }
1281     } elsif ($self->{state} eq 'comment dash') {
1282     if ($self->{next_input_character} == 0x002D) { # -
1283     $self->{state} = 'comment end';
1284     !!!next-input-character;
1285     redo A;
1286     } elsif ($self->{next_input_character} == -1) {
1287 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1288 wakaba 1.1 $self->{state} = 'data';
1289     ## reconsume
1290    
1291     !!!emit ($self->{current_token}); # comment
1292     undef $self->{current_token};
1293    
1294     redo A;
1295     } else {
1296     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1297     $self->{state} = 'comment';
1298     !!!next-input-character;
1299     redo A;
1300     }
1301     } elsif ($self->{state} eq 'comment end') {
1302     if ($self->{next_input_character} == 0x003E) { # >
1303     $self->{state} = 'data';
1304     !!!next-input-character;
1305    
1306     !!!emit ($self->{current_token}); # comment
1307     undef $self->{current_token};
1308    
1309     redo A;
1310     } elsif ($self->{next_input_character} == 0x002D) { # -
1311 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1312 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1313     ## Stay in the state
1314     !!!next-input-character;
1315     redo A;
1316     } elsif ($self->{next_input_character} == -1) {
1317 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1318 wakaba 1.1 $self->{state} = 'data';
1319     ## reconsume
1320    
1321     !!!emit ($self->{current_token}); # comment
1322     undef $self->{current_token};
1323    
1324     redo A;
1325     } else {
1326 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1327 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1328     $self->{state} = 'comment';
1329     !!!next-input-character;
1330     redo A;
1331     }
1332     } elsif ($self->{state} eq 'DOCTYPE') {
1333     if ($self->{next_input_character} == 0x0009 or # HT
1334     $self->{next_input_character} == 0x000A or # LF
1335     $self->{next_input_character} == 0x000B or # VT
1336     $self->{next_input_character} == 0x000C or # FF
1337     $self->{next_input_character} == 0x0020) { # SP
1338     $self->{state} = 'before DOCTYPE name';
1339     !!!next-input-character;
1340     redo A;
1341     } else {
1342 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
1343 wakaba 1.1 $self->{state} = 'before DOCTYPE name';
1344     ## reconsume
1345     redo A;
1346     }
1347     } elsif ($self->{state} eq 'before DOCTYPE name') {
1348     if ($self->{next_input_character} == 0x0009 or # HT
1349     $self->{next_input_character} == 0x000A or # LF
1350     $self->{next_input_character} == 0x000B or # VT
1351     $self->{next_input_character} == 0x000C or # FF
1352     $self->{next_input_character} == 0x0020) { # SP
1353     ## Stay in the state
1354     !!!next-input-character;
1355     redo A;
1356     } elsif (0x0061 <= $self->{next_input_character} and
1357     $self->{next_input_character} <= 0x007A) { # a..z
1358 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1359 wakaba 1.1 $self->{current_token} = {type => 'DOCTYPE',
1360     name => chr ($self->{next_input_character} - 0x0020),
1361     error => 1};
1362     $self->{state} = 'DOCTYPE name';
1363     !!!next-input-character;
1364     redo A;
1365     } elsif ($self->{next_input_character} == 0x003E) { # >
1366 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1367 wakaba 1.1 $self->{state} = 'data';
1368     !!!next-input-character;
1369    
1370     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1371    
1372     redo A;
1373     } elsif ($self->{next_input_character} == -1) {
1374 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1375 wakaba 1.1 $self->{state} = 'data';
1376     ## reconsume
1377    
1378     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1379    
1380     redo A;
1381     } else {
1382     $self->{current_token} = {type => 'DOCTYPE',
1383     name => chr ($self->{next_input_character}),
1384     error => 1};
1385 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1386 wakaba 1.1 $self->{state} = 'DOCTYPE name';
1387     !!!next-input-character;
1388     redo A;
1389     }
1390     } elsif ($self->{state} eq 'DOCTYPE name') {
1391     if ($self->{next_input_character} == 0x0009 or # HT
1392     $self->{next_input_character} == 0x000A or # LF
1393     $self->{next_input_character} == 0x000B or # VT
1394     $self->{next_input_character} == 0x000C or # FF
1395     $self->{next_input_character} == 0x0020) { # SP
1396     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1397     $self->{state} = 'after DOCTYPE name';
1398     !!!next-input-character;
1399     redo A;
1400     } elsif ($self->{next_input_character} == 0x003E) { # >
1401     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1402     $self->{state} = 'data';
1403     !!!next-input-character;
1404    
1405     !!!emit ($self->{current_token}); # DOCTYPE
1406     undef $self->{current_token};
1407    
1408     redo A;
1409     } elsif (0x0061 <= $self->{next_input_character} and
1410     $self->{next_input_character} <= 0x007A) { # a..z
1411     $self->{current_token}->{name} .= chr ($self->{next_input_character} - 0x0020); # DOCTYPE
1412     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1413     ## Stay in the state
1414     !!!next-input-character;
1415     redo A;
1416     } elsif ($self->{next_input_character} == -1) {
1417 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1418 wakaba 1.1 $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1419     $self->{state} = 'data';
1420     ## reconsume
1421    
1422     !!!emit ($self->{current_token});
1423     undef $self->{current_token};
1424    
1425     redo A;
1426     } else {
1427     $self->{current_token}->{name}
1428     .= chr ($self->{next_input_character}); # DOCTYPE
1429     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1430     ## Stay in the state
1431     !!!next-input-character;
1432     redo A;
1433     }
1434     } elsif ($self->{state} eq 'after DOCTYPE name') {
1435     if ($self->{next_input_character} == 0x0009 or # HT
1436     $self->{next_input_character} == 0x000A or # LF
1437     $self->{next_input_character} == 0x000B or # VT
1438     $self->{next_input_character} == 0x000C or # FF
1439     $self->{next_input_character} == 0x0020) { # SP
1440     ## Stay in the state
1441     !!!next-input-character;
1442     redo A;
1443     } elsif ($self->{next_input_character} == 0x003E) { # >
1444     $self->{state} = 'data';
1445     !!!next-input-character;
1446    
1447     !!!emit ($self->{current_token}); # DOCTYPE
1448     undef $self->{current_token};
1449    
1450     redo A;
1451     } elsif ($self->{next_input_character} == -1) {
1452 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1453 wakaba 1.1 $self->{state} = 'data';
1454     ## reconsume
1455    
1456     !!!emit ($self->{current_token}); # DOCTYPE
1457     undef $self->{current_token};
1458    
1459     redo A;
1460     } else {
1461 wakaba 1.3 !!!parse-error (type => 'string after DOCTYPE name');
1462 wakaba 1.1 $self->{current_token}->{error} = 1; # DOCTYPE
1463     $self->{state} = 'bogus DOCTYPE';
1464     !!!next-input-character;
1465     redo A;
1466     }
1467     } elsif ($self->{state} eq 'bogus DOCTYPE') {
1468     if ($self->{next_input_character} == 0x003E) { # >
1469     $self->{state} = 'data';
1470     !!!next-input-character;
1471    
1472     !!!emit ($self->{current_token}); # DOCTYPE
1473     undef $self->{current_token};
1474    
1475     redo A;
1476     } elsif ($self->{next_input_character} == -1) {
1477 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1478 wakaba 1.1 $self->{state} = 'data';
1479     ## reconsume
1480    
1481     !!!emit ($self->{current_token}); # DOCTYPE
1482     undef $self->{current_token};
1483    
1484     redo A;
1485     } else {
1486     ## Stay in the state
1487     !!!next-input-character;
1488     redo A;
1489     }
1490     } else {
1491     die "$0: $self->{state}: Unknown state";
1492     }
1493     } # A
1494    
1495     die "$0: _get_next_token: unexpected case";
1496     } # _get_next_token
1497    
1498     sub _tokenize_attempt_to_consume_an_entity ($) {
1499     my $self = shift;
1500    
1501     if ($self->{next_input_character} == 0x0023) { # #
1502     !!!next-input-character;
1503     if ($self->{next_input_character} == 0x0078 or # x
1504     $self->{next_input_character} == 0x0058) { # X
1505 wakaba 1.4 my $num;
1506 wakaba 1.1 X: {
1507     my $x_char = $self->{next_input_character};
1508     !!!next-input-character;
1509     if (0x0030 <= $self->{next_input_character} and
1510     $self->{next_input_character} <= 0x0039) { # 0..9
1511     $num ||= 0;
1512     $num *= 0x10;
1513     $num += $self->{next_input_character} - 0x0030;
1514     redo X;
1515     } elsif (0x0061 <= $self->{next_input_character} and
1516     $self->{next_input_character} <= 0x0066) { # a..f
1517     ## ISSUE: the spec says U+0078, which is apparently incorrect
1518     $num ||= 0;
1519     $num *= 0x10;
1520     $num += $self->{next_input_character} - 0x0060 + 9;
1521     redo X;
1522     } elsif (0x0041 <= $self->{next_input_character} and
1523     $self->{next_input_character} <= 0x0046) { # A..F
1524     ## ISSUE: the spec says U+0058, which is apparently incorrect
1525     $num ||= 0;
1526     $num *= 0x10;
1527     $num += $self->{next_input_character} - 0x0040 + 9;
1528     redo X;
1529     } elsif (not defined $num) { # no hexadecimal digit
1530 wakaba 1.3 !!!parse-error (type => 'bare hcro');
1531 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
1532     !!!back-next-input-character ($x_char);
1533     return undef;
1534     } elsif ($self->{next_input_character} == 0x003B) { # ;
1535     !!!next-input-character;
1536     } else {
1537 wakaba 1.3 !!!parse-error (type => 'no refc');
1538 wakaba 1.1 }
1539    
1540     ## TODO: check the definition for |a valid Unicode character|.
1541 wakaba 1.4 ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8189>
1542 wakaba 1.1 if ($num > 1114111 or $num == 0) {
1543     $num = 0xFFFD; # REPLACEMENT CHARACTER
1544     ## ISSUE: Why this is not an error?
1545 wakaba 1.4 } elsif (0x80 <= $num and $num <= 0x9F) {
1546 wakaba 1.8 !!!parse-error (type => sprintf 'c1 entity:U+%04X', $num);
1547 wakaba 1.4 $num = $c1_entity_char->{$num};
1548 wakaba 1.1 }
1549    
1550     return {type => 'character', data => chr $num};
1551     } # X
1552     } elsif (0x0030 <= $self->{next_input_character} and
1553     $self->{next_input_character} <= 0x0039) { # 0..9
1554     my $code = $self->{next_input_character} - 0x0030;
1555     !!!next-input-character;
1556    
1557     while (0x0030 <= $self->{next_input_character} and
1558     $self->{next_input_character} <= 0x0039) { # 0..9
1559     $code *= 10;
1560     $code += $self->{next_input_character} - 0x0030;
1561    
1562     !!!next-input-character;
1563     }
1564    
1565     if ($self->{next_input_character} == 0x003B) { # ;
1566     !!!next-input-character;
1567     } else {
1568 wakaba 1.3 !!!parse-error (type => 'no refc');
1569 wakaba 1.1 }
1570    
1571     ## TODO: check the definition for |a valid Unicode character|.
1572     if ($code > 1114111 or $code == 0) {
1573     $code = 0xFFFD; # REPLACEMENT CHARACTER
1574     ## ISSUE: Why this is not an error?
1575 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
1576 wakaba 1.8 !!!parse-error (type => sprintf 'c1 entity:U+%04X', $code);
1577 wakaba 1.4 $code = $c1_entity_char->{$code};
1578 wakaba 1.1 }
1579    
1580     return {type => 'character', data => chr $code};
1581     } else {
1582 wakaba 1.3 !!!parse-error (type => 'bare nero');
1583 wakaba 1.1 !!!back-next-input-character ($self->{next_input_character});
1584     $self->{next_input_character} = 0x0023; # #
1585     return undef;
1586     }
1587     } elsif ((0x0041 <= $self->{next_input_character} and
1588     $self->{next_input_character} <= 0x005A) or
1589     (0x0061 <= $self->{next_input_character} and
1590     $self->{next_input_character} <= 0x007A)) {
1591     my $entity_name = chr $self->{next_input_character};
1592     !!!next-input-character;
1593    
1594     my $value = $entity_name;
1595     my $match;
1596    
1597     while (length $entity_name < 10 and
1598     ## NOTE: Some number greater than the maximum length of entity name
1599     ((0x0041 <= $self->{next_input_character} and
1600     $self->{next_input_character} <= 0x005A) or
1601     (0x0061 <= $self->{next_input_character} and
1602     $self->{next_input_character} <= 0x007A) or
1603     (0x0030 <= $self->{next_input_character} and
1604     $self->{next_input_character} <= 0x0039))) {
1605     $entity_name .= chr $self->{next_input_character};
1606     if (defined $entity_char->{$entity_name}) {
1607     $value = $entity_char->{$entity_name};
1608     $match = 1;
1609     } else {
1610     $value .= chr $self->{next_input_character};
1611     }
1612     !!!next-input-character;
1613     }
1614    
1615     if ($match) {
1616     if ($self->{next_input_character} == 0x003B) { # ;
1617     !!!next-input-character;
1618     } else {
1619 wakaba 1.3 !!!parse-error (type => 'refc');
1620 wakaba 1.1 }
1621    
1622     return {type => 'character', data => $value};
1623     } else {
1624 wakaba 1.3 !!!parse-error (type => 'bare ero');
1625 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
1626     !!!back-token ({type => 'character', data => $value});
1627     return undef;
1628     }
1629     } else {
1630     ## no characters are consumed
1631 wakaba 1.3 !!!parse-error (type => 'bare ero');
1632 wakaba 1.1 return undef;
1633     }
1634     } # _tokenize_attempt_to_consume_an_entity
1635    
1636     sub _initialize_tree_constructor ($) {
1637     my $self = shift;
1638     ## NOTE: $self->{document} MUST be specified before this method is called
1639     $self->{document}->strict_error_checking (0);
1640     ## TODO: Turn mutation events off # MUST
1641     ## TODO: Turn loose Document option (manakai extension) on
1642     ## TODO: Mark the Document as an HTML document # MUST
1643     } # _initialize_tree_constructor
1644    
1645     sub _terminate_tree_constructor ($) {
1646     my $self = shift;
1647     $self->{document}->strict_error_checking (1);
1648     ## TODO: Turn mutation events on
1649     } # _terminate_tree_constructor
1650    
1651     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
1652    
1653 wakaba 1.3 { # tree construction stage
1654     my $token;
1655    
1656 wakaba 1.1 sub _construct_tree ($) {
1657     my ($self) = @_;
1658    
1659     ## When an interactive UA render the $self->{document} available
1660     ## to the user, or when it begin accepting user input, are
1661     ## not defined.
1662    
1663     ## Append a character: collect it and all subsequent consecutive
1664     ## characters and insert one Text node whose data is concatenation
1665     ## of all those characters. # MUST
1666    
1667     !!!next-token;
1668    
1669 wakaba 1.3 $self->{insertion_mode} = 'before head';
1670     undef $self->{form_element};
1671     undef $self->{head_element};
1672     $self->{open_elements} = [];
1673     undef $self->{inner_html_node};
1674    
1675     $self->_tree_construction_initial; # MUST
1676     $self->_tree_construction_root_element;
1677     $self->_tree_construction_main;
1678     } # _construct_tree
1679    
1680     sub _tree_construction_initial ($) {
1681     my $self = shift;
1682     B: {
1683     if ($token->{type} eq 'DOCTYPE') {
1684     if ($token->{error}) {
1685     ## ISSUE: Spec currently left this case undefined.
1686     !!!parse-error (type => 'bogus DOCTYPE');
1687     }
1688     my $doctype = $self->{document}->create_document_type_definition
1689     ($token->{name});
1690     $self->{document}->append_child ($doctype);
1691     #$phase = 'root element';
1692     !!!next-token;
1693     #redo B;
1694     return;
1695     } elsif ({
1696     comment => 1,
1697     'start tag' => 1,
1698     'end tag' => 1,
1699     'end-of-file' => 1,
1700     }->{$token->{type}}) {
1701     ## ISSUE: Spec currently left this case undefined.
1702     !!!parse-error (type => 'missing DOCTYPE');
1703     #$phase = 'root element';
1704     ## reprocess
1705     #redo B;
1706     return;
1707     } elsif ($token->{type} eq 'character') {
1708     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1709     $self->{document}->manakai_append_text ($1);
1710     ## ISSUE: DOM3 Core does not allow Document > Text
1711     unless (length $token->{data}) {
1712     ## Stay in the phase
1713     !!!next-token;
1714     redo B;
1715     }
1716     }
1717     ## ISSUE: Spec currently left this case undefined.
1718     !!!parse-error (type => 'missing DOCTYPE');
1719     #$phase = 'root element';
1720     ## reprocess
1721     #redo B;
1722     return;
1723     } else {
1724     die "$0: $token->{type}: Unknown token";
1725     }
1726     } # B
1727     } # _tree_construction_initial
1728    
1729     sub _tree_construction_root_element ($) {
1730     my $self = shift;
1731    
1732     B: {
1733     if ($token->{type} eq 'DOCTYPE') {
1734     !!!parse-error (type => 'in html:#DOCTYPE');
1735     ## Ignore the token
1736     ## Stay in the phase
1737     !!!next-token;
1738     redo B;
1739     } elsif ($token->{type} eq 'comment') {
1740     my $comment = $self->{document}->create_comment ($token->{data});
1741     $self->{document}->append_child ($comment);
1742     ## Stay in the phase
1743     !!!next-token;
1744     redo B;
1745     } elsif ($token->{type} eq 'character') {
1746     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1747     $self->{document}->manakai_append_text ($1);
1748     ## ISSUE: DOM3 Core does not allow Document > Text
1749     unless (length $token->{data}) {
1750     ## Stay in the phase
1751     !!!next-token;
1752     redo B;
1753     }
1754     }
1755     #
1756     } elsif ({
1757     'start tag' => 1,
1758     'end tag' => 1,
1759     'end-of-file' => 1,
1760     }->{$token->{type}}) {
1761     ## ISSUE: There is an issue in the spec
1762     #
1763     } else {
1764     die "$0: $token->{type}: Unknown token";
1765     }
1766     my $root_element; !!!create-element ($root_element, 'html');
1767     $self->{document}->append_child ($root_element);
1768     push @{$self->{open_elements}}, [$root_element, 'html'];
1769     #$phase = 'main';
1770     ## reprocess
1771     #redo B;
1772     return;
1773     } # B
1774     } # _tree_construction_root_element
1775    
1776     sub _reset_insertion_mode ($) {
1777     my $self = shift;
1778    
1779     ## Step 1
1780     my $last;
1781    
1782     ## Step 2
1783     my $i = -1;
1784     my $node = $self->{open_elements}->[$i];
1785    
1786     ## Step 3
1787     S3: {
1788     $last = 1 if $self->{open_elements}->[0]->[0] eq $node->[0];
1789     if (defined $self->{inner_html_node}) {
1790     if ($self->{inner_html_node}->[1] eq 'td' or
1791     $self->{inner_html_node}->[1] eq 'th') {
1792     #
1793     } else {
1794     $node = $self->{inner_html_node};
1795     }
1796     }
1797    
1798     ## Step 4..13
1799     my $new_mode = {
1800     select => 'in select',
1801     td => 'in cell',
1802     th => 'in cell',
1803     tr => 'in row',
1804     tbody => 'in table body',
1805     thead => 'in table head',
1806     tfoot => 'in table foot',
1807     caption => 'in caption',
1808     colgroup => 'in column group',
1809     table => 'in table',
1810     head => 'in body', # not in head!
1811     body => 'in body',
1812     frameset => 'in frameset',
1813     }->{$node->[1]};
1814     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1815    
1816     ## Step 14
1817     if ($node->[1] eq 'html') {
1818     unless (defined $self->{head_element}) {
1819     $self->{insertion_mode} = 'before head';
1820     } else {
1821     $self->{insertion_mode} = 'after head';
1822     }
1823     return;
1824     }
1825    
1826     ## Step 15
1827     $self->{insertion_mode} = 'in body' and return if $last;
1828    
1829     ## Step 16
1830     $i--;
1831     $node = $self->{open_elements}->[$i];
1832    
1833     ## Step 17
1834     redo S3;
1835     } # S3
1836     } # _reset_insertion_mode
1837    
1838     sub _tree_construction_main ($) {
1839     my $self = shift;
1840    
1841     my $phase = 'main';
1842 wakaba 1.1
1843     my $active_formatting_elements = [];
1844    
1845     my $reconstruct_active_formatting_elements = sub { # MUST
1846     my $insert = shift;
1847    
1848     ## Step 1
1849     return unless @$active_formatting_elements;
1850    
1851     ## Step 3
1852     my $i = -1;
1853     my $entry = $active_formatting_elements->[$i];
1854    
1855     ## Step 2
1856     return if $entry->[0] eq '#marker';
1857 wakaba 1.3 for (@{$self->{open_elements}}) {
1858 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1859     return;
1860     }
1861     }
1862    
1863     S4: {
1864     ## Step 4
1865     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1866    
1867     ## Step 5
1868     $i--;
1869     $entry = $active_formatting_elements->[$i];
1870    
1871     ## Step 6
1872     if ($entry->[0] eq '#marker') {
1873     #
1874     } else {
1875     my $in_open_elements;
1876 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
1877 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1878     $in_open_elements = 1;
1879     last OE;
1880     }
1881     }
1882     if ($in_open_elements) {
1883     #
1884     } else {
1885     redo S4;
1886     }
1887     }
1888    
1889     ## Step 7
1890     $i++;
1891     $entry = $active_formatting_elements->[$i];
1892     } # S4
1893    
1894     S7: {
1895     ## Step 8
1896     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1897    
1898     ## Step 9
1899     $insert->($clone->[0]);
1900 wakaba 1.3 push @{$self->{open_elements}}, $clone;
1901 wakaba 1.1
1902     ## Step 10
1903 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1904 wakaba 1.1
1905     ## Step 11
1906     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1907     ## Step 7'
1908     $i++;
1909     $entry = $active_formatting_elements->[$i];
1910    
1911     redo S7;
1912     }
1913     } # S7
1914     }; # $reconstruct_active_formatting_elements
1915    
1916     my $clear_up_to_marker = sub {
1917     for (reverse 0..$#$active_formatting_elements) {
1918     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1919     splice @$active_formatting_elements, $_;
1920     return;
1921     }
1922     }
1923     }; # $clear_up_to_marker
1924    
1925     my $style_start_tag = sub {
1926 wakaba 1.6 my $style_el; !!!create-element ($style_el, 'style', $token->{attributes});
1927 wakaba 1.3 ## $self->{insertion_mode} eq 'in head' and ... (always true)
1928     (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
1929     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
1930 wakaba 1.1 ->append_child ($style_el);
1931     $self->{content_model_flag} = 'CDATA';
1932    
1933     my $text = '';
1934     !!!next-token;
1935     while ($token->{type} eq 'character') {
1936     $text .= $token->{data};
1937     !!!next-token;
1938     } # stop if non-character token or tokenizer stops tokenising
1939     if (length $text) {
1940     $style_el->manakai_append_text ($text);
1941     }
1942    
1943     $self->{content_model_flag} = 'PCDATA';
1944    
1945     if ($token->{type} eq 'end tag' and $token->{tag_name} eq 'style') {
1946     ## Ignore the token
1947     } else {
1948 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
1949 wakaba 1.1 ## ISSUE: And ignore?
1950     }
1951     !!!next-token;
1952     }; # $style_start_tag
1953    
1954     my $script_start_tag = sub {
1955     my $script_el;
1956     !!!create-element ($script_el, 'script', $token->{attributes});
1957     ## TODO: mark as "parser-inserted"
1958    
1959     $self->{content_model_flag} = 'CDATA';
1960    
1961     my $text = '';
1962     !!!next-token;
1963     while ($token->{type} eq 'character') {
1964     $text .= $token->{data};
1965     !!!next-token;
1966     } # stop if non-character token or tokenizer stops tokenising
1967     if (length $text) {
1968     $script_el->manakai_append_text ($text);
1969     }
1970    
1971     $self->{content_model_flag} = 'PCDATA';
1972    
1973     if ($token->{type} eq 'end tag' and
1974     $token->{tag_name} eq 'script') {
1975     ## Ignore the token
1976     } else {
1977 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
1978 wakaba 1.1 ## ISSUE: And ignore?
1979     ## TODO: mark as "already executed"
1980     }
1981    
1982 wakaba 1.3 if (defined $self->{inner_html_node}) {
1983     ## TODO: mark as "already executed"
1984     } else {
1985 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
1986     ## TODO: insertion point = just before the next input character
1987    
1988 wakaba 1.3 (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
1989     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])->append_child ($script_el);
1990 wakaba 1.1
1991     ## TODO: insertion point = $old_insertion_point (might be "undefined")
1992    
1993     ## TODO: if there is a script that will execute as soon as the parser resume, then...
1994     }
1995    
1996     !!!next-token;
1997     }; # $script_start_tag
1998    
1999     my $formatting_end_tag = sub {
2000     my $tag_name = shift;
2001    
2002     FET: {
2003     ## Step 1
2004     my $formatting_element;
2005     my $formatting_element_i_in_active;
2006     AFE: for (reverse 0..$#$active_formatting_elements) {
2007     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
2008     $formatting_element = $active_formatting_elements->[$_];
2009     $formatting_element_i_in_active = $_;
2010     last AFE;
2011     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
2012     last AFE;
2013     }
2014     } # AFE
2015     unless (defined $formatting_element) {
2016 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$tag_name);
2017 wakaba 1.1 ## Ignore the token
2018     !!!next-token;
2019     return;
2020     }
2021     ## has an element in scope
2022     my $in_scope = 1;
2023     my $formatting_element_i_in_open;
2024 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2025     my $node = $self->{open_elements}->[$_];
2026 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
2027     if ($in_scope) {
2028     $formatting_element_i_in_open = $_;
2029     last INSCOPE;
2030     } else { # in open elements but not in scope
2031 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2032 wakaba 1.1 ## Ignore the token
2033     !!!next-token;
2034     return;
2035     }
2036     } elsif ({
2037     table => 1, caption => 1, td => 1, th => 1,
2038     button => 1, marquee => 1, object => 1, html => 1,
2039     }->{$node->[1]}) {
2040     $in_scope = 0;
2041     }
2042     } # INSCOPE
2043     unless (defined $formatting_element_i_in_open) {
2044 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2045 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
2046     !!!next-token; ## TODO: ok?
2047     return;
2048     }
2049 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
2050 wakaba 1.4 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2051 wakaba 1.1 }
2052    
2053     ## Step 2
2054     my $furthest_block;
2055     my $furthest_block_i_in_open;
2056 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2057     my $node = $self->{open_elements}->[$_];
2058 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
2059     #not $phrasing_category->{$node->[1]} and
2060     ($special_category->{$node->[1]} or
2061     $scoping_category->{$node->[1]})) {
2062     $furthest_block = $node;
2063     $furthest_block_i_in_open = $_;
2064     } elsif ($node->[0] eq $formatting_element->[0]) {
2065     last OE;
2066     }
2067     } # OE
2068    
2069     ## Step 3
2070     unless (defined $furthest_block) { # MUST
2071 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
2072 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
2073     !!!next-token;
2074     return;
2075     }
2076    
2077     ## Step 4
2078 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
2079 wakaba 1.1
2080     ## Step 5
2081     my $furthest_block_parent = $furthest_block->[0]->parent_node;
2082     if (defined $furthest_block_parent) {
2083     $furthest_block_parent->remove_child ($furthest_block->[0]);
2084     }
2085    
2086     ## Step 6
2087     my $bookmark_prev_el
2088     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
2089     ->[0];
2090    
2091     ## Step 7
2092     my $node = $furthest_block;
2093     my $node_i_in_open = $furthest_block_i_in_open;
2094     my $last_node = $furthest_block;
2095     S7: {
2096     ## Step 1
2097     $node_i_in_open--;
2098 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
2099 wakaba 1.1
2100     ## Step 2
2101     my $node_i_in_active;
2102     S7S2: {
2103     for (reverse 0..$#$active_formatting_elements) {
2104     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2105     $node_i_in_active = $_;
2106     last S7S2;
2107     }
2108     }
2109 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
2110 wakaba 1.1 redo S7;
2111     } # S7S2
2112    
2113     ## Step 3
2114     last S7 if $node->[0] eq $formatting_element->[0];
2115    
2116     ## Step 4
2117     if ($last_node->[0] eq $furthest_block->[0]) {
2118     $bookmark_prev_el = $node->[0];
2119     }
2120    
2121     ## Step 5
2122     if ($node->[0]->has_child_nodes ()) {
2123     my $clone = [$node->[0]->clone_node (0), $node->[1]];
2124     $active_formatting_elements->[$node_i_in_active] = $clone;
2125 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
2126 wakaba 1.1 $node = $clone;
2127     }
2128    
2129     ## Step 6
2130     $node->[0]->append_child ($last_node->[0]);
2131    
2132     ## Step 7
2133     $last_node = $node;
2134    
2135     ## Step 8
2136     redo S7;
2137     } # S7
2138    
2139     ## Step 8
2140     $common_ancestor_node->[0]->append_child ($last_node->[0]);
2141    
2142     ## Step 9
2143     my $clone = [$formatting_element->[0]->clone_node (0),
2144     $formatting_element->[1]];
2145    
2146     ## Step 10
2147     my @cn = @{$furthest_block->[0]->child_nodes};
2148     $clone->[0]->append_child ($_) for @cn;
2149    
2150     ## Step 11
2151     $furthest_block->[0]->append_child ($clone->[0]);
2152    
2153     ## Step 12
2154     my $i;
2155     AFE: for (reverse 0..$#$active_formatting_elements) {
2156     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
2157     splice @$active_formatting_elements, $_, 1;
2158     $i-- and last AFE if defined $i;
2159     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
2160     $i = $_;
2161     }
2162     } # AFE
2163     splice @$active_formatting_elements, $i + 1, 0, $clone;
2164    
2165     ## Step 13
2166     undef $i;
2167 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2168     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
2169     splice @{$self->{open_elements}}, $_, 1;
2170 wakaba 1.1 $i-- and last OE if defined $i;
2171 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
2172 wakaba 1.1 $i = $_;
2173     }
2174     } # OE
2175 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
2176 wakaba 1.1
2177     ## Step 14
2178     redo FET;
2179     } # FET
2180     }; # $formatting_end_tag
2181    
2182     my $insert_to_current = sub {
2183 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child (shift);
2184 wakaba 1.1 }; # $insert_to_current
2185    
2186     my $insert_to_foster = sub {
2187     my $child = shift;
2188     if ({
2189     table => 1, tbody => 1, tfoot => 1,
2190     thead => 1, tr => 1,
2191 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2192 wakaba 1.1 # MUST
2193     my $foster_parent_element;
2194     my $next_sibling;
2195 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2196     if ($self->{open_elements}->[$_]->[1] eq 'table') {
2197     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
2198 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
2199     $foster_parent_element = $parent;
2200 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
2201 wakaba 1.1 } else {
2202     $foster_parent_element
2203 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
2204 wakaba 1.1 }
2205     last OE;
2206     }
2207     } # OE
2208 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
2209 wakaba 1.1 unless defined $foster_parent_element;
2210     $foster_parent_element->insert_before
2211     ($child, $next_sibling);
2212     } else {
2213 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
2214 wakaba 1.1 }
2215     }; # $insert_to_foster
2216    
2217     my $in_body = sub {
2218     my $insert = shift;
2219     if ($token->{type} eq 'start tag') {
2220     if ($token->{tag_name} eq 'script') {
2221     $script_start_tag->();
2222     return;
2223     } elsif ($token->{tag_name} eq 'style') {
2224     $style_start_tag->();
2225     return;
2226     } elsif ({
2227     base => 1, link => 1, meta => 1,
2228     }->{$token->{tag_name}}) {
2229 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2230 wakaba 1.1 ## NOTE: This is an "as if in head" code clone
2231     my $el;
2232     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2233 wakaba 1.3 if (defined $self->{head_element}) {
2234     $self->{head_element}->append_child ($el);
2235 wakaba 1.1 } else {
2236     $insert->($el);
2237     }
2238    
2239     !!!next-token;
2240     return;
2241     } elsif ($token->{tag_name} eq 'title') {
2242 wakaba 1.3 !!!parse-error (type => 'in body:title');
2243 wakaba 1.1 ## NOTE: There is an "as if in head" code clone
2244     my $title_el;
2245     !!!create-element ($title_el, 'title', $token->{attributes});
2246 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2247 wakaba 1.1 ->append_child ($title_el);
2248     $self->{content_model_flag} = 'RCDATA';
2249    
2250     my $text = '';
2251     !!!next-token;
2252     while ($token->{type} eq 'character') {
2253     $text .= $token->{data};
2254     !!!next-token;
2255     }
2256     if (length $text) {
2257     $title_el->manakai_append_text ($text);
2258     }
2259    
2260     $self->{content_model_flag} = 'PCDATA';
2261    
2262     if ($token->{type} eq 'end tag' and
2263     $token->{tag_name} eq 'title') {
2264     ## Ignore the token
2265     } else {
2266 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2267 wakaba 1.1 ## ISSUE: And ignore?
2268     }
2269     !!!next-token;
2270     return;
2271     } elsif ($token->{tag_name} eq 'body') {
2272 wakaba 1.3 !!!parse-error (type => 'in body:body');
2273 wakaba 1.1
2274 wakaba 1.3 if (@{$self->{open_elements}} == 1 or
2275     $self->{open_elements}->[1]->[1] ne 'body') {
2276 wakaba 1.1 ## Ignore the token
2277     } else {
2278 wakaba 1.3 my $body_el = $self->{open_elements}->[1]->[0];
2279 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2280     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
2281     $body_el->set_attribute_ns
2282     (undef, [undef, $attr_name],
2283     $token->{attributes}->{$attr_name}->{value});
2284     }
2285     }
2286     }
2287     !!!next-token;
2288     return;
2289     } elsif ({
2290     address => 1, blockquote => 1, center => 1, dir => 1,
2291     div => 1, dl => 1, fieldset => 1, listing => 1,
2292     menu => 1, ol => 1, p => 1, ul => 1,
2293     pre => 1,
2294     }->{$token->{tag_name}}) {
2295     ## has a p element in scope
2296 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2297 wakaba 1.1 if ($_->[1] eq 'p') {
2298     !!!back-token;
2299     $token = {type => 'end tag', tag_name => 'p'};
2300     return;
2301     } elsif ({
2302     table => 1, caption => 1, td => 1, th => 1,
2303     button => 1, marquee => 1, object => 1, html => 1,
2304     }->{$_->[1]}) {
2305     last INSCOPE;
2306     }
2307     } # INSCOPE
2308    
2309     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2310     if ($token->{tag_name} eq 'pre') {
2311     !!!next-token;
2312     if ($token->{type} eq 'character') {
2313     $token->{data} =~ s/^\x0A//;
2314     unless (length $token->{data}) {
2315     !!!next-token;
2316     }
2317     }
2318     } else {
2319     !!!next-token;
2320     }
2321     return;
2322     } elsif ($token->{tag_name} eq 'form') {
2323 wakaba 1.3 if (defined $self->{form_element}) {
2324     !!!parse-error (type => 'in form:form');
2325 wakaba 1.1 ## Ignore the token
2326 wakaba 1.7 !!!next-token;
2327     return;
2328 wakaba 1.1 } else {
2329     ## has a p element in scope
2330 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2331 wakaba 1.1 if ($_->[1] eq 'p') {
2332     !!!back-token;
2333     $token = {type => 'end tag', tag_name => 'p'};
2334     return;
2335     } elsif ({
2336     table => 1, caption => 1, td => 1, th => 1,
2337     button => 1, marquee => 1, object => 1, html => 1,
2338     }->{$_->[1]}) {
2339     last INSCOPE;
2340     }
2341     } # INSCOPE
2342    
2343     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2344 wakaba 1.3 $self->{form_element} = $self->{open_elements}->[-1]->[0];
2345 wakaba 1.1 !!!next-token;
2346     return;
2347     }
2348     } elsif ($token->{tag_name} eq 'li') {
2349     ## has a p element in scope
2350 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2351 wakaba 1.1 if ($_->[1] eq 'p') {
2352     !!!back-token;
2353     $token = {type => 'end tag', tag_name => 'p'};
2354     return;
2355     } elsif ({
2356     table => 1, caption => 1, td => 1, th => 1,
2357     button => 1, marquee => 1, object => 1, html => 1,
2358     }->{$_->[1]}) {
2359     last INSCOPE;
2360     }
2361     } # INSCOPE
2362    
2363     ## Step 1
2364     my $i = -1;
2365 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2366 wakaba 1.1 LI: {
2367     ## Step 2
2368     if ($node->[1] eq 'li') {
2369 wakaba 1.8 if ($i != -1) {
2370     !!!parse-error (type => 'end tag missing:'.
2371     $self->{open_elements}->[-1]->[1]);
2372     ## TODO: test
2373     }
2374 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2375 wakaba 1.1 last LI;
2376     }
2377    
2378     ## Step 3
2379     if (not $formatting_category->{$node->[1]} and
2380     #not $phrasing_category->{$node->[1]} and
2381     ($special_category->{$node->[1]} or
2382     $scoping_category->{$node->[1]}) and
2383     $node->[1] ne 'address' and $node->[1] ne 'div') {
2384     last LI;
2385     }
2386    
2387     ## Step 4
2388     $i--;
2389 wakaba 1.3 $node = $self->{open_elements}->[$i];
2390 wakaba 1.1 redo LI;
2391     } # LI
2392    
2393     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2394     !!!next-token;
2395     return;
2396     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
2397     ## has a p element in scope
2398 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2399 wakaba 1.1 if ($_->[1] eq 'p') {
2400     !!!back-token;
2401     $token = {type => 'end tag', tag_name => 'p'};
2402     return;
2403     } elsif ({
2404     table => 1, caption => 1, td => 1, th => 1,
2405     button => 1, marquee => 1, object => 1, html => 1,
2406     }->{$_->[1]}) {
2407     last INSCOPE;
2408     }
2409     } # INSCOPE
2410    
2411     ## Step 1
2412     my $i = -1;
2413 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2414 wakaba 1.1 LI: {
2415     ## Step 2
2416     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
2417 wakaba 1.8 if ($i != -1) {
2418     !!!parse-error (type => 'end tag missing:'.
2419     $self->{open_elements}->[-1]->[1]);
2420     ## TODO: test
2421     }
2422 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2423 wakaba 1.1 last LI;
2424     }
2425    
2426     ## Step 3
2427     if (not $formatting_category->{$node->[1]} and
2428     #not $phrasing_category->{$node->[1]} and
2429     ($special_category->{$node->[1]} or
2430     $scoping_category->{$node->[1]}) and
2431     $node->[1] ne 'address' and $node->[1] ne 'div') {
2432     last LI;
2433     }
2434    
2435     ## Step 4
2436     $i--;
2437 wakaba 1.3 $node = $self->{open_elements}->[$i];
2438 wakaba 1.1 redo LI;
2439     } # LI
2440    
2441     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2442     !!!next-token;
2443     return;
2444     } elsif ($token->{tag_name} eq 'plaintext') {
2445     ## has a p element in scope
2446 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2447 wakaba 1.1 if ($_->[1] eq 'p') {
2448     !!!back-token;
2449     $token = {type => 'end tag', tag_name => 'p'};
2450     return;
2451     } elsif ({
2452     table => 1, caption => 1, td => 1, th => 1,
2453     button => 1, marquee => 1, object => 1, html => 1,
2454     }->{$_->[1]}) {
2455     last INSCOPE;
2456     }
2457     } # INSCOPE
2458    
2459     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2460    
2461     $self->{content_model_flag} = 'PLAINTEXT';
2462    
2463     !!!next-token;
2464     return;
2465     } elsif ({
2466     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2467     }->{$token->{tag_name}}) {
2468     ## has a p element in scope
2469 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2470     my $node = $self->{open_elements}->[$_];
2471 wakaba 1.1 if ($node->[1] eq 'p') {
2472     !!!back-token;
2473     $token = {type => 'end tag', tag_name => 'p'};
2474     return;
2475     } elsif ({
2476     table => 1, caption => 1, td => 1, th => 1,
2477     button => 1, marquee => 1, object => 1, html => 1,
2478     }->{$node->[1]}) {
2479     last INSCOPE;
2480     }
2481     } # INSCOPE
2482    
2483     ## has an element in scope
2484     my $i;
2485 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2486     my $node = $self->{open_elements}->[$_];
2487 wakaba 1.1 if ({
2488     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2489     }->{$node->[1]}) {
2490     $i = $_;
2491     last INSCOPE;
2492     } elsif ({
2493     table => 1, caption => 1, td => 1, th => 1,
2494     button => 1, marquee => 1, object => 1, html => 1,
2495     }->{$node->[1]}) {
2496     last INSCOPE;
2497     }
2498     } # INSCOPE
2499    
2500     if (defined $i) {
2501 wakaba 1.3 !!!parse-error (type => 'in hn:hn');
2502     splice @{$self->{open_elements}}, $i;
2503 wakaba 1.1 }
2504    
2505     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2506    
2507     !!!next-token;
2508     return;
2509     } elsif ($token->{tag_name} eq 'a') {
2510     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
2511     my $node = $active_formatting_elements->[$i];
2512     if ($node->[1] eq 'a') {
2513 wakaba 1.3 !!!parse-error (type => 'in a:a');
2514 wakaba 1.1
2515     !!!back-token;
2516     $token = {type => 'end tag', tag_name => 'a'};
2517     $formatting_end_tag->($token->{tag_name});
2518    
2519     AFE2: for (reverse 0..$#$active_formatting_elements) {
2520     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2521     splice @$active_formatting_elements, $_, 1;
2522     last AFE2;
2523     }
2524     } # AFE2
2525 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2526     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
2527     splice @{$self->{open_elements}}, $_, 1;
2528 wakaba 1.1 last OE;
2529     }
2530     } # OE
2531     last AFE;
2532     } elsif ($node->[0] eq '#marker') {
2533     last AFE;
2534     }
2535     } # AFE
2536    
2537     $reconstruct_active_formatting_elements->($insert_to_current);
2538    
2539     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2540 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2541 wakaba 1.1
2542     !!!next-token;
2543     return;
2544     } elsif ({
2545     b => 1, big => 1, em => 1, font => 1, i => 1,
2546     nobr => 1, s => 1, small => 1, strile => 1,
2547     strong => 1, tt => 1, u => 1,
2548     }->{$token->{tag_name}}) {
2549     $reconstruct_active_formatting_elements->($insert_to_current);
2550    
2551     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2552 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2553 wakaba 1.1
2554     !!!next-token;
2555     return;
2556     } elsif ($token->{tag_name} eq 'button') {
2557     ## has a button element in scope
2558 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2559     my $node = $self->{open_elements}->[$_];
2560 wakaba 1.1 if ($node->[1] eq 'button') {
2561 wakaba 1.3 !!!parse-error (type => 'in button:button');
2562 wakaba 1.1 !!!back-token;
2563     $token = {type => 'end tag', tag_name => 'button'};
2564     return;
2565     } elsif ({
2566     table => 1, caption => 1, td => 1, th => 1,
2567     button => 1, marquee => 1, object => 1, html => 1,
2568     }->{$node->[1]}) {
2569     last INSCOPE;
2570     }
2571     } # INSCOPE
2572    
2573     $reconstruct_active_formatting_elements->($insert_to_current);
2574    
2575     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2576     push @$active_formatting_elements, ['#marker', ''];
2577    
2578     !!!next-token;
2579     return;
2580     } elsif ($token->{tag_name} eq 'marquee' or
2581     $token->{tag_name} eq 'object') {
2582     $reconstruct_active_formatting_elements->($insert_to_current);
2583    
2584     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2585     push @$active_formatting_elements, ['#marker', ''];
2586    
2587     !!!next-token;
2588     return;
2589     } elsif ($token->{tag_name} eq 'xmp') {
2590     $reconstruct_active_formatting_elements->($insert_to_current);
2591    
2592     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2593    
2594     $self->{content_model_flag} = 'CDATA';
2595    
2596     !!!next-token;
2597     return;
2598     } elsif ($token->{tag_name} eq 'table') {
2599     ## has a p element in scope
2600 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2601 wakaba 1.1 if ($_->[1] eq 'p') {
2602     !!!back-token;
2603     $token = {type => 'end tag', tag_name => 'p'};
2604     return;
2605     } elsif ({
2606     table => 1, caption => 1, td => 1, th => 1,
2607     button => 1, marquee => 1, object => 1, html => 1,
2608     }->{$_->[1]}) {
2609     last INSCOPE;
2610     }
2611     } # INSCOPE
2612    
2613     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2614    
2615 wakaba 1.3 $self->{insertion_mode} = 'in table';
2616 wakaba 1.1
2617     !!!next-token;
2618     return;
2619     } elsif ({
2620     area => 1, basefont => 1, bgsound => 1, br => 1,
2621     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
2622     image => 1,
2623     }->{$token->{tag_name}}) {
2624     if ($token->{tag_name} eq 'image') {
2625 wakaba 1.3 !!!parse-error (type => 'image');
2626 wakaba 1.1 $token->{tag_name} = 'img';
2627     }
2628    
2629     $reconstruct_active_formatting_elements->($insert_to_current);
2630    
2631     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2632 wakaba 1.3 pop @{$self->{open_elements}};
2633 wakaba 1.1
2634     !!!next-token;
2635     return;
2636     } elsif ($token->{tag_name} eq 'hr') {
2637     ## has a p element in scope
2638 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2639 wakaba 1.1 if ($_->[1] eq 'p') {
2640     !!!back-token;
2641     $token = {type => 'end tag', tag_name => 'p'};
2642     return;
2643     } elsif ({
2644     table => 1, caption => 1, td => 1, th => 1,
2645     button => 1, marquee => 1, object => 1, html => 1,
2646     }->{$_->[1]}) {
2647     last INSCOPE;
2648     }
2649     } # INSCOPE
2650    
2651     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2652 wakaba 1.3 pop @{$self->{open_elements}};
2653 wakaba 1.1
2654     !!!next-token;
2655     return;
2656     } elsif ($token->{tag_name} eq 'input') {
2657     $reconstruct_active_formatting_elements->($insert_to_current);
2658    
2659     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2660 wakaba 1.3 ## TODO: associate with $self->{form_element} if defined
2661     pop @{$self->{open_elements}};
2662 wakaba 1.1
2663     !!!next-token;
2664     return;
2665     } elsif ($token->{tag_name} eq 'isindex') {
2666 wakaba 1.3 !!!parse-error (type => 'isindex');
2667 wakaba 1.1
2668 wakaba 1.3 if (defined $self->{form_element}) {
2669 wakaba 1.1 ## Ignore the token
2670     !!!next-token;
2671     return;
2672     } else {
2673     my $at = $token->{attributes};
2674     $at->{name} = {name => 'name', value => 'isindex'};
2675     my @tokens = (
2676     {type => 'start tag', tag_name => 'form'},
2677     {type => 'start tag', tag_name => 'hr'},
2678     {type => 'start tag', tag_name => 'p'},
2679     {type => 'start tag', tag_name => 'label'},
2680     {type => 'character',
2681     data => 'This is a searchable index. Insert your search keywords here: '}, # SHOULD
2682     ## TODO: make this configurable
2683     {type => 'start tag', tag_name => 'input', attributes => $at},
2684     #{type => 'character', data => ''}, # SHOULD
2685     {type => 'end tag', tag_name => 'label'},
2686     {type => 'end tag', tag_name => 'p'},
2687     {type => 'start tag', tag_name => 'hr'},
2688     {type => 'end tag', tag_name => 'form'},
2689     );
2690     $token = shift @tokens;
2691     !!!back-token (@tokens);
2692     return;
2693     }
2694     } elsif ({
2695     textarea => 1,
2696 wakaba 1.5 iframe => 1,
2697 wakaba 1.1 noembed => 1,
2698     noframes => 1,
2699     noscript => 0, ## TODO: 1 if scripting is enabled
2700     }->{$token->{tag_name}}) {
2701     my $tag_name = $token->{tag_name};
2702     my $el;
2703     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2704    
2705     if ($token->{tag_name} eq 'textarea') {
2706 wakaba 1.3 ## TODO: $self->{form_element} if defined
2707 wakaba 1.1 $self->{content_model_flag} = 'RCDATA';
2708     } else {
2709     $self->{content_model_flag} = 'CDATA';
2710     }
2711    
2712     $insert->($el);
2713    
2714     my $text = '';
2715 wakaba 1.9 if ($token->{tag_name} eq 'textarea') {
2716     !!!next-token;
2717     if ($token->{type} eq 'character') {
2718     $token->{data} =~ s/^\x0A//;
2719     unless (length $token->{data}) {
2720     !!!next-token;
2721     }
2722     }
2723     } else {
2724     !!!next-token;
2725     }
2726 wakaba 1.1 while ($token->{type} eq 'character') {
2727     $text .= $token->{data};
2728     !!!next-token;
2729     }
2730     if (length $text) {
2731     $el->manakai_append_text ($text);
2732     }
2733    
2734     $self->{content_model_flag} = 'PCDATA';
2735    
2736     if ($token->{type} eq 'end tag' and
2737     $token->{tag_name} eq $tag_name) {
2738     ## Ignore the token
2739     } else {
2740 wakaba 1.8 if ($token->{tag_name} eq 'textarea') { ## TODO: This is incorrect maybe
2741     ## TODO: <http://html5.org/tools/web-apps-tracker?from=866&to=867>
2742 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
2743     } else {
2744     !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2745     }
2746 wakaba 1.1 ## ISSUE: And ignore?
2747     }
2748     !!!next-token;
2749     return;
2750     } elsif ($token->{tag_name} eq 'select') {
2751     $reconstruct_active_formatting_elements->($insert_to_current);
2752    
2753     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2754    
2755 wakaba 1.3 $self->{insertion_mode} = 'in select';
2756 wakaba 1.1 !!!next-token;
2757     return;
2758     } elsif ({
2759     caption => 1, col => 1, colgroup => 1, frame => 1,
2760     frameset => 1, head => 1, option => 1, optgroup => 1,
2761     tbody => 1, td => 1, tfoot => 1, th => 1,
2762     thead => 1, tr => 1,
2763     }->{$token->{tag_name}}) {
2764 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2765 wakaba 1.1 ## Ignore the token
2766     !!!next-token;
2767     return;
2768    
2769     ## ISSUE: An issue on HTML5 new elements in the spec.
2770     } else {
2771     $reconstruct_active_formatting_elements->($insert_to_current);
2772    
2773     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2774    
2775     !!!next-token;
2776     return;
2777     }
2778     } elsif ($token->{type} eq 'end tag') {
2779     if ($token->{tag_name} eq 'body') {
2780 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2781 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2782 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2783     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2784 wakaba 1.1 }
2785 wakaba 1.3 $self->{insertion_mode} = 'after body';
2786 wakaba 1.1 !!!next-token;
2787     return;
2788     } else {
2789 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2790 wakaba 1.1 ## Ignore the token
2791     !!!next-token;
2792     return;
2793     }
2794     } elsif ($token->{tag_name} eq 'html') {
2795 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2796 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2797 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2798     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
2799 wakaba 1.1 }
2800 wakaba 1.3 $self->{insertion_mode} = 'after body';
2801 wakaba 1.1 ## reprocess
2802     return;
2803     } else {
2804 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2805 wakaba 1.1 ## Ignore the token
2806     !!!next-token;
2807     return;
2808     }
2809     } elsif ({
2810     address => 1, blockquote => 1, center => 1, dir => 1,
2811     div => 1, dl => 1, fieldset => 1, listing => 1,
2812     menu => 1, ol => 1, pre => 1, ul => 1,
2813     form => 1,
2814     p => 1,
2815     dd => 1, dt => 1, li => 1,
2816     button => 1, marquee => 1, object => 1,
2817     }->{$token->{tag_name}}) {
2818     ## has an element in scope
2819     my $i;
2820 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2821     my $node = $self->{open_elements}->[$_];
2822 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
2823     ## generate implied end tags
2824     if ({
2825     dd => ($token->{tag_name} ne 'dd'),
2826     dt => ($token->{tag_name} ne 'dt'),
2827     li => ($token->{tag_name} ne 'li'),
2828     p => ($token->{tag_name} ne 'p'),
2829     td => 1, th => 1, tr => 1,
2830 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2831 wakaba 1.1 !!!back-token;
2832     $token = {type => 'end tag',
2833 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2834 wakaba 1.1 return;
2835     }
2836     $i = $_;
2837     last INSCOPE unless $token->{tag_name} eq 'p';
2838     } elsif ({
2839     table => 1, caption => 1, td => 1, th => 1,
2840     button => 1, marquee => 1, object => 1, html => 1,
2841     }->{$node->[1]}) {
2842     last INSCOPE;
2843     }
2844     } # INSCOPE
2845    
2846 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
2847     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2848 wakaba 1.1 }
2849    
2850 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
2851     undef $self->{form_element} if $token->{tag_name} eq 'form';
2852 wakaba 1.1 $clear_up_to_marker->()
2853     if {
2854     button => 1, marquee => 1, object => 1,
2855     }->{$token->{tag_name}};
2856     !!!next-token;
2857     return;
2858     } elsif ({
2859     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2860     }->{$token->{tag_name}}) {
2861     ## has an element in scope
2862     my $i;
2863 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2864     my $node = $self->{open_elements}->[$_];
2865 wakaba 1.1 if ({
2866     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2867     }->{$node->[1]}) {
2868     ## generate implied end tags
2869     if ({
2870     dd => 1, dt => 1, li => 1, p => 1,
2871     td => 1, th => 1, tr => 1,
2872 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2873 wakaba 1.1 !!!back-token;
2874     $token = {type => 'end tag',
2875 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2876 wakaba 1.1 return;
2877     }
2878     $i = $_;
2879     last INSCOPE;
2880     } elsif ({
2881     table => 1, caption => 1, td => 1, th => 1,
2882     button => 1, marquee => 1, object => 1, html => 1,
2883     }->{$node->[1]}) {
2884     last INSCOPE;
2885     }
2886     } # INSCOPE
2887    
2888 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
2889     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2890 wakaba 1.1 }
2891    
2892 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
2893 wakaba 1.1 !!!next-token;
2894     return;
2895     } elsif ({
2896     a => 1,
2897     b => 1, big => 1, em => 1, font => 1, i => 1,
2898     nobr => 1, s => 1, small => 1, strile => 1,
2899     strong => 1, tt => 1, u => 1,
2900     }->{$token->{tag_name}}) {
2901     $formatting_end_tag->($token->{tag_name});
2902 wakaba 1.8 ## TODO: <http://html5.org/tools/web-apps-tracker?from=883&to=884>
2903 wakaba 1.1 return;
2904     } elsif ({
2905     caption => 1, col => 1, colgroup => 1, frame => 1,
2906     frameset => 1, head => 1, option => 1, optgroup => 1,
2907     tbody => 1, td => 1, tfoot => 1, th => 1,
2908     thead => 1, tr => 1,
2909     area => 1, basefont => 1, bgsound => 1, br => 1,
2910     embed => 1, hr => 1, iframe => 1, image => 1,
2911 wakaba 1.5 img => 1, input => 1, isindex => 1, noembed => 1,
2912 wakaba 1.1 noframes => 1, param => 1, select => 1, spacer => 1,
2913     table => 1, textarea => 1, wbr => 1,
2914     noscript => 0, ## TODO: if scripting is enabled
2915     }->{$token->{tag_name}}) {
2916 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2917 wakaba 1.1 ## Ignore the token
2918     !!!next-token;
2919     return;
2920    
2921     ## ISSUE: Issue on HTML5 new elements in spec
2922    
2923     } else {
2924     ## Step 1
2925     my $node_i = -1;
2926 wakaba 1.3 my $node = $self->{open_elements}->[$node_i];
2927 wakaba 1.1
2928     ## Step 2
2929     S2: {
2930     if ($node->[1] eq $token->{tag_name}) {
2931     ## Step 1
2932     ## generate implied end tags
2933     if ({
2934     dd => 1, dt => 1, li => 1, p => 1,
2935     td => 1, th => 1, tr => 1,
2936 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2937 wakaba 1.1 !!!back-token;
2938     $token = {type => 'end tag',
2939 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2940 wakaba 1.1 return;
2941     }
2942    
2943     ## Step 2
2944 wakaba 1.3 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
2945     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2946 wakaba 1.1 }
2947    
2948     ## Step 3
2949 wakaba 1.3 splice @{$self->{open_elements}}, $node_i;
2950    
2951     !!!next-token;
2952 wakaba 1.1 last S2;
2953     } else {
2954     ## Step 3
2955     if (not $formatting_category->{$node->[1]} and
2956     #not $phrasing_category->{$node->[1]} and
2957     ($special_category->{$node->[1]} or
2958     $scoping_category->{$node->[1]})) {
2959 wakaba 1.3 !!!parse-error (type => 'not closed:'.$node->[1]);
2960 wakaba 1.1 ## Ignore the token
2961     !!!next-token;
2962     last S2;
2963     }
2964     }
2965    
2966     ## Step 4
2967     $node_i--;
2968 wakaba 1.3 $node = $self->{open_elements}->[$node_i];
2969 wakaba 1.1
2970     ## Step 5;
2971     redo S2;
2972     } # S2
2973 wakaba 1.3 return;
2974 wakaba 1.1 }
2975     }
2976     }; # $in_body
2977    
2978     B: {
2979 wakaba 1.3 if ($phase eq 'main') {
2980 wakaba 1.1 if ($token->{type} eq 'DOCTYPE') {
2981 wakaba 1.3 !!!parse-error (type => 'in html:#DOCTYPE');
2982 wakaba 1.1 ## Ignore the token
2983     ## Stay in the phase
2984     !!!next-token;
2985     redo B;
2986     } elsif ($token->{type} eq 'start tag' and
2987     $token->{tag_name} eq 'html') {
2988     ## TODO: unless it is the first start tag token, parse-error
2989 wakaba 1.3 my $top_el = $self->{open_elements}->[0]->[0];
2990 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2991     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
2992     $top_el->set_attribute_ns
2993     (undef, [undef, $attr_name],
2994     $token->{attributes}->{$attr_name}->{value});
2995     }
2996     }
2997     !!!next-token;
2998     redo B;
2999     } elsif ($token->{type} eq 'end-of-file') {
3000     ## Generate implied end tags
3001     if ({
3002     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
3003 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3004 wakaba 1.1 !!!back-token;
3005 wakaba 1.3 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
3006 wakaba 1.1 redo B;
3007     }
3008    
3009 wakaba 1.3 if (@{$self->{open_elements}} > 2 or
3010     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
3011     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3012     } elsif (defined $self->{inner_html_node} and
3013     @{$self->{open_elements}} > 1 and
3014     $self->{open_elements}->[1]->[1] ne 'body') {
3015     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3016 wakaba 1.1 }
3017    
3018     ## Stop parsing
3019     last B;
3020    
3021     ## ISSUE: There is an issue in the spec.
3022     } else {
3023 wakaba 1.3 if ($self->{insertion_mode} eq 'before head') {
3024 wakaba 1.1 if ($token->{type} eq 'character') {
3025     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3026 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3027 wakaba 1.1 unless (length $token->{data}) {
3028     !!!next-token;
3029     redo B;
3030     }
3031     }
3032     ## As if <head>
3033 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
3034     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3035     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3036     $self->{insertion_mode} = 'in head';
3037 wakaba 1.1 ## reprocess
3038     redo B;
3039     } elsif ($token->{type} eq 'comment') {
3040     my $comment = $self->{document}->create_comment ($token->{data});
3041 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3042 wakaba 1.1 !!!next-token;
3043     redo B;
3044     } elsif ($token->{type} eq 'start tag') {
3045     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
3046 wakaba 1.3 !!!create-element ($self->{head_element}, 'head', $attr);
3047     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3048     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3049     $self->{insertion_mode} = 'in head';
3050 wakaba 1.1 if ($token->{tag_name} eq 'head') {
3051     !!!next-token;
3052     #} elsif ({
3053     # base => 1, link => 1, meta => 1,
3054     # script => 1, style => 1, title => 1,
3055     # }->{$token->{tag_name}}) {
3056     # ## reprocess
3057     } else {
3058     ## reprocess
3059     }
3060     redo B;
3061     } elsif ($token->{type} eq 'end tag') {
3062     if ($token->{tag_name} eq 'html') {
3063     ## As if <head>
3064 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
3065     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3066     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3067     $self->{insertion_mode} = 'in head';
3068 wakaba 1.1 ## reprocess
3069     redo B;
3070     } else {
3071 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3072 wakaba 1.1 ## Ignore the token
3073     !!!next-token;
3074     redo B;
3075     }
3076     } else {
3077     die "$0: $token->{type}: Unknown type";
3078     }
3079 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in head') {
3080 wakaba 1.1 if ($token->{type} eq 'character') {
3081     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3082 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3083 wakaba 1.1 unless (length $token->{data}) {
3084     !!!next-token;
3085     redo B;
3086     }
3087     }
3088    
3089     #
3090     } elsif ($token->{type} eq 'comment') {
3091     my $comment = $self->{document}->create_comment ($token->{data});
3092 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3093 wakaba 1.1 !!!next-token;
3094     redo B;
3095     } elsif ($token->{type} eq 'start tag') {
3096     if ($token->{tag_name} eq 'title') {
3097     ## NOTE: There is an "as if in head" code clone
3098     my $title_el;
3099     !!!create-element ($title_el, 'title', $token->{attributes});
3100 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3101 wakaba 1.1 ->append_child ($title_el);
3102     $self->{content_model_flag} = 'RCDATA';
3103    
3104     my $text = '';
3105     !!!next-token;
3106     while ($token->{type} eq 'character') {
3107     $text .= $token->{data};
3108     !!!next-token;
3109     }
3110     if (length $text) {
3111     $title_el->manakai_append_text ($text);
3112     }
3113    
3114     $self->{content_model_flag} = 'PCDATA';
3115    
3116     if ($token->{type} eq 'end tag' and
3117     $token->{tag_name} eq 'title') {
3118     ## Ignore the token
3119     } else {
3120 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
3121 wakaba 1.1 ## ISSUE: And ignore?
3122     }
3123     !!!next-token;
3124     redo B;
3125     } elsif ($token->{tag_name} eq 'style') {
3126     $style_start_tag->();
3127     redo B;
3128     } elsif ($token->{tag_name} eq 'script') {
3129     $script_start_tag->();
3130     redo B;
3131     } elsif ({base => 1, link => 1, meta => 1}->{$token->{tag_name}}) {
3132     ## NOTE: There are "as if in head" code clones
3133     my $el;
3134     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
3135 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3136 wakaba 1.1 ->append_child ($el);
3137    
3138     !!!next-token;
3139     redo B;
3140     } elsif ($token->{tag_name} eq 'head') {
3141 wakaba 1.3 !!!parse-error (type => 'in head:head');
3142 wakaba 1.1 ## Ignore the token
3143     !!!next-token;
3144     redo B;
3145     } else {
3146     #
3147     }
3148     } elsif ($token->{type} eq 'end tag') {
3149     if ($token->{tag_name} eq 'head') {
3150 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
3151     pop @{$self->{open_elements}};
3152 wakaba 1.1 } else {
3153 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:head');
3154 wakaba 1.1 }
3155 wakaba 1.3 $self->{insertion_mode} = 'after head';
3156 wakaba 1.1 !!!next-token;
3157     redo B;
3158     } elsif ($token->{tag_name} eq 'html') {
3159     #
3160     } else {
3161 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3162 wakaba 1.1 ## Ignore the token
3163     !!!next-token;
3164     redo B;
3165     }
3166     } else {
3167     #
3168     }
3169    
3170 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
3171 wakaba 1.1 ## As if </head>
3172 wakaba 1.3 pop @{$self->{open_elements}};
3173 wakaba 1.1 }
3174 wakaba 1.3 $self->{insertion_mode} = 'after head';
3175 wakaba 1.1 ## reprocess
3176     redo B;
3177    
3178     ## ISSUE: An issue in the spec.
3179 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after head') {
3180 wakaba 1.1 if ($token->{type} eq 'character') {
3181     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3182 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3183 wakaba 1.1 unless (length $token->{data}) {
3184     !!!next-token;
3185     redo B;
3186     }
3187     }
3188    
3189     #
3190     } elsif ($token->{type} eq 'comment') {
3191     my $comment = $self->{document}->create_comment ($token->{data});
3192 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3193 wakaba 1.1 !!!next-token;
3194     redo B;
3195     } elsif ($token->{type} eq 'start tag') {
3196     if ($token->{tag_name} eq 'body') {
3197     !!!insert-element ('body', $token->{attributes});
3198 wakaba 1.3 $self->{insertion_mode} = 'in body';
3199 wakaba 1.1 !!!next-token;
3200     redo B;
3201     } elsif ($token->{tag_name} eq 'frameset') {
3202     !!!insert-element ('frameset', $token->{attributes});
3203 wakaba 1.3 $self->{insertion_mode} = 'in frameset';
3204 wakaba 1.1 !!!next-token;
3205     redo B;
3206     } elsif ({
3207     base => 1, link => 1, meta => 1,
3208 wakaba 1.3 script => 1, style => 1, title => 1,
3209 wakaba 1.1 }->{$token->{tag_name}}) {
3210 wakaba 1.3 !!!parse-error (type => 'after head:'.$token->{tag_name});
3211     $self->{insertion_mode} = 'in head';
3212 wakaba 1.1 ## reprocess
3213     redo B;
3214     } else {
3215     #
3216     }
3217     } else {
3218     #
3219     }
3220    
3221     ## As if <body>
3222     !!!insert-element ('body');
3223 wakaba 1.3 $self->{insertion_mode} = 'in body';
3224 wakaba 1.1 ## reprocess
3225     redo B;
3226 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in body') {
3227 wakaba 1.1 if ($token->{type} eq 'character') {
3228     ## NOTE: There is a code clone of "character in body".
3229     $reconstruct_active_formatting_elements->($insert_to_current);
3230    
3231 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3232 wakaba 1.1
3233     !!!next-token;
3234     redo B;
3235     } elsif ($token->{type} eq 'comment') {
3236     ## NOTE: There is a code clone of "comment in body".
3237     my $comment = $self->{document}->create_comment ($token->{data});
3238 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3239 wakaba 1.1 !!!next-token;
3240     redo B;
3241     } else {
3242     $in_body->($insert_to_current);
3243     redo B;
3244     }
3245 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table') {
3246 wakaba 1.1 if ($token->{type} eq 'character') {
3247     ## NOTE: There are "character in table" code clones.
3248     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3249 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3250 wakaba 1.1
3251     unless (length $token->{data}) {
3252     !!!next-token;
3253     redo B;
3254     }
3255     }
3256    
3257 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3258    
3259 wakaba 1.1 ## As if in body, but insert into foster parent element
3260     ## ISSUE: Spec says that "whenever a node would be inserted
3261     ## into the current node" while characters might not be
3262     ## result in a new Text node.
3263     $reconstruct_active_formatting_elements->($insert_to_foster);
3264    
3265     if ({
3266     table => 1, tbody => 1, tfoot => 1,
3267     thead => 1, tr => 1,
3268 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3269 wakaba 1.1 # MUST
3270     my $foster_parent_element;
3271     my $next_sibling;
3272     my $prev_sibling;
3273 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3274     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3275     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3276 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3277     $foster_parent_element = $parent;
3278 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3279 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3280     } else {
3281 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3282 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3283     }
3284     last OE;
3285     }
3286     } # OE
3287 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3288 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3289     unless defined $foster_parent_element;
3290     if (defined $prev_sibling and
3291     $prev_sibling->node_type == 3) {
3292     $prev_sibling->manakai_append_text ($token->{data});
3293     } else {
3294     $foster_parent_element->insert_before
3295     ($self->{document}->create_text_node ($token->{data}),
3296     $next_sibling);
3297     }
3298     } else {
3299 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3300 wakaba 1.1 }
3301    
3302     !!!next-token;
3303     redo B;
3304     } elsif ($token->{type} eq 'comment') {
3305     my $comment = $self->{document}->create_comment ($token->{data});
3306 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3307 wakaba 1.1 !!!next-token;
3308     redo B;
3309     } elsif ($token->{type} eq 'start tag') {
3310     if ({
3311     caption => 1,
3312     colgroup => 1,
3313     tbody => 1, tfoot => 1, thead => 1,
3314     }->{$token->{tag_name}}) {
3315     ## Clear back to table context
3316 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3317     $self->{open_elements}->[-1]->[1] ne 'html') {
3318     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3319     pop @{$self->{open_elements}};
3320 wakaba 1.1 }
3321    
3322     push @$active_formatting_elements, ['#marker', '']
3323     if $token->{tag_name} eq 'caption';
3324    
3325     !!!insert-element ($token->{tag_name}, $token->{attributes});
3326 wakaba 1.3 $self->{insertion_mode} = {
3327 wakaba 1.1 caption => 'in caption',
3328     colgroup => 'in column group',
3329     tbody => 'in table body',
3330     tfoot => 'in table body',
3331     thead => 'in table body',
3332     }->{$token->{tag_name}};
3333     !!!next-token;
3334     redo B;
3335     } elsif ({
3336     col => 1,
3337     td => 1, th => 1, tr => 1,
3338     }->{$token->{tag_name}}) {
3339     ## Clear back to table context
3340 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3341     $self->{open_elements}->[-1]->[1] ne 'html') {
3342     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3343     pop @{$self->{open_elements}};
3344 wakaba 1.1 }
3345    
3346     !!!insert-element ($token->{tag_name} eq 'col' ? 'colgroup' : 'tbody');
3347 wakaba 1.3 $self->{insertion_mode} = $token->{tag_name} eq 'col'
3348 wakaba 1.1 ? 'in column group' : 'in table body';
3349     ## reprocess
3350     redo B;
3351     } elsif ($token->{tag_name} eq 'table') {
3352     ## NOTE: There are code clones for this "table in table"
3353 wakaba 1.3 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3354 wakaba 1.1
3355     ## As if </table>
3356     ## have a table element in table scope
3357     my $i;
3358 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3359     my $node = $self->{open_elements}->[$_];
3360 wakaba 1.1 if ($node->[1] eq 'table') {
3361     $i = $_;
3362     last INSCOPE;
3363     } elsif ({
3364     table => 1, html => 1,
3365     }->{$node->[1]}) {
3366     last INSCOPE;
3367     }
3368     } # INSCOPE
3369     unless (defined $i) {
3370 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3371 wakaba 1.1 ## Ignore tokens </table><table>
3372     !!!next-token;
3373     redo B;
3374     }
3375    
3376     ## generate implied end tags
3377     if ({
3378     dd => 1, dt => 1, li => 1, p => 1,
3379     td => 1, th => 1, tr => 1,
3380 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3381 wakaba 1.1 !!!back-token; # <table>
3382     $token = {type => 'end tag', tag_name => 'table'};
3383     !!!back-token;
3384     $token = {type => 'end tag',
3385 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3386 wakaba 1.1 redo B;
3387     }
3388    
3389 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3390     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3391 wakaba 1.1 }
3392    
3393 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3394 wakaba 1.1
3395 wakaba 1.3 $self->_reset_insertion_mode;
3396 wakaba 1.1
3397     ## reprocess
3398     redo B;
3399     } else {
3400     #
3401     }
3402     } elsif ($token->{type} eq 'end tag') {
3403     if ($token->{tag_name} eq 'table') {
3404     ## have a table element in table scope
3405     my $i;
3406 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3407     my $node = $self->{open_elements}->[$_];
3408 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3409     $i = $_;
3410     last INSCOPE;
3411     } elsif ({
3412     table => 1, html => 1,
3413     }->{$node->[1]}) {
3414     last INSCOPE;
3415     }
3416     } # INSCOPE
3417     unless (defined $i) {
3418 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3419 wakaba 1.1 ## Ignore the token
3420     !!!next-token;
3421     redo B;
3422     }
3423    
3424     ## generate implied end tags
3425     if ({
3426     dd => 1, dt => 1, li => 1, p => 1,
3427     td => 1, th => 1, tr => 1,
3428 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3429 wakaba 1.1 !!!back-token;
3430     $token = {type => 'end tag',
3431 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3432 wakaba 1.1 redo B;
3433     }
3434    
3435 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3436     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3437 wakaba 1.1 }
3438    
3439 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3440 wakaba 1.1
3441 wakaba 1.3 $self->_reset_insertion_mode;
3442 wakaba 1.1
3443     !!!next-token;
3444     redo B;
3445     } elsif ({
3446     body => 1, caption => 1, col => 1, colgroup => 1,
3447     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
3448     thead => 1, tr => 1,
3449     }->{$token->{tag_name}}) {
3450 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3451 wakaba 1.1 ## Ignore the token
3452     !!!next-token;
3453     redo B;
3454     } else {
3455     #
3456     }
3457     } else {
3458     #
3459     }
3460    
3461 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3462 wakaba 1.1 $in_body->($insert_to_foster);
3463     redo B;
3464 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in caption') {
3465 wakaba 1.1 if ($token->{type} eq 'character') {
3466     ## NOTE: This is a code clone of "character in body".
3467     $reconstruct_active_formatting_elements->($insert_to_current);
3468    
3469 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3470 wakaba 1.1
3471     !!!next-token;
3472     redo B;
3473     } elsif ($token->{type} eq 'comment') {
3474     ## NOTE: This is a code clone of "comment in body".
3475     my $comment = $self->{document}->create_comment ($token->{data});
3476 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3477 wakaba 1.1 !!!next-token;
3478     redo B;
3479     } elsif ($token->{type} eq 'start tag') {
3480     if ({
3481     caption => 1, col => 1, colgroup => 1, tbody => 1,
3482     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
3483     }->{$token->{tag_name}}) {
3484 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3485 wakaba 1.1
3486     ## As if </caption>
3487     ## have a table element in table scope
3488     my $i;
3489 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3490     my $node = $self->{open_elements}->[$_];
3491 wakaba 1.1 if ($node->[1] eq 'caption') {
3492     $i = $_;
3493     last INSCOPE;
3494     } elsif ({
3495     table => 1, html => 1,
3496     }->{$node->[1]}) {
3497     last INSCOPE;
3498     }
3499     } # INSCOPE
3500     unless (defined $i) {
3501 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3502 wakaba 1.1 ## Ignore the token
3503     !!!next-token;
3504     redo B;
3505     }
3506    
3507     ## generate implied end tags
3508     if ({
3509     dd => 1, dt => 1, li => 1, p => 1,
3510     td => 1, th => 1, tr => 1,
3511 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3512 wakaba 1.1 !!!back-token; # <?>
3513     $token = {type => 'end tag', tag_name => 'caption'};
3514     !!!back-token;
3515     $token = {type => 'end tag',
3516 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3517 wakaba 1.1 redo B;
3518     }
3519    
3520 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3521     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3522 wakaba 1.1 }
3523    
3524 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3525 wakaba 1.1
3526     $clear_up_to_marker->();
3527    
3528 wakaba 1.3 $self->{insertion_mode} = 'in table';
3529 wakaba 1.1
3530     ## reprocess
3531     redo B;
3532     } else {
3533     #
3534     }
3535     } elsif ($token->{type} eq 'end tag') {
3536     if ($token->{tag_name} eq 'caption') {
3537     ## have a table element in table scope
3538     my $i;
3539 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3540     my $node = $self->{open_elements}->[$_];
3541 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3542     $i = $_;
3543     last INSCOPE;
3544     } elsif ({
3545     table => 1, html => 1,
3546     }->{$node->[1]}) {
3547     last INSCOPE;
3548     }
3549     } # INSCOPE
3550     unless (defined $i) {
3551 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3552 wakaba 1.1 ## Ignore the token
3553     !!!next-token;
3554     redo B;
3555     }
3556    
3557     ## generate implied end tags
3558     if ({
3559     dd => 1, dt => 1, li => 1, p => 1,
3560     td => 1, th => 1, tr => 1,
3561 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3562 wakaba 1.1 !!!back-token;
3563     $token = {type => 'end tag',
3564 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3565 wakaba 1.1 redo B;
3566     }
3567    
3568 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3569     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3570 wakaba 1.1 }
3571    
3572 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3573 wakaba 1.1
3574     $clear_up_to_marker->();
3575    
3576 wakaba 1.3 $self->{insertion_mode} = 'in table';
3577 wakaba 1.1
3578     !!!next-token;
3579     redo B;
3580     } elsif ($token->{tag_name} eq 'table') {
3581 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3582 wakaba 1.1
3583     ## As if </caption>
3584     ## have a table element in table scope
3585     my $i;
3586 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3587     my $node = $self->{open_elements}->[$_];
3588 wakaba 1.1 if ($node->[1] eq 'caption') {
3589     $i = $_;
3590     last INSCOPE;
3591     } elsif ({
3592     table => 1, html => 1,
3593     }->{$node->[1]}) {
3594     last INSCOPE;
3595     }
3596     } # INSCOPE
3597     unless (defined $i) {
3598 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3599 wakaba 1.1 ## Ignore the token
3600     !!!next-token;
3601     redo B;
3602     }
3603    
3604     ## generate implied end tags
3605     if ({
3606     dd => 1, dt => 1, li => 1, p => 1,
3607     td => 1, th => 1, tr => 1,
3608 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3609 wakaba 1.1 !!!back-token; # </table>
3610     $token = {type => 'end tag', tag_name => 'caption'};
3611     !!!back-token;
3612     $token = {type => 'end tag',
3613 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3614 wakaba 1.1 redo B;
3615     }
3616    
3617 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3618     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3619 wakaba 1.1 }
3620    
3621 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3622 wakaba 1.1
3623     $clear_up_to_marker->();
3624    
3625 wakaba 1.3 $self->{insertion_mode} = 'in table';
3626 wakaba 1.1
3627     ## reprocess
3628     redo B;
3629     } elsif ({
3630     body => 1, col => 1, colgroup => 1,
3631     html => 1, tbody => 1, td => 1, tfoot => 1,
3632     th => 1, thead => 1, tr => 1,
3633     }->{$token->{tag_name}}) {
3634 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3635 wakaba 1.1 ## Ignore the token
3636     redo B;
3637     } else {
3638     #
3639     }
3640     } else {
3641     #
3642     }
3643    
3644     $in_body->($insert_to_current);
3645     redo B;
3646 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in column group') {
3647 wakaba 1.1 if ($token->{type} eq 'character') {
3648     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3649 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3650 wakaba 1.1 unless (length $token->{data}) {
3651     !!!next-token;
3652     redo B;
3653     }
3654     }
3655    
3656     #
3657     } elsif ($token->{type} eq 'comment') {
3658     my $comment = $self->{document}->create_comment ($token->{data});
3659 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3660 wakaba 1.1 !!!next-token;
3661     redo B;
3662     } elsif ($token->{type} eq 'start tag') {
3663     if ($token->{tag_name} eq 'col') {
3664     !!!insert-element ($token->{tag_name}, $token->{attributes});
3665 wakaba 1.3 pop @{$self->{open_elements}};
3666 wakaba 1.1 !!!next-token;
3667     redo B;
3668     } else {
3669     #
3670     }
3671     } elsif ($token->{type} eq 'end tag') {
3672     if ($token->{tag_name} eq 'colgroup') {
3673 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3674     !!!parse-error (type => 'unmatched end tag:colgroup');
3675 wakaba 1.1 ## Ignore the token
3676     !!!next-token;
3677     redo B;
3678     } else {
3679 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3680     $self->{insertion_mode} = 'in table';
3681 wakaba 1.1 !!!next-token;
3682     redo B;
3683     }
3684     } elsif ($token->{tag_name} eq 'col') {
3685 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:col');
3686 wakaba 1.1 ## Ignore the token
3687     !!!next-token;
3688     redo B;
3689     } else {
3690     #
3691     }
3692     } else {
3693     #
3694     }
3695    
3696     ## As if </colgroup>
3697 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3698     !!!parse-error (type => 'unmatched end tag:colgroup');
3699 wakaba 1.1 ## Ignore the token
3700     !!!next-token;
3701     redo B;
3702     } else {
3703 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3704     $self->{insertion_mode} = 'in table';
3705 wakaba 1.1 ## reprocess
3706     redo B;
3707     }
3708 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table body') {
3709 wakaba 1.1 if ($token->{type} eq 'character') {
3710     ## NOTE: This is a "character in table" code clone.
3711     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3712 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3713 wakaba 1.1
3714     unless (length $token->{data}) {
3715     !!!next-token;
3716     redo B;
3717     }
3718     }
3719    
3720 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3721    
3722 wakaba 1.1 ## As if in body, but insert into foster parent element
3723     ## ISSUE: Spec says that "whenever a node would be inserted
3724     ## into the current node" while characters might not be
3725     ## result in a new Text node.
3726     $reconstruct_active_formatting_elements->($insert_to_foster);
3727    
3728     if ({
3729     table => 1, tbody => 1, tfoot => 1,
3730     thead => 1, tr => 1,
3731 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3732 wakaba 1.1 # MUST
3733     my $foster_parent_element;
3734     my $next_sibling;
3735     my $prev_sibling;
3736 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3737     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3738     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3739 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3740     $foster_parent_element = $parent;
3741 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3742 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3743     } else {
3744 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3745 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3746     }
3747     last OE;
3748     }
3749     } # OE
3750 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3751 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3752     unless defined $foster_parent_element;
3753     if (defined $prev_sibling and
3754     $prev_sibling->node_type == 3) {
3755     $prev_sibling->manakai_append_text ($token->{data});
3756     } else {
3757     $foster_parent_element->insert_before
3758     ($self->{document}->create_text_node ($token->{data}),
3759     $next_sibling);
3760     }
3761     } else {
3762 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3763 wakaba 1.1 }
3764    
3765     !!!next-token;
3766     redo B;
3767     } elsif ($token->{type} eq 'comment') {
3768     ## Copied from 'in table'
3769     my $comment = $self->{document}->create_comment ($token->{data});
3770 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3771 wakaba 1.1 !!!next-token;
3772     redo B;
3773     } elsif ($token->{type} eq 'start tag') {
3774     if ({
3775     tr => 1,
3776     th => 1, td => 1,
3777     }->{$token->{tag_name}}) {
3778 wakaba 1.3 unless ($token->{tag_name} eq 'tr') {
3779     !!!parse-error (type => 'missing start tag:tr');
3780     }
3781    
3782 wakaba 1.1 ## Clear back to table body context
3783     while (not {
3784     tbody => 1, tfoot => 1, thead => 1, html => 1,
3785 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3786     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3787     pop @{$self->{open_elements}};
3788 wakaba 1.1 }
3789    
3790 wakaba 1.3 $self->{insertion_mode} = 'in row';
3791 wakaba 1.1 if ($token->{tag_name} eq 'tr') {
3792     !!!insert-element ($token->{tag_name}, $token->{attributes});
3793     !!!next-token;
3794     } else {
3795     !!!insert-element ('tr');
3796     ## reprocess
3797     }
3798     redo B;
3799     } elsif ({
3800     caption => 1, col => 1, colgroup => 1,
3801     tbody => 1, tfoot => 1, thead => 1,
3802     }->{$token->{tag_name}}) {
3803     ## have an element in table scope
3804     my $i;
3805 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3806     my $node = $self->{open_elements}->[$_];
3807 wakaba 1.1 if ({
3808     tbody => 1, thead => 1, tfoot => 1,
3809     }->{$node->[1]}) {
3810     $i = $_;
3811     last INSCOPE;
3812     } elsif ({
3813     table => 1, html => 1,
3814     }->{$node->[1]}) {
3815     last INSCOPE;
3816     }
3817     } # INSCOPE
3818     unless (defined $i) {
3819 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3820 wakaba 1.1 ## Ignore the token
3821     !!!next-token;
3822     redo B;
3823     }
3824    
3825     ## Clear back to table body context
3826     while (not {
3827     tbody => 1, tfoot => 1, thead => 1, html => 1,
3828 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3829     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3830     pop @{$self->{open_elements}};
3831 wakaba 1.1 }
3832    
3833     ## As if <{current node}>
3834     ## have an element in table scope
3835     ## true by definition
3836    
3837     ## Clear back to table body context
3838     ## nop by definition
3839    
3840 wakaba 1.3 pop @{$self->{open_elements}};
3841     $self->{insertion_mode} = 'in table';
3842 wakaba 1.1 ## reprocess
3843     redo B;
3844     } elsif ($token->{tag_name} eq 'table') {
3845     ## NOTE: This is a code clone of "table in table"
3846 wakaba 1.3 !!!parse-error (type => 'not closed:table');
3847 wakaba 1.1
3848     ## As if </table>
3849     ## have a table element in table scope
3850     my $i;
3851 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3852     my $node = $self->{open_elements}->[$_];
3853 wakaba 1.1 if ($node->[1] eq 'table') {
3854     $i = $_;
3855     last INSCOPE;
3856     } elsif ({
3857     table => 1, html => 1,
3858     }->{$node->[1]}) {
3859     last INSCOPE;
3860     }
3861     } # INSCOPE
3862     unless (defined $i) {
3863 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3864 wakaba 1.1 ## Ignore tokens </table><table>
3865     !!!next-token;
3866     redo B;
3867     }
3868    
3869     ## generate implied end tags
3870     if ({
3871     dd => 1, dt => 1, li => 1, p => 1,
3872     td => 1, th => 1, tr => 1,
3873 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3874 wakaba 1.1 !!!back-token; # <table>
3875     $token = {type => 'end tag', tag_name => 'table'};
3876     !!!back-token;
3877     $token = {type => 'end tag',
3878 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3879 wakaba 1.1 redo B;
3880     }
3881    
3882 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3883     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3884 wakaba 1.1 }
3885    
3886 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3887 wakaba 1.1
3888 wakaba 1.3 $self->_reset_insertion_mode;
3889 wakaba 1.1
3890     ## reprocess
3891     redo B;
3892     } else {
3893     #
3894     }
3895     } elsif ($token->{type} eq 'end tag') {
3896     if ({
3897     tbody => 1, tfoot => 1, thead => 1,
3898     }->{$token->{tag_name}}) {
3899     ## have an element in table scope
3900     my $i;
3901 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3902     my $node = $self->{open_elements}->[$_];
3903 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3904     $i = $_;
3905     last INSCOPE;
3906     } elsif ({
3907     table => 1, html => 1,
3908     }->{$node->[1]}) {
3909     last INSCOPE;
3910     }
3911     } # INSCOPE
3912     unless (defined $i) {
3913 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3914 wakaba 1.1 ## Ignore the token
3915     !!!next-token;
3916     redo B;
3917     }
3918    
3919     ## Clear back to table body context
3920     while (not {
3921     tbody => 1, tfoot => 1, thead => 1, html => 1,
3922 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3923     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3924     pop @{$self->{open_elements}};
3925 wakaba 1.1 }
3926    
3927 wakaba 1.3 pop @{$self->{open_elements}};
3928     $self->{insertion_mode} = 'in table';
3929 wakaba 1.1 !!!next-token;
3930     redo B;
3931     } elsif ($token->{tag_name} eq 'table') {
3932     ## have an element in table scope
3933     my $i;
3934 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3935     my $node = $self->{open_elements}->[$_];
3936 wakaba 1.1 if ({
3937     tbody => 1, thead => 1, tfoot => 1,
3938     }->{$node->[1]}) {
3939     $i = $_;
3940     last INSCOPE;
3941     } elsif ({
3942     table => 1, html => 1,
3943     }->{$node->[1]}) {
3944     last INSCOPE;
3945     }
3946     } # INSCOPE
3947     unless (defined $i) {
3948 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3949 wakaba 1.1 ## Ignore the token
3950     !!!next-token;
3951     redo B;
3952     }
3953    
3954     ## Clear back to table body context
3955     while (not {
3956     tbody => 1, tfoot => 1, thead => 1, html => 1,
3957 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3958     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3959     pop @{$self->{open_elements}};
3960 wakaba 1.1 }
3961    
3962     ## As if <{current node}>
3963     ## have an element in table scope
3964     ## true by definition
3965    
3966     ## Clear back to table body context
3967     ## nop by definition
3968    
3969 wakaba 1.3 pop @{$self->{open_elements}};
3970     $self->{insertion_mode} = 'in table';
3971 wakaba 1.1 ## reprocess
3972     redo B;
3973     } elsif ({
3974     body => 1, caption => 1, col => 1, colgroup => 1,
3975     html => 1, td => 1, th => 1, tr => 1,
3976     }->{$token->{tag_name}}) {
3977 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3978 wakaba 1.1 ## Ignore the token
3979     !!!next-token;
3980     redo B;
3981     } else {
3982     #
3983     }
3984     } else {
3985     #
3986     }
3987    
3988     ## As if in table
3989 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3990 wakaba 1.1 $in_body->($insert_to_foster);
3991     redo B;
3992 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in row') {
3993 wakaba 1.1 if ($token->{type} eq 'character') {
3994     ## NOTE: This is a "character in table" code clone.
3995     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3996 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3997 wakaba 1.1
3998     unless (length $token->{data}) {
3999     !!!next-token;
4000     redo B;
4001     }
4002     }
4003    
4004 wakaba 1.3 !!!parse-error (type => 'in table:#character');
4005    
4006 wakaba 1.1 ## As if in body, but insert into foster parent element
4007     ## ISSUE: Spec says that "whenever a node would be inserted
4008     ## into the current node" while characters might not be
4009     ## result in a new Text node.
4010     $reconstruct_active_formatting_elements->($insert_to_foster);
4011    
4012     if ({
4013     table => 1, tbody => 1, tfoot => 1,
4014     thead => 1, tr => 1,
4015 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4016 wakaba 1.1 # MUST
4017     my $foster_parent_element;
4018     my $next_sibling;
4019     my $prev_sibling;
4020 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
4021     if ($self->{open_elements}->[$_]->[1] eq 'table') {
4022     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4023 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
4024     $foster_parent_element = $parent;
4025 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
4026 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
4027     } else {
4028 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4029 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
4030     }
4031     last OE;
4032     }
4033     } # OE
4034 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
4035 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
4036     unless defined $foster_parent_element;
4037     if (defined $prev_sibling and
4038     $prev_sibling->node_type == 3) {
4039     $prev_sibling->manakai_append_text ($token->{data});
4040     } else {
4041     $foster_parent_element->insert_before
4042     ($self->{document}->create_text_node ($token->{data}),
4043     $next_sibling);
4044     }
4045     } else {
4046 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4047 wakaba 1.1 }
4048    
4049     !!!next-token;
4050     redo B;
4051     } elsif ($token->{type} eq 'comment') {
4052     ## Copied from 'in table'
4053     my $comment = $self->{document}->create_comment ($token->{data});
4054 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4055 wakaba 1.1 !!!next-token;
4056     redo B;
4057     } elsif ($token->{type} eq 'start tag') {
4058     if ($token->{tag_name} eq 'th' or
4059     $token->{tag_name} eq 'td') {
4060     ## Clear back to table row context
4061     while (not {
4062     tr => 1, html => 1,
4063 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4064     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4065     pop @{$self->{open_elements}};
4066 wakaba 1.1 }
4067    
4068     !!!insert-element ($token->{tag_name}, $token->{attributes});
4069 wakaba 1.3 $self->{insertion_mode} = 'in cell';
4070 wakaba 1.1
4071     push @$active_formatting_elements, ['#marker', ''];
4072    
4073     !!!next-token;
4074     redo B;
4075     } elsif ({
4076     caption => 1, col => 1, colgroup => 1,
4077     tbody => 1, tfoot => 1, thead => 1, tr => 1,
4078     }->{$token->{tag_name}}) {
4079     ## As if </tr>
4080     ## have an element in table scope
4081     my $i;
4082 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4083     my $node = $self->{open_elements}->[$_];
4084 wakaba 1.1 if ($node->[1] eq 'tr') {
4085     $i = $_;
4086     last INSCOPE;
4087     } elsif ({
4088     table => 1, html => 1,
4089     }->{$node->[1]}) {
4090     last INSCOPE;
4091     }
4092     } # INSCOPE
4093     unless (defined $i) {
4094 wakaba 1.3 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name});
4095 wakaba 1.1 ## Ignore the token
4096     !!!next-token;
4097     redo B;
4098     }
4099    
4100     ## Clear back to table row context
4101     while (not {
4102     tr => 1, html => 1,
4103 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4104     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4105     pop @{$self->{open_elements}};
4106 wakaba 1.1 }
4107    
4108 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4109     $self->{insertion_mode} = 'in table body';
4110 wakaba 1.1 ## reprocess
4111     redo B;
4112     } elsif ($token->{tag_name} eq 'table') {
4113     ## NOTE: This is a code clone of "table in table"
4114 wakaba 1.3 !!!parse-error (type => 'not closed:table');
4115 wakaba 1.1
4116     ## As if </table>
4117     ## have a table element in table scope
4118     my $i;
4119 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4120     my $node = $self->{open_elements}->[$_];
4121 wakaba 1.1 if ($node->[1] eq 'table') {
4122     $i = $_;
4123     last INSCOPE;
4124     } elsif ({
4125     table => 1, html => 1,
4126     }->{$node->[1]}) {
4127     last INSCOPE;
4128     }
4129     } # INSCOPE
4130     unless (defined $i) {
4131 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
4132 wakaba 1.1 ## Ignore tokens </table><table>
4133     !!!next-token;
4134     redo B;
4135     }
4136    
4137     ## generate implied end tags
4138     if ({
4139     dd => 1, dt => 1, li => 1, p => 1,
4140     td => 1, th => 1, tr => 1,
4141 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4142 wakaba 1.1 !!!back-token; # <table>
4143     $token = {type => 'end tag', tag_name => 'table'};
4144     !!!back-token;
4145     $token = {type => 'end tag',
4146 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4147 wakaba 1.1 redo B;
4148     }
4149    
4150 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4151     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4152 wakaba 1.1 }
4153    
4154 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4155 wakaba 1.1
4156 wakaba 1.3 $self->_reset_insertion_mode;
4157 wakaba 1.1
4158     ## reprocess
4159     redo B;
4160     } else {
4161     #
4162     }
4163     } elsif ($token->{type} eq 'end tag') {
4164     if ($token->{tag_name} eq 'tr') {
4165     ## have an element in table scope
4166     my $i;
4167 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4168     my $node = $self->{open_elements}->[$_];
4169 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4170     $i = $_;
4171     last INSCOPE;
4172     } elsif ({
4173     table => 1, html => 1,
4174     }->{$node->[1]}) {
4175     last INSCOPE;
4176     }
4177     } # INSCOPE
4178     unless (defined $i) {
4179 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4180 wakaba 1.1 ## Ignore the token
4181     !!!next-token;
4182     redo B;
4183     }
4184    
4185     ## Clear back to table row context
4186     while (not {
4187     tr => 1, html => 1,
4188 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4189     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4190     pop @{$self->{open_elements}};
4191 wakaba 1.1 }
4192    
4193 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4194     $self->{insertion_mode} = 'in table body';
4195 wakaba 1.1 !!!next-token;
4196     redo B;
4197     } elsif ($token->{tag_name} eq 'table') {
4198     ## As if </tr>
4199     ## have an element in table scope
4200     my $i;
4201 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4202     my $node = $self->{open_elements}->[$_];
4203 wakaba 1.1 if ($node->[1] eq 'tr') {
4204     $i = $_;
4205     last INSCOPE;
4206     } elsif ({
4207     table => 1, html => 1,
4208     }->{$node->[1]}) {
4209     last INSCOPE;
4210     }
4211     } # INSCOPE
4212     unless (defined $i) {
4213 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{type});
4214 wakaba 1.1 ## Ignore the token
4215     !!!next-token;
4216     redo B;
4217     }
4218    
4219     ## Clear back to table row context
4220     while (not {
4221     tr => 1, html => 1,
4222 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4223     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4224     pop @{$self->{open_elements}};
4225 wakaba 1.1 }
4226    
4227 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4228     $self->{insertion_mode} = 'in table body';
4229 wakaba 1.1 ## reprocess
4230     redo B;
4231     } elsif ({
4232     tbody => 1, tfoot => 1, thead => 1,
4233     }->{$token->{tag_name}}) {
4234     ## have an element in table scope
4235     my $i;
4236 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4237     my $node = $self->{open_elements}->[$_];
4238 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4239     $i = $_;
4240     last INSCOPE;
4241     } elsif ({
4242     table => 1, html => 1,
4243     }->{$node->[1]}) {
4244     last INSCOPE;
4245     }
4246     } # INSCOPE
4247     unless (defined $i) {
4248 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4249 wakaba 1.1 ## Ignore the token
4250     !!!next-token;
4251     redo B;
4252     }
4253    
4254     ## As if </tr>
4255     ## have an element in table scope
4256     my $i;
4257 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4258     my $node = $self->{open_elements}->[$_];
4259 wakaba 1.1 if ($node->[1] eq 'tr') {
4260     $i = $_;
4261     last INSCOPE;
4262     } elsif ({
4263     table => 1, html => 1,
4264     }->{$node->[1]}) {
4265     last INSCOPE;
4266     }
4267     } # INSCOPE
4268     unless (defined $i) {
4269 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:tr');
4270 wakaba 1.1 ## Ignore the token
4271     !!!next-token;
4272     redo B;
4273     }
4274    
4275     ## Clear back to table row context
4276     while (not {
4277     tr => 1, html => 1,
4278 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4279     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4280     pop @{$self->{open_elements}};
4281 wakaba 1.1 }
4282    
4283 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4284     $self->{insertion_mode} = 'in table body';
4285 wakaba 1.1 ## reprocess
4286     redo B;
4287     } elsif ({
4288     body => 1, caption => 1, col => 1,
4289     colgroup => 1, html => 1, td => 1, th => 1,
4290     }->{$token->{tag_name}}) {
4291 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4292 wakaba 1.1 ## Ignore the token
4293     !!!next-token;
4294     redo B;
4295     } else {
4296     #
4297     }
4298     } else {
4299     #
4300     }
4301    
4302     ## As if in table
4303 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
4304 wakaba 1.1 $in_body->($insert_to_foster);
4305     redo B;
4306 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in cell') {
4307 wakaba 1.1 if ($token->{type} eq 'character') {
4308     ## NOTE: This is a code clone of "character in body".
4309     $reconstruct_active_formatting_elements->($insert_to_current);
4310    
4311 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4312 wakaba 1.1
4313     !!!next-token;
4314     redo B;
4315     } elsif ($token->{type} eq 'comment') {
4316     ## NOTE: This is a code clone of "comment in body".
4317     my $comment = $self->{document}->create_comment ($token->{data});
4318 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4319 wakaba 1.1 !!!next-token;
4320     redo B;
4321     } elsif ($token->{type} eq 'start tag') {
4322     if ({
4323     caption => 1, col => 1, colgroup => 1,
4324     tbody => 1, td => 1, tfoot => 1, th => 1,
4325     thead => 1, tr => 1,
4326     }->{$token->{tag_name}}) {
4327     ## have an element in table scope
4328     my $tn;
4329 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4330     my $node = $self->{open_elements}->[$_];
4331 wakaba 1.1 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
4332     $tn = $node->[1];
4333     last INSCOPE;
4334     } elsif ({
4335     table => 1, html => 1,
4336     }->{$node->[1]}) {
4337     last INSCOPE;
4338     }
4339     } # INSCOPE
4340     unless (defined $tn) {
4341 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4342 wakaba 1.1 ## Ignore the token
4343     !!!next-token;
4344     redo B;
4345     }
4346    
4347     ## Close the cell
4348     !!!back-token; # <?>
4349     $token = {type => 'end tag', tag_name => $tn};
4350     redo B;
4351     } else {
4352     #
4353     }
4354     } elsif ($token->{type} eq 'end tag') {
4355     if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4356     ## have an element in table scope
4357     my $i;
4358 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4359     my $node = $self->{open_elements}->[$_];
4360 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4361     $i = $_;
4362     last INSCOPE;
4363     } elsif ({
4364     table => 1, html => 1,
4365     }->{$node->[1]}) {
4366     last INSCOPE;
4367     }
4368     } # INSCOPE
4369     unless (defined $i) {
4370 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4371 wakaba 1.1 ## Ignore the token
4372     !!!next-token;
4373     redo B;
4374     }
4375    
4376     ## generate implied end tags
4377     if ({
4378     dd => 1, dt => 1, li => 1, p => 1,
4379     td => ($token->{tag_name} eq 'th'),
4380     th => ($token->{tag_name} eq 'td'),
4381     tr => 1,
4382 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4383 wakaba 1.1 !!!back-token;
4384     $token = {type => 'end tag',
4385 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4386 wakaba 1.1 redo B;
4387     }
4388    
4389 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4390     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4391 wakaba 1.1 }
4392    
4393 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4394 wakaba 1.1
4395     $clear_up_to_marker->();
4396    
4397 wakaba 1.3 $self->{insertion_mode} = 'in row';
4398 wakaba 1.1
4399     !!!next-token;
4400     redo B;
4401     } elsif ({
4402     body => 1, caption => 1, col => 1,
4403     colgroup => 1, html => 1,
4404     }->{$token->{tag_name}}) {
4405 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4406 wakaba 1.1 ## Ignore the token
4407     !!!next-token;
4408     redo B;
4409     } elsif ({
4410     table => 1, tbody => 1, tfoot => 1,
4411     thead => 1, tr => 1,
4412     }->{$token->{tag_name}}) {
4413     ## have an element in table scope
4414     my $i;
4415     my $tn;
4416 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4417     my $node = $self->{open_elements}->[$_];
4418 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4419     $i = $_;
4420     last INSCOPE;
4421     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4422     $tn = $node->[1];
4423     ## NOTE: There is exactly one |td| or |th| element
4424     ## in scope in the stack of open elements by definition.
4425     } elsif ({
4426     table => 1, html => 1,
4427     }->{$node->[1]}) {
4428     last INSCOPE;
4429     }
4430     } # INSCOPE
4431     unless (defined $i) {
4432 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4433 wakaba 1.1 ## Ignore the token
4434     !!!next-token;
4435     redo B;
4436     }
4437    
4438     ## Close the cell
4439     !!!back-token; # </?>
4440     $token = {type => 'end tag', tag_name => $tn};
4441     redo B;
4442     } else {
4443     #
4444     }
4445     } else {
4446     #
4447     }
4448    
4449     $in_body->($insert_to_current);
4450     redo B;
4451 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in select') {
4452 wakaba 1.1 if ($token->{type} eq 'character') {
4453 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4454 wakaba 1.1 !!!next-token;
4455     redo B;
4456     } elsif ($token->{type} eq 'comment') {
4457     my $comment = $self->{document}->create_comment ($token->{data});
4458 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4459 wakaba 1.1 !!!next-token;
4460     redo B;
4461     } elsif ($token->{type} eq 'start tag') {
4462     if ($token->{tag_name} eq 'option') {
4463 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4464 wakaba 1.1 ## As if </option>
4465 wakaba 1.3 pop @{$self->{open_elements}};
4466 wakaba 1.1 }
4467    
4468     !!!insert-element ($token->{tag_name}, $token->{attributes});
4469     !!!next-token;
4470     redo B;
4471     } elsif ($token->{tag_name} eq 'optgroup') {
4472 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4473 wakaba 1.1 ## As if </option>
4474 wakaba 1.3 pop @{$self->{open_elements}};
4475 wakaba 1.1 }
4476    
4477 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4478 wakaba 1.1 ## As if </optgroup>
4479 wakaba 1.3 pop @{$self->{open_elements}};
4480 wakaba 1.1 }
4481    
4482     !!!insert-element ($token->{tag_name}, $token->{attributes});
4483     !!!next-token;
4484     redo B;
4485     } elsif ($token->{tag_name} eq 'select') {
4486 wakaba 1.3 !!!parse-error (type => 'not closed:select');
4487 wakaba 1.1 ## As if </select> instead
4488     ## have an element in table scope
4489     my $i;
4490 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4491     my $node = $self->{open_elements}->[$_];
4492 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4493     $i = $_;
4494     last INSCOPE;
4495     } elsif ({
4496     table => 1, html => 1,
4497     }->{$node->[1]}) {
4498     last INSCOPE;
4499     }
4500     } # INSCOPE
4501     unless (defined $i) {
4502 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4503 wakaba 1.1 ## Ignore the token
4504     !!!next-token;
4505     redo B;
4506     }
4507    
4508 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4509 wakaba 1.1
4510 wakaba 1.3 $self->_reset_insertion_mode;
4511 wakaba 1.1
4512     !!!next-token;
4513     redo B;
4514     } else {
4515     #
4516     }
4517     } elsif ($token->{type} eq 'end tag') {
4518     if ($token->{tag_name} eq 'optgroup') {
4519 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option' and
4520     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
4521 wakaba 1.1 ## As if </option>
4522 wakaba 1.3 splice @{$self->{open_elements}}, -2;
4523     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4524     pop @{$self->{open_elements}};
4525 wakaba 1.1 } else {
4526 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4527 wakaba 1.1 ## Ignore the token
4528     }
4529     !!!next-token;
4530     redo B;
4531     } elsif ($token->{tag_name} eq 'option') {
4532 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4533     pop @{$self->{open_elements}};
4534 wakaba 1.1 } else {
4535 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4536 wakaba 1.1 ## Ignore the token
4537     }
4538     !!!next-token;
4539     redo B;
4540     } elsif ($token->{tag_name} eq 'select') {
4541     ## have an element in table scope
4542     my $i;
4543 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4544     my $node = $self->{open_elements}->[$_];
4545 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4546     $i = $_;
4547     last INSCOPE;
4548     } elsif ({
4549     table => 1, html => 1,
4550     }->{$node->[1]}) {
4551     last INSCOPE;
4552     }
4553     } # INSCOPE
4554     unless (defined $i) {
4555 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4556 wakaba 1.1 ## Ignore the token
4557     !!!next-token;
4558     redo B;
4559     }
4560    
4561 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4562 wakaba 1.1
4563 wakaba 1.3 $self->_reset_insertion_mode;
4564 wakaba 1.1
4565     !!!next-token;
4566     redo B;
4567     } elsif ({
4568     caption => 1, table => 1, tbody => 1,
4569     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
4570     }->{$token->{tag_name}}) {
4571 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4572 wakaba 1.1
4573     ## have an element in table scope
4574     my $i;
4575 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4576     my $node = $self->{open_elements}->[$_];
4577 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4578     $i = $_;
4579     last INSCOPE;
4580     } elsif ({
4581     table => 1, html => 1,
4582     }->{$node->[1]}) {
4583     last INSCOPE;
4584     }
4585     } # INSCOPE
4586     unless (defined $i) {
4587     ## Ignore the token
4588     !!!next-token;
4589     redo B;
4590     }
4591    
4592     ## As if </select>
4593     ## have an element in table scope
4594     undef $i;
4595 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4596     my $node = $self->{open_elements}->[$_];
4597 wakaba 1.1 if ($node->[1] eq 'select') {
4598     $i = $_;
4599     last INSCOPE;
4600     } elsif ({
4601     table => 1, html => 1,
4602     }->{$node->[1]}) {
4603     last INSCOPE;
4604     }
4605     } # INSCOPE
4606     unless (defined $i) {
4607 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4608 wakaba 1.1 ## Ignore the </select> token
4609     !!!next-token; ## TODO: ok?
4610     redo B;
4611     }
4612    
4613 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4614 wakaba 1.1
4615 wakaba 1.3 $self->_reset_insertion_mode;
4616 wakaba 1.1
4617     ## reprocess
4618     redo B;
4619     } else {
4620     #
4621     }
4622     } else {
4623     #
4624     }
4625    
4626 wakaba 1.3 !!!parse-error (type => 'in select:'.$token->{tag_name});
4627 wakaba 1.1 ## Ignore the token
4628     !!!next-token;
4629     redo B;
4630 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after body') {
4631 wakaba 1.1 if ($token->{type} eq 'character') {
4632     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4633     ## As if in body
4634     $reconstruct_active_formatting_elements->($insert_to_current);
4635    
4636 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4637 wakaba 1.1
4638     unless (length $token->{data}) {
4639     !!!next-token;
4640     redo B;
4641     }
4642     }
4643    
4644     #
4645 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4646 wakaba 1.1 } elsif ($token->{type} eq 'comment') {
4647     my $comment = $self->{document}->create_comment ($token->{data});
4648 wakaba 1.3 $self->{open_elements}->[0]->[0]->append_child ($comment);
4649 wakaba 1.1 !!!next-token;
4650     redo B;
4651 wakaba 1.3 } elsif ($token->{type} eq 'start tag') {
4652     !!!parse-error (type => 'after body:'.$token->{tag_name});
4653     #
4654 wakaba 1.1 } elsif ($token->{type} eq 'end tag') {
4655     if ($token->{tag_name} eq 'html') {
4656 wakaba 1.3 if (defined $self->{inner_html_node}) {
4657     !!!parse-error (type => 'unmatched end tag:html');
4658     ## Ignore the token
4659     !!!next-token;
4660     redo B;
4661     } else {
4662     $phase = 'trailing end';
4663     !!!next-token;
4664     redo B;
4665     }
4666 wakaba 1.1 } else {
4667 wakaba 1.3 !!!parse-error (type => 'after body:/'.$token->{tag_name});
4668 wakaba 1.1 }
4669     } else {
4670 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4671 wakaba 1.1 }
4672    
4673 wakaba 1.3 $self->{insertion_mode} = 'in body';
4674 wakaba 1.1 ## reprocess
4675     redo B;
4676 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in frameset') {
4677 wakaba 1.1 if ($token->{type} eq 'character') {
4678     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4679 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4680 wakaba 1.1
4681     unless (length $token->{data}) {
4682     !!!next-token;
4683     redo B;
4684     }
4685     }
4686    
4687     #
4688     } elsif ($token->{type} eq 'comment') {
4689     my $comment = $self->{document}->create_comment ($token->{data});
4690 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4691 wakaba 1.1 !!!next-token;
4692     redo B;
4693     } elsif ($token->{type} eq 'start tag') {
4694     if ($token->{tag_name} eq 'frameset') {
4695     !!!insert-element ($token->{tag_name}, $token->{attributes});
4696     !!!next-token;
4697     redo B;
4698     } elsif ($token->{tag_name} eq 'frame') {
4699     !!!insert-element ($token->{tag_name}, $token->{attributes});
4700 wakaba 1.3 pop @{$self->{open_elements}};
4701 wakaba 1.1 !!!next-token;
4702     redo B;
4703     } elsif ($token->{tag_name} eq 'noframes') {
4704     $in_body->($insert_to_current);
4705     redo B;
4706     } else {
4707     #
4708     }
4709     } elsif ($token->{type} eq 'end tag') {
4710     if ($token->{tag_name} eq 'frameset') {
4711 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html' and
4712     @{$self->{open_elements}} == 1) {
4713     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4714 wakaba 1.1 ## Ignore the token
4715     !!!next-token;
4716     } else {
4717 wakaba 1.3 pop @{$self->{open_elements}};
4718 wakaba 1.1 !!!next-token;
4719     }
4720    
4721     ## if not inner_html and
4722 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
4723     $self->{insertion_mode} = 'after frameset';
4724 wakaba 1.1 }
4725     redo B;
4726     } else {
4727     #
4728     }
4729     } else {
4730     #
4731     }
4732    
4733 wakaba 1.3 if (defined $token->{tag_name}) {
4734     !!!parse-error (type => 'in frameset:'.$token->{tag_name});
4735     } else {
4736     !!!parse-error (type => 'in frameset:#'.$token->{type});
4737     }
4738 wakaba 1.1 ## Ignore the token
4739     !!!next-token;
4740     redo B;
4741 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after frameset') {
4742 wakaba 1.1 if ($token->{type} eq 'character') {
4743     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4744 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4745 wakaba 1.1
4746     unless (length $token->{data}) {
4747     !!!next-token;
4748     redo B;
4749     }
4750     }
4751    
4752     #
4753     } elsif ($token->{type} eq 'comment') {
4754     my $comment = $self->{document}->create_comment ($token->{data});
4755 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4756 wakaba 1.1 !!!next-token;
4757     redo B;
4758     } elsif ($token->{type} eq 'start tag') {
4759     if ($token->{tag_name} eq 'noframes') {
4760     $in_body->($insert_to_current);
4761     redo B;
4762     } else {
4763     #
4764     }
4765     } elsif ($token->{type} eq 'end tag') {
4766     if ($token->{tag_name} eq 'html') {
4767     $phase = 'trailing end';
4768     !!!next-token;
4769     redo B;
4770     } else {
4771     #
4772     }
4773     } else {
4774     #
4775     }
4776    
4777 wakaba 1.3 if (defined $token->{tag_name}) {
4778     !!!parse-error (type => 'after frameset:'.$token->{tag_name});
4779     } else {
4780     !!!parse-error (type => 'after frameset:#'.$token->{type});
4781     }
4782 wakaba 1.1 ## Ignore the token
4783     !!!next-token;
4784     redo B;
4785    
4786     ## ISSUE: An issue in spec there
4787     } else {
4788 wakaba 1.3 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4789 wakaba 1.1 }
4790     }
4791     } elsif ($phase eq 'trailing end') {
4792     ## states in the main stage is preserved yet # MUST
4793    
4794     if ($token->{type} eq 'DOCTYPE') {
4795 wakaba 1.3 !!!parse-error (type => 'after html:#DOCTYPE');
4796 wakaba 1.1 ## Ignore the token
4797     !!!next-token;
4798     redo B;
4799     } elsif ($token->{type} eq 'comment') {
4800     my $comment = $self->{document}->create_comment ($token->{data});
4801     $self->{document}->append_child ($comment);
4802     !!!next-token;
4803     redo B;
4804     } elsif ($token->{type} eq 'character') {
4805     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4806     my $data = $1;
4807     ## As if in the main phase.
4808     ## NOTE: The insertion mode in the main phase
4809     ## just before the phase has been changed to the trailing
4810     ## end phase is either "after body" or "after frameset".
4811     $reconstruct_active_formatting_elements->($insert_to_current)
4812     if $phase eq 'main';
4813    
4814 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
4815 wakaba 1.1
4816     unless (length $token->{data}) {
4817     !!!next-token;
4818     redo B;
4819     }
4820     }
4821    
4822 wakaba 1.3 !!!parse-error (type => 'after html:#character');
4823 wakaba 1.1 $phase = 'main';
4824     ## reprocess
4825     redo B;
4826     } elsif ($token->{type} eq 'start tag' or
4827     $token->{type} eq 'end tag') {
4828 wakaba 1.3 !!!parse-error (type => 'after html:'.$token->{tag_name});
4829 wakaba 1.1 $phase = 'main';
4830     ## reprocess
4831     redo B;
4832     } elsif ($token->{type} eq 'end-of-file') {
4833     ## Stop parsing
4834     last B;
4835     } else {
4836     die "$0: $token->{type}: Unknown token";
4837     }
4838     }
4839     } # B
4840    
4841     ## Stop parsing # MUST
4842    
4843     ## TODO: script stuffs
4844 wakaba 1.3 } # _tree_construct_main
4845    
4846     sub set_inner_html ($$$) {
4847     my $class = shift;
4848     my $node = shift;
4849     my $s = \$_[0];
4850     my $onerror = $_[1];
4851    
4852     my $nt = $node->node_type;
4853     if ($nt == 9) {
4854     # MUST
4855    
4856     ## Step 1 # MUST
4857     ## TODO: If the document has an active parser, ...
4858     ## ISSUE: There is an issue in the spec.
4859    
4860     ## Step 2 # MUST
4861     my @cn = @{$node->child_nodes};
4862     for (@cn) {
4863     $node->remove_child ($_);
4864     }
4865    
4866     ## Step 3, 4, 5 # MUST
4867     $class->parse_string ($$s => $node, $onerror);
4868     } elsif ($nt == 1) {
4869     ## TODO: If non-html element
4870    
4871     ## NOTE: Most of this code is copied from |parse_string|
4872    
4873     ## Step 1 # MUST
4874     my $doc = $node->owner_document->implementation->create_document;
4875     ## TODO: Mark as HTML document
4876     my $p = $class->new;
4877     $p->{document} = $doc;
4878    
4879     ## Step 9 # MUST
4880     my $i = 0;
4881     my $line = 1;
4882     my $column = 0;
4883     $p->{set_next_input_character} = sub {
4884     my $self = shift;
4885     $self->{next_input_character} = -1 and return if $i >= length $$s;
4886     $self->{next_input_character} = ord substr $$s, $i++, 1;
4887     $column++;
4888 wakaba 1.4
4889     if ($self->{next_input_character} == 0x000A) { # LF
4890     $line++;
4891     $column = 0;
4892     } elsif ($self->{next_input_character} == 0x000D) { # CR
4893 wakaba 1.3 if ($i >= length $$s) {
4894     #
4895     } else {
4896     my $next_char = ord substr $$s, $i++, 1;
4897     if ($next_char == 0x000A) { # LF
4898     #
4899     } else {
4900     push @{$self->{char}}, $next_char;
4901     }
4902     }
4903     $self->{next_input_character} = 0x000A; # LF # MUST
4904     $line++;
4905 wakaba 1.4 $column = 0;
4906 wakaba 1.3 } elsif ($self->{next_input_character} > 0x10FFFF) {
4907     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4908     } elsif ($self->{next_input_character} == 0x0000) { # NULL
4909     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4910     }
4911     };
4912    
4913     my $ponerror = $onerror || sub {
4914     my (%opt) = @_;
4915     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
4916     };
4917     $p->{parse_error} = sub {
4918     $ponerror->(@_, line => $line, column => $column);
4919     };
4920    
4921     $p->_initialize_tokenizer;
4922     $p->_initialize_tree_constructor;
4923    
4924     ## Step 2
4925     my $node_ln = $node->local_name;
4926     $p->{content_model_flag} = {
4927     title => 'RCDATA',
4928     textarea => 'RCDATA',
4929     style => 'CDATA',
4930     script => 'CDATA',
4931     xmp => 'CDATA',
4932     iframe => 'CDATA',
4933     noembed => 'CDATA',
4934     noframes => 'CDATA',
4935     noscript => 'CDATA',
4936     plaintext => 'PLAINTEXT',
4937     }->{$node_ln} || 'PCDATA';
4938     ## ISSUE: What is "the name of the element"? local name?
4939    
4940     $p->{inner_html_node} = [$node, $node_ln];
4941    
4942     ## Step 4
4943     my $root = $doc->create_element_ns
4944     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
4945    
4946     ## Step 5 # MUST
4947     $doc->append_child ($root);
4948    
4949     ## Step 6 # MUST
4950     push @{$p->{open_elements}}, [$root, 'html'];
4951    
4952     undef $p->{head_element};
4953    
4954     ## Step 7 # MUST
4955     $p->_reset_insertion_mode;
4956    
4957     ## Step 8 # MUST
4958     my $anode = $node;
4959     AN: while (defined $anode) {
4960     if ($anode->node_type == 1) {
4961     my $nsuri = $anode->namespace_uri;
4962     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
4963     if ($anode->local_name eq 'form') { ## TODO: case?
4964     $p->{form_element} = $anode;
4965     last AN;
4966     }
4967     }
4968     }
4969     $anode = $anode->parent_node;
4970     } # AN
4971    
4972     ## Step 3 # MUST
4973     ## Step 10 # MUST
4974     {
4975     my $self = $p;
4976     !!!next-token;
4977     }
4978     $p->_tree_construction_main;
4979    
4980     ## Step 11 # MUST
4981     my @cn = @{$node->child_nodes};
4982     for (@cn) {
4983     $node->remove_child ($_);
4984     }
4985     ## ISSUE: mutation events? read-only?
4986    
4987     ## Step 12 # MUST
4988     @cn = @{$root->child_nodes};
4989     for (@cn) {
4990     $node->append_child ($_);
4991     }
4992     ## ISSUE: adopt_node? mutation events?
4993    
4994     $p->_terminate_tree_constructor;
4995     } else {
4996     die "$0: |set_inner_html| is not defined for node of type $nt";
4997     }
4998     } # set_inner_html
4999    
5000     } # tree construction stage
5001 wakaba 1.1
5002     sub get_inner_html ($$$) {
5003 wakaba 1.3 my (undef, $node, $on_error) = @_;
5004 wakaba 1.1
5005     ## Step 1
5006     my $s = '';
5007    
5008     my $in_cdata;
5009     my $parent = $node;
5010     while (defined $parent) {
5011     if ($parent->node_type == 1 and
5012     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
5013     {
5014     style => 1, script => 1, xmp => 1, iframe => 1,
5015     noembed => 1, noframes => 1, noscript => 1,
5016     }->{$parent->local_name}) { ## TODO: case thingy
5017     $in_cdata = 1;
5018     }
5019     $parent = $parent->parent_node;
5020     }
5021    
5022     ## Step 2
5023     my @node = @{$node->child_nodes};
5024     C: while (@node) {
5025     my $child = shift @node;
5026     unless (ref $child) {
5027     if ($child eq 'cdata-out') {
5028     $in_cdata = 0;
5029     } else {
5030     $s .= $child; # end tag
5031     }
5032     next C;
5033     }
5034    
5035     my $nt = $child->node_type;
5036     if ($nt == 1) { # Element
5037     my $tag_name = lc $child->tag_name; ## ISSUE: Definition of "lowercase"
5038     $s .= '<' . $tag_name;
5039    
5040     ## ISSUE: Non-html elements
5041    
5042     my @attrs = @{$child->attributes}; # sort order MUST be stable
5043     for my $attr (@attrs) { # order is implementation dependent
5044     my $attr_name = lc $attr->name; ## ISSUE: Definition of "lowercase"
5045     $s .= ' ' . $attr_name . '="';
5046     my $attr_value = $attr->value;
5047     ## escape
5048     $attr_value =~ s/&/&amp;/g;
5049     $attr_value =~ s/</&lt;/g;
5050     $attr_value =~ s/>/&gt;/g;
5051     $attr_value =~ s/"/&quot;/g;
5052     $s .= $attr_value . '"';
5053     }
5054     $s .= '>';
5055    
5056     next C if {
5057     area => 1, base => 1, basefont => 1, bgsound => 1,
5058     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
5059     img => 1, input => 1, link => 1, meta => 1, param => 1,
5060     spacer => 1, wbr => 1,
5061     }->{$tag_name};
5062    
5063     if (not $in_cdata and {
5064     style => 1, script => 1, xmp => 1, iframe => 1,
5065     noembed => 1, noframes => 1, noscript => 1,
5066     }->{$tag_name}) {
5067     unshift @node, 'cdata-out';
5068     $in_cdata = 1;
5069     }
5070    
5071     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
5072     } elsif ($nt == 3 or $nt == 4) {
5073     if ($in_cdata) {
5074     $s .= $child->data;
5075     } else {
5076     my $value = $child->data;
5077     $value =~ s/&/&amp;/g;
5078     $value =~ s/</&lt;/g;
5079     $value =~ s/>/&gt;/g;
5080     $value =~ s/"/&quot;/g;
5081     $s .= $value;
5082     }
5083     } elsif ($nt == 8) {
5084     $s .= '<!--' . $child->data . '-->';
5085     } elsif ($nt == 10) {
5086     $s .= '<!DOCTYPE ' . $child->name . '>';
5087     } elsif ($nt == 5) { # entrefs
5088     push @node, @{$child->child_nodes};
5089     } else {
5090     $on_error->($child) if defined $on_error;
5091     }
5092     ## ISSUE: This code does not support PIs.
5093     } # C
5094    
5095     ## Step 3
5096     return \$s;
5097     } # get_inner_html
5098    
5099     1;
5100 wakaba 1.9 # $Date: 2007/06/23 02:26:51 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24