/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (hide annotations) (download) (as text)
Wed May 30 12:24:50 2007 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.6: +4 -2 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	30 May 2007 12:23:55 -0000
2007-05-30  Wakaba  <wakaba@suika.fam.cx>

	* ContentChecker.t: Don't use obsolete |new| method.

	* tree-test-1.dat: Nested form test added.

++ whatpm/Whatpm/ChangeLog	30 May 2007 12:24:39 -0000
2007-05-30  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm.src: |<form><form>| went to inifinite loop.

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.7 our $VERSION=do{my @r=(q$Revision: 1.6 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5     ## This is an early version of an HTML parser.
6    
7     my $permitted_slash_tag_name = {
8     base => 1,
9     link => 1,
10     meta => 1,
11     hr => 1,
12     br => 1,
13     img=> 1,
14     embed => 1,
15     param => 1,
16     area => 1,
17     col => 1,
18     input => 1,
19     };
20    
21     my $entity_char = {
22     AElig => "\x{00C6}",
23     Aacute => "\x{00C1}",
24     Acirc => "\x{00C2}",
25     Agrave => "\x{00C0}",
26     Alpha => "\x{0391}",
27     Aring => "\x{00C5}",
28     Atilde => "\x{00C3}",
29     Auml => "\x{00C4}",
30     Beta => "\x{0392}",
31     Ccedil => "\x{00C7}",
32     Chi => "\x{03A7}",
33     Dagger => "\x{2021}",
34     Delta => "\x{0394}",
35     ETH => "\x{00D0}",
36     Eacute => "\x{00C9}",
37     Ecirc => "\x{00CA}",
38     Egrave => "\x{00C8}",
39     Epsilon => "\x{0395}",
40     Eta => "\x{0397}",
41     Euml => "\x{00CB}",
42     Gamma => "\x{0393}",
43     Iacute => "\x{00CD}",
44     Icirc => "\x{00CE}",
45     Igrave => "\x{00CC}",
46     Iota => "\x{0399}",
47     Iuml => "\x{00CF}",
48     Kappa => "\x{039A}",
49     Lambda => "\x{039B}",
50     Mu => "\x{039C}",
51     Ntilde => "\x{00D1}",
52     Nu => "\x{039D}",
53     OElig => "\x{0152}",
54     Oacute => "\x{00D3}",
55     Ocirc => "\x{00D4}",
56     Ograve => "\x{00D2}",
57     Omega => "\x{03A9}",
58     Omicron => "\x{039F}",
59     Oslash => "\x{00D8}",
60     Otilde => "\x{00D5}",
61     Ouml => "\x{00D6}",
62     Phi => "\x{03A6}",
63     Pi => "\x{03A0}",
64     Prime => "\x{2033}",
65     Psi => "\x{03A8}",
66     Rho => "\x{03A1}",
67     Scaron => "\x{0160}",
68     Sigma => "\x{03A3}",
69     THORN => "\x{00DE}",
70     Tau => "\x{03A4}",
71     Theta => "\x{0398}",
72     Uacute => "\x{00DA}",
73     Ucirc => "\x{00DB}",
74     Ugrave => "\x{00D9}",
75     Upsilon => "\x{03A5}",
76     Uuml => "\x{00DC}",
77     Xi => "\x{039E}",
78     Yacute => "\x{00DD}",
79     Yuml => "\x{0178}",
80     Zeta => "\x{0396}",
81     aacute => "\x{00E1}",
82     acirc => "\x{00E2}",
83     acute => "\x{00B4}",
84     aelig => "\x{00E6}",
85     agrave => "\x{00E0}",
86     alefsym => "\x{2135}",
87     alpha => "\x{03B1}",
88     amp => "\x{0026}",
89     AMP => "\x{0026}",
90     and => "\x{2227}",
91     ang => "\x{2220}",
92     apos => "\x{0027}",
93     aring => "\x{00E5}",
94     asymp => "\x{2248}",
95     atilde => "\x{00E3}",
96     auml => "\x{00E4}",
97     bdquo => "\x{201E}",
98     beta => "\x{03B2}",
99     brvbar => "\x{00A6}",
100     bull => "\x{2022}",
101     cap => "\x{2229}",
102     ccedil => "\x{00E7}",
103     cedil => "\x{00B8}",
104     cent => "\x{00A2}",
105     chi => "\x{03C7}",
106     circ => "\x{02C6}",
107     clubs => "\x{2663}",
108     cong => "\x{2245}",
109     copy => "\x{00A9}",
110     COPY => "\x{00A9}",
111     crarr => "\x{21B5}",
112     cup => "\x{222A}",
113     curren => "\x{00A4}",
114     dArr => "\x{21D3}",
115     dagger => "\x{2020}",
116     darr => "\x{2193}",
117     deg => "\x{00B0}",
118     delta => "\x{03B4}",
119     diams => "\x{2666}",
120     divide => "\x{00F7}",
121     eacute => "\x{00E9}",
122     ecirc => "\x{00EA}",
123     egrave => "\x{00E8}",
124     empty => "\x{2205}",
125     emsp => "\x{2003}",
126     ensp => "\x{2002}",
127     epsilon => "\x{03B5}",
128     equiv => "\x{2261}",
129     eta => "\x{03B7}",
130     eth => "\x{00F0}",
131     euml => "\x{00EB}",
132     euro => "\x{20AC}",
133     exist => "\x{2203}",
134     fnof => "\x{0192}",
135     forall => "\x{2200}",
136     frac12 => "\x{00BD}",
137     frac14 => "\x{00BC}",
138     frac34 => "\x{00BE}",
139     frasl => "\x{2044}",
140     gamma => "\x{03B3}",
141     ge => "\x{2265}",
142     gt => "\x{003E}",
143     GT => "\x{003E}",
144     hArr => "\x{21D4}",
145     harr => "\x{2194}",
146     hearts => "\x{2665}",
147     hellip => "\x{2026}",
148     iacute => "\x{00ED}",
149     icirc => "\x{00EE}",
150     iexcl => "\x{00A1}",
151     igrave => "\x{00EC}",
152     image => "\x{2111}",
153     infin => "\x{221E}",
154     int => "\x{222B}",
155     iota => "\x{03B9}",
156     iquest => "\x{00BF}",
157     isin => "\x{2208}",
158     iuml => "\x{00EF}",
159     kappa => "\x{03BA}",
160     lArr => "\x{21D0}",
161     lambda => "\x{03BB}",
162     lang => "\x{2329}",
163     laquo => "\x{00AB}",
164     larr => "\x{2190}",
165     lceil => "\x{2308}",
166     ldquo => "\x{201C}",
167     le => "\x{2264}",
168     lfloor => "\x{230A}",
169     lowast => "\x{2217}",
170     loz => "\x{25CA}",
171     lrm => "\x{200E}",
172     lsaquo => "\x{2039}",
173     lsquo => "\x{2018}",
174     lt => "\x{003C}",
175     LT => "\x{003C}",
176     macr => "\x{00AF}",
177     mdash => "\x{2014}",
178     micro => "\x{00B5}",
179     middot => "\x{00B7}",
180     minus => "\x{2212}",
181     mu => "\x{03BC}",
182     nabla => "\x{2207}",
183     nbsp => "\x{00A0}",
184     ndash => "\x{2013}",
185     ne => "\x{2260}",
186     ni => "\x{220B}",
187     not => "\x{00AC}",
188     notin => "\x{2209}",
189     nsub => "\x{2284}",
190     ntilde => "\x{00F1}",
191     nu => "\x{03BD}",
192     oacute => "\x{00F3}",
193     ocirc => "\x{00F4}",
194     oelig => "\x{0153}",
195     ograve => "\x{00F2}",
196     oline => "\x{203E}",
197     omega => "\x{03C9}",
198     omicron => "\x{03BF}",
199     oplus => "\x{2295}",
200     or => "\x{2228}",
201     ordf => "\x{00AA}",
202     ordm => "\x{00BA}",
203     oslash => "\x{00F8}",
204     otilde => "\x{00F5}",
205     otimes => "\x{2297}",
206     ouml => "\x{00F6}",
207     para => "\x{00B6}",
208     part => "\x{2202}",
209     permil => "\x{2030}",
210     perp => "\x{22A5}",
211     phi => "\x{03C6}",
212     pi => "\x{03C0}",
213     piv => "\x{03D6}",
214     plusmn => "\x{00B1}",
215     pound => "\x{00A3}",
216     prime => "\x{2032}",
217     prod => "\x{220F}",
218     prop => "\x{221D}",
219     psi => "\x{03C8}",
220     quot => "\x{0022}",
221     QUOT => "\x{0022}",
222     rArr => "\x{21D2}",
223     radic => "\x{221A}",
224     rang => "\x{232A}",
225     raquo => "\x{00BB}",
226     rarr => "\x{2192}",
227     rceil => "\x{2309}",
228     rdquo => "\x{201D}",
229     real => "\x{211C}",
230     reg => "\x{00AE}",
231     REG => "\x{00AE}",
232     rfloor => "\x{230B}",
233     rho => "\x{03C1}",
234     rlm => "\x{200F}",
235     rsaquo => "\x{203A}",
236     rsquo => "\x{2019}",
237     sbquo => "\x{201A}",
238     scaron => "\x{0161}",
239     sdot => "\x{22C5}",
240     sect => "\x{00A7}",
241     shy => "\x{00AD}",
242     sigma => "\x{03C3}",
243     sigmaf => "\x{03C2}",
244     sim => "\x{223C}",
245     spades => "\x{2660}",
246     sub => "\x{2282}",
247     sube => "\x{2286}",
248     sum => "\x{2211}",
249     sup => "\x{2283}",
250     sup1 => "\x{00B9}",
251     sup2 => "\x{00B2}",
252     sup3 => "\x{00B3}",
253     supe => "\x{2287}",
254     szlig => "\x{00DF}",
255     tau => "\x{03C4}",
256     there4 => "\x{2234}",
257     theta => "\x{03B8}",
258     thetasym => "\x{03D1}",
259     thinsp => "\x{2009}",
260     thorn => "\x{00FE}",
261     tilde => "\x{02DC}",
262     times => "\x{00D7}",
263     trade => "\x{2122}",
264     uArr => "\x{21D1}",
265     uacute => "\x{00FA}",
266     uarr => "\x{2191}",
267     ucirc => "\x{00FB}",
268     ugrave => "\x{00F9}",
269     uml => "\x{00A8}",
270     upsih => "\x{03D2}",
271     upsilon => "\x{03C5}",
272     uuml => "\x{00FC}",
273     weierp => "\x{2118}",
274     xi => "\x{03BE}",
275     yacute => "\x{00FD}",
276     yen => "\x{00A5}",
277     yuml => "\x{00FF}",
278     zeta => "\x{03B6}",
279     zwj => "\x{200D}",
280     zwnj => "\x{200C}",
281 wakaba 1.4 }; # $entity_char
282    
283     ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562>
284     my $c1_entity_char = {
285     128, 8364,
286     129, 65533,
287     130, 8218,
288     131, 402,
289     132, 8222,
290     133, 8230,
291     134, 8224,
292     135, 8225,
293     136, 710,
294     137, 8240,
295     138, 352,
296     139, 8249,
297     140, 338,
298     141, 65533,
299     142, 381,
300     143, 65533,
301     144, 65533,
302     145, 8216,
303     146, 8217,
304     147, 8220,
305     148, 8221,
306     149, 8226,
307     150, 8211,
308     151, 8212,
309     152, 732,
310     153, 8482,
311     154, 353,
312     155, 8250,
313     156, 339,
314     157, 65533,
315     158, 382,
316     159, 376,
317     }; # $c1_entity_char
318 wakaba 1.1
319     my $special_category = {
320     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
321     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
322     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
323     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
324     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
325     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
326     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
327     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
328     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
329     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
330     };
331     my $scoping_category = {
332     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
333     table => 1, td => 1, th => 1,
334     };
335     my $formatting_category = {
336     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
337     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
338     };
339     # $phrasing_category: all other elements
340    
341     sub parse_string ($$$;$) {
342     my $self = shift->new;
343     my $s = \$_[0];
344     $self->{document} = $_[1];
345    
346 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
347    
348 wakaba 1.1 my $i = 0;
349 wakaba 1.3 my $line = 1;
350     my $column = 0;
351 wakaba 1.1 $self->{set_next_input_character} = sub {
352     my $self = shift;
353     $self->{next_input_character} = -1 and return if $i >= length $$s;
354     $self->{next_input_character} = ord substr $$s, $i++, 1;
355 wakaba 1.3 $column++;
356 wakaba 1.1
357 wakaba 1.4 if ($self->{next_input_character} == 0x000A) { # LF
358     $line++;
359     $column = 0;
360     } elsif ($self->{next_input_character} == 0x000D) { # CR
361 wakaba 1.1 if ($i >= length $$s) {
362     #
363     } else {
364     my $next_char = ord substr $$s, $i++, 1;
365     if ($next_char == 0x000A) { # LF
366     #
367     } else {
368     push @{$self->{char}}, $next_char;
369     }
370     }
371     $self->{next_input_character} = 0x000A; # LF # MUST
372 wakaba 1.3 $line++;
373 wakaba 1.4 $column = 0;
374 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
375     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
376     } elsif ($self->{next_input_character} == 0x0000) { # NULL
377     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
378     }
379     };
380    
381 wakaba 1.3 my $onerror = $_[2] || sub {
382     my (%opt) = @_;
383     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
384     };
385     $self->{parse_error} = sub {
386     $onerror->(@_, line => $line, column => $column);
387 wakaba 1.1 };
388    
389     $self->_initialize_tokenizer;
390     $self->_initialize_tree_constructor;
391     $self->_construct_tree;
392     $self->_terminate_tree_constructor;
393    
394     return $self->{document};
395     } # parse_string
396    
397     sub new ($) {
398     my $class = shift;
399     my $self = bless {}, $class;
400     $self->{set_next_input_character} = sub {
401     $self->{next_input_character} = -1;
402     };
403     $self->{parse_error} = sub {
404     #
405     };
406     return $self;
407     } # new
408    
409     ## Implementations MUST act as if state machine in the spec
410    
411     sub _initialize_tokenizer ($) {
412     my $self = shift;
413     $self->{state} = 'data'; # MUST
414     $self->{content_model_flag} = 'PCDATA'; # be
415     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
416     undef $self->{current_attribute};
417     undef $self->{last_emitted_start_tag_name};
418     undef $self->{last_attribute_value_state};
419     $self->{char} = [];
420     # $self->{next_input_character}
421     !!!next-input-character;
422     $self->{token} = [];
423     } # _initialize_tokenizer
424    
425     ## A token has:
426     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
427     ## 'character', or 'end-of-file'
428     ## ->{name} (DOCTYPE, start tag (tagname), end tag (tagname))
429     ## ISSUE: the spec need s/tagname/tag name/
430     ## ->{error} == 1 or 0 (DOCTYPE)
431     ## ->{attributes} isa HASH (start tag, end tag)
432     ## ->{data} (comment, character)
433    
434     ## Macros
435     ## Macros MUST be preceded by three EXCLAMATION MARKs.
436     ## emit ($token)
437     ## Emits the specified token.
438    
439     ## Emitted token MUST immediately be handled by the tree construction state.
440    
441     ## Before each step, UA MAY check to see if either one of the scripts in
442     ## "list of scripts that will execute as soon as possible" or the first
443     ## script in the "list of scripts that will execute asynchronously",
444     ## has completed loading. If one has, then it MUST be executed
445     ## and removed from the list.
446    
447     sub _get_next_token ($) {
448     my $self = shift;
449     if (@{$self->{token}}) {
450     return shift @{$self->{token}};
451     }
452    
453     A: {
454     if ($self->{state} eq 'data') {
455     if ($self->{next_input_character} == 0x0026) { # &
456     if ($self->{content_model_flag} eq 'PCDATA' or
457     $self->{content_model_flag} eq 'RCDATA') {
458     $self->{state} = 'entity data';
459     !!!next-input-character;
460     redo A;
461     } else {
462     #
463     }
464     } elsif ($self->{next_input_character} == 0x003C) { # <
465     if ($self->{content_model_flag} ne 'PLAINTEXT') {
466     $self->{state} = 'tag open';
467     !!!next-input-character;
468     redo A;
469     } else {
470     #
471     }
472     } elsif ($self->{next_input_character} == -1) {
473     !!!emit ({type => 'end-of-file'});
474     last A; ## TODO: ok?
475     }
476     # Anything else
477     my $token = {type => 'character',
478     data => chr $self->{next_input_character}};
479     ## Stay in the data state
480     !!!next-input-character;
481    
482     !!!emit ($token);
483    
484     redo A;
485     } elsif ($self->{state} eq 'entity data') {
486     ## (cannot happen in CDATA state)
487    
488     my $token = $self->_tokenize_attempt_to_consume_an_entity;
489    
490     $self->{state} = 'data';
491     # next-input-character is already done
492    
493     unless (defined $token) {
494     !!!emit ({type => 'character', data => '&'});
495     } else {
496     !!!emit ($token);
497     }
498    
499     redo A;
500     } elsif ($self->{state} eq 'tag open') {
501     if ($self->{content_model_flag} eq 'RCDATA' or
502     $self->{content_model_flag} eq 'CDATA') {
503     if ($self->{next_input_character} == 0x002F) { # /
504     !!!next-input-character;
505     $self->{state} = 'close tag open';
506     redo A;
507     } else {
508     ## reconsume
509     $self->{state} = 'data';
510    
511     !!!emit ({type => 'character', data => '<'});
512    
513     redo A;
514     }
515     } elsif ($self->{content_model_flag} eq 'PCDATA') {
516     if ($self->{next_input_character} == 0x0021) { # !
517     $self->{state} = 'markup declaration open';
518     !!!next-input-character;
519     redo A;
520     } elsif ($self->{next_input_character} == 0x002F) { # /
521     $self->{state} = 'close tag open';
522     !!!next-input-character;
523     redo A;
524     } elsif (0x0041 <= $self->{next_input_character} and
525     $self->{next_input_character} <= 0x005A) { # A..Z
526     $self->{current_token}
527     = {type => 'start tag',
528     tag_name => chr ($self->{next_input_character} + 0x0020)};
529     $self->{state} = 'tag name';
530     !!!next-input-character;
531     redo A;
532     } elsif (0x0061 <= $self->{next_input_character} and
533     $self->{next_input_character} <= 0x007A) { # a..z
534     $self->{current_token} = {type => 'start tag',
535     tag_name => chr ($self->{next_input_character})};
536     $self->{state} = 'tag name';
537     !!!next-input-character;
538     redo A;
539     } elsif ($self->{next_input_character} == 0x003E) { # >
540 wakaba 1.3 !!!parse-error (type => 'empty start tag');
541 wakaba 1.1 $self->{state} = 'data';
542     !!!next-input-character;
543    
544     !!!emit ({type => 'character', data => '<>'});
545    
546     redo A;
547     } elsif ($self->{next_input_character} == 0x003F) { # ?
548 wakaba 1.3 !!!parse-error (type => 'pio');
549 wakaba 1.1 $self->{state} = 'bogus comment';
550     ## $self->{next_input_character} is intentionally left as is
551     redo A;
552     } else {
553 wakaba 1.3 !!!parse-error (type => 'bare stago');
554 wakaba 1.1 $self->{state} = 'data';
555     ## reconsume
556    
557     !!!emit ({type => 'character', data => '<'});
558    
559     redo A;
560     }
561     } else {
562     die "$0: $self->{content_model_flag}: Unknown content model flag";
563     }
564     } elsif ($self->{state} eq 'close tag open') {
565     if ($self->{content_model_flag} eq 'RCDATA' or
566     $self->{content_model_flag} eq 'CDATA') {
567     my @next_char;
568     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
569     push @next_char, $self->{next_input_character};
570     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
571     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
572     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
573     !!!next-input-character;
574     next TAGNAME;
575     } else {
576 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
577 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
578     !!!back-next-input-character (@next_char);
579     $self->{state} = 'data';
580    
581     !!!emit ({type => 'character', data => '</'});
582    
583     redo A;
584     }
585     }
586     push @next_char, $self->{next_input_character};
587    
588     unless ($self->{next_input_character} == 0x0009 or # HT
589     $self->{next_input_character} == 0x000A or # LF
590     $self->{next_input_character} == 0x000B or # VT
591     $self->{next_input_character} == 0x000C or # FF
592     $self->{next_input_character} == 0x0020 or # SP
593     $self->{next_input_character} == 0x003E or # >
594     $self->{next_input_character} == 0x002F or # /
595     $self->{next_input_character} == 0x003C or # <
596     $self->{next_input_character} == -1) {
597 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
598 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
599     !!!back-next-input-character (@next_char);
600     $self->{state} = 'data';
601    
602     !!!emit ({type => 'character', data => '</'});
603    
604     redo A;
605     } else {
606     $self->{next_input_character} = shift @next_char;
607     !!!back-next-input-character (@next_char);
608     # and consume...
609     }
610     }
611    
612     if (0x0041 <= $self->{next_input_character} and
613     $self->{next_input_character} <= 0x005A) { # A..Z
614     $self->{current_token} = {type => 'end tag',
615     tag_name => chr ($self->{next_input_character} + 0x0020)};
616     $self->{state} = 'tag name';
617     !!!next-input-character;
618     redo A;
619     } elsif (0x0061 <= $self->{next_input_character} and
620     $self->{next_input_character} <= 0x007A) { # a..z
621     $self->{current_token} = {type => 'end tag',
622     tag_name => chr ($self->{next_input_character})};
623     $self->{state} = 'tag name';
624     !!!next-input-character;
625     redo A;
626     } elsif ($self->{next_input_character} == 0x003E) { # >
627 wakaba 1.3 !!!parse-error (type => 'empty end tag');
628 wakaba 1.1 $self->{state} = 'data';
629     !!!next-input-character;
630     redo A;
631     } elsif ($self->{next_input_character} == -1) {
632 wakaba 1.3 !!!parse-error (type => 'bare etago');
633 wakaba 1.1 $self->{state} = 'data';
634     # reconsume
635    
636     !!!emit ({type => 'character', data => '</'});
637    
638     redo A;
639     } else {
640 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
641 wakaba 1.1 $self->{state} = 'bogus comment';
642     ## $self->{next_input_character} is intentionally left as is
643     redo A;
644     }
645     } elsif ($self->{state} eq 'tag name') {
646     if ($self->{next_input_character} == 0x0009 or # HT
647     $self->{next_input_character} == 0x000A or # LF
648     $self->{next_input_character} == 0x000B or # VT
649     $self->{next_input_character} == 0x000C or # FF
650     $self->{next_input_character} == 0x0020) { # SP
651     $self->{state} = 'before attribute name';
652     !!!next-input-character;
653     redo A;
654     } elsif ($self->{next_input_character} == 0x003E) { # >
655     if ($self->{current_token}->{type} eq 'start tag') {
656     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
657     } elsif ($self->{current_token}->{type} eq 'end tag') {
658     $self->{content_model_flag} = 'PCDATA'; # MUST
659     if ($self->{current_token}->{attributes}) {
660 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
661 wakaba 1.1 }
662     } else {
663     die "$0: $self->{current_token}->{type}: Unknown token type";
664     }
665     $self->{state} = 'data';
666     !!!next-input-character;
667    
668     !!!emit ($self->{current_token}); # start tag or end tag
669     undef $self->{current_token};
670    
671     redo A;
672     } elsif (0x0041 <= $self->{next_input_character} and
673     $self->{next_input_character} <= 0x005A) { # A..Z
674     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
675     # start tag or end tag
676     ## Stay in this state
677     !!!next-input-character;
678     redo A;
679     } elsif ($self->{next_input_character} == 0x003C or # <
680     $self->{next_input_character} == -1) {
681 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
682 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
683     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
684     } elsif ($self->{current_token}->{type} eq 'end tag') {
685     $self->{content_model_flag} = 'PCDATA'; # MUST
686     if ($self->{current_token}->{attributes}) {
687 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
688 wakaba 1.1 }
689     } else {
690     die "$0: $self->{current_token}->{type}: Unknown token type";
691     }
692     $self->{state} = 'data';
693     # reconsume
694    
695     !!!emit ($self->{current_token}); # start tag or end tag
696     undef $self->{current_token};
697    
698     redo A;
699     } elsif ($self->{next_input_character} == 0x002F) { # /
700     !!!next-input-character;
701     if ($self->{next_input_character} == 0x003E and # >
702     $self->{current_token}->{type} eq 'start tag' and
703     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
704     # permitted slash
705     #
706     } else {
707 wakaba 1.3 !!!parse-error (type => 'nestc');
708 wakaba 1.1 }
709     $self->{state} = 'before attribute name';
710     # next-input-character is already done
711     redo A;
712     } else {
713     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
714     # start tag or end tag
715     ## Stay in the state
716     !!!next-input-character;
717     redo A;
718     }
719     } elsif ($self->{state} eq 'before attribute name') {
720     if ($self->{next_input_character} == 0x0009 or # HT
721     $self->{next_input_character} == 0x000A or # LF
722     $self->{next_input_character} == 0x000B or # VT
723     $self->{next_input_character} == 0x000C or # FF
724     $self->{next_input_character} == 0x0020) { # SP
725     ## Stay in the state
726     !!!next-input-character;
727     redo A;
728     } elsif ($self->{next_input_character} == 0x003E) { # >
729     if ($self->{current_token}->{type} eq 'start tag') {
730     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
731     } elsif ($self->{current_token}->{type} eq 'end tag') {
732     $self->{content_model_flag} = 'PCDATA'; # MUST
733     if ($self->{current_token}->{attributes}) {
734 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
735 wakaba 1.1 }
736     } else {
737     die "$0: $self->{current_token}->{type}: Unknown token type";
738     }
739     $self->{state} = 'data';
740     !!!next-input-character;
741    
742     !!!emit ($self->{current_token}); # start tag or end tag
743     undef $self->{current_token};
744    
745     redo A;
746     } elsif (0x0041 <= $self->{next_input_character} and
747     $self->{next_input_character} <= 0x005A) { # A..Z
748     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
749     value => ''};
750     $self->{state} = 'attribute name';
751     !!!next-input-character;
752     redo A;
753     } elsif ($self->{next_input_character} == 0x002F) { # /
754     !!!next-input-character;
755     if ($self->{next_input_character} == 0x003E and # >
756     $self->{current_token}->{type} eq 'start tag' and
757     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
758     # permitted slash
759     #
760     } else {
761 wakaba 1.3 !!!parse-error (type => 'nestc');
762 wakaba 1.1 }
763     ## Stay in the state
764     # next-input-character is already done
765     redo A;
766     } elsif ($self->{next_input_character} == 0x003C or # <
767     $self->{next_input_character} == -1) {
768 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
769 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
770     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
771     } elsif ($self->{current_token}->{type} eq 'end tag') {
772     $self->{content_model_flag} = 'PCDATA'; # MUST
773     if ($self->{current_token}->{attributes}) {
774 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
775 wakaba 1.1 }
776     } else {
777     die "$0: $self->{current_token}->{type}: Unknown token type";
778     }
779     $self->{state} = 'data';
780     # reconsume
781    
782     !!!emit ($self->{current_token}); # start tag or end tag
783     undef $self->{current_token};
784    
785     redo A;
786     } else {
787     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
788     value => ''};
789     $self->{state} = 'attribute name';
790     !!!next-input-character;
791     redo A;
792     }
793     } elsif ($self->{state} eq 'attribute name') {
794     my $before_leave = sub {
795     if (exists $self->{current_token}->{attributes} # start tag or end tag
796     ->{$self->{current_attribute}->{name}}) { # MUST
797 wakaba 1.3 !!!parse-error (type => 'dupulicate attribute');
798 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
799     } else {
800     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
801     = $self->{current_attribute};
802     }
803     }; # $before_leave
804    
805     if ($self->{next_input_character} == 0x0009 or # HT
806     $self->{next_input_character} == 0x000A or # LF
807     $self->{next_input_character} == 0x000B or # VT
808     $self->{next_input_character} == 0x000C or # FF
809     $self->{next_input_character} == 0x0020) { # SP
810     $before_leave->();
811     $self->{state} = 'after attribute name';
812     !!!next-input-character;
813     redo A;
814     } elsif ($self->{next_input_character} == 0x003D) { # =
815     $before_leave->();
816     $self->{state} = 'before attribute value';
817     !!!next-input-character;
818     redo A;
819     } elsif ($self->{next_input_character} == 0x003E) { # >
820     $before_leave->();
821     if ($self->{current_token}->{type} eq 'start tag') {
822     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
823     } elsif ($self->{current_token}->{type} eq 'end tag') {
824     $self->{content_model_flag} = 'PCDATA'; # MUST
825     if ($self->{current_token}->{attributes}) {
826 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
827 wakaba 1.1 }
828     } else {
829     die "$0: $self->{current_token}->{type}: Unknown token type";
830     }
831     $self->{state} = 'data';
832     !!!next-input-character;
833    
834     !!!emit ($self->{current_token}); # start tag or end tag
835     undef $self->{current_token};
836    
837     redo A;
838     } elsif (0x0041 <= $self->{next_input_character} and
839     $self->{next_input_character} <= 0x005A) { # A..Z
840     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
841     ## Stay in the state
842     !!!next-input-character;
843     redo A;
844     } elsif ($self->{next_input_character} == 0x002F) { # /
845     $before_leave->();
846     !!!next-input-character;
847     if ($self->{next_input_character} == 0x003E and # >
848     $self->{current_token}->{type} eq 'start tag' and
849     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
850     # permitted slash
851     #
852     } else {
853 wakaba 1.3 !!!parse-error (type => 'nestc');
854 wakaba 1.1 }
855     $self->{state} = 'before attribute name';
856     # next-input-character is already done
857     redo A;
858     } elsif ($self->{next_input_character} == 0x003C or # <
859     $self->{next_input_character} == -1) {
860 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
861 wakaba 1.1 $before_leave->();
862     if ($self->{current_token}->{type} eq 'start tag') {
863     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
864     } elsif ($self->{current_token}->{type} eq 'end tag') {
865     $self->{content_model_flag} = 'PCDATA'; # MUST
866     if ($self->{current_token}->{attributes}) {
867 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
868 wakaba 1.1 }
869     } else {
870     die "$0: $self->{current_token}->{type}: Unknown token type";
871     }
872     $self->{state} = 'data';
873     # reconsume
874    
875     !!!emit ($self->{current_token}); # start tag or end tag
876     undef $self->{current_token};
877    
878     redo A;
879     } else {
880     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
881     ## Stay in the state
882     !!!next-input-character;
883     redo A;
884     }
885     } elsif ($self->{state} eq 'after attribute name') {
886     if ($self->{next_input_character} == 0x0009 or # HT
887     $self->{next_input_character} == 0x000A or # LF
888     $self->{next_input_character} == 0x000B or # VT
889     $self->{next_input_character} == 0x000C or # FF
890     $self->{next_input_character} == 0x0020) { # SP
891     ## Stay in the state
892     !!!next-input-character;
893     redo A;
894     } elsif ($self->{next_input_character} == 0x003D) { # =
895     $self->{state} = 'before attribute value';
896     !!!next-input-character;
897     redo A;
898     } elsif ($self->{next_input_character} == 0x003E) { # >
899     if ($self->{current_token}->{type} eq 'start tag') {
900     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
901     } elsif ($self->{current_token}->{type} eq 'end tag') {
902     $self->{content_model_flag} = 'PCDATA'; # MUST
903     if ($self->{current_token}->{attributes}) {
904 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
905 wakaba 1.1 }
906     } else {
907     die "$0: $self->{current_token}->{type}: Unknown token type";
908     }
909     $self->{state} = 'data';
910     !!!next-input-character;
911    
912     !!!emit ($self->{current_token}); # start tag or end tag
913     undef $self->{current_token};
914    
915     redo A;
916     } elsif (0x0041 <= $self->{next_input_character} and
917     $self->{next_input_character} <= 0x005A) { # A..Z
918     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
919     value => ''};
920     $self->{state} = 'attribute name';
921     !!!next-input-character;
922     redo A;
923     } elsif ($self->{next_input_character} == 0x002F) { # /
924     !!!next-input-character;
925     if ($self->{next_input_character} == 0x003E and # >
926     $self->{current_token}->{type} eq 'start tag' and
927     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
928     # permitted slash
929     #
930     } else {
931 wakaba 1.3 !!!parse-error (type => 'nestc');
932 wakaba 1.1 }
933     $self->{state} = 'before attribute name';
934     # next-input-character is already done
935     redo A;
936     } elsif ($self->{next_input_character} == 0x003C or # <
937     $self->{next_input_character} == -1) {
938 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
939 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
940     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
941     } elsif ($self->{current_token}->{type} eq 'end tag') {
942     $self->{content_model_flag} = 'PCDATA'; # MUST
943     if ($self->{current_token}->{attributes}) {
944 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
945 wakaba 1.1 }
946     } else {
947     die "$0: $self->{current_token}->{type}: Unknown token type";
948     }
949     $self->{state} = 'data';
950     # reconsume
951    
952     !!!emit ($self->{current_token}); # start tag or end tag
953     undef $self->{current_token};
954    
955     redo A;
956     } else {
957     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
958     value => ''};
959     $self->{state} = 'attribute name';
960     !!!next-input-character;
961     redo A;
962     }
963     } elsif ($self->{state} eq 'before attribute value') {
964     if ($self->{next_input_character} == 0x0009 or # HT
965     $self->{next_input_character} == 0x000A or # LF
966     $self->{next_input_character} == 0x000B or # VT
967     $self->{next_input_character} == 0x000C or # FF
968     $self->{next_input_character} == 0x0020) { # SP
969     ## Stay in the state
970     !!!next-input-character;
971     redo A;
972     } elsif ($self->{next_input_character} == 0x0022) { # "
973     $self->{state} = 'attribute value (double-quoted)';
974     !!!next-input-character;
975     redo A;
976     } elsif ($self->{next_input_character} == 0x0026) { # &
977     $self->{state} = 'attribute value (unquoted)';
978     ## reconsume
979     redo A;
980     } elsif ($self->{next_input_character} == 0x0027) { # '
981     $self->{state} = 'attribute value (single-quoted)';
982     !!!next-input-character;
983     redo A;
984     } elsif ($self->{next_input_character} == 0x003E) { # >
985     if ($self->{current_token}->{type} eq 'start tag') {
986     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
987     } elsif ($self->{current_token}->{type} eq 'end tag') {
988     $self->{content_model_flag} = 'PCDATA'; # MUST
989     if ($self->{current_token}->{attributes}) {
990 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
991 wakaba 1.1 }
992     } else {
993     die "$0: $self->{current_token}->{type}: Unknown token type";
994     }
995     $self->{state} = 'data';
996     !!!next-input-character;
997    
998     !!!emit ($self->{current_token}); # start tag or end tag
999     undef $self->{current_token};
1000    
1001     redo A;
1002     } elsif ($self->{next_input_character} == 0x003C or # <
1003     $self->{next_input_character} == -1) {
1004 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1005 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1006     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1007     } elsif ($self->{current_token}->{type} eq 'end tag') {
1008     $self->{content_model_flag} = 'PCDATA'; # MUST
1009     if ($self->{current_token}->{attributes}) {
1010 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1011 wakaba 1.1 }
1012     } else {
1013     die "$0: $self->{current_token}->{type}: Unknown token type";
1014     }
1015     $self->{state} = 'data';
1016     ## reconsume
1017    
1018     !!!emit ($self->{current_token}); # start tag or end tag
1019     undef $self->{current_token};
1020    
1021     redo A;
1022     } else {
1023     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1024     $self->{state} = 'attribute value (unquoted)';
1025     !!!next-input-character;
1026     redo A;
1027     }
1028     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
1029     if ($self->{next_input_character} == 0x0022) { # "
1030     $self->{state} = 'before attribute name';
1031     !!!next-input-character;
1032     redo A;
1033     } elsif ($self->{next_input_character} == 0x0026) { # &
1034     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
1035     $self->{state} = 'entity in attribute value';
1036     !!!next-input-character;
1037     redo A;
1038     } elsif ($self->{next_input_character} == -1) {
1039 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1040 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1041     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1042     } elsif ($self->{current_token}->{type} eq 'end tag') {
1043     $self->{content_model_flag} = 'PCDATA'; # MUST
1044     if ($self->{current_token}->{attributes}) {
1045 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1046 wakaba 1.1 }
1047     } else {
1048     die "$0: $self->{current_token}->{type}: Unknown token type";
1049     }
1050     $self->{state} = 'data';
1051     ## reconsume
1052    
1053     !!!emit ($self->{current_token}); # start tag or end tag
1054     undef $self->{current_token};
1055    
1056     redo A;
1057     } else {
1058     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1059     ## Stay in the state
1060     !!!next-input-character;
1061     redo A;
1062     }
1063     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
1064     if ($self->{next_input_character} == 0x0027) { # '
1065     $self->{state} = 'before attribute name';
1066     !!!next-input-character;
1067     redo A;
1068     } elsif ($self->{next_input_character} == 0x0026) { # &
1069     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
1070     $self->{state} = 'entity in attribute value';
1071     !!!next-input-character;
1072     redo A;
1073     } elsif ($self->{next_input_character} == -1) {
1074 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1075 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1076     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1077     } elsif ($self->{current_token}->{type} eq 'end tag') {
1078     $self->{content_model_flag} = 'PCDATA'; # MUST
1079     if ($self->{current_token}->{attributes}) {
1080 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1081 wakaba 1.1 }
1082     } else {
1083     die "$0: $self->{current_token}->{type}: Unknown token type";
1084     }
1085     $self->{state} = 'data';
1086     ## reconsume
1087    
1088     !!!emit ($self->{current_token}); # start tag or end tag
1089     undef $self->{current_token};
1090    
1091     redo A;
1092     } else {
1093     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1094     ## Stay in the state
1095     !!!next-input-character;
1096     redo A;
1097     }
1098     } elsif ($self->{state} eq 'attribute value (unquoted)') {
1099     if ($self->{next_input_character} == 0x0009 or # HT
1100     $self->{next_input_character} == 0x000A or # LF
1101     $self->{next_input_character} == 0x000B or # HT
1102     $self->{next_input_character} == 0x000C or # FF
1103     $self->{next_input_character} == 0x0020) { # SP
1104     $self->{state} = 'before attribute name';
1105     !!!next-input-character;
1106     redo A;
1107     } elsif ($self->{next_input_character} == 0x0026) { # &
1108     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
1109     $self->{state} = 'entity in attribute value';
1110     !!!next-input-character;
1111     redo A;
1112     } elsif ($self->{next_input_character} == 0x003E) { # >
1113     if ($self->{current_token}->{type} eq 'start tag') {
1114     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1115     } elsif ($self->{current_token}->{type} eq 'end tag') {
1116     $self->{content_model_flag} = 'PCDATA'; # MUST
1117     if ($self->{current_token}->{attributes}) {
1118 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1119 wakaba 1.1 }
1120     } else {
1121     die "$0: $self->{current_token}->{type}: Unknown token type";
1122     }
1123     $self->{state} = 'data';
1124     !!!next-input-character;
1125    
1126     !!!emit ($self->{current_token}); # start tag or end tag
1127     undef $self->{current_token};
1128    
1129     redo A;
1130     } elsif ($self->{next_input_character} == 0x003C or # <
1131     $self->{next_input_character} == -1) {
1132 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1133 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1134     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1135     } elsif ($self->{current_token}->{type} eq 'end tag') {
1136     $self->{content_model_flag} = 'PCDATA'; # MUST
1137     if ($self->{current_token}->{attributes}) {
1138 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1139 wakaba 1.1 }
1140     } else {
1141     die "$0: $self->{current_token}->{type}: Unknown token type";
1142     }
1143     $self->{state} = 'data';
1144     ## reconsume
1145    
1146     !!!emit ($self->{current_token}); # start tag or end tag
1147     undef $self->{current_token};
1148    
1149     redo A;
1150     } else {
1151     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1152     ## Stay in the state
1153     !!!next-input-character;
1154     redo A;
1155     }
1156     } elsif ($self->{state} eq 'entity in attribute value') {
1157     my $token = $self->_tokenize_attempt_to_consume_an_entity;
1158    
1159     unless (defined $token) {
1160     $self->{current_attribute}->{value} .= '&';
1161     } else {
1162     $self->{current_attribute}->{value} .= $token->{data};
1163     ## ISSUE: spec says "append the returned character token to the current attribute's value"
1164     }
1165    
1166     $self->{state} = $self->{last_attribute_value_state};
1167     # next-input-character is already done
1168     redo A;
1169     } elsif ($self->{state} eq 'bogus comment') {
1170     ## (only happen if PCDATA state)
1171    
1172     my $token = {type => 'comment', data => ''};
1173    
1174     BC: {
1175     if ($self->{next_input_character} == 0x003E) { # >
1176     $self->{state} = 'data';
1177     !!!next-input-character;
1178    
1179     !!!emit ($token);
1180    
1181     redo A;
1182     } elsif ($self->{next_input_character} == -1) {
1183     $self->{state} = 'data';
1184     ## reconsume
1185    
1186     !!!emit ($token);
1187    
1188     redo A;
1189     } else {
1190     $token->{data} .= chr ($self->{next_input_character});
1191     !!!next-input-character;
1192     redo BC;
1193     }
1194     } # BC
1195     } elsif ($self->{state} eq 'markup declaration open') {
1196     ## (only happen if PCDATA state)
1197    
1198     my @next_char;
1199     push @next_char, $self->{next_input_character};
1200    
1201     if ($self->{next_input_character} == 0x002D) { # -
1202     !!!next-input-character;
1203     push @next_char, $self->{next_input_character};
1204     if ($self->{next_input_character} == 0x002D) { # -
1205     $self->{current_token} = {type => 'comment', data => ''};
1206     $self->{state} = 'comment';
1207     !!!next-input-character;
1208     redo A;
1209     }
1210     } elsif ($self->{next_input_character} == 0x0044 or # D
1211     $self->{next_input_character} == 0x0064) { # d
1212     !!!next-input-character;
1213     push @next_char, $self->{next_input_character};
1214     if ($self->{next_input_character} == 0x004F or # O
1215     $self->{next_input_character} == 0x006F) { # o
1216     !!!next-input-character;
1217     push @next_char, $self->{next_input_character};
1218     if ($self->{next_input_character} == 0x0043 or # C
1219     $self->{next_input_character} == 0x0063) { # c
1220     !!!next-input-character;
1221     push @next_char, $self->{next_input_character};
1222     if ($self->{next_input_character} == 0x0054 or # T
1223     $self->{next_input_character} == 0x0074) { # t
1224     !!!next-input-character;
1225     push @next_char, $self->{next_input_character};
1226     if ($self->{next_input_character} == 0x0059 or # Y
1227     $self->{next_input_character} == 0x0079) { # y
1228     !!!next-input-character;
1229     push @next_char, $self->{next_input_character};
1230     if ($self->{next_input_character} == 0x0050 or # P
1231     $self->{next_input_character} == 0x0070) { # p
1232     !!!next-input-character;
1233     push @next_char, $self->{next_input_character};
1234     if ($self->{next_input_character} == 0x0045 or # E
1235     $self->{next_input_character} == 0x0065) { # e
1236     ## ISSUE: What a stupid code this is!
1237     $self->{state} = 'DOCTYPE';
1238     !!!next-input-character;
1239     redo A;
1240     }
1241     }
1242     }
1243     }
1244     }
1245     }
1246     }
1247    
1248 wakaba 1.3 !!!parse-error (type => 'bogus comment open');
1249 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1250     !!!back-next-input-character (@next_char);
1251     $self->{state} = 'bogus comment';
1252     redo A;
1253    
1254     ## ISSUE: typos in spec: chacacters, is is a parse error
1255     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1256     } elsif ($self->{state} eq 'comment') {
1257     if ($self->{next_input_character} == 0x002D) { # -
1258     $self->{state} = 'comment dash';
1259     !!!next-input-character;
1260     redo A;
1261     } elsif ($self->{next_input_character} == -1) {
1262 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1263 wakaba 1.1 $self->{state} = 'data';
1264     ## reconsume
1265    
1266     !!!emit ($self->{current_token}); # comment
1267     undef $self->{current_token};
1268    
1269     redo A;
1270     } else {
1271     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1272     ## Stay in the state
1273     !!!next-input-character;
1274     redo A;
1275     }
1276     } elsif ($self->{state} eq 'comment dash') {
1277     if ($self->{next_input_character} == 0x002D) { # -
1278     $self->{state} = 'comment end';
1279     !!!next-input-character;
1280     redo A;
1281     } elsif ($self->{next_input_character} == -1) {
1282 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1283 wakaba 1.1 $self->{state} = 'data';
1284     ## reconsume
1285    
1286     !!!emit ($self->{current_token}); # comment
1287     undef $self->{current_token};
1288    
1289     redo A;
1290     } else {
1291     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1292     $self->{state} = 'comment';
1293     !!!next-input-character;
1294     redo A;
1295     }
1296     } elsif ($self->{state} eq 'comment end') {
1297     if ($self->{next_input_character} == 0x003E) { # >
1298     $self->{state} = 'data';
1299     !!!next-input-character;
1300    
1301     !!!emit ($self->{current_token}); # comment
1302     undef $self->{current_token};
1303    
1304     redo A;
1305     } elsif ($self->{next_input_character} == 0x002D) { # -
1306 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1307 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1308     ## Stay in the state
1309     !!!next-input-character;
1310     redo A;
1311     } elsif ($self->{next_input_character} == -1) {
1312 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1313 wakaba 1.1 $self->{state} = 'data';
1314     ## reconsume
1315    
1316     !!!emit ($self->{current_token}); # comment
1317     undef $self->{current_token};
1318    
1319     redo A;
1320     } else {
1321 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1322 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1323     $self->{state} = 'comment';
1324     !!!next-input-character;
1325     redo A;
1326     }
1327     } elsif ($self->{state} eq 'DOCTYPE') {
1328     if ($self->{next_input_character} == 0x0009 or # HT
1329     $self->{next_input_character} == 0x000A or # LF
1330     $self->{next_input_character} == 0x000B or # VT
1331     $self->{next_input_character} == 0x000C or # FF
1332     $self->{next_input_character} == 0x0020) { # SP
1333     $self->{state} = 'before DOCTYPE name';
1334     !!!next-input-character;
1335     redo A;
1336     } else {
1337 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
1338 wakaba 1.1 $self->{state} = 'before DOCTYPE name';
1339     ## reconsume
1340     redo A;
1341     }
1342     } elsif ($self->{state} eq 'before DOCTYPE name') {
1343     if ($self->{next_input_character} == 0x0009 or # HT
1344     $self->{next_input_character} == 0x000A or # LF
1345     $self->{next_input_character} == 0x000B or # VT
1346     $self->{next_input_character} == 0x000C or # FF
1347     $self->{next_input_character} == 0x0020) { # SP
1348     ## Stay in the state
1349     !!!next-input-character;
1350     redo A;
1351     } elsif (0x0061 <= $self->{next_input_character} and
1352     $self->{next_input_character} <= 0x007A) { # a..z
1353 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1354 wakaba 1.1 $self->{current_token} = {type => 'DOCTYPE',
1355     name => chr ($self->{next_input_character} - 0x0020),
1356     error => 1};
1357     $self->{state} = 'DOCTYPE name';
1358     !!!next-input-character;
1359     redo A;
1360     } elsif ($self->{next_input_character} == 0x003E) { # >
1361 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1362 wakaba 1.1 $self->{state} = 'data';
1363     !!!next-input-character;
1364    
1365     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1366    
1367     redo A;
1368     } elsif ($self->{next_input_character} == -1) {
1369 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1370 wakaba 1.1 $self->{state} = 'data';
1371     ## reconsume
1372    
1373     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1374    
1375     redo A;
1376     } else {
1377     $self->{current_token} = {type => 'DOCTYPE',
1378     name => chr ($self->{next_input_character}),
1379     error => 1};
1380 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1381 wakaba 1.1 $self->{state} = 'DOCTYPE name';
1382     !!!next-input-character;
1383     redo A;
1384     }
1385     } elsif ($self->{state} eq 'DOCTYPE name') {
1386     if ($self->{next_input_character} == 0x0009 or # HT
1387     $self->{next_input_character} == 0x000A or # LF
1388     $self->{next_input_character} == 0x000B or # VT
1389     $self->{next_input_character} == 0x000C or # FF
1390     $self->{next_input_character} == 0x0020) { # SP
1391     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1392     $self->{state} = 'after DOCTYPE name';
1393     !!!next-input-character;
1394     redo A;
1395     } elsif ($self->{next_input_character} == 0x003E) { # >
1396     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1397     $self->{state} = 'data';
1398     !!!next-input-character;
1399    
1400     !!!emit ($self->{current_token}); # DOCTYPE
1401     undef $self->{current_token};
1402    
1403     redo A;
1404     } elsif (0x0061 <= $self->{next_input_character} and
1405     $self->{next_input_character} <= 0x007A) { # a..z
1406     $self->{current_token}->{name} .= chr ($self->{next_input_character} - 0x0020); # DOCTYPE
1407     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1408     ## Stay in the state
1409     !!!next-input-character;
1410     redo A;
1411     } elsif ($self->{next_input_character} == -1) {
1412 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1413 wakaba 1.1 $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1414     $self->{state} = 'data';
1415     ## reconsume
1416    
1417     !!!emit ($self->{current_token});
1418     undef $self->{current_token};
1419    
1420     redo A;
1421     } else {
1422     $self->{current_token}->{name}
1423     .= chr ($self->{next_input_character}); # DOCTYPE
1424     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1425     ## Stay in the state
1426     !!!next-input-character;
1427     redo A;
1428     }
1429     } elsif ($self->{state} eq 'after DOCTYPE name') {
1430     if ($self->{next_input_character} == 0x0009 or # HT
1431     $self->{next_input_character} == 0x000A or # LF
1432     $self->{next_input_character} == 0x000B or # VT
1433     $self->{next_input_character} == 0x000C or # FF
1434     $self->{next_input_character} == 0x0020) { # SP
1435     ## Stay in the state
1436     !!!next-input-character;
1437     redo A;
1438     } elsif ($self->{next_input_character} == 0x003E) { # >
1439     $self->{state} = 'data';
1440     !!!next-input-character;
1441    
1442     !!!emit ($self->{current_token}); # DOCTYPE
1443     undef $self->{current_token};
1444    
1445     redo A;
1446     } elsif ($self->{next_input_character} == -1) {
1447 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1448 wakaba 1.1 $self->{state} = 'data';
1449     ## reconsume
1450    
1451     !!!emit ($self->{current_token}); # DOCTYPE
1452     undef $self->{current_token};
1453    
1454     redo A;
1455     } else {
1456 wakaba 1.3 !!!parse-error (type => 'string after DOCTYPE name');
1457 wakaba 1.1 $self->{current_token}->{error} = 1; # DOCTYPE
1458     $self->{state} = 'bogus DOCTYPE';
1459     !!!next-input-character;
1460     redo A;
1461     }
1462     } elsif ($self->{state} eq 'bogus DOCTYPE') {
1463     if ($self->{next_input_character} == 0x003E) { # >
1464     $self->{state} = 'data';
1465     !!!next-input-character;
1466    
1467     !!!emit ($self->{current_token}); # DOCTYPE
1468     undef $self->{current_token};
1469    
1470     redo A;
1471     } elsif ($self->{next_input_character} == -1) {
1472 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1473 wakaba 1.1 $self->{state} = 'data';
1474     ## reconsume
1475    
1476     !!!emit ($self->{current_token}); # DOCTYPE
1477     undef $self->{current_token};
1478    
1479     redo A;
1480     } else {
1481     ## Stay in the state
1482     !!!next-input-character;
1483     redo A;
1484     }
1485     } else {
1486     die "$0: $self->{state}: Unknown state";
1487     }
1488     } # A
1489    
1490     die "$0: _get_next_token: unexpected case";
1491     } # _get_next_token
1492    
1493     sub _tokenize_attempt_to_consume_an_entity ($) {
1494     my $self = shift;
1495    
1496     if ($self->{next_input_character} == 0x0023) { # #
1497     !!!next-input-character;
1498     if ($self->{next_input_character} == 0x0078 or # x
1499     $self->{next_input_character} == 0x0058) { # X
1500 wakaba 1.4 my $num;
1501 wakaba 1.1 X: {
1502     my $x_char = $self->{next_input_character};
1503     !!!next-input-character;
1504     if (0x0030 <= $self->{next_input_character} and
1505     $self->{next_input_character} <= 0x0039) { # 0..9
1506     $num ||= 0;
1507     $num *= 0x10;
1508     $num += $self->{next_input_character} - 0x0030;
1509     redo X;
1510     } elsif (0x0061 <= $self->{next_input_character} and
1511     $self->{next_input_character} <= 0x0066) { # a..f
1512     ## ISSUE: the spec says U+0078, which is apparently incorrect
1513     $num ||= 0;
1514     $num *= 0x10;
1515     $num += $self->{next_input_character} - 0x0060 + 9;
1516     redo X;
1517     } elsif (0x0041 <= $self->{next_input_character} and
1518     $self->{next_input_character} <= 0x0046) { # A..F
1519     ## ISSUE: the spec says U+0058, which is apparently incorrect
1520     $num ||= 0;
1521     $num *= 0x10;
1522     $num += $self->{next_input_character} - 0x0040 + 9;
1523     redo X;
1524     } elsif (not defined $num) { # no hexadecimal digit
1525 wakaba 1.3 !!!parse-error (type => 'bare hcro');
1526 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
1527     !!!back-next-input-character ($x_char);
1528     return undef;
1529     } elsif ($self->{next_input_character} == 0x003B) { # ;
1530     !!!next-input-character;
1531     } else {
1532 wakaba 1.3 !!!parse-error (type => 'no refc');
1533 wakaba 1.1 }
1534    
1535     ## TODO: check the definition for |a valid Unicode character|.
1536 wakaba 1.4 ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8189>
1537 wakaba 1.1 if ($num > 1114111 or $num == 0) {
1538     $num = 0xFFFD; # REPLACEMENT CHARACTER
1539     ## ISSUE: Why this is not an error?
1540 wakaba 1.4 } elsif (0x80 <= $num and $num <= 0x9F) {
1541     ## NOTE: <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562>
1542     ## ISSUE: Not in the spec yet; parse error?
1543     $num = $c1_entity_char->{$num};
1544 wakaba 1.1 }
1545    
1546     return {type => 'character', data => chr $num};
1547     } # X
1548     } elsif (0x0030 <= $self->{next_input_character} and
1549     $self->{next_input_character} <= 0x0039) { # 0..9
1550     my $code = $self->{next_input_character} - 0x0030;
1551     !!!next-input-character;
1552    
1553     while (0x0030 <= $self->{next_input_character} and
1554     $self->{next_input_character} <= 0x0039) { # 0..9
1555     $code *= 10;
1556     $code += $self->{next_input_character} - 0x0030;
1557    
1558     !!!next-input-character;
1559     }
1560    
1561     if ($self->{next_input_character} == 0x003B) { # ;
1562     !!!next-input-character;
1563     } else {
1564 wakaba 1.3 !!!parse-error (type => 'no refc');
1565 wakaba 1.1 }
1566    
1567     ## TODO: check the definition for |a valid Unicode character|.
1568     if ($code > 1114111 or $code == 0) {
1569     $code = 0xFFFD; # REPLACEMENT CHARACTER
1570     ## ISSUE: Why this is not an error?
1571 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
1572     ## NOTE: <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562>
1573     ## ISSUE: Not in the spec yet; parse error?
1574     $code = $c1_entity_char->{$code};
1575 wakaba 1.1 }
1576    
1577     return {type => 'character', data => chr $code};
1578     } else {
1579 wakaba 1.3 !!!parse-error (type => 'bare nero');
1580 wakaba 1.1 !!!back-next-input-character ($self->{next_input_character});
1581     $self->{next_input_character} = 0x0023; # #
1582     return undef;
1583     }
1584     } elsif ((0x0041 <= $self->{next_input_character} and
1585     $self->{next_input_character} <= 0x005A) or
1586     (0x0061 <= $self->{next_input_character} and
1587     $self->{next_input_character} <= 0x007A)) {
1588     my $entity_name = chr $self->{next_input_character};
1589     !!!next-input-character;
1590    
1591     my $value = $entity_name;
1592     my $match;
1593    
1594     while (length $entity_name < 10 and
1595     ## NOTE: Some number greater than the maximum length of entity name
1596     ((0x0041 <= $self->{next_input_character} and
1597     $self->{next_input_character} <= 0x005A) or
1598     (0x0061 <= $self->{next_input_character} and
1599     $self->{next_input_character} <= 0x007A) or
1600     (0x0030 <= $self->{next_input_character} and
1601     $self->{next_input_character} <= 0x0039))) {
1602     $entity_name .= chr $self->{next_input_character};
1603     if (defined $entity_char->{$entity_name}) {
1604     $value = $entity_char->{$entity_name};
1605     $match = 1;
1606     } else {
1607     $value .= chr $self->{next_input_character};
1608     }
1609     !!!next-input-character;
1610     }
1611    
1612     if ($match) {
1613     if ($self->{next_input_character} == 0x003B) { # ;
1614     !!!next-input-character;
1615     } else {
1616 wakaba 1.3 !!!parse-error (type => 'refc');
1617 wakaba 1.1 }
1618    
1619     return {type => 'character', data => $value};
1620     } else {
1621 wakaba 1.3 !!!parse-error (type => 'bare ero');
1622 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
1623     !!!back-token ({type => 'character', data => $value});
1624     return undef;
1625     }
1626     } else {
1627     ## no characters are consumed
1628 wakaba 1.3 !!!parse-error (type => 'bare ero');
1629 wakaba 1.1 return undef;
1630     }
1631     } # _tokenize_attempt_to_consume_an_entity
1632    
1633     sub _initialize_tree_constructor ($) {
1634     my $self = shift;
1635     ## NOTE: $self->{document} MUST be specified before this method is called
1636     $self->{document}->strict_error_checking (0);
1637     ## TODO: Turn mutation events off # MUST
1638     ## TODO: Turn loose Document option (manakai extension) on
1639     ## TODO: Mark the Document as an HTML document # MUST
1640     } # _initialize_tree_constructor
1641    
1642     sub _terminate_tree_constructor ($) {
1643     my $self = shift;
1644     $self->{document}->strict_error_checking (1);
1645     ## TODO: Turn mutation events on
1646     } # _terminate_tree_constructor
1647    
1648     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
1649    
1650 wakaba 1.3 { # tree construction stage
1651     my $token;
1652    
1653 wakaba 1.1 sub _construct_tree ($) {
1654     my ($self) = @_;
1655    
1656     ## When an interactive UA render the $self->{document} available
1657     ## to the user, or when it begin accepting user input, are
1658     ## not defined.
1659    
1660     ## Append a character: collect it and all subsequent consecutive
1661     ## characters and insert one Text node whose data is concatenation
1662     ## of all those characters. # MUST
1663    
1664     !!!next-token;
1665    
1666 wakaba 1.3 $self->{insertion_mode} = 'before head';
1667     undef $self->{form_element};
1668     undef $self->{head_element};
1669     $self->{open_elements} = [];
1670     undef $self->{inner_html_node};
1671    
1672     $self->_tree_construction_initial; # MUST
1673     $self->_tree_construction_root_element;
1674     $self->_tree_construction_main;
1675     } # _construct_tree
1676    
1677     sub _tree_construction_initial ($) {
1678     my $self = shift;
1679     B: {
1680     if ($token->{type} eq 'DOCTYPE') {
1681     if ($token->{error}) {
1682     ## ISSUE: Spec currently left this case undefined.
1683     !!!parse-error (type => 'bogus DOCTYPE');
1684     }
1685     my $doctype = $self->{document}->create_document_type_definition
1686     ($token->{name});
1687     $self->{document}->append_child ($doctype);
1688     #$phase = 'root element';
1689     !!!next-token;
1690     #redo B;
1691     return;
1692     } elsif ({
1693     comment => 1,
1694     'start tag' => 1,
1695     'end tag' => 1,
1696     'end-of-file' => 1,
1697     }->{$token->{type}}) {
1698     ## ISSUE: Spec currently left this case undefined.
1699     !!!parse-error (type => 'missing DOCTYPE');
1700     #$phase = 'root element';
1701     ## reprocess
1702     #redo B;
1703     return;
1704     } elsif ($token->{type} eq 'character') {
1705     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1706     $self->{document}->manakai_append_text ($1);
1707     ## ISSUE: DOM3 Core does not allow Document > Text
1708     unless (length $token->{data}) {
1709     ## Stay in the phase
1710     !!!next-token;
1711     redo B;
1712     }
1713     }
1714     ## ISSUE: Spec currently left this case undefined.
1715     !!!parse-error (type => 'missing DOCTYPE');
1716     #$phase = 'root element';
1717     ## reprocess
1718     #redo B;
1719     return;
1720     } else {
1721     die "$0: $token->{type}: Unknown token";
1722     }
1723     } # B
1724     } # _tree_construction_initial
1725    
1726     sub _tree_construction_root_element ($) {
1727     my $self = shift;
1728    
1729     B: {
1730     if ($token->{type} eq 'DOCTYPE') {
1731     !!!parse-error (type => 'in html:#DOCTYPE');
1732     ## Ignore the token
1733     ## Stay in the phase
1734     !!!next-token;
1735     redo B;
1736     } elsif ($token->{type} eq 'comment') {
1737     my $comment = $self->{document}->create_comment ($token->{data});
1738     $self->{document}->append_child ($comment);
1739     ## Stay in the phase
1740     !!!next-token;
1741     redo B;
1742     } elsif ($token->{type} eq 'character') {
1743     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1744     $self->{document}->manakai_append_text ($1);
1745     ## ISSUE: DOM3 Core does not allow Document > Text
1746     unless (length $token->{data}) {
1747     ## Stay in the phase
1748     !!!next-token;
1749     redo B;
1750     }
1751     }
1752     #
1753     } elsif ({
1754     'start tag' => 1,
1755     'end tag' => 1,
1756     'end-of-file' => 1,
1757     }->{$token->{type}}) {
1758     ## ISSUE: There is an issue in the spec
1759     #
1760     } else {
1761     die "$0: $token->{type}: Unknown token";
1762     }
1763     my $root_element; !!!create-element ($root_element, 'html');
1764     $self->{document}->append_child ($root_element);
1765     push @{$self->{open_elements}}, [$root_element, 'html'];
1766     #$phase = 'main';
1767     ## reprocess
1768     #redo B;
1769     return;
1770     } # B
1771     } # _tree_construction_root_element
1772    
1773     sub _reset_insertion_mode ($) {
1774     my $self = shift;
1775    
1776     ## Step 1
1777     my $last;
1778    
1779     ## Step 2
1780     my $i = -1;
1781     my $node = $self->{open_elements}->[$i];
1782    
1783     ## Step 3
1784     S3: {
1785     $last = 1 if $self->{open_elements}->[0]->[0] eq $node->[0];
1786     if (defined $self->{inner_html_node}) {
1787     if ($self->{inner_html_node}->[1] eq 'td' or
1788     $self->{inner_html_node}->[1] eq 'th') {
1789     #
1790     } else {
1791     $node = $self->{inner_html_node};
1792     }
1793     }
1794    
1795     ## Step 4..13
1796     my $new_mode = {
1797     select => 'in select',
1798     td => 'in cell',
1799     th => 'in cell',
1800     tr => 'in row',
1801     tbody => 'in table body',
1802     thead => 'in table head',
1803     tfoot => 'in table foot',
1804     caption => 'in caption',
1805     colgroup => 'in column group',
1806     table => 'in table',
1807     head => 'in body', # not in head!
1808     body => 'in body',
1809     frameset => 'in frameset',
1810     }->{$node->[1]};
1811     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1812    
1813     ## Step 14
1814     if ($node->[1] eq 'html') {
1815     unless (defined $self->{head_element}) {
1816     $self->{insertion_mode} = 'before head';
1817     } else {
1818     $self->{insertion_mode} = 'after head';
1819     }
1820     return;
1821     }
1822    
1823     ## Step 15
1824     $self->{insertion_mode} = 'in body' and return if $last;
1825    
1826     ## Step 16
1827     $i--;
1828     $node = $self->{open_elements}->[$i];
1829    
1830     ## Step 17
1831     redo S3;
1832     } # S3
1833     } # _reset_insertion_mode
1834    
1835     sub _tree_construction_main ($) {
1836     my $self = shift;
1837    
1838     my $phase = 'main';
1839 wakaba 1.1
1840     my $active_formatting_elements = [];
1841    
1842     my $reconstruct_active_formatting_elements = sub { # MUST
1843     my $insert = shift;
1844    
1845     ## Step 1
1846     return unless @$active_formatting_elements;
1847    
1848     ## Step 3
1849     my $i = -1;
1850     my $entry = $active_formatting_elements->[$i];
1851    
1852     ## Step 2
1853     return if $entry->[0] eq '#marker';
1854 wakaba 1.3 for (@{$self->{open_elements}}) {
1855 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1856     return;
1857     }
1858     }
1859    
1860     S4: {
1861     ## Step 4
1862     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1863    
1864     ## Step 5
1865     $i--;
1866     $entry = $active_formatting_elements->[$i];
1867    
1868     ## Step 6
1869     if ($entry->[0] eq '#marker') {
1870     #
1871     } else {
1872     my $in_open_elements;
1873 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
1874 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1875     $in_open_elements = 1;
1876     last OE;
1877     }
1878     }
1879     if ($in_open_elements) {
1880     #
1881     } else {
1882     redo S4;
1883     }
1884     }
1885    
1886     ## Step 7
1887     $i++;
1888     $entry = $active_formatting_elements->[$i];
1889     } # S4
1890    
1891     S7: {
1892     ## Step 8
1893     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1894    
1895     ## Step 9
1896     $insert->($clone->[0]);
1897 wakaba 1.3 push @{$self->{open_elements}}, $clone;
1898 wakaba 1.1
1899     ## Step 10
1900 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1901 wakaba 1.1
1902     ## Step 11
1903     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1904     ## Step 7'
1905     $i++;
1906     $entry = $active_formatting_elements->[$i];
1907    
1908     redo S7;
1909     }
1910     } # S7
1911     }; # $reconstruct_active_formatting_elements
1912    
1913     my $clear_up_to_marker = sub {
1914     for (reverse 0..$#$active_formatting_elements) {
1915     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1916     splice @$active_formatting_elements, $_;
1917     return;
1918     }
1919     }
1920     }; # $clear_up_to_marker
1921    
1922     my $style_start_tag = sub {
1923 wakaba 1.6 my $style_el; !!!create-element ($style_el, 'style', $token->{attributes});
1924 wakaba 1.3 ## $self->{insertion_mode} eq 'in head' and ... (always true)
1925     (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
1926     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
1927 wakaba 1.1 ->append_child ($style_el);
1928     $self->{content_model_flag} = 'CDATA';
1929    
1930     my $text = '';
1931     !!!next-token;
1932     while ($token->{type} eq 'character') {
1933     $text .= $token->{data};
1934     !!!next-token;
1935     } # stop if non-character token or tokenizer stops tokenising
1936     if (length $text) {
1937     $style_el->manakai_append_text ($text);
1938     }
1939    
1940     $self->{content_model_flag} = 'PCDATA';
1941    
1942     if ($token->{type} eq 'end tag' and $token->{tag_name} eq 'style') {
1943     ## Ignore the token
1944     } else {
1945 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
1946 wakaba 1.1 ## ISSUE: And ignore?
1947     }
1948     !!!next-token;
1949     }; # $style_start_tag
1950    
1951     my $script_start_tag = sub {
1952     my $script_el;
1953     !!!create-element ($script_el, 'script', $token->{attributes});
1954     ## TODO: mark as "parser-inserted"
1955    
1956     $self->{content_model_flag} = 'CDATA';
1957    
1958     my $text = '';
1959     !!!next-token;
1960     while ($token->{type} eq 'character') {
1961     $text .= $token->{data};
1962     !!!next-token;
1963     } # stop if non-character token or tokenizer stops tokenising
1964     if (length $text) {
1965     $script_el->manakai_append_text ($text);
1966     }
1967    
1968     $self->{content_model_flag} = 'PCDATA';
1969    
1970     if ($token->{type} eq 'end tag' and
1971     $token->{tag_name} eq 'script') {
1972     ## Ignore the token
1973     } else {
1974 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
1975 wakaba 1.1 ## ISSUE: And ignore?
1976     ## TODO: mark as "already executed"
1977     }
1978    
1979 wakaba 1.3 if (defined $self->{inner_html_node}) {
1980     ## TODO: mark as "already executed"
1981     } else {
1982 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
1983     ## TODO: insertion point = just before the next input character
1984    
1985 wakaba 1.3 (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
1986     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])->append_child ($script_el);
1987 wakaba 1.1
1988     ## TODO: insertion point = $old_insertion_point (might be "undefined")
1989    
1990     ## TODO: if there is a script that will execute as soon as the parser resume, then...
1991     }
1992    
1993     !!!next-token;
1994     }; # $script_start_tag
1995    
1996     my $formatting_end_tag = sub {
1997     my $tag_name = shift;
1998    
1999     FET: {
2000     ## Step 1
2001     my $formatting_element;
2002     my $formatting_element_i_in_active;
2003     AFE: for (reverse 0..$#$active_formatting_elements) {
2004     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
2005     $formatting_element = $active_formatting_elements->[$_];
2006     $formatting_element_i_in_active = $_;
2007     last AFE;
2008     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
2009     last AFE;
2010     }
2011     } # AFE
2012     unless (defined $formatting_element) {
2013 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$tag_name);
2014 wakaba 1.1 ## Ignore the token
2015     !!!next-token;
2016     return;
2017     }
2018     ## has an element in scope
2019     my $in_scope = 1;
2020     my $formatting_element_i_in_open;
2021 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2022     my $node = $self->{open_elements}->[$_];
2023 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
2024     if ($in_scope) {
2025     $formatting_element_i_in_open = $_;
2026     last INSCOPE;
2027     } else { # in open elements but not in scope
2028 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2029 wakaba 1.1 ## Ignore the token
2030     !!!next-token;
2031     return;
2032     }
2033     } elsif ({
2034     table => 1, caption => 1, td => 1, th => 1,
2035     button => 1, marquee => 1, object => 1, html => 1,
2036     }->{$node->[1]}) {
2037     $in_scope = 0;
2038     }
2039     } # INSCOPE
2040     unless (defined $formatting_element_i_in_open) {
2041 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2042 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
2043     !!!next-token; ## TODO: ok?
2044     return;
2045     }
2046 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
2047 wakaba 1.4 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2048 wakaba 1.1 }
2049    
2050     ## Step 2
2051     my $furthest_block;
2052     my $furthest_block_i_in_open;
2053 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2054     my $node = $self->{open_elements}->[$_];
2055 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
2056     #not $phrasing_category->{$node->[1]} and
2057     ($special_category->{$node->[1]} or
2058     $scoping_category->{$node->[1]})) {
2059     $furthest_block = $node;
2060     $furthest_block_i_in_open = $_;
2061     } elsif ($node->[0] eq $formatting_element->[0]) {
2062     last OE;
2063     }
2064     } # OE
2065    
2066     ## Step 3
2067     unless (defined $furthest_block) { # MUST
2068 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
2069 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
2070     !!!next-token;
2071     return;
2072     }
2073    
2074     ## Step 4
2075 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
2076 wakaba 1.1
2077     ## Step 5
2078     my $furthest_block_parent = $furthest_block->[0]->parent_node;
2079     if (defined $furthest_block_parent) {
2080     $furthest_block_parent->remove_child ($furthest_block->[0]);
2081     }
2082    
2083     ## Step 6
2084     my $bookmark_prev_el
2085     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
2086     ->[0];
2087    
2088     ## Step 7
2089     my $node = $furthest_block;
2090     my $node_i_in_open = $furthest_block_i_in_open;
2091     my $last_node = $furthest_block;
2092     S7: {
2093     ## Step 1
2094     $node_i_in_open--;
2095 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
2096 wakaba 1.1
2097     ## Step 2
2098     my $node_i_in_active;
2099     S7S2: {
2100     for (reverse 0..$#$active_formatting_elements) {
2101     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2102     $node_i_in_active = $_;
2103     last S7S2;
2104     }
2105     }
2106 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
2107 wakaba 1.1 redo S7;
2108     } # S7S2
2109    
2110     ## Step 3
2111     last S7 if $node->[0] eq $formatting_element->[0];
2112    
2113     ## Step 4
2114     if ($last_node->[0] eq $furthest_block->[0]) {
2115     $bookmark_prev_el = $node->[0];
2116     }
2117    
2118     ## Step 5
2119     if ($node->[0]->has_child_nodes ()) {
2120     my $clone = [$node->[0]->clone_node (0), $node->[1]];
2121     $active_formatting_elements->[$node_i_in_active] = $clone;
2122 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
2123 wakaba 1.1 $node = $clone;
2124     }
2125    
2126     ## Step 6
2127     $node->[0]->append_child ($last_node->[0]);
2128    
2129     ## Step 7
2130     $last_node = $node;
2131    
2132     ## Step 8
2133     redo S7;
2134     } # S7
2135    
2136     ## Step 8
2137     $common_ancestor_node->[0]->append_child ($last_node->[0]);
2138    
2139     ## Step 9
2140     my $clone = [$formatting_element->[0]->clone_node (0),
2141     $formatting_element->[1]];
2142    
2143     ## Step 10
2144     my @cn = @{$furthest_block->[0]->child_nodes};
2145     $clone->[0]->append_child ($_) for @cn;
2146    
2147     ## Step 11
2148     $furthest_block->[0]->append_child ($clone->[0]);
2149    
2150     ## Step 12
2151     my $i;
2152     AFE: for (reverse 0..$#$active_formatting_elements) {
2153     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
2154     splice @$active_formatting_elements, $_, 1;
2155     $i-- and last AFE if defined $i;
2156     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
2157     $i = $_;
2158     }
2159     } # AFE
2160     splice @$active_formatting_elements, $i + 1, 0, $clone;
2161    
2162     ## Step 13
2163     undef $i;
2164 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2165     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
2166     splice @{$self->{open_elements}}, $_, 1;
2167 wakaba 1.1 $i-- and last OE if defined $i;
2168 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
2169 wakaba 1.1 $i = $_;
2170     }
2171     } # OE
2172 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
2173 wakaba 1.1
2174     ## Step 14
2175     redo FET;
2176     } # FET
2177     }; # $formatting_end_tag
2178    
2179     my $insert_to_current = sub {
2180 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child (shift);
2181 wakaba 1.1 }; # $insert_to_current
2182    
2183     my $insert_to_foster = sub {
2184     my $child = shift;
2185     if ({
2186     table => 1, tbody => 1, tfoot => 1,
2187     thead => 1, tr => 1,
2188 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2189 wakaba 1.1 # MUST
2190     my $foster_parent_element;
2191     my $next_sibling;
2192 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2193     if ($self->{open_elements}->[$_]->[1] eq 'table') {
2194     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
2195 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
2196     $foster_parent_element = $parent;
2197 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
2198 wakaba 1.1 } else {
2199     $foster_parent_element
2200 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
2201 wakaba 1.1 }
2202     last OE;
2203     }
2204     } # OE
2205 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
2206 wakaba 1.1 unless defined $foster_parent_element;
2207     $foster_parent_element->insert_before
2208     ($child, $next_sibling);
2209     } else {
2210 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
2211 wakaba 1.1 }
2212     }; # $insert_to_foster
2213    
2214     my $in_body = sub {
2215     my $insert = shift;
2216     if ($token->{type} eq 'start tag') {
2217     if ($token->{tag_name} eq 'script') {
2218     $script_start_tag->();
2219     return;
2220     } elsif ($token->{tag_name} eq 'style') {
2221     $style_start_tag->();
2222     return;
2223     } elsif ({
2224     base => 1, link => 1, meta => 1,
2225     }->{$token->{tag_name}}) {
2226 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2227 wakaba 1.1 ## NOTE: This is an "as if in head" code clone
2228     my $el;
2229     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2230 wakaba 1.3 if (defined $self->{head_element}) {
2231     $self->{head_element}->append_child ($el);
2232 wakaba 1.1 } else {
2233     $insert->($el);
2234     }
2235    
2236     !!!next-token;
2237     return;
2238     } elsif ($token->{tag_name} eq 'title') {
2239 wakaba 1.3 !!!parse-error (type => 'in body:title');
2240 wakaba 1.1 ## NOTE: There is an "as if in head" code clone
2241     my $title_el;
2242     !!!create-element ($title_el, 'title', $token->{attributes});
2243 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2244 wakaba 1.1 ->append_child ($title_el);
2245     $self->{content_model_flag} = 'RCDATA';
2246    
2247     my $text = '';
2248     !!!next-token;
2249     while ($token->{type} eq 'character') {
2250     $text .= $token->{data};
2251     !!!next-token;
2252     }
2253     if (length $text) {
2254     $title_el->manakai_append_text ($text);
2255     }
2256    
2257     $self->{content_model_flag} = 'PCDATA';
2258    
2259     if ($token->{type} eq 'end tag' and
2260     $token->{tag_name} eq 'title') {
2261     ## Ignore the token
2262     } else {
2263 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2264 wakaba 1.1 ## ISSUE: And ignore?
2265     }
2266     !!!next-token;
2267     return;
2268     } elsif ($token->{tag_name} eq 'body') {
2269 wakaba 1.3 !!!parse-error (type => 'in body:body');
2270 wakaba 1.1
2271 wakaba 1.3 if (@{$self->{open_elements}} == 1 or
2272     $self->{open_elements}->[1]->[1] ne 'body') {
2273 wakaba 1.1 ## Ignore the token
2274     } else {
2275 wakaba 1.3 my $body_el = $self->{open_elements}->[1]->[0];
2276 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2277     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
2278     $body_el->set_attribute_ns
2279     (undef, [undef, $attr_name],
2280     $token->{attributes}->{$attr_name}->{value});
2281     }
2282     }
2283     }
2284     !!!next-token;
2285     return;
2286     } elsif ({
2287     address => 1, blockquote => 1, center => 1, dir => 1,
2288     div => 1, dl => 1, fieldset => 1, listing => 1,
2289     menu => 1, ol => 1, p => 1, ul => 1,
2290     pre => 1,
2291     }->{$token->{tag_name}}) {
2292     ## has a p element in scope
2293 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2294 wakaba 1.1 if ($_->[1] eq 'p') {
2295     !!!back-token;
2296     $token = {type => 'end tag', tag_name => 'p'};
2297     return;
2298     } elsif ({
2299     table => 1, caption => 1, td => 1, th => 1,
2300     button => 1, marquee => 1, object => 1, html => 1,
2301     }->{$_->[1]}) {
2302     last INSCOPE;
2303     }
2304     } # INSCOPE
2305    
2306     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2307     if ($token->{tag_name} eq 'pre') {
2308     !!!next-token;
2309     if ($token->{type} eq 'character') {
2310     $token->{data} =~ s/^\x0A//;
2311     unless (length $token->{data}) {
2312     !!!next-token;
2313     }
2314     }
2315     } else {
2316     !!!next-token;
2317     }
2318     return;
2319     } elsif ($token->{tag_name} eq 'form') {
2320 wakaba 1.3 if (defined $self->{form_element}) {
2321     !!!parse-error (type => 'in form:form');
2322 wakaba 1.1 ## Ignore the token
2323 wakaba 1.7 !!!next-token;
2324     return;
2325 wakaba 1.1 } else {
2326     ## has a p element in scope
2327 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2328 wakaba 1.1 if ($_->[1] eq 'p') {
2329     !!!back-token;
2330     $token = {type => 'end tag', tag_name => 'p'};
2331     return;
2332     } elsif ({
2333     table => 1, caption => 1, td => 1, th => 1,
2334     button => 1, marquee => 1, object => 1, html => 1,
2335     }->{$_->[1]}) {
2336     last INSCOPE;
2337     }
2338     } # INSCOPE
2339    
2340     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2341 wakaba 1.3 $self->{form_element} = $self->{open_elements}->[-1]->[0];
2342 wakaba 1.1 !!!next-token;
2343     return;
2344     }
2345     } elsif ($token->{tag_name} eq 'li') {
2346     ## has a p element in scope
2347 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2348 wakaba 1.1 if ($_->[1] eq 'p') {
2349     !!!back-token;
2350     $token = {type => 'end tag', tag_name => 'p'};
2351     return;
2352     } elsif ({
2353     table => 1, caption => 1, td => 1, th => 1,
2354     button => 1, marquee => 1, object => 1, html => 1,
2355     }->{$_->[1]}) {
2356     last INSCOPE;
2357     }
2358     } # INSCOPE
2359    
2360     ## Step 1
2361     my $i = -1;
2362 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2363 wakaba 1.1 LI: {
2364     ## Step 2
2365     if ($node->[1] eq 'li') {
2366 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2367 wakaba 1.1 last LI;
2368     }
2369    
2370     ## Step 3
2371     if (not $formatting_category->{$node->[1]} and
2372     #not $phrasing_category->{$node->[1]} and
2373     ($special_category->{$node->[1]} or
2374     $scoping_category->{$node->[1]}) and
2375     $node->[1] ne 'address' and $node->[1] ne 'div') {
2376     last LI;
2377     }
2378    
2379     ## Step 4
2380     $i--;
2381 wakaba 1.3 $node = $self->{open_elements}->[$i];
2382 wakaba 1.1 redo LI;
2383     } # LI
2384    
2385     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2386     !!!next-token;
2387     return;
2388     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
2389     ## has a p element in scope
2390 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2391 wakaba 1.1 if ($_->[1] eq 'p') {
2392     !!!back-token;
2393     $token = {type => 'end tag', tag_name => 'p'};
2394     return;
2395     } elsif ({
2396     table => 1, caption => 1, td => 1, th => 1,
2397     button => 1, marquee => 1, object => 1, html => 1,
2398     }->{$_->[1]}) {
2399     last INSCOPE;
2400     }
2401     } # INSCOPE
2402    
2403     ## Step 1
2404     my $i = -1;
2405 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2406 wakaba 1.1 LI: {
2407     ## Step 2
2408     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
2409 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2410 wakaba 1.1 last LI;
2411     }
2412    
2413     ## Step 3
2414     if (not $formatting_category->{$node->[1]} and
2415     #not $phrasing_category->{$node->[1]} and
2416     ($special_category->{$node->[1]} or
2417     $scoping_category->{$node->[1]}) and
2418     $node->[1] ne 'address' and $node->[1] ne 'div') {
2419     last LI;
2420     }
2421    
2422     ## Step 4
2423     $i--;
2424 wakaba 1.3 $node = $self->{open_elements}->[$i];
2425 wakaba 1.1 redo LI;
2426     } # LI
2427    
2428     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2429     !!!next-token;
2430     return;
2431     } elsif ($token->{tag_name} eq 'plaintext') {
2432     ## has a p element in scope
2433 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2434 wakaba 1.1 if ($_->[1] eq 'p') {
2435     !!!back-token;
2436     $token = {type => 'end tag', tag_name => 'p'};
2437     return;
2438     } elsif ({
2439     table => 1, caption => 1, td => 1, th => 1,
2440     button => 1, marquee => 1, object => 1, html => 1,
2441     }->{$_->[1]}) {
2442     last INSCOPE;
2443     }
2444     } # INSCOPE
2445    
2446     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2447    
2448     $self->{content_model_flag} = 'PLAINTEXT';
2449    
2450     !!!next-token;
2451     return;
2452     } elsif ({
2453     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2454     }->{$token->{tag_name}}) {
2455     ## has a p element in scope
2456 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2457     my $node = $self->{open_elements}->[$_];
2458 wakaba 1.1 if ($node->[1] eq 'p') {
2459     !!!back-token;
2460     $token = {type => 'end tag', tag_name => 'p'};
2461     return;
2462     } elsif ({
2463     table => 1, caption => 1, td => 1, th => 1,
2464     button => 1, marquee => 1, object => 1, html => 1,
2465     }->{$node->[1]}) {
2466     last INSCOPE;
2467     }
2468     } # INSCOPE
2469    
2470     ## has an element in scope
2471     my $i;
2472 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2473     my $node = $self->{open_elements}->[$_];
2474 wakaba 1.1 if ({
2475     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2476     }->{$node->[1]}) {
2477     $i = $_;
2478     last INSCOPE;
2479     } elsif ({
2480     table => 1, caption => 1, td => 1, th => 1,
2481     button => 1, marquee => 1, object => 1, html => 1,
2482     }->{$node->[1]}) {
2483     last INSCOPE;
2484     }
2485     } # INSCOPE
2486    
2487     if (defined $i) {
2488 wakaba 1.3 !!!parse-error (type => 'in hn:hn');
2489     splice @{$self->{open_elements}}, $i;
2490 wakaba 1.1 }
2491    
2492     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2493    
2494     !!!next-token;
2495     return;
2496     } elsif ($token->{tag_name} eq 'a') {
2497     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
2498     my $node = $active_formatting_elements->[$i];
2499     if ($node->[1] eq 'a') {
2500 wakaba 1.3 !!!parse-error (type => 'in a:a');
2501 wakaba 1.1
2502     !!!back-token;
2503     $token = {type => 'end tag', tag_name => 'a'};
2504     $formatting_end_tag->($token->{tag_name});
2505    
2506     AFE2: for (reverse 0..$#$active_formatting_elements) {
2507     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2508     splice @$active_formatting_elements, $_, 1;
2509     last AFE2;
2510     }
2511     } # AFE2
2512 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2513     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
2514     splice @{$self->{open_elements}}, $_, 1;
2515 wakaba 1.1 last OE;
2516     }
2517     } # OE
2518     last AFE;
2519     } elsif ($node->[0] eq '#marker') {
2520     last AFE;
2521     }
2522     } # AFE
2523    
2524     $reconstruct_active_formatting_elements->($insert_to_current);
2525    
2526     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2527 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2528 wakaba 1.1
2529     !!!next-token;
2530     return;
2531     } elsif ({
2532     b => 1, big => 1, em => 1, font => 1, i => 1,
2533     nobr => 1, s => 1, small => 1, strile => 1,
2534     strong => 1, tt => 1, u => 1,
2535     }->{$token->{tag_name}}) {
2536     $reconstruct_active_formatting_elements->($insert_to_current);
2537    
2538     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2539 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2540 wakaba 1.1
2541     !!!next-token;
2542     return;
2543     } elsif ($token->{tag_name} eq 'button') {
2544     ## has a button element in scope
2545 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2546     my $node = $self->{open_elements}->[$_];
2547 wakaba 1.1 if ($node->[1] eq 'button') {
2548 wakaba 1.3 !!!parse-error (type => 'in button:button');
2549 wakaba 1.1 !!!back-token;
2550     $token = {type => 'end tag', tag_name => 'button'};
2551     return;
2552     } elsif ({
2553     table => 1, caption => 1, td => 1, th => 1,
2554     button => 1, marquee => 1, object => 1, html => 1,
2555     }->{$node->[1]}) {
2556     last INSCOPE;
2557     }
2558     } # INSCOPE
2559    
2560     $reconstruct_active_formatting_elements->($insert_to_current);
2561    
2562     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2563     push @$active_formatting_elements, ['#marker', ''];
2564    
2565     !!!next-token;
2566     return;
2567     } elsif ($token->{tag_name} eq 'marquee' or
2568     $token->{tag_name} eq 'object') {
2569     $reconstruct_active_formatting_elements->($insert_to_current);
2570    
2571     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2572     push @$active_formatting_elements, ['#marker', ''];
2573    
2574     !!!next-token;
2575     return;
2576     } elsif ($token->{tag_name} eq 'xmp') {
2577     $reconstruct_active_formatting_elements->($insert_to_current);
2578    
2579     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2580    
2581     $self->{content_model_flag} = 'CDATA';
2582    
2583     !!!next-token;
2584     return;
2585     } elsif ($token->{tag_name} eq 'table') {
2586     ## has a p element in scope
2587 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2588 wakaba 1.1 if ($_->[1] eq 'p') {
2589     !!!back-token;
2590     $token = {type => 'end tag', tag_name => 'p'};
2591     return;
2592     } elsif ({
2593     table => 1, caption => 1, td => 1, th => 1,
2594     button => 1, marquee => 1, object => 1, html => 1,
2595     }->{$_->[1]}) {
2596     last INSCOPE;
2597     }
2598     } # INSCOPE
2599    
2600     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2601    
2602 wakaba 1.3 $self->{insertion_mode} = 'in table';
2603 wakaba 1.1
2604     !!!next-token;
2605     return;
2606     } elsif ({
2607     area => 1, basefont => 1, bgsound => 1, br => 1,
2608     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
2609     image => 1,
2610     }->{$token->{tag_name}}) {
2611     if ($token->{tag_name} eq 'image') {
2612 wakaba 1.3 !!!parse-error (type => 'image');
2613 wakaba 1.1 $token->{tag_name} = 'img';
2614     }
2615    
2616     $reconstruct_active_formatting_elements->($insert_to_current);
2617    
2618     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2619 wakaba 1.3 pop @{$self->{open_elements}};
2620 wakaba 1.1
2621     !!!next-token;
2622     return;
2623     } elsif ($token->{tag_name} eq 'hr') {
2624     ## has a p element in scope
2625 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2626 wakaba 1.1 if ($_->[1] eq 'p') {
2627     !!!back-token;
2628     $token = {type => 'end tag', tag_name => 'p'};
2629     return;
2630     } elsif ({
2631     table => 1, caption => 1, td => 1, th => 1,
2632     button => 1, marquee => 1, object => 1, html => 1,
2633     }->{$_->[1]}) {
2634     last INSCOPE;
2635     }
2636     } # INSCOPE
2637    
2638     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2639 wakaba 1.3 pop @{$self->{open_elements}};
2640 wakaba 1.1
2641     !!!next-token;
2642     return;
2643     } elsif ($token->{tag_name} eq 'input') {
2644     $reconstruct_active_formatting_elements->($insert_to_current);
2645    
2646     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2647 wakaba 1.3 ## TODO: associate with $self->{form_element} if defined
2648     pop @{$self->{open_elements}};
2649 wakaba 1.1
2650     !!!next-token;
2651     return;
2652     } elsif ($token->{tag_name} eq 'isindex') {
2653 wakaba 1.3 !!!parse-error (type => 'isindex');
2654 wakaba 1.1
2655 wakaba 1.3 if (defined $self->{form_element}) {
2656 wakaba 1.1 ## Ignore the token
2657     !!!next-token;
2658     return;
2659     } else {
2660     my $at = $token->{attributes};
2661     $at->{name} = {name => 'name', value => 'isindex'};
2662     my @tokens = (
2663     {type => 'start tag', tag_name => 'form'},
2664     {type => 'start tag', tag_name => 'hr'},
2665     {type => 'start tag', tag_name => 'p'},
2666     {type => 'start tag', tag_name => 'label'},
2667     {type => 'character',
2668     data => 'This is a searchable index. Insert your search keywords here: '}, # SHOULD
2669     ## TODO: make this configurable
2670     {type => 'start tag', tag_name => 'input', attributes => $at},
2671     #{type => 'character', data => ''}, # SHOULD
2672     {type => 'end tag', tag_name => 'label'},
2673     {type => 'end tag', tag_name => 'p'},
2674     {type => 'start tag', tag_name => 'hr'},
2675     {type => 'end tag', tag_name => 'form'},
2676     );
2677     $token = shift @tokens;
2678     !!!back-token (@tokens);
2679     return;
2680     }
2681     } elsif ({
2682     textarea => 1,
2683 wakaba 1.5 iframe => 1,
2684 wakaba 1.1 noembed => 1,
2685     noframes => 1,
2686     noscript => 0, ## TODO: 1 if scripting is enabled
2687     }->{$token->{tag_name}}) {
2688     my $tag_name = $token->{tag_name};
2689     my $el;
2690     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2691    
2692     if ($token->{tag_name} eq 'textarea') {
2693 wakaba 1.3 ## TODO: $self->{form_element} if defined
2694 wakaba 1.1 $self->{content_model_flag} = 'RCDATA';
2695     } else {
2696     $self->{content_model_flag} = 'CDATA';
2697     }
2698    
2699     $insert->($el);
2700    
2701     my $text = '';
2702     !!!next-token;
2703     while ($token->{type} eq 'character') {
2704     $text .= $token->{data};
2705     !!!next-token;
2706     }
2707     if (length $text) {
2708     $el->manakai_append_text ($text);
2709     }
2710    
2711     $self->{content_model_flag} = 'PCDATA';
2712    
2713     if ($token->{type} eq 'end tag' and
2714     $token->{tag_name} eq $tag_name) {
2715     ## Ignore the token
2716     } else {
2717 wakaba 1.3 if ($token->{tag_name} eq 'textarea') {
2718     !!!parse-error (type => 'in CDATA:#'.$token->{type});
2719     } else {
2720     !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2721     }
2722 wakaba 1.1 ## ISSUE: And ignore?
2723     }
2724     !!!next-token;
2725     return;
2726     } elsif ($token->{tag_name} eq 'select') {
2727     $reconstruct_active_formatting_elements->($insert_to_current);
2728    
2729     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2730    
2731 wakaba 1.3 $self->{insertion_mode} = 'in select';
2732 wakaba 1.1 !!!next-token;
2733     return;
2734     } elsif ({
2735     caption => 1, col => 1, colgroup => 1, frame => 1,
2736     frameset => 1, head => 1, option => 1, optgroup => 1,
2737     tbody => 1, td => 1, tfoot => 1, th => 1,
2738     thead => 1, tr => 1,
2739     }->{$token->{tag_name}}) {
2740 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2741 wakaba 1.1 ## Ignore the token
2742     !!!next-token;
2743     return;
2744    
2745     ## ISSUE: An issue on HTML5 new elements in the spec.
2746     } else {
2747     $reconstruct_active_formatting_elements->($insert_to_current);
2748    
2749     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2750    
2751     !!!next-token;
2752     return;
2753     }
2754     } elsif ($token->{type} eq 'end tag') {
2755     if ($token->{tag_name} eq 'body') {
2756 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2757 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2758 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2759     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2760 wakaba 1.1 }
2761 wakaba 1.3 $self->{insertion_mode} = 'after body';
2762 wakaba 1.1 !!!next-token;
2763     return;
2764     } else {
2765 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2766 wakaba 1.1 ## Ignore the token
2767     !!!next-token;
2768     return;
2769     }
2770     } elsif ($token->{tag_name} eq 'html') {
2771 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2772 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2773 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2774     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
2775 wakaba 1.1 }
2776 wakaba 1.3 $self->{insertion_mode} = 'after body';
2777 wakaba 1.1 ## reprocess
2778     return;
2779     } else {
2780 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2781 wakaba 1.1 ## Ignore the token
2782     !!!next-token;
2783     return;
2784     }
2785     } elsif ({
2786     address => 1, blockquote => 1, center => 1, dir => 1,
2787     div => 1, dl => 1, fieldset => 1, listing => 1,
2788     menu => 1, ol => 1, pre => 1, ul => 1,
2789     form => 1,
2790     p => 1,
2791     dd => 1, dt => 1, li => 1,
2792     button => 1, marquee => 1, object => 1,
2793     }->{$token->{tag_name}}) {
2794     ## has an element in scope
2795     my $i;
2796 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2797     my $node = $self->{open_elements}->[$_];
2798 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
2799     ## generate implied end tags
2800     if ({
2801     dd => ($token->{tag_name} ne 'dd'),
2802     dt => ($token->{tag_name} ne 'dt'),
2803     li => ($token->{tag_name} ne 'li'),
2804     p => ($token->{tag_name} ne 'p'),
2805     td => 1, th => 1, tr => 1,
2806 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2807 wakaba 1.1 !!!back-token;
2808     $token = {type => 'end tag',
2809 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2810 wakaba 1.1 return;
2811     }
2812     $i = $_;
2813     last INSCOPE unless $token->{tag_name} eq 'p';
2814     } elsif ({
2815     table => 1, caption => 1, td => 1, th => 1,
2816     button => 1, marquee => 1, object => 1, html => 1,
2817     }->{$node->[1]}) {
2818     last INSCOPE;
2819     }
2820     } # INSCOPE
2821    
2822 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
2823     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2824 wakaba 1.1 }
2825    
2826 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
2827     undef $self->{form_element} if $token->{tag_name} eq 'form';
2828 wakaba 1.1 $clear_up_to_marker->()
2829     if {
2830     button => 1, marquee => 1, object => 1,
2831     }->{$token->{tag_name}};
2832     !!!next-token;
2833     return;
2834     } elsif ({
2835     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2836     }->{$token->{tag_name}}) {
2837     ## has an element in scope
2838     my $i;
2839 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2840     my $node = $self->{open_elements}->[$_];
2841 wakaba 1.1 if ({
2842     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2843     }->{$node->[1]}) {
2844     ## generate implied end tags
2845     if ({
2846     dd => 1, dt => 1, li => 1, p => 1,
2847     td => 1, th => 1, tr => 1,
2848 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2849 wakaba 1.1 !!!back-token;
2850     $token = {type => 'end tag',
2851 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2852 wakaba 1.1 return;
2853     }
2854     $i = $_;
2855     last INSCOPE;
2856     } elsif ({
2857     table => 1, caption => 1, td => 1, th => 1,
2858     button => 1, marquee => 1, object => 1, html => 1,
2859     }->{$node->[1]}) {
2860     last INSCOPE;
2861     }
2862     } # INSCOPE
2863    
2864 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
2865     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2866 wakaba 1.1 }
2867    
2868 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
2869 wakaba 1.1 !!!next-token;
2870     return;
2871     } elsif ({
2872     a => 1,
2873     b => 1, big => 1, em => 1, font => 1, i => 1,
2874     nobr => 1, s => 1, small => 1, strile => 1,
2875     strong => 1, tt => 1, u => 1,
2876     }->{$token->{tag_name}}) {
2877     $formatting_end_tag->($token->{tag_name});
2878     return;
2879     } elsif ({
2880     caption => 1, col => 1, colgroup => 1, frame => 1,
2881     frameset => 1, head => 1, option => 1, optgroup => 1,
2882     tbody => 1, td => 1, tfoot => 1, th => 1,
2883     thead => 1, tr => 1,
2884     area => 1, basefont => 1, bgsound => 1, br => 1,
2885     embed => 1, hr => 1, iframe => 1, image => 1,
2886 wakaba 1.5 img => 1, input => 1, isindex => 1, noembed => 1,
2887 wakaba 1.1 noframes => 1, param => 1, select => 1, spacer => 1,
2888     table => 1, textarea => 1, wbr => 1,
2889     noscript => 0, ## TODO: if scripting is enabled
2890     }->{$token->{tag_name}}) {
2891 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2892 wakaba 1.1 ## Ignore the token
2893     !!!next-token;
2894     return;
2895    
2896     ## ISSUE: Issue on HTML5 new elements in spec
2897    
2898     } else {
2899     ## Step 1
2900     my $node_i = -1;
2901 wakaba 1.3 my $node = $self->{open_elements}->[$node_i];
2902 wakaba 1.1
2903     ## Step 2
2904     S2: {
2905     if ($node->[1] eq $token->{tag_name}) {
2906     ## Step 1
2907     ## generate implied end tags
2908     if ({
2909     dd => 1, dt => 1, li => 1, p => 1,
2910     td => 1, th => 1, tr => 1,
2911 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2912 wakaba 1.1 !!!back-token;
2913     $token = {type => 'end tag',
2914 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2915 wakaba 1.1 return;
2916     }
2917    
2918     ## Step 2
2919 wakaba 1.3 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
2920     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2921 wakaba 1.1 }
2922    
2923     ## Step 3
2924 wakaba 1.3 splice @{$self->{open_elements}}, $node_i;
2925    
2926     !!!next-token;
2927 wakaba 1.1 last S2;
2928     } else {
2929     ## Step 3
2930     if (not $formatting_category->{$node->[1]} and
2931     #not $phrasing_category->{$node->[1]} and
2932     ($special_category->{$node->[1]} or
2933     $scoping_category->{$node->[1]})) {
2934 wakaba 1.3 !!!parse-error (type => 'not closed:'.$node->[1]);
2935 wakaba 1.1 ## Ignore the token
2936     !!!next-token;
2937     last S2;
2938     }
2939     }
2940    
2941     ## Step 4
2942     $node_i--;
2943 wakaba 1.3 $node = $self->{open_elements}->[$node_i];
2944 wakaba 1.1
2945     ## Step 5;
2946     redo S2;
2947     } # S2
2948 wakaba 1.3 return;
2949 wakaba 1.1 }
2950     }
2951     }; # $in_body
2952    
2953     B: {
2954 wakaba 1.3 if ($phase eq 'main') {
2955 wakaba 1.1 if ($token->{type} eq 'DOCTYPE') {
2956 wakaba 1.3 !!!parse-error (type => 'in html:#DOCTYPE');
2957 wakaba 1.1 ## Ignore the token
2958     ## Stay in the phase
2959     !!!next-token;
2960     redo B;
2961     } elsif ($token->{type} eq 'start tag' and
2962     $token->{tag_name} eq 'html') {
2963     ## TODO: unless it is the first start tag token, parse-error
2964 wakaba 1.3 my $top_el = $self->{open_elements}->[0]->[0];
2965 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2966     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
2967     $top_el->set_attribute_ns
2968     (undef, [undef, $attr_name],
2969     $token->{attributes}->{$attr_name}->{value});
2970     }
2971     }
2972     !!!next-token;
2973     redo B;
2974     } elsif ($token->{type} eq 'end-of-file') {
2975     ## Generate implied end tags
2976     if ({
2977     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
2978 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2979 wakaba 1.1 !!!back-token;
2980 wakaba 1.3 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
2981 wakaba 1.1 redo B;
2982     }
2983    
2984 wakaba 1.3 if (@{$self->{open_elements}} > 2 or
2985     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
2986     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2987     } elsif (defined $self->{inner_html_node} and
2988     @{$self->{open_elements}} > 1 and
2989     $self->{open_elements}->[1]->[1] ne 'body') {
2990     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2991 wakaba 1.1 }
2992    
2993     ## Stop parsing
2994     last B;
2995    
2996     ## ISSUE: There is an issue in the spec.
2997     } else {
2998 wakaba 1.3 if ($self->{insertion_mode} eq 'before head') {
2999 wakaba 1.1 if ($token->{type} eq 'character') {
3000     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3001 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3002 wakaba 1.1 unless (length $token->{data}) {
3003     !!!next-token;
3004     redo B;
3005     }
3006     }
3007     ## As if <head>
3008 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
3009     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3010     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3011     $self->{insertion_mode} = 'in head';
3012 wakaba 1.1 ## reprocess
3013     redo B;
3014     } elsif ($token->{type} eq 'comment') {
3015     my $comment = $self->{document}->create_comment ($token->{data});
3016 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3017 wakaba 1.1 !!!next-token;
3018     redo B;
3019     } elsif ($token->{type} eq 'start tag') {
3020     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
3021 wakaba 1.3 !!!create-element ($self->{head_element}, 'head', $attr);
3022     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3023     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3024     $self->{insertion_mode} = 'in head';
3025 wakaba 1.1 if ($token->{tag_name} eq 'head') {
3026     !!!next-token;
3027     #} elsif ({
3028     # base => 1, link => 1, meta => 1,
3029     # script => 1, style => 1, title => 1,
3030     # }->{$token->{tag_name}}) {
3031     # ## reprocess
3032     } else {
3033     ## reprocess
3034     }
3035     redo B;
3036     } elsif ($token->{type} eq 'end tag') {
3037     if ($token->{tag_name} eq 'html') {
3038     ## As if <head>
3039 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
3040     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3041     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3042     $self->{insertion_mode} = 'in head';
3043 wakaba 1.1 ## reprocess
3044     redo B;
3045     } else {
3046 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3047 wakaba 1.1 ## Ignore the token
3048     !!!next-token;
3049     redo B;
3050     }
3051     } else {
3052     die "$0: $token->{type}: Unknown type";
3053     }
3054 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in head') {
3055 wakaba 1.1 if ($token->{type} eq 'character') {
3056     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3057 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3058 wakaba 1.1 unless (length $token->{data}) {
3059     !!!next-token;
3060     redo B;
3061     }
3062     }
3063    
3064     #
3065     } elsif ($token->{type} eq 'comment') {
3066     my $comment = $self->{document}->create_comment ($token->{data});
3067 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3068 wakaba 1.1 !!!next-token;
3069     redo B;
3070     } elsif ($token->{type} eq 'start tag') {
3071     if ($token->{tag_name} eq 'title') {
3072     ## NOTE: There is an "as if in head" code clone
3073     my $title_el;
3074     !!!create-element ($title_el, 'title', $token->{attributes});
3075 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3076 wakaba 1.1 ->append_child ($title_el);
3077     $self->{content_model_flag} = 'RCDATA';
3078    
3079     my $text = '';
3080     !!!next-token;
3081     while ($token->{type} eq 'character') {
3082     $text .= $token->{data};
3083     !!!next-token;
3084     }
3085     if (length $text) {
3086     $title_el->manakai_append_text ($text);
3087     }
3088    
3089     $self->{content_model_flag} = 'PCDATA';
3090    
3091     if ($token->{type} eq 'end tag' and
3092     $token->{tag_name} eq 'title') {
3093     ## Ignore the token
3094     } else {
3095 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
3096 wakaba 1.1 ## ISSUE: And ignore?
3097     }
3098     !!!next-token;
3099     redo B;
3100     } elsif ($token->{tag_name} eq 'style') {
3101     $style_start_tag->();
3102     redo B;
3103     } elsif ($token->{tag_name} eq 'script') {
3104     $script_start_tag->();
3105     redo B;
3106     } elsif ({base => 1, link => 1, meta => 1}->{$token->{tag_name}}) {
3107     ## NOTE: There are "as if in head" code clones
3108     my $el;
3109     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
3110 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3111 wakaba 1.1 ->append_child ($el);
3112    
3113     !!!next-token;
3114     redo B;
3115     } elsif ($token->{tag_name} eq 'head') {
3116 wakaba 1.3 !!!parse-error (type => 'in head:head');
3117 wakaba 1.1 ## Ignore the token
3118     !!!next-token;
3119     redo B;
3120     } else {
3121     #
3122     }
3123     } elsif ($token->{type} eq 'end tag') {
3124     if ($token->{tag_name} eq 'head') {
3125 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
3126     pop @{$self->{open_elements}};
3127 wakaba 1.1 } else {
3128 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:head');
3129 wakaba 1.1 }
3130 wakaba 1.3 $self->{insertion_mode} = 'after head';
3131 wakaba 1.1 !!!next-token;
3132     redo B;
3133     } elsif ($token->{tag_name} eq 'html') {
3134     #
3135     } else {
3136 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3137 wakaba 1.1 ## Ignore the token
3138     !!!next-token;
3139     redo B;
3140     }
3141     } else {
3142     #
3143     }
3144    
3145 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
3146 wakaba 1.1 ## As if </head>
3147 wakaba 1.3 pop @{$self->{open_elements}};
3148 wakaba 1.1 }
3149 wakaba 1.3 $self->{insertion_mode} = 'after head';
3150 wakaba 1.1 ## reprocess
3151     redo B;
3152    
3153     ## ISSUE: An issue in the spec.
3154 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after head') {
3155 wakaba 1.1 if ($token->{type} eq 'character') {
3156     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3157 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3158 wakaba 1.1 unless (length $token->{data}) {
3159     !!!next-token;
3160     redo B;
3161     }
3162     }
3163    
3164     #
3165     } elsif ($token->{type} eq 'comment') {
3166     my $comment = $self->{document}->create_comment ($token->{data});
3167 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3168 wakaba 1.1 !!!next-token;
3169     redo B;
3170     } elsif ($token->{type} eq 'start tag') {
3171     if ($token->{tag_name} eq 'body') {
3172     !!!insert-element ('body', $token->{attributes});
3173 wakaba 1.3 $self->{insertion_mode} = 'in body';
3174 wakaba 1.1 !!!next-token;
3175     redo B;
3176     } elsif ($token->{tag_name} eq 'frameset') {
3177     !!!insert-element ('frameset', $token->{attributes});
3178 wakaba 1.3 $self->{insertion_mode} = 'in frameset';
3179 wakaba 1.1 !!!next-token;
3180     redo B;
3181     } elsif ({
3182     base => 1, link => 1, meta => 1,
3183 wakaba 1.3 script => 1, style => 1, title => 1,
3184 wakaba 1.1 }->{$token->{tag_name}}) {
3185 wakaba 1.3 !!!parse-error (type => 'after head:'.$token->{tag_name});
3186     $self->{insertion_mode} = 'in head';
3187 wakaba 1.1 ## reprocess
3188     redo B;
3189     } else {
3190     #
3191     }
3192     } else {
3193     #
3194     }
3195    
3196     ## As if <body>
3197     !!!insert-element ('body');
3198 wakaba 1.3 $self->{insertion_mode} = 'in body';
3199 wakaba 1.1 ## reprocess
3200     redo B;
3201 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in body') {
3202 wakaba 1.1 if ($token->{type} eq 'character') {
3203     ## NOTE: There is a code clone of "character in body".
3204     $reconstruct_active_formatting_elements->($insert_to_current);
3205    
3206 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3207 wakaba 1.1
3208     !!!next-token;
3209     redo B;
3210     } elsif ($token->{type} eq 'comment') {
3211     ## NOTE: There is a code clone of "comment in body".
3212     my $comment = $self->{document}->create_comment ($token->{data});
3213 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3214 wakaba 1.1 !!!next-token;
3215     redo B;
3216     } else {
3217     $in_body->($insert_to_current);
3218     redo B;
3219     }
3220 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table') {
3221 wakaba 1.1 if ($token->{type} eq 'character') {
3222     ## NOTE: There are "character in table" code clones.
3223     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3224 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3225 wakaba 1.1
3226     unless (length $token->{data}) {
3227     !!!next-token;
3228     redo B;
3229     }
3230     }
3231    
3232 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3233    
3234 wakaba 1.1 ## As if in body, but insert into foster parent element
3235     ## ISSUE: Spec says that "whenever a node would be inserted
3236     ## into the current node" while characters might not be
3237     ## result in a new Text node.
3238     $reconstruct_active_formatting_elements->($insert_to_foster);
3239    
3240     if ({
3241     table => 1, tbody => 1, tfoot => 1,
3242     thead => 1, tr => 1,
3243 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3244 wakaba 1.1 # MUST
3245     my $foster_parent_element;
3246     my $next_sibling;
3247     my $prev_sibling;
3248 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3249     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3250     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3251 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3252     $foster_parent_element = $parent;
3253 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3254 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3255     } else {
3256 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3257 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3258     }
3259     last OE;
3260     }
3261     } # OE
3262 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3263 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3264     unless defined $foster_parent_element;
3265     if (defined $prev_sibling and
3266     $prev_sibling->node_type == 3) {
3267     $prev_sibling->manakai_append_text ($token->{data});
3268     } else {
3269     $foster_parent_element->insert_before
3270     ($self->{document}->create_text_node ($token->{data}),
3271     $next_sibling);
3272     }
3273     } else {
3274 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3275 wakaba 1.1 }
3276    
3277     !!!next-token;
3278     redo B;
3279     } elsif ($token->{type} eq 'comment') {
3280     my $comment = $self->{document}->create_comment ($token->{data});
3281 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3282 wakaba 1.1 !!!next-token;
3283     redo B;
3284     } elsif ($token->{type} eq 'start tag') {
3285     if ({
3286     caption => 1,
3287     colgroup => 1,
3288     tbody => 1, tfoot => 1, thead => 1,
3289     }->{$token->{tag_name}}) {
3290     ## Clear back to table context
3291 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3292     $self->{open_elements}->[-1]->[1] ne 'html') {
3293     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3294     pop @{$self->{open_elements}};
3295 wakaba 1.1 }
3296    
3297     push @$active_formatting_elements, ['#marker', '']
3298     if $token->{tag_name} eq 'caption';
3299    
3300     !!!insert-element ($token->{tag_name}, $token->{attributes});
3301 wakaba 1.3 $self->{insertion_mode} = {
3302 wakaba 1.1 caption => 'in caption',
3303     colgroup => 'in column group',
3304     tbody => 'in table body',
3305     tfoot => 'in table body',
3306     thead => 'in table body',
3307     }->{$token->{tag_name}};
3308     !!!next-token;
3309     redo B;
3310     } elsif ({
3311     col => 1,
3312     td => 1, th => 1, tr => 1,
3313     }->{$token->{tag_name}}) {
3314     ## Clear back to table context
3315 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3316     $self->{open_elements}->[-1]->[1] ne 'html') {
3317     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3318     pop @{$self->{open_elements}};
3319 wakaba 1.1 }
3320    
3321     !!!insert-element ($token->{tag_name} eq 'col' ? 'colgroup' : 'tbody');
3322 wakaba 1.3 $self->{insertion_mode} = $token->{tag_name} eq 'col'
3323 wakaba 1.1 ? 'in column group' : 'in table body';
3324     ## reprocess
3325     redo B;
3326     } elsif ($token->{tag_name} eq 'table') {
3327     ## NOTE: There are code clones for this "table in table"
3328 wakaba 1.3 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3329 wakaba 1.1
3330     ## As if </table>
3331     ## have a table element in table scope
3332     my $i;
3333 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3334     my $node = $self->{open_elements}->[$_];
3335 wakaba 1.1 if ($node->[1] eq 'table') {
3336     $i = $_;
3337     last INSCOPE;
3338     } elsif ({
3339     table => 1, html => 1,
3340     }->{$node->[1]}) {
3341     last INSCOPE;
3342     }
3343     } # INSCOPE
3344     unless (defined $i) {
3345 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3346 wakaba 1.1 ## Ignore tokens </table><table>
3347     !!!next-token;
3348     redo B;
3349     }
3350    
3351     ## generate implied end tags
3352     if ({
3353     dd => 1, dt => 1, li => 1, p => 1,
3354     td => 1, th => 1, tr => 1,
3355 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3356 wakaba 1.1 !!!back-token; # <table>
3357     $token = {type => 'end tag', tag_name => 'table'};
3358     !!!back-token;
3359     $token = {type => 'end tag',
3360 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3361 wakaba 1.1 redo B;
3362     }
3363    
3364 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3365     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3366 wakaba 1.1 }
3367    
3368 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3369 wakaba 1.1
3370 wakaba 1.3 $self->_reset_insertion_mode;
3371 wakaba 1.1
3372     ## reprocess
3373     redo B;
3374     } else {
3375     #
3376     }
3377     } elsif ($token->{type} eq 'end tag') {
3378     if ($token->{tag_name} eq 'table') {
3379     ## have a table element in table scope
3380     my $i;
3381 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3382     my $node = $self->{open_elements}->[$_];
3383 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3384     $i = $_;
3385     last INSCOPE;
3386     } elsif ({
3387     table => 1, html => 1,
3388     }->{$node->[1]}) {
3389     last INSCOPE;
3390     }
3391     } # INSCOPE
3392     unless (defined $i) {
3393 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3394 wakaba 1.1 ## Ignore the token
3395     !!!next-token;
3396     redo B;
3397     }
3398    
3399     ## generate implied end tags
3400     if ({
3401     dd => 1, dt => 1, li => 1, p => 1,
3402     td => 1, th => 1, tr => 1,
3403 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3404 wakaba 1.1 !!!back-token;
3405     $token = {type => 'end tag',
3406 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3407 wakaba 1.1 redo B;
3408     }
3409    
3410 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3411     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3412 wakaba 1.1 }
3413    
3414 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3415 wakaba 1.1
3416 wakaba 1.3 $self->_reset_insertion_mode;
3417 wakaba 1.1
3418     !!!next-token;
3419     redo B;
3420     } elsif ({
3421     body => 1, caption => 1, col => 1, colgroup => 1,
3422     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
3423     thead => 1, tr => 1,
3424     }->{$token->{tag_name}}) {
3425 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3426 wakaba 1.1 ## Ignore the token
3427     !!!next-token;
3428     redo B;
3429     } else {
3430     #
3431     }
3432     } else {
3433     #
3434     }
3435    
3436 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3437 wakaba 1.1 $in_body->($insert_to_foster);
3438     redo B;
3439 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in caption') {
3440 wakaba 1.1 if ($token->{type} eq 'character') {
3441     ## NOTE: This is a code clone of "character in body".
3442     $reconstruct_active_formatting_elements->($insert_to_current);
3443    
3444 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3445 wakaba 1.1
3446     !!!next-token;
3447     redo B;
3448     } elsif ($token->{type} eq 'comment') {
3449     ## NOTE: This is a code clone of "comment in body".
3450     my $comment = $self->{document}->create_comment ($token->{data});
3451 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3452 wakaba 1.1 !!!next-token;
3453     redo B;
3454     } elsif ($token->{type} eq 'start tag') {
3455     if ({
3456     caption => 1, col => 1, colgroup => 1, tbody => 1,
3457     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
3458     }->{$token->{tag_name}}) {
3459 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3460 wakaba 1.1
3461     ## As if </caption>
3462     ## have a table element in table scope
3463     my $i;
3464 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3465     my $node = $self->{open_elements}->[$_];
3466 wakaba 1.1 if ($node->[1] eq 'caption') {
3467     $i = $_;
3468     last INSCOPE;
3469     } elsif ({
3470     table => 1, html => 1,
3471     }->{$node->[1]}) {
3472     last INSCOPE;
3473     }
3474     } # INSCOPE
3475     unless (defined $i) {
3476 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3477 wakaba 1.1 ## Ignore the token
3478     !!!next-token;
3479     redo B;
3480     }
3481    
3482     ## generate implied end tags
3483     if ({
3484     dd => 1, dt => 1, li => 1, p => 1,
3485     td => 1, th => 1, tr => 1,
3486 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3487 wakaba 1.1 !!!back-token; # <?>
3488     $token = {type => 'end tag', tag_name => 'caption'};
3489     !!!back-token;
3490     $token = {type => 'end tag',
3491 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3492 wakaba 1.1 redo B;
3493     }
3494    
3495 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3496     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3497 wakaba 1.1 }
3498    
3499 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3500 wakaba 1.1
3501     $clear_up_to_marker->();
3502    
3503 wakaba 1.3 $self->{insertion_mode} = 'in table';
3504 wakaba 1.1
3505     ## reprocess
3506     redo B;
3507     } else {
3508     #
3509     }
3510     } elsif ($token->{type} eq 'end tag') {
3511     if ($token->{tag_name} eq 'caption') {
3512     ## have a table element in table scope
3513     my $i;
3514 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3515     my $node = $self->{open_elements}->[$_];
3516 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3517     $i = $_;
3518     last INSCOPE;
3519     } elsif ({
3520     table => 1, html => 1,
3521     }->{$node->[1]}) {
3522     last INSCOPE;
3523     }
3524     } # INSCOPE
3525     unless (defined $i) {
3526 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3527 wakaba 1.1 ## Ignore the token
3528     !!!next-token;
3529     redo B;
3530     }
3531    
3532     ## generate implied end tags
3533     if ({
3534     dd => 1, dt => 1, li => 1, p => 1,
3535     td => 1, th => 1, tr => 1,
3536 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3537 wakaba 1.1 !!!back-token;
3538     $token = {type => 'end tag',
3539 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3540 wakaba 1.1 redo B;
3541     }
3542    
3543 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3544     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3545 wakaba 1.1 }
3546    
3547 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3548 wakaba 1.1
3549     $clear_up_to_marker->();
3550    
3551 wakaba 1.3 $self->{insertion_mode} = 'in table';
3552 wakaba 1.1
3553     !!!next-token;
3554     redo B;
3555     } elsif ($token->{tag_name} eq 'table') {
3556 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3557 wakaba 1.1
3558     ## As if </caption>
3559     ## have a table element in table scope
3560     my $i;
3561 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3562     my $node = $self->{open_elements}->[$_];
3563 wakaba 1.1 if ($node->[1] eq 'caption') {
3564     $i = $_;
3565     last INSCOPE;
3566     } elsif ({
3567     table => 1, html => 1,
3568     }->{$node->[1]}) {
3569     last INSCOPE;
3570     }
3571     } # INSCOPE
3572     unless (defined $i) {
3573 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3574 wakaba 1.1 ## Ignore the token
3575     !!!next-token;
3576     redo B;
3577     }
3578    
3579     ## generate implied end tags
3580     if ({
3581     dd => 1, dt => 1, li => 1, p => 1,
3582     td => 1, th => 1, tr => 1,
3583 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3584 wakaba 1.1 !!!back-token; # </table>
3585     $token = {type => 'end tag', tag_name => 'caption'};
3586     !!!back-token;
3587     $token = {type => 'end tag',
3588 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3589 wakaba 1.1 redo B;
3590     }
3591    
3592 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3593     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3594 wakaba 1.1 }
3595    
3596 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3597 wakaba 1.1
3598     $clear_up_to_marker->();
3599    
3600 wakaba 1.3 $self->{insertion_mode} = 'in table';
3601 wakaba 1.1
3602     ## reprocess
3603     redo B;
3604     } elsif ({
3605     body => 1, col => 1, colgroup => 1,
3606     html => 1, tbody => 1, td => 1, tfoot => 1,
3607     th => 1, thead => 1, tr => 1,
3608     }->{$token->{tag_name}}) {
3609 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3610 wakaba 1.1 ## Ignore the token
3611     redo B;
3612     } else {
3613     #
3614     }
3615     } else {
3616     #
3617     }
3618    
3619     $in_body->($insert_to_current);
3620     redo B;
3621 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in column group') {
3622 wakaba 1.1 if ($token->{type} eq 'character') {
3623     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3624 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3625 wakaba 1.1 unless (length $token->{data}) {
3626     !!!next-token;
3627     redo B;
3628     }
3629     }
3630    
3631     #
3632     } elsif ($token->{type} eq 'comment') {
3633     my $comment = $self->{document}->create_comment ($token->{data});
3634 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3635 wakaba 1.1 !!!next-token;
3636     redo B;
3637     } elsif ($token->{type} eq 'start tag') {
3638     if ($token->{tag_name} eq 'col') {
3639     !!!insert-element ($token->{tag_name}, $token->{attributes});
3640 wakaba 1.3 pop @{$self->{open_elements}};
3641 wakaba 1.1 !!!next-token;
3642     redo B;
3643     } else {
3644     #
3645     }
3646     } elsif ($token->{type} eq 'end tag') {
3647     if ($token->{tag_name} eq 'colgroup') {
3648 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3649     !!!parse-error (type => 'unmatched end tag:colgroup');
3650 wakaba 1.1 ## Ignore the token
3651     !!!next-token;
3652     redo B;
3653     } else {
3654 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3655     $self->{insertion_mode} = 'in table';
3656 wakaba 1.1 !!!next-token;
3657     redo B;
3658     }
3659     } elsif ($token->{tag_name} eq 'col') {
3660 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:col');
3661 wakaba 1.1 ## Ignore the token
3662     !!!next-token;
3663     redo B;
3664     } else {
3665     #
3666     }
3667     } else {
3668     #
3669     }
3670    
3671     ## As if </colgroup>
3672 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3673     !!!parse-error (type => 'unmatched end tag:colgroup');
3674 wakaba 1.1 ## Ignore the token
3675     !!!next-token;
3676     redo B;
3677     } else {
3678 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3679     $self->{insertion_mode} = 'in table';
3680 wakaba 1.1 ## reprocess
3681     redo B;
3682     }
3683 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table body') {
3684 wakaba 1.1 if ($token->{type} eq 'character') {
3685     ## NOTE: This is a "character in table" code clone.
3686     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3687 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3688 wakaba 1.1
3689     unless (length $token->{data}) {
3690     !!!next-token;
3691     redo B;
3692     }
3693     }
3694    
3695 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3696    
3697 wakaba 1.1 ## As if in body, but insert into foster parent element
3698     ## ISSUE: Spec says that "whenever a node would be inserted
3699     ## into the current node" while characters might not be
3700     ## result in a new Text node.
3701     $reconstruct_active_formatting_elements->($insert_to_foster);
3702    
3703     if ({
3704     table => 1, tbody => 1, tfoot => 1,
3705     thead => 1, tr => 1,
3706 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3707 wakaba 1.1 # MUST
3708     my $foster_parent_element;
3709     my $next_sibling;
3710     my $prev_sibling;
3711 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3712     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3713     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3714 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3715     $foster_parent_element = $parent;
3716 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3717 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3718     } else {
3719 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3720 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3721     }
3722     last OE;
3723     }
3724     } # OE
3725 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3726 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3727     unless defined $foster_parent_element;
3728     if (defined $prev_sibling and
3729     $prev_sibling->node_type == 3) {
3730     $prev_sibling->manakai_append_text ($token->{data});
3731     } else {
3732     $foster_parent_element->insert_before
3733     ($self->{document}->create_text_node ($token->{data}),
3734     $next_sibling);
3735     }
3736     } else {
3737 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3738 wakaba 1.1 }
3739    
3740     !!!next-token;
3741     redo B;
3742     } elsif ($token->{type} eq 'comment') {
3743     ## Copied from 'in table'
3744     my $comment = $self->{document}->create_comment ($token->{data});
3745 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3746 wakaba 1.1 !!!next-token;
3747     redo B;
3748     } elsif ($token->{type} eq 'start tag') {
3749     if ({
3750     tr => 1,
3751     th => 1, td => 1,
3752     }->{$token->{tag_name}}) {
3753 wakaba 1.3 unless ($token->{tag_name} eq 'tr') {
3754     !!!parse-error (type => 'missing start tag:tr');
3755     }
3756    
3757 wakaba 1.1 ## Clear back to table body context
3758     while (not {
3759     tbody => 1, tfoot => 1, thead => 1, html => 1,
3760 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3761     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3762     pop @{$self->{open_elements}};
3763 wakaba 1.1 }
3764    
3765 wakaba 1.3 $self->{insertion_mode} = 'in row';
3766 wakaba 1.1 if ($token->{tag_name} eq 'tr') {
3767     !!!insert-element ($token->{tag_name}, $token->{attributes});
3768     !!!next-token;
3769     } else {
3770     !!!insert-element ('tr');
3771     ## reprocess
3772     }
3773     redo B;
3774     } elsif ({
3775     caption => 1, col => 1, colgroup => 1,
3776     tbody => 1, tfoot => 1, thead => 1,
3777     }->{$token->{tag_name}}) {
3778     ## have an element in table scope
3779     my $i;
3780 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3781     my $node = $self->{open_elements}->[$_];
3782 wakaba 1.1 if ({
3783     tbody => 1, thead => 1, tfoot => 1,
3784     }->{$node->[1]}) {
3785     $i = $_;
3786     last INSCOPE;
3787     } elsif ({
3788     table => 1, html => 1,
3789     }->{$node->[1]}) {
3790     last INSCOPE;
3791     }
3792     } # INSCOPE
3793     unless (defined $i) {
3794 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3795 wakaba 1.1 ## Ignore the token
3796     !!!next-token;
3797     redo B;
3798     }
3799    
3800     ## Clear back to table body context
3801     while (not {
3802     tbody => 1, tfoot => 1, thead => 1, html => 1,
3803 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3804     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3805     pop @{$self->{open_elements}};
3806 wakaba 1.1 }
3807    
3808     ## As if <{current node}>
3809     ## have an element in table scope
3810     ## true by definition
3811    
3812     ## Clear back to table body context
3813     ## nop by definition
3814    
3815 wakaba 1.3 pop @{$self->{open_elements}};
3816     $self->{insertion_mode} = 'in table';
3817 wakaba 1.1 ## reprocess
3818     redo B;
3819     } elsif ($token->{tag_name} eq 'table') {
3820     ## NOTE: This is a code clone of "table in table"
3821 wakaba 1.3 !!!parse-error (type => 'not closed:table');
3822 wakaba 1.1
3823     ## As if </table>
3824     ## have a table element in table scope
3825     my $i;
3826 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3827     my $node = $self->{open_elements}->[$_];
3828 wakaba 1.1 if ($node->[1] eq 'table') {
3829     $i = $_;
3830     last INSCOPE;
3831     } elsif ({
3832     table => 1, html => 1,
3833     }->{$node->[1]}) {
3834     last INSCOPE;
3835     }
3836     } # INSCOPE
3837     unless (defined $i) {
3838 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3839 wakaba 1.1 ## Ignore tokens </table><table>
3840     !!!next-token;
3841     redo B;
3842     }
3843    
3844     ## generate implied end tags
3845     if ({
3846     dd => 1, dt => 1, li => 1, p => 1,
3847     td => 1, th => 1, tr => 1,
3848 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3849 wakaba 1.1 !!!back-token; # <table>
3850     $token = {type => 'end tag', tag_name => 'table'};
3851     !!!back-token;
3852     $token = {type => 'end tag',
3853 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3854 wakaba 1.1 redo B;
3855     }
3856    
3857 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3858     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3859 wakaba 1.1 }
3860    
3861 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3862 wakaba 1.1
3863 wakaba 1.3 $self->_reset_insertion_mode;
3864 wakaba 1.1
3865     ## reprocess
3866     redo B;
3867     } else {
3868     #
3869     }
3870     } elsif ($token->{type} eq 'end tag') {
3871     if ({
3872     tbody => 1, tfoot => 1, thead => 1,
3873     }->{$token->{tag_name}}) {
3874     ## have an element in table scope
3875     my $i;
3876 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3877     my $node = $self->{open_elements}->[$_];
3878 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3879     $i = $_;
3880     last INSCOPE;
3881     } elsif ({
3882     table => 1, html => 1,
3883     }->{$node->[1]}) {
3884     last INSCOPE;
3885     }
3886     } # INSCOPE
3887     unless (defined $i) {
3888 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3889 wakaba 1.1 ## Ignore the token
3890     !!!next-token;
3891     redo B;
3892     }
3893    
3894     ## Clear back to table body context
3895     while (not {
3896     tbody => 1, tfoot => 1, thead => 1, html => 1,
3897 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3898     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3899     pop @{$self->{open_elements}};
3900 wakaba 1.1 }
3901    
3902 wakaba 1.3 pop @{$self->{open_elements}};
3903     $self->{insertion_mode} = 'in table';
3904 wakaba 1.1 !!!next-token;
3905     redo B;
3906     } elsif ($token->{tag_name} eq 'table') {
3907     ## have an element in table scope
3908     my $i;
3909 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3910     my $node = $self->{open_elements}->[$_];
3911 wakaba 1.1 if ({
3912     tbody => 1, thead => 1, tfoot => 1,
3913     }->{$node->[1]}) {
3914     $i = $_;
3915     last INSCOPE;
3916     } elsif ({
3917     table => 1, html => 1,
3918     }->{$node->[1]}) {
3919     last INSCOPE;
3920     }
3921     } # INSCOPE
3922     unless (defined $i) {
3923 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3924 wakaba 1.1 ## Ignore the token
3925     !!!next-token;
3926     redo B;
3927     }
3928    
3929     ## Clear back to table body context
3930     while (not {
3931     tbody => 1, tfoot => 1, thead => 1, html => 1,
3932 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3933     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3934     pop @{$self->{open_elements}};
3935 wakaba 1.1 }
3936    
3937     ## As if <{current node}>
3938     ## have an element in table scope
3939     ## true by definition
3940    
3941     ## Clear back to table body context
3942     ## nop by definition
3943    
3944 wakaba 1.3 pop @{$self->{open_elements}};
3945     $self->{insertion_mode} = 'in table';
3946 wakaba 1.1 ## reprocess
3947     redo B;
3948     } elsif ({
3949     body => 1, caption => 1, col => 1, colgroup => 1,
3950     html => 1, td => 1, th => 1, tr => 1,
3951     }->{$token->{tag_name}}) {
3952 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3953 wakaba 1.1 ## Ignore the token
3954     !!!next-token;
3955     redo B;
3956     } else {
3957     #
3958     }
3959     } else {
3960     #
3961     }
3962    
3963     ## As if in table
3964 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3965 wakaba 1.1 $in_body->($insert_to_foster);
3966     redo B;
3967 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in row') {
3968 wakaba 1.1 if ($token->{type} eq 'character') {
3969     ## NOTE: This is a "character in table" code clone.
3970     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3971 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3972 wakaba 1.1
3973     unless (length $token->{data}) {
3974     !!!next-token;
3975     redo B;
3976     }
3977     }
3978    
3979 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3980    
3981 wakaba 1.1 ## As if in body, but insert into foster parent element
3982     ## ISSUE: Spec says that "whenever a node would be inserted
3983     ## into the current node" while characters might not be
3984     ## result in a new Text node.
3985     $reconstruct_active_formatting_elements->($insert_to_foster);
3986    
3987     if ({
3988     table => 1, tbody => 1, tfoot => 1,
3989     thead => 1, tr => 1,
3990 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3991 wakaba 1.1 # MUST
3992     my $foster_parent_element;
3993     my $next_sibling;
3994     my $prev_sibling;
3995 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3996     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3997     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3998 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3999     $foster_parent_element = $parent;
4000 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
4001 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
4002     } else {
4003 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4004 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
4005     }
4006     last OE;
4007     }
4008     } # OE
4009 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
4010 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
4011     unless defined $foster_parent_element;
4012     if (defined $prev_sibling and
4013     $prev_sibling->node_type == 3) {
4014     $prev_sibling->manakai_append_text ($token->{data});
4015     } else {
4016     $foster_parent_element->insert_before
4017     ($self->{document}->create_text_node ($token->{data}),
4018     $next_sibling);
4019     }
4020     } else {
4021 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4022 wakaba 1.1 }
4023    
4024     !!!next-token;
4025     redo B;
4026     } elsif ($token->{type} eq 'comment') {
4027     ## Copied from 'in table'
4028     my $comment = $self->{document}->create_comment ($token->{data});
4029 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4030 wakaba 1.1 !!!next-token;
4031     redo B;
4032     } elsif ($token->{type} eq 'start tag') {
4033     if ($token->{tag_name} eq 'th' or
4034     $token->{tag_name} eq 'td') {
4035     ## Clear back to table row context
4036     while (not {
4037     tr => 1, html => 1,
4038 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4039     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4040     pop @{$self->{open_elements}};
4041 wakaba 1.1 }
4042    
4043     !!!insert-element ($token->{tag_name}, $token->{attributes});
4044 wakaba 1.3 $self->{insertion_mode} = 'in cell';
4045 wakaba 1.1
4046     push @$active_formatting_elements, ['#marker', ''];
4047    
4048     !!!next-token;
4049     redo B;
4050     } elsif ({
4051     caption => 1, col => 1, colgroup => 1,
4052     tbody => 1, tfoot => 1, thead => 1, tr => 1,
4053     }->{$token->{tag_name}}) {
4054     ## As if </tr>
4055     ## have an element in table scope
4056     my $i;
4057 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4058     my $node = $self->{open_elements}->[$_];
4059 wakaba 1.1 if ($node->[1] eq 'tr') {
4060     $i = $_;
4061     last INSCOPE;
4062     } elsif ({
4063     table => 1, html => 1,
4064     }->{$node->[1]}) {
4065     last INSCOPE;
4066     }
4067     } # INSCOPE
4068     unless (defined $i) {
4069 wakaba 1.3 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name});
4070 wakaba 1.1 ## Ignore the token
4071     !!!next-token;
4072     redo B;
4073     }
4074    
4075     ## Clear back to table row context
4076     while (not {
4077     tr => 1, html => 1,
4078 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4079     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4080     pop @{$self->{open_elements}};
4081 wakaba 1.1 }
4082    
4083 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4084     $self->{insertion_mode} = 'in table body';
4085 wakaba 1.1 ## reprocess
4086     redo B;
4087     } elsif ($token->{tag_name} eq 'table') {
4088     ## NOTE: This is a code clone of "table in table"
4089 wakaba 1.3 !!!parse-error (type => 'not closed:table');
4090 wakaba 1.1
4091     ## As if </table>
4092     ## have a table element in table scope
4093     my $i;
4094 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4095     my $node = $self->{open_elements}->[$_];
4096 wakaba 1.1 if ($node->[1] eq 'table') {
4097     $i = $_;
4098     last INSCOPE;
4099     } elsif ({
4100     table => 1, html => 1,
4101     }->{$node->[1]}) {
4102     last INSCOPE;
4103     }
4104     } # INSCOPE
4105     unless (defined $i) {
4106 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
4107 wakaba 1.1 ## Ignore tokens </table><table>
4108     !!!next-token;
4109     redo B;
4110     }
4111    
4112     ## generate implied end tags
4113     if ({
4114     dd => 1, dt => 1, li => 1, p => 1,
4115     td => 1, th => 1, tr => 1,
4116 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4117 wakaba 1.1 !!!back-token; # <table>
4118     $token = {type => 'end tag', tag_name => 'table'};
4119     !!!back-token;
4120     $token = {type => 'end tag',
4121 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4122 wakaba 1.1 redo B;
4123     }
4124    
4125 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4126     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4127 wakaba 1.1 }
4128    
4129 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4130 wakaba 1.1
4131 wakaba 1.3 $self->_reset_insertion_mode;
4132 wakaba 1.1
4133     ## reprocess
4134     redo B;
4135     } else {
4136     #
4137     }
4138     } elsif ($token->{type} eq 'end tag') {
4139     if ($token->{tag_name} eq 'tr') {
4140     ## have an element in table scope
4141     my $i;
4142 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4143     my $node = $self->{open_elements}->[$_];
4144 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4145     $i = $_;
4146     last INSCOPE;
4147     } elsif ({
4148     table => 1, html => 1,
4149     }->{$node->[1]}) {
4150     last INSCOPE;
4151     }
4152     } # INSCOPE
4153     unless (defined $i) {
4154 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4155 wakaba 1.1 ## Ignore the token
4156     !!!next-token;
4157     redo B;
4158     }
4159    
4160     ## Clear back to table row context
4161     while (not {
4162     tr => 1, html => 1,
4163 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4164     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4165     pop @{$self->{open_elements}};
4166 wakaba 1.1 }
4167    
4168 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4169     $self->{insertion_mode} = 'in table body';
4170 wakaba 1.1 !!!next-token;
4171     redo B;
4172     } elsif ($token->{tag_name} eq 'table') {
4173     ## As if </tr>
4174     ## have an element in table scope
4175     my $i;
4176 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4177     my $node = $self->{open_elements}->[$_];
4178 wakaba 1.1 if ($node->[1] eq 'tr') {
4179     $i = $_;
4180     last INSCOPE;
4181     } elsif ({
4182     table => 1, html => 1,
4183     }->{$node->[1]}) {
4184     last INSCOPE;
4185     }
4186     } # INSCOPE
4187     unless (defined $i) {
4188 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{type});
4189 wakaba 1.1 ## Ignore the token
4190     !!!next-token;
4191     redo B;
4192     }
4193    
4194     ## Clear back to table row context
4195     while (not {
4196     tr => 1, html => 1,
4197 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4198     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4199     pop @{$self->{open_elements}};
4200 wakaba 1.1 }
4201    
4202 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4203     $self->{insertion_mode} = 'in table body';
4204 wakaba 1.1 ## reprocess
4205     redo B;
4206     } elsif ({
4207     tbody => 1, tfoot => 1, thead => 1,
4208     }->{$token->{tag_name}}) {
4209     ## have an element in table scope
4210     my $i;
4211 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4212     my $node = $self->{open_elements}->[$_];
4213 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4214     $i = $_;
4215     last INSCOPE;
4216     } elsif ({
4217     table => 1, html => 1,
4218     }->{$node->[1]}) {
4219     last INSCOPE;
4220     }
4221     } # INSCOPE
4222     unless (defined $i) {
4223 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4224 wakaba 1.1 ## Ignore the token
4225     !!!next-token;
4226     redo B;
4227     }
4228    
4229     ## As if </tr>
4230     ## have an element in table scope
4231     my $i;
4232 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4233     my $node = $self->{open_elements}->[$_];
4234 wakaba 1.1 if ($node->[1] eq 'tr') {
4235     $i = $_;
4236     last INSCOPE;
4237     } elsif ({
4238     table => 1, html => 1,
4239     }->{$node->[1]}) {
4240     last INSCOPE;
4241     }
4242     } # INSCOPE
4243     unless (defined $i) {
4244 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:tr');
4245 wakaba 1.1 ## Ignore the token
4246     !!!next-token;
4247     redo B;
4248     }
4249    
4250     ## Clear back to table row context
4251     while (not {
4252     tr => 1, html => 1,
4253 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4254     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4255     pop @{$self->{open_elements}};
4256 wakaba 1.1 }
4257    
4258 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4259     $self->{insertion_mode} = 'in table body';
4260 wakaba 1.1 ## reprocess
4261     redo B;
4262     } elsif ({
4263     body => 1, caption => 1, col => 1,
4264     colgroup => 1, html => 1, td => 1, th => 1,
4265     }->{$token->{tag_name}}) {
4266 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4267 wakaba 1.1 ## Ignore the token
4268     !!!next-token;
4269     redo B;
4270     } else {
4271     #
4272     }
4273     } else {
4274     #
4275     }
4276    
4277     ## As if in table
4278 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
4279 wakaba 1.1 $in_body->($insert_to_foster);
4280     redo B;
4281 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in cell') {
4282 wakaba 1.1 if ($token->{type} eq 'character') {
4283     ## NOTE: This is a code clone of "character in body".
4284     $reconstruct_active_formatting_elements->($insert_to_current);
4285    
4286 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4287 wakaba 1.1
4288     !!!next-token;
4289     redo B;
4290     } elsif ($token->{type} eq 'comment') {
4291     ## NOTE: This is a code clone of "comment in body".
4292     my $comment = $self->{document}->create_comment ($token->{data});
4293 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4294 wakaba 1.1 !!!next-token;
4295     redo B;
4296     } elsif ($token->{type} eq 'start tag') {
4297     if ({
4298     caption => 1, col => 1, colgroup => 1,
4299     tbody => 1, td => 1, tfoot => 1, th => 1,
4300     thead => 1, tr => 1,
4301     }->{$token->{tag_name}}) {
4302     ## have an element in table scope
4303     my $tn;
4304 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4305     my $node = $self->{open_elements}->[$_];
4306 wakaba 1.1 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
4307     $tn = $node->[1];
4308     last INSCOPE;
4309     } elsif ({
4310     table => 1, html => 1,
4311     }->{$node->[1]}) {
4312     last INSCOPE;
4313     }
4314     } # INSCOPE
4315     unless (defined $tn) {
4316 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4317 wakaba 1.1 ## Ignore the token
4318     !!!next-token;
4319     redo B;
4320     }
4321    
4322     ## Close the cell
4323     !!!back-token; # <?>
4324     $token = {type => 'end tag', tag_name => $tn};
4325     redo B;
4326     } else {
4327     #
4328     }
4329     } elsif ($token->{type} eq 'end tag') {
4330     if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4331     ## have an element in table scope
4332     my $i;
4333 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4334     my $node = $self->{open_elements}->[$_];
4335 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4336     $i = $_;
4337     last INSCOPE;
4338     } elsif ({
4339     table => 1, html => 1,
4340     }->{$node->[1]}) {
4341     last INSCOPE;
4342     }
4343     } # INSCOPE
4344     unless (defined $i) {
4345 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4346 wakaba 1.1 ## Ignore the token
4347     !!!next-token;
4348     redo B;
4349     }
4350    
4351     ## generate implied end tags
4352     if ({
4353     dd => 1, dt => 1, li => 1, p => 1,
4354     td => ($token->{tag_name} eq 'th'),
4355     th => ($token->{tag_name} eq 'td'),
4356     tr => 1,
4357 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4358 wakaba 1.1 !!!back-token;
4359     $token = {type => 'end tag',
4360 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4361 wakaba 1.1 redo B;
4362     }
4363    
4364 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4365     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4366 wakaba 1.1 }
4367    
4368 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4369 wakaba 1.1
4370     $clear_up_to_marker->();
4371    
4372 wakaba 1.3 $self->{insertion_mode} = 'in row';
4373 wakaba 1.1
4374     !!!next-token;
4375     redo B;
4376     } elsif ({
4377     body => 1, caption => 1, col => 1,
4378     colgroup => 1, html => 1,
4379     }->{$token->{tag_name}}) {
4380 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4381 wakaba 1.1 ## Ignore the token
4382     !!!next-token;
4383     redo B;
4384     } elsif ({
4385     table => 1, tbody => 1, tfoot => 1,
4386     thead => 1, tr => 1,
4387     }->{$token->{tag_name}}) {
4388     ## have an element in table scope
4389     my $i;
4390     my $tn;
4391 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4392     my $node = $self->{open_elements}->[$_];
4393 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4394     $i = $_;
4395     last INSCOPE;
4396     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4397     $tn = $node->[1];
4398     ## NOTE: There is exactly one |td| or |th| element
4399     ## in scope in the stack of open elements by definition.
4400     } elsif ({
4401     table => 1, html => 1,
4402     }->{$node->[1]}) {
4403     last INSCOPE;
4404     }
4405     } # INSCOPE
4406     unless (defined $i) {
4407 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4408 wakaba 1.1 ## Ignore the token
4409     !!!next-token;
4410     redo B;
4411     }
4412    
4413     ## Close the cell
4414     !!!back-token; # </?>
4415     $token = {type => 'end tag', tag_name => $tn};
4416     redo B;
4417     } else {
4418     #
4419     }
4420     } else {
4421     #
4422     }
4423    
4424     $in_body->($insert_to_current);
4425     redo B;
4426 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in select') {
4427 wakaba 1.1 if ($token->{type} eq 'character') {
4428 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4429 wakaba 1.1 !!!next-token;
4430     redo B;
4431     } elsif ($token->{type} eq 'comment') {
4432     my $comment = $self->{document}->create_comment ($token->{data});
4433 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4434 wakaba 1.1 !!!next-token;
4435     redo B;
4436     } elsif ($token->{type} eq 'start tag') {
4437     if ($token->{tag_name} eq 'option') {
4438 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4439 wakaba 1.1 ## As if </option>
4440 wakaba 1.3 pop @{$self->{open_elements}};
4441 wakaba 1.1 }
4442    
4443     !!!insert-element ($token->{tag_name}, $token->{attributes});
4444     !!!next-token;
4445     redo B;
4446     } elsif ($token->{tag_name} eq 'optgroup') {
4447 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4448 wakaba 1.1 ## As if </option>
4449 wakaba 1.3 pop @{$self->{open_elements}};
4450 wakaba 1.1 }
4451    
4452 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4453 wakaba 1.1 ## As if </optgroup>
4454 wakaba 1.3 pop @{$self->{open_elements}};
4455 wakaba 1.1 }
4456    
4457     !!!insert-element ($token->{tag_name}, $token->{attributes});
4458     !!!next-token;
4459     redo B;
4460     } elsif ($token->{tag_name} eq 'select') {
4461 wakaba 1.3 !!!parse-error (type => 'not closed:select');
4462 wakaba 1.1 ## As if </select> instead
4463     ## have an element in table scope
4464     my $i;
4465 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4466     my $node = $self->{open_elements}->[$_];
4467 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4468     $i = $_;
4469     last INSCOPE;
4470     } elsif ({
4471     table => 1, html => 1,
4472     }->{$node->[1]}) {
4473     last INSCOPE;
4474     }
4475     } # INSCOPE
4476     unless (defined $i) {
4477 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4478 wakaba 1.1 ## Ignore the token
4479     !!!next-token;
4480     redo B;
4481     }
4482    
4483 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4484 wakaba 1.1
4485 wakaba 1.3 $self->_reset_insertion_mode;
4486 wakaba 1.1
4487     !!!next-token;
4488     redo B;
4489     } else {
4490     #
4491     }
4492     } elsif ($token->{type} eq 'end tag') {
4493     if ($token->{tag_name} eq 'optgroup') {
4494 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option' and
4495     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
4496 wakaba 1.1 ## As if </option>
4497 wakaba 1.3 splice @{$self->{open_elements}}, -2;
4498     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4499     pop @{$self->{open_elements}};
4500 wakaba 1.1 } else {
4501 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4502 wakaba 1.1 ## Ignore the token
4503     }
4504     !!!next-token;
4505     redo B;
4506     } elsif ($token->{tag_name} eq 'option') {
4507 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4508     pop @{$self->{open_elements}};
4509 wakaba 1.1 } else {
4510 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4511 wakaba 1.1 ## Ignore the token
4512     }
4513     !!!next-token;
4514     redo B;
4515     } elsif ($token->{tag_name} eq 'select') {
4516     ## have an element in table scope
4517     my $i;
4518 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4519     my $node = $self->{open_elements}->[$_];
4520 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4521     $i = $_;
4522     last INSCOPE;
4523     } elsif ({
4524     table => 1, html => 1,
4525     }->{$node->[1]}) {
4526     last INSCOPE;
4527     }
4528     } # INSCOPE
4529     unless (defined $i) {
4530 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4531 wakaba 1.1 ## Ignore the token
4532     !!!next-token;
4533     redo B;
4534     }
4535    
4536 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4537 wakaba 1.1
4538 wakaba 1.3 $self->_reset_insertion_mode;
4539 wakaba 1.1
4540     !!!next-token;
4541     redo B;
4542     } elsif ({
4543     caption => 1, table => 1, tbody => 1,
4544     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
4545     }->{$token->{tag_name}}) {
4546 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4547 wakaba 1.1
4548     ## have an element in table scope
4549     my $i;
4550 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4551     my $node = $self->{open_elements}->[$_];
4552 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4553     $i = $_;
4554     last INSCOPE;
4555     } elsif ({
4556     table => 1, html => 1,
4557     }->{$node->[1]}) {
4558     last INSCOPE;
4559     }
4560     } # INSCOPE
4561     unless (defined $i) {
4562     ## Ignore the token
4563     !!!next-token;
4564     redo B;
4565     }
4566    
4567     ## As if </select>
4568     ## have an element in table scope
4569     undef $i;
4570 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4571     my $node = $self->{open_elements}->[$_];
4572 wakaba 1.1 if ($node->[1] eq 'select') {
4573     $i = $_;
4574     last INSCOPE;
4575     } elsif ({
4576     table => 1, html => 1,
4577     }->{$node->[1]}) {
4578     last INSCOPE;
4579     }
4580     } # INSCOPE
4581     unless (defined $i) {
4582 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4583 wakaba 1.1 ## Ignore the </select> token
4584     !!!next-token; ## TODO: ok?
4585     redo B;
4586     }
4587    
4588 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4589 wakaba 1.1
4590 wakaba 1.3 $self->_reset_insertion_mode;
4591 wakaba 1.1
4592     ## reprocess
4593     redo B;
4594     } else {
4595     #
4596     }
4597     } else {
4598     #
4599     }
4600    
4601 wakaba 1.3 !!!parse-error (type => 'in select:'.$token->{tag_name});
4602 wakaba 1.1 ## Ignore the token
4603     !!!next-token;
4604     redo B;
4605 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after body') {
4606 wakaba 1.1 if ($token->{type} eq 'character') {
4607     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4608     ## As if in body
4609     $reconstruct_active_formatting_elements->($insert_to_current);
4610    
4611 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4612 wakaba 1.1
4613     unless (length $token->{data}) {
4614     !!!next-token;
4615     redo B;
4616     }
4617     }
4618    
4619     #
4620 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4621 wakaba 1.1 } elsif ($token->{type} eq 'comment') {
4622     my $comment = $self->{document}->create_comment ($token->{data});
4623 wakaba 1.3 $self->{open_elements}->[0]->[0]->append_child ($comment);
4624 wakaba 1.1 !!!next-token;
4625     redo B;
4626 wakaba 1.3 } elsif ($token->{type} eq 'start tag') {
4627     !!!parse-error (type => 'after body:'.$token->{tag_name});
4628     #
4629 wakaba 1.1 } elsif ($token->{type} eq 'end tag') {
4630     if ($token->{tag_name} eq 'html') {
4631 wakaba 1.3 if (defined $self->{inner_html_node}) {
4632     !!!parse-error (type => 'unmatched end tag:html');
4633     ## Ignore the token
4634     !!!next-token;
4635     redo B;
4636     } else {
4637     $phase = 'trailing end';
4638     !!!next-token;
4639     redo B;
4640     }
4641 wakaba 1.1 } else {
4642 wakaba 1.3 !!!parse-error (type => 'after body:/'.$token->{tag_name});
4643 wakaba 1.1 }
4644     } else {
4645 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4646 wakaba 1.1 }
4647    
4648 wakaba 1.3 $self->{insertion_mode} = 'in body';
4649 wakaba 1.1 ## reprocess
4650     redo B;
4651 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in frameset') {
4652 wakaba 1.1 if ($token->{type} eq 'character') {
4653     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4654 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4655 wakaba 1.1
4656     unless (length $token->{data}) {
4657     !!!next-token;
4658     redo B;
4659     }
4660     }
4661    
4662     #
4663     } elsif ($token->{type} eq 'comment') {
4664     my $comment = $self->{document}->create_comment ($token->{data});
4665 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4666 wakaba 1.1 !!!next-token;
4667     redo B;
4668     } elsif ($token->{type} eq 'start tag') {
4669     if ($token->{tag_name} eq 'frameset') {
4670     !!!insert-element ($token->{tag_name}, $token->{attributes});
4671     !!!next-token;
4672     redo B;
4673     } elsif ($token->{tag_name} eq 'frame') {
4674     !!!insert-element ($token->{tag_name}, $token->{attributes});
4675 wakaba 1.3 pop @{$self->{open_elements}};
4676 wakaba 1.1 !!!next-token;
4677     redo B;
4678     } elsif ($token->{tag_name} eq 'noframes') {
4679     $in_body->($insert_to_current);
4680     redo B;
4681     } else {
4682     #
4683     }
4684     } elsif ($token->{type} eq 'end tag') {
4685     if ($token->{tag_name} eq 'frameset') {
4686 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html' and
4687     @{$self->{open_elements}} == 1) {
4688     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4689 wakaba 1.1 ## Ignore the token
4690     !!!next-token;
4691     } else {
4692 wakaba 1.3 pop @{$self->{open_elements}};
4693 wakaba 1.1 !!!next-token;
4694     }
4695    
4696     ## if not inner_html and
4697 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
4698     $self->{insertion_mode} = 'after frameset';
4699 wakaba 1.1 }
4700     redo B;
4701     } else {
4702     #
4703     }
4704     } else {
4705     #
4706     }
4707    
4708 wakaba 1.3 if (defined $token->{tag_name}) {
4709     !!!parse-error (type => 'in frameset:'.$token->{tag_name});
4710     } else {
4711     !!!parse-error (type => 'in frameset:#'.$token->{type});
4712     }
4713 wakaba 1.1 ## Ignore the token
4714     !!!next-token;
4715     redo B;
4716 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after frameset') {
4717 wakaba 1.1 if ($token->{type} eq 'character') {
4718     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4719 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4720 wakaba 1.1
4721     unless (length $token->{data}) {
4722     !!!next-token;
4723     redo B;
4724     }
4725     }
4726    
4727     #
4728     } elsif ($token->{type} eq 'comment') {
4729     my $comment = $self->{document}->create_comment ($token->{data});
4730 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4731 wakaba 1.1 !!!next-token;
4732     redo B;
4733     } elsif ($token->{type} eq 'start tag') {
4734     if ($token->{tag_name} eq 'noframes') {
4735     $in_body->($insert_to_current);
4736     redo B;
4737     } else {
4738     #
4739     }
4740     } elsif ($token->{type} eq 'end tag') {
4741     if ($token->{tag_name} eq 'html') {
4742     $phase = 'trailing end';
4743     !!!next-token;
4744     redo B;
4745     } else {
4746     #
4747     }
4748     } else {
4749     #
4750     }
4751    
4752 wakaba 1.3 if (defined $token->{tag_name}) {
4753     !!!parse-error (type => 'after frameset:'.$token->{tag_name});
4754     } else {
4755     !!!parse-error (type => 'after frameset:#'.$token->{type});
4756     }
4757 wakaba 1.1 ## Ignore the token
4758     !!!next-token;
4759     redo B;
4760    
4761     ## ISSUE: An issue in spec there
4762     } else {
4763 wakaba 1.3 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4764 wakaba 1.1 }
4765     }
4766     } elsif ($phase eq 'trailing end') {
4767     ## states in the main stage is preserved yet # MUST
4768    
4769     if ($token->{type} eq 'DOCTYPE') {
4770 wakaba 1.3 !!!parse-error (type => 'after html:#DOCTYPE');
4771 wakaba 1.1 ## Ignore the token
4772     !!!next-token;
4773     redo B;
4774     } elsif ($token->{type} eq 'comment') {
4775     my $comment = $self->{document}->create_comment ($token->{data});
4776     $self->{document}->append_child ($comment);
4777     !!!next-token;
4778     redo B;
4779     } elsif ($token->{type} eq 'character') {
4780     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4781     my $data = $1;
4782     ## As if in the main phase.
4783     ## NOTE: The insertion mode in the main phase
4784     ## just before the phase has been changed to the trailing
4785     ## end phase is either "after body" or "after frameset".
4786     $reconstruct_active_formatting_elements->($insert_to_current)
4787     if $phase eq 'main';
4788    
4789 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
4790 wakaba 1.1
4791     unless (length $token->{data}) {
4792     !!!next-token;
4793     redo B;
4794     }
4795     }
4796    
4797 wakaba 1.3 !!!parse-error (type => 'after html:#character');
4798 wakaba 1.1 $phase = 'main';
4799     ## reprocess
4800     redo B;
4801     } elsif ($token->{type} eq 'start tag' or
4802     $token->{type} eq 'end tag') {
4803 wakaba 1.3 !!!parse-error (type => 'after html:'.$token->{tag_name});
4804 wakaba 1.1 $phase = 'main';
4805     ## reprocess
4806     redo B;
4807     } elsif ($token->{type} eq 'end-of-file') {
4808     ## Stop parsing
4809     last B;
4810     } else {
4811     die "$0: $token->{type}: Unknown token";
4812     }
4813     }
4814     } # B
4815    
4816     ## Stop parsing # MUST
4817    
4818     ## TODO: script stuffs
4819 wakaba 1.3 } # _tree_construct_main
4820    
4821     sub set_inner_html ($$$) {
4822     my $class = shift;
4823     my $node = shift;
4824     my $s = \$_[0];
4825     my $onerror = $_[1];
4826    
4827     my $nt = $node->node_type;
4828     if ($nt == 9) {
4829     # MUST
4830    
4831     ## Step 1 # MUST
4832     ## TODO: If the document has an active parser, ...
4833     ## ISSUE: There is an issue in the spec.
4834    
4835     ## Step 2 # MUST
4836     my @cn = @{$node->child_nodes};
4837     for (@cn) {
4838     $node->remove_child ($_);
4839     }
4840    
4841     ## Step 3, 4, 5 # MUST
4842     $class->parse_string ($$s => $node, $onerror);
4843     } elsif ($nt == 1) {
4844     ## TODO: If non-html element
4845    
4846     ## NOTE: Most of this code is copied from |parse_string|
4847    
4848     ## Step 1 # MUST
4849     my $doc = $node->owner_document->implementation->create_document;
4850     ## TODO: Mark as HTML document
4851     my $p = $class->new;
4852     $p->{document} = $doc;
4853    
4854     ## Step 9 # MUST
4855     my $i = 0;
4856     my $line = 1;
4857     my $column = 0;
4858     $p->{set_next_input_character} = sub {
4859     my $self = shift;
4860     $self->{next_input_character} = -1 and return if $i >= length $$s;
4861     $self->{next_input_character} = ord substr $$s, $i++, 1;
4862     $column++;
4863 wakaba 1.4
4864     if ($self->{next_input_character} == 0x000A) { # LF
4865     $line++;
4866     $column = 0;
4867     } elsif ($self->{next_input_character} == 0x000D) { # CR
4868 wakaba 1.3 if ($i >= length $$s) {
4869     #
4870     } else {
4871     my $next_char = ord substr $$s, $i++, 1;
4872     if ($next_char == 0x000A) { # LF
4873     #
4874     } else {
4875     push @{$self->{char}}, $next_char;
4876     }
4877     }
4878     $self->{next_input_character} = 0x000A; # LF # MUST
4879     $line++;
4880 wakaba 1.4 $column = 0;
4881 wakaba 1.3 } elsif ($self->{next_input_character} > 0x10FFFF) {
4882     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4883     } elsif ($self->{next_input_character} == 0x0000) { # NULL
4884     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4885     }
4886     };
4887    
4888     my $ponerror = $onerror || sub {
4889     my (%opt) = @_;
4890     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
4891     };
4892     $p->{parse_error} = sub {
4893     $ponerror->(@_, line => $line, column => $column);
4894     };
4895    
4896     $p->_initialize_tokenizer;
4897     $p->_initialize_tree_constructor;
4898    
4899     ## Step 2
4900     my $node_ln = $node->local_name;
4901     $p->{content_model_flag} = {
4902     title => 'RCDATA',
4903     textarea => 'RCDATA',
4904     style => 'CDATA',
4905     script => 'CDATA',
4906     xmp => 'CDATA',
4907     iframe => 'CDATA',
4908     noembed => 'CDATA',
4909     noframes => 'CDATA',
4910     noscript => 'CDATA',
4911     plaintext => 'PLAINTEXT',
4912     }->{$node_ln} || 'PCDATA';
4913     ## ISSUE: What is "the name of the element"? local name?
4914    
4915     $p->{inner_html_node} = [$node, $node_ln];
4916    
4917     ## Step 4
4918     my $root = $doc->create_element_ns
4919     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
4920    
4921     ## Step 5 # MUST
4922     $doc->append_child ($root);
4923    
4924     ## Step 6 # MUST
4925     push @{$p->{open_elements}}, [$root, 'html'];
4926    
4927     undef $p->{head_element};
4928    
4929     ## Step 7 # MUST
4930     $p->_reset_insertion_mode;
4931    
4932     ## Step 8 # MUST
4933     my $anode = $node;
4934     AN: while (defined $anode) {
4935     if ($anode->node_type == 1) {
4936     my $nsuri = $anode->namespace_uri;
4937     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
4938     if ($anode->local_name eq 'form') { ## TODO: case?
4939     $p->{form_element} = $anode;
4940     last AN;
4941     }
4942     }
4943     }
4944     $anode = $anode->parent_node;
4945     } # AN
4946    
4947     ## Step 3 # MUST
4948     ## Step 10 # MUST
4949     {
4950     my $self = $p;
4951     !!!next-token;
4952     }
4953     $p->_tree_construction_main;
4954    
4955     ## Step 11 # MUST
4956     my @cn = @{$node->child_nodes};
4957     for (@cn) {
4958     $node->remove_child ($_);
4959     }
4960     ## ISSUE: mutation events? read-only?
4961    
4962     ## Step 12 # MUST
4963     @cn = @{$root->child_nodes};
4964     for (@cn) {
4965     $node->append_child ($_);
4966     }
4967     ## ISSUE: adopt_node? mutation events?
4968    
4969     $p->_terminate_tree_constructor;
4970     } else {
4971     die "$0: |set_inner_html| is not defined for node of type $nt";
4972     }
4973     } # set_inner_html
4974    
4975     } # tree construction stage
4976 wakaba 1.1
4977     sub get_inner_html ($$$) {
4978 wakaba 1.3 my (undef, $node, $on_error) = @_;
4979 wakaba 1.1
4980     ## Step 1
4981     my $s = '';
4982    
4983     my $in_cdata;
4984     my $parent = $node;
4985     while (defined $parent) {
4986     if ($parent->node_type == 1 and
4987     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
4988     {
4989     style => 1, script => 1, xmp => 1, iframe => 1,
4990     noembed => 1, noframes => 1, noscript => 1,
4991     }->{$parent->local_name}) { ## TODO: case thingy
4992     $in_cdata = 1;
4993     }
4994     $parent = $parent->parent_node;
4995     }
4996    
4997     ## Step 2
4998     my @node = @{$node->child_nodes};
4999     C: while (@node) {
5000     my $child = shift @node;
5001     unless (ref $child) {
5002     if ($child eq 'cdata-out') {
5003     $in_cdata = 0;
5004     } else {
5005     $s .= $child; # end tag
5006     }
5007     next C;
5008     }
5009    
5010     my $nt = $child->node_type;
5011     if ($nt == 1) { # Element
5012     my $tag_name = lc $child->tag_name; ## ISSUE: Definition of "lowercase"
5013     $s .= '<' . $tag_name;
5014    
5015     ## ISSUE: Non-html elements
5016    
5017     my @attrs = @{$child->attributes}; # sort order MUST be stable
5018     for my $attr (@attrs) { # order is implementation dependent
5019     my $attr_name = lc $attr->name; ## ISSUE: Definition of "lowercase"
5020     $s .= ' ' . $attr_name . '="';
5021     my $attr_value = $attr->value;
5022     ## escape
5023     $attr_value =~ s/&/&amp;/g;
5024     $attr_value =~ s/</&lt;/g;
5025     $attr_value =~ s/>/&gt;/g;
5026     $attr_value =~ s/"/&quot;/g;
5027     $s .= $attr_value . '"';
5028     }
5029     $s .= '>';
5030    
5031     next C if {
5032     area => 1, base => 1, basefont => 1, bgsound => 1,
5033     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
5034     img => 1, input => 1, link => 1, meta => 1, param => 1,
5035     spacer => 1, wbr => 1,
5036     }->{$tag_name};
5037    
5038     if (not $in_cdata and {
5039     style => 1, script => 1, xmp => 1, iframe => 1,
5040     noembed => 1, noframes => 1, noscript => 1,
5041     }->{$tag_name}) {
5042     unshift @node, 'cdata-out';
5043     $in_cdata = 1;
5044     }
5045    
5046     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
5047     } elsif ($nt == 3 or $nt == 4) {
5048     if ($in_cdata) {
5049     $s .= $child->data;
5050     } else {
5051     my $value = $child->data;
5052     $value =~ s/&/&amp;/g;
5053     $value =~ s/</&lt;/g;
5054     $value =~ s/>/&gt;/g;
5055     $value =~ s/"/&quot;/g;
5056     $s .= $value;
5057     }
5058     } elsif ($nt == 8) {
5059     $s .= '<!--' . $child->data . '-->';
5060     } elsif ($nt == 10) {
5061     $s .= '<!DOCTYPE ' . $child->name . '>';
5062     } elsif ($nt == 5) { # entrefs
5063     push @node, @{$child->child_nodes};
5064     } else {
5065     $on_error->($child) if defined $on_error;
5066     }
5067     ## ISSUE: This code does not support PIs.
5068     } # C
5069    
5070     ## Step 3
5071     return \$s;
5072     } # get_inner_html
5073    
5074     1;
5075 wakaba 1.7 # $Date: 2007/05/26 08:12:34 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24