/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (hide annotations) (download) (as text)
Sat May 19 11:37:24 2007 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.4: +4 -3 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	19 May 2007 11:37:20 -0000
	* tree-test-1.dat: Tests for |<iframe>|, |<noframes>|,
	and |<xmp>| are added.

2007-05-19  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	19 May 2007 11:36:48 -0000
	* HTML.pm.src: In |main| phase, |in body| insertion
	mode, action for |<iframe>| was missing.

2007-05-19  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.5 our $VERSION=do{my @r=(q$Revision: 1.4 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5     ## This is an early version of an HTML parser.
6    
7     my $permitted_slash_tag_name = {
8     base => 1,
9     link => 1,
10     meta => 1,
11     hr => 1,
12     br => 1,
13     img=> 1,
14     embed => 1,
15     param => 1,
16     area => 1,
17     col => 1,
18     input => 1,
19     };
20    
21     my $entity_char = {
22     AElig => "\x{00C6}",
23     Aacute => "\x{00C1}",
24     Acirc => "\x{00C2}",
25     Agrave => "\x{00C0}",
26     Alpha => "\x{0391}",
27     Aring => "\x{00C5}",
28     Atilde => "\x{00C3}",
29     Auml => "\x{00C4}",
30     Beta => "\x{0392}",
31     Ccedil => "\x{00C7}",
32     Chi => "\x{03A7}",
33     Dagger => "\x{2021}",
34     Delta => "\x{0394}",
35     ETH => "\x{00D0}",
36     Eacute => "\x{00C9}",
37     Ecirc => "\x{00CA}",
38     Egrave => "\x{00C8}",
39     Epsilon => "\x{0395}",
40     Eta => "\x{0397}",
41     Euml => "\x{00CB}",
42     Gamma => "\x{0393}",
43     Iacute => "\x{00CD}",
44     Icirc => "\x{00CE}",
45     Igrave => "\x{00CC}",
46     Iota => "\x{0399}",
47     Iuml => "\x{00CF}",
48     Kappa => "\x{039A}",
49     Lambda => "\x{039B}",
50     Mu => "\x{039C}",
51     Ntilde => "\x{00D1}",
52     Nu => "\x{039D}",
53     OElig => "\x{0152}",
54     Oacute => "\x{00D3}",
55     Ocirc => "\x{00D4}",
56     Ograve => "\x{00D2}",
57     Omega => "\x{03A9}",
58     Omicron => "\x{039F}",
59     Oslash => "\x{00D8}",
60     Otilde => "\x{00D5}",
61     Ouml => "\x{00D6}",
62     Phi => "\x{03A6}",
63     Pi => "\x{03A0}",
64     Prime => "\x{2033}",
65     Psi => "\x{03A8}",
66     Rho => "\x{03A1}",
67     Scaron => "\x{0160}",
68     Sigma => "\x{03A3}",
69     THORN => "\x{00DE}",
70     Tau => "\x{03A4}",
71     Theta => "\x{0398}",
72     Uacute => "\x{00DA}",
73     Ucirc => "\x{00DB}",
74     Ugrave => "\x{00D9}",
75     Upsilon => "\x{03A5}",
76     Uuml => "\x{00DC}",
77     Xi => "\x{039E}",
78     Yacute => "\x{00DD}",
79     Yuml => "\x{0178}",
80     Zeta => "\x{0396}",
81     aacute => "\x{00E1}",
82     acirc => "\x{00E2}",
83     acute => "\x{00B4}",
84     aelig => "\x{00E6}",
85     agrave => "\x{00E0}",
86     alefsym => "\x{2135}",
87     alpha => "\x{03B1}",
88     amp => "\x{0026}",
89     AMP => "\x{0026}",
90     and => "\x{2227}",
91     ang => "\x{2220}",
92     apos => "\x{0027}",
93     aring => "\x{00E5}",
94     asymp => "\x{2248}",
95     atilde => "\x{00E3}",
96     auml => "\x{00E4}",
97     bdquo => "\x{201E}",
98     beta => "\x{03B2}",
99     brvbar => "\x{00A6}",
100     bull => "\x{2022}",
101     cap => "\x{2229}",
102     ccedil => "\x{00E7}",
103     cedil => "\x{00B8}",
104     cent => "\x{00A2}",
105     chi => "\x{03C7}",
106     circ => "\x{02C6}",
107     clubs => "\x{2663}",
108     cong => "\x{2245}",
109     copy => "\x{00A9}",
110     COPY => "\x{00A9}",
111     crarr => "\x{21B5}",
112     cup => "\x{222A}",
113     curren => "\x{00A4}",
114     dArr => "\x{21D3}",
115     dagger => "\x{2020}",
116     darr => "\x{2193}",
117     deg => "\x{00B0}",
118     delta => "\x{03B4}",
119     diams => "\x{2666}",
120     divide => "\x{00F7}",
121     eacute => "\x{00E9}",
122     ecirc => "\x{00EA}",
123     egrave => "\x{00E8}",
124     empty => "\x{2205}",
125     emsp => "\x{2003}",
126     ensp => "\x{2002}",
127     epsilon => "\x{03B5}",
128     equiv => "\x{2261}",
129     eta => "\x{03B7}",
130     eth => "\x{00F0}",
131     euml => "\x{00EB}",
132     euro => "\x{20AC}",
133     exist => "\x{2203}",
134     fnof => "\x{0192}",
135     forall => "\x{2200}",
136     frac12 => "\x{00BD}",
137     frac14 => "\x{00BC}",
138     frac34 => "\x{00BE}",
139     frasl => "\x{2044}",
140     gamma => "\x{03B3}",
141     ge => "\x{2265}",
142     gt => "\x{003E}",
143     GT => "\x{003E}",
144     hArr => "\x{21D4}",
145     harr => "\x{2194}",
146     hearts => "\x{2665}",
147     hellip => "\x{2026}",
148     iacute => "\x{00ED}",
149     icirc => "\x{00EE}",
150     iexcl => "\x{00A1}",
151     igrave => "\x{00EC}",
152     image => "\x{2111}",
153     infin => "\x{221E}",
154     int => "\x{222B}",
155     iota => "\x{03B9}",
156     iquest => "\x{00BF}",
157     isin => "\x{2208}",
158     iuml => "\x{00EF}",
159     kappa => "\x{03BA}",
160     lArr => "\x{21D0}",
161     lambda => "\x{03BB}",
162     lang => "\x{2329}",
163     laquo => "\x{00AB}",
164     larr => "\x{2190}",
165     lceil => "\x{2308}",
166     ldquo => "\x{201C}",
167     le => "\x{2264}",
168     lfloor => "\x{230A}",
169     lowast => "\x{2217}",
170     loz => "\x{25CA}",
171     lrm => "\x{200E}",
172     lsaquo => "\x{2039}",
173     lsquo => "\x{2018}",
174     lt => "\x{003C}",
175     LT => "\x{003C}",
176     macr => "\x{00AF}",
177     mdash => "\x{2014}",
178     micro => "\x{00B5}",
179     middot => "\x{00B7}",
180     minus => "\x{2212}",
181     mu => "\x{03BC}",
182     nabla => "\x{2207}",
183     nbsp => "\x{00A0}",
184     ndash => "\x{2013}",
185     ne => "\x{2260}",
186     ni => "\x{220B}",
187     not => "\x{00AC}",
188     notin => "\x{2209}",
189     nsub => "\x{2284}",
190     ntilde => "\x{00F1}",
191     nu => "\x{03BD}",
192     oacute => "\x{00F3}",
193     ocirc => "\x{00F4}",
194     oelig => "\x{0153}",
195     ograve => "\x{00F2}",
196     oline => "\x{203E}",
197     omega => "\x{03C9}",
198     omicron => "\x{03BF}",
199     oplus => "\x{2295}",
200     or => "\x{2228}",
201     ordf => "\x{00AA}",
202     ordm => "\x{00BA}",
203     oslash => "\x{00F8}",
204     otilde => "\x{00F5}",
205     otimes => "\x{2297}",
206     ouml => "\x{00F6}",
207     para => "\x{00B6}",
208     part => "\x{2202}",
209     permil => "\x{2030}",
210     perp => "\x{22A5}",
211     phi => "\x{03C6}",
212     pi => "\x{03C0}",
213     piv => "\x{03D6}",
214     plusmn => "\x{00B1}",
215     pound => "\x{00A3}",
216     prime => "\x{2032}",
217     prod => "\x{220F}",
218     prop => "\x{221D}",
219     psi => "\x{03C8}",
220     quot => "\x{0022}",
221     QUOT => "\x{0022}",
222     rArr => "\x{21D2}",
223     radic => "\x{221A}",
224     rang => "\x{232A}",
225     raquo => "\x{00BB}",
226     rarr => "\x{2192}",
227     rceil => "\x{2309}",
228     rdquo => "\x{201D}",
229     real => "\x{211C}",
230     reg => "\x{00AE}",
231     REG => "\x{00AE}",
232     rfloor => "\x{230B}",
233     rho => "\x{03C1}",
234     rlm => "\x{200F}",
235     rsaquo => "\x{203A}",
236     rsquo => "\x{2019}",
237     sbquo => "\x{201A}",
238     scaron => "\x{0161}",
239     sdot => "\x{22C5}",
240     sect => "\x{00A7}",
241     shy => "\x{00AD}",
242     sigma => "\x{03C3}",
243     sigmaf => "\x{03C2}",
244     sim => "\x{223C}",
245     spades => "\x{2660}",
246     sub => "\x{2282}",
247     sube => "\x{2286}",
248     sum => "\x{2211}",
249     sup => "\x{2283}",
250     sup1 => "\x{00B9}",
251     sup2 => "\x{00B2}",
252     sup3 => "\x{00B3}",
253     supe => "\x{2287}",
254     szlig => "\x{00DF}",
255     tau => "\x{03C4}",
256     there4 => "\x{2234}",
257     theta => "\x{03B8}",
258     thetasym => "\x{03D1}",
259     thinsp => "\x{2009}",
260     thorn => "\x{00FE}",
261     tilde => "\x{02DC}",
262     times => "\x{00D7}",
263     trade => "\x{2122}",
264     uArr => "\x{21D1}",
265     uacute => "\x{00FA}",
266     uarr => "\x{2191}",
267     ucirc => "\x{00FB}",
268     ugrave => "\x{00F9}",
269     uml => "\x{00A8}",
270     upsih => "\x{03D2}",
271     upsilon => "\x{03C5}",
272     uuml => "\x{00FC}",
273     weierp => "\x{2118}",
274     xi => "\x{03BE}",
275     yacute => "\x{00FD}",
276     yen => "\x{00A5}",
277     yuml => "\x{00FF}",
278     zeta => "\x{03B6}",
279     zwj => "\x{200D}",
280     zwnj => "\x{200C}",
281 wakaba 1.4 }; # $entity_char
282    
283     ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562>
284     my $c1_entity_char = {
285     128, 8364,
286     129, 65533,
287     130, 8218,
288     131, 402,
289     132, 8222,
290     133, 8230,
291     134, 8224,
292     135, 8225,
293     136, 710,
294     137, 8240,
295     138, 352,
296     139, 8249,
297     140, 338,
298     141, 65533,
299     142, 381,
300     143, 65533,
301     144, 65533,
302     145, 8216,
303     146, 8217,
304     147, 8220,
305     148, 8221,
306     149, 8226,
307     150, 8211,
308     151, 8212,
309     152, 732,
310     153, 8482,
311     154, 353,
312     155, 8250,
313     156, 339,
314     157, 65533,
315     158, 382,
316     159, 376,
317     }; # $c1_entity_char
318 wakaba 1.1
319     my $special_category = {
320     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
321     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
322     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
323     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
324     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
325     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
326     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
327     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
328     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
329     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
330     };
331     my $scoping_category = {
332     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
333     table => 1, td => 1, th => 1,
334     };
335     my $formatting_category = {
336     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
337     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
338     };
339     # $phrasing_category: all other elements
340    
341     sub parse_string ($$$;$) {
342     my $self = shift->new;
343     my $s = \$_[0];
344     $self->{document} = $_[1];
345    
346 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
347    
348 wakaba 1.1 my $i = 0;
349 wakaba 1.3 my $line = 1;
350     my $column = 0;
351 wakaba 1.1 $self->{set_next_input_character} = sub {
352     my $self = shift;
353     $self->{next_input_character} = -1 and return if $i >= length $$s;
354     $self->{next_input_character} = ord substr $$s, $i++, 1;
355 wakaba 1.3 $column++;
356 wakaba 1.1
357 wakaba 1.4 if ($self->{next_input_character} == 0x000A) { # LF
358     $line++;
359     $column = 0;
360     } elsif ($self->{next_input_character} == 0x000D) { # CR
361 wakaba 1.1 if ($i >= length $$s) {
362     #
363     } else {
364     my $next_char = ord substr $$s, $i++, 1;
365     if ($next_char == 0x000A) { # LF
366     #
367     } else {
368     push @{$self->{char}}, $next_char;
369     }
370     }
371     $self->{next_input_character} = 0x000A; # LF # MUST
372 wakaba 1.3 $line++;
373 wakaba 1.4 $column = 0;
374 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
375     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
376     } elsif ($self->{next_input_character} == 0x0000) { # NULL
377     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
378     }
379     };
380    
381 wakaba 1.3 my $onerror = $_[2] || sub {
382     my (%opt) = @_;
383     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
384     };
385     $self->{parse_error} = sub {
386     $onerror->(@_, line => $line, column => $column);
387 wakaba 1.1 };
388    
389     $self->_initialize_tokenizer;
390     $self->_initialize_tree_constructor;
391     $self->_construct_tree;
392     $self->_terminate_tree_constructor;
393    
394     return $self->{document};
395     } # parse_string
396    
397     sub new ($) {
398     my $class = shift;
399     my $self = bless {}, $class;
400     $self->{set_next_input_character} = sub {
401     $self->{next_input_character} = -1;
402     };
403     $self->{parse_error} = sub {
404     #
405     };
406     return $self;
407     } # new
408    
409     ## Implementations MUST act as if state machine in the spec
410    
411     sub _initialize_tokenizer ($) {
412     my $self = shift;
413     $self->{state} = 'data'; # MUST
414     $self->{content_model_flag} = 'PCDATA'; # be
415     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
416     undef $self->{current_attribute};
417     undef $self->{last_emitted_start_tag_name};
418     undef $self->{last_attribute_value_state};
419     $self->{char} = [];
420     # $self->{next_input_character}
421     !!!next-input-character;
422     $self->{token} = [];
423     } # _initialize_tokenizer
424    
425     ## A token has:
426     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
427     ## 'character', or 'end-of-file'
428     ## ->{name} (DOCTYPE, start tag (tagname), end tag (tagname))
429     ## ISSUE: the spec need s/tagname/tag name/
430     ## ->{error} == 1 or 0 (DOCTYPE)
431     ## ->{attributes} isa HASH (start tag, end tag)
432     ## ->{data} (comment, character)
433    
434     ## Macros
435     ## Macros MUST be preceded by three EXCLAMATION MARKs.
436     ## emit ($token)
437     ## Emits the specified token.
438    
439     ## Emitted token MUST immediately be handled by the tree construction state.
440    
441     ## Before each step, UA MAY check to see if either one of the scripts in
442     ## "list of scripts that will execute as soon as possible" or the first
443     ## script in the "list of scripts that will execute asynchronously",
444     ## has completed loading. If one has, then it MUST be executed
445     ## and removed from the list.
446    
447     sub _get_next_token ($) {
448     my $self = shift;
449     if (@{$self->{token}}) {
450     return shift @{$self->{token}};
451     }
452    
453     A: {
454     if ($self->{state} eq 'data') {
455     if ($self->{next_input_character} == 0x0026) { # &
456     if ($self->{content_model_flag} eq 'PCDATA' or
457     $self->{content_model_flag} eq 'RCDATA') {
458     $self->{state} = 'entity data';
459     !!!next-input-character;
460     redo A;
461     } else {
462     #
463     }
464     } elsif ($self->{next_input_character} == 0x003C) { # <
465     if ($self->{content_model_flag} ne 'PLAINTEXT') {
466     $self->{state} = 'tag open';
467     !!!next-input-character;
468     redo A;
469     } else {
470     #
471     }
472     } elsif ($self->{next_input_character} == -1) {
473     !!!emit ({type => 'end-of-file'});
474     last A; ## TODO: ok?
475     }
476     # Anything else
477     my $token = {type => 'character',
478     data => chr $self->{next_input_character}};
479     ## Stay in the data state
480     !!!next-input-character;
481    
482     !!!emit ($token);
483    
484     redo A;
485     } elsif ($self->{state} eq 'entity data') {
486     ## (cannot happen in CDATA state)
487    
488     my $token = $self->_tokenize_attempt_to_consume_an_entity;
489    
490     $self->{state} = 'data';
491     # next-input-character is already done
492    
493     unless (defined $token) {
494     !!!emit ({type => 'character', data => '&'});
495     } else {
496     !!!emit ($token);
497     }
498    
499     redo A;
500     } elsif ($self->{state} eq 'tag open') {
501     if ($self->{content_model_flag} eq 'RCDATA' or
502     $self->{content_model_flag} eq 'CDATA') {
503     if ($self->{next_input_character} == 0x002F) { # /
504     !!!next-input-character;
505     $self->{state} = 'close tag open';
506     redo A;
507     } else {
508     ## reconsume
509     $self->{state} = 'data';
510    
511     !!!emit ({type => 'character', data => '<'});
512    
513     redo A;
514     }
515     } elsif ($self->{content_model_flag} eq 'PCDATA') {
516     if ($self->{next_input_character} == 0x0021) { # !
517     $self->{state} = 'markup declaration open';
518     !!!next-input-character;
519     redo A;
520     } elsif ($self->{next_input_character} == 0x002F) { # /
521     $self->{state} = 'close tag open';
522     !!!next-input-character;
523     redo A;
524     } elsif (0x0041 <= $self->{next_input_character} and
525     $self->{next_input_character} <= 0x005A) { # A..Z
526     $self->{current_token}
527     = {type => 'start tag',
528     tag_name => chr ($self->{next_input_character} + 0x0020)};
529     $self->{state} = 'tag name';
530     !!!next-input-character;
531     redo A;
532     } elsif (0x0061 <= $self->{next_input_character} and
533     $self->{next_input_character} <= 0x007A) { # a..z
534     $self->{current_token} = {type => 'start tag',
535     tag_name => chr ($self->{next_input_character})};
536     $self->{state} = 'tag name';
537     !!!next-input-character;
538     redo A;
539     } elsif ($self->{next_input_character} == 0x003E) { # >
540 wakaba 1.3 !!!parse-error (type => 'empty start tag');
541 wakaba 1.1 $self->{state} = 'data';
542     !!!next-input-character;
543    
544     !!!emit ({type => 'character', data => '<>'});
545    
546     redo A;
547     } elsif ($self->{next_input_character} == 0x003F) { # ?
548 wakaba 1.3 !!!parse-error (type => 'pio');
549 wakaba 1.1 $self->{state} = 'bogus comment';
550     ## $self->{next_input_character} is intentionally left as is
551     redo A;
552     } else {
553 wakaba 1.3 !!!parse-error (type => 'bare stago');
554 wakaba 1.1 $self->{state} = 'data';
555     ## reconsume
556    
557     !!!emit ({type => 'character', data => '<'});
558    
559     redo A;
560     }
561     } else {
562     die "$0: $self->{content_model_flag}: Unknown content model flag";
563     }
564     } elsif ($self->{state} eq 'close tag open') {
565     if ($self->{content_model_flag} eq 'RCDATA' or
566     $self->{content_model_flag} eq 'CDATA') {
567     my @next_char;
568     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
569     push @next_char, $self->{next_input_character};
570     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
571     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
572     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
573     !!!next-input-character;
574     next TAGNAME;
575     } else {
576 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
577 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
578     !!!back-next-input-character (@next_char);
579     $self->{state} = 'data';
580    
581     !!!emit ({type => 'character', data => '</'});
582    
583     redo A;
584     }
585     }
586     push @next_char, $self->{next_input_character};
587    
588     unless ($self->{next_input_character} == 0x0009 or # HT
589     $self->{next_input_character} == 0x000A or # LF
590     $self->{next_input_character} == 0x000B or # VT
591     $self->{next_input_character} == 0x000C or # FF
592     $self->{next_input_character} == 0x0020 or # SP
593     $self->{next_input_character} == 0x003E or # >
594     $self->{next_input_character} == 0x002F or # /
595     $self->{next_input_character} == 0x003C or # <
596     $self->{next_input_character} == -1) {
597 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
598 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
599     !!!back-next-input-character (@next_char);
600     $self->{state} = 'data';
601    
602     !!!emit ({type => 'character', data => '</'});
603    
604     redo A;
605     } else {
606     $self->{next_input_character} = shift @next_char;
607     !!!back-next-input-character (@next_char);
608     # and consume...
609     }
610     }
611    
612     if (0x0041 <= $self->{next_input_character} and
613     $self->{next_input_character} <= 0x005A) { # A..Z
614     $self->{current_token} = {type => 'end tag',
615     tag_name => chr ($self->{next_input_character} + 0x0020)};
616     $self->{state} = 'tag name';
617     !!!next-input-character;
618     redo A;
619     } elsif (0x0061 <= $self->{next_input_character} and
620     $self->{next_input_character} <= 0x007A) { # a..z
621     $self->{current_token} = {type => 'end tag',
622     tag_name => chr ($self->{next_input_character})};
623     $self->{state} = 'tag name';
624     !!!next-input-character;
625     redo A;
626     } elsif ($self->{next_input_character} == 0x003E) { # >
627 wakaba 1.3 !!!parse-error (type => 'empty end tag');
628 wakaba 1.1 $self->{state} = 'data';
629     !!!next-input-character;
630     redo A;
631     } elsif ($self->{next_input_character} == -1) {
632 wakaba 1.3 !!!parse-error (type => 'bare etago');
633 wakaba 1.1 $self->{state} = 'data';
634     # reconsume
635    
636     !!!emit ({type => 'character', data => '</'});
637    
638     redo A;
639     } else {
640 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
641 wakaba 1.1 $self->{state} = 'bogus comment';
642     ## $self->{next_input_character} is intentionally left as is
643     redo A;
644     }
645     } elsif ($self->{state} eq 'tag name') {
646     if ($self->{next_input_character} == 0x0009 or # HT
647     $self->{next_input_character} == 0x000A or # LF
648     $self->{next_input_character} == 0x000B or # VT
649     $self->{next_input_character} == 0x000C or # FF
650     $self->{next_input_character} == 0x0020) { # SP
651     $self->{state} = 'before attribute name';
652     !!!next-input-character;
653     redo A;
654     } elsif ($self->{next_input_character} == 0x003E) { # >
655     if ($self->{current_token}->{type} eq 'start tag') {
656     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
657     } elsif ($self->{current_token}->{type} eq 'end tag') {
658     $self->{content_model_flag} = 'PCDATA'; # MUST
659     if ($self->{current_token}->{attributes}) {
660 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
661 wakaba 1.1 }
662     } else {
663     die "$0: $self->{current_token}->{type}: Unknown token type";
664     }
665     $self->{state} = 'data';
666     !!!next-input-character;
667    
668     !!!emit ($self->{current_token}); # start tag or end tag
669     undef $self->{current_token};
670    
671     redo A;
672     } elsif (0x0041 <= $self->{next_input_character} and
673     $self->{next_input_character} <= 0x005A) { # A..Z
674     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
675     # start tag or end tag
676     ## Stay in this state
677     !!!next-input-character;
678     redo A;
679     } elsif ($self->{next_input_character} == 0x003C or # <
680     $self->{next_input_character} == -1) {
681 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
682 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
683     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
684     } elsif ($self->{current_token}->{type} eq 'end tag') {
685     $self->{content_model_flag} = 'PCDATA'; # MUST
686     if ($self->{current_token}->{attributes}) {
687 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
688 wakaba 1.1 }
689     } else {
690     die "$0: $self->{current_token}->{type}: Unknown token type";
691     }
692     $self->{state} = 'data';
693     # reconsume
694    
695     !!!emit ($self->{current_token}); # start tag or end tag
696     undef $self->{current_token};
697    
698     redo A;
699     } elsif ($self->{next_input_character} == 0x002F) { # /
700     !!!next-input-character;
701     if ($self->{next_input_character} == 0x003E and # >
702     $self->{current_token}->{type} eq 'start tag' and
703     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
704     # permitted slash
705     #
706     } else {
707 wakaba 1.3 !!!parse-error (type => 'nestc');
708 wakaba 1.1 }
709     $self->{state} = 'before attribute name';
710     # next-input-character is already done
711     redo A;
712     } else {
713     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
714     # start tag or end tag
715     ## Stay in the state
716     !!!next-input-character;
717     redo A;
718     }
719     } elsif ($self->{state} eq 'before attribute name') {
720     if ($self->{next_input_character} == 0x0009 or # HT
721     $self->{next_input_character} == 0x000A or # LF
722     $self->{next_input_character} == 0x000B or # VT
723     $self->{next_input_character} == 0x000C or # FF
724     $self->{next_input_character} == 0x0020) { # SP
725     ## Stay in the state
726     !!!next-input-character;
727     redo A;
728     } elsif ($self->{next_input_character} == 0x003E) { # >
729     if ($self->{current_token}->{type} eq 'start tag') {
730     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
731     } elsif ($self->{current_token}->{type} eq 'end tag') {
732     $self->{content_model_flag} = 'PCDATA'; # MUST
733     if ($self->{current_token}->{attributes}) {
734 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
735 wakaba 1.1 }
736     } else {
737     die "$0: $self->{current_token}->{type}: Unknown token type";
738     }
739     $self->{state} = 'data';
740     !!!next-input-character;
741    
742     !!!emit ($self->{current_token}); # start tag or end tag
743     undef $self->{current_token};
744    
745     redo A;
746     } elsif (0x0041 <= $self->{next_input_character} and
747     $self->{next_input_character} <= 0x005A) { # A..Z
748     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
749     value => ''};
750     $self->{state} = 'attribute name';
751     !!!next-input-character;
752     redo A;
753     } elsif ($self->{next_input_character} == 0x002F) { # /
754     !!!next-input-character;
755     if ($self->{next_input_character} == 0x003E and # >
756     $self->{current_token}->{type} eq 'start tag' and
757     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
758     # permitted slash
759     #
760     } else {
761 wakaba 1.3 !!!parse-error (type => 'nestc');
762 wakaba 1.1 }
763     ## Stay in the state
764     # next-input-character is already done
765     redo A;
766     } elsif ($self->{next_input_character} == 0x003C or # <
767     $self->{next_input_character} == -1) {
768 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
769 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
770     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
771     } elsif ($self->{current_token}->{type} eq 'end tag') {
772     $self->{content_model_flag} = 'PCDATA'; # MUST
773     if ($self->{current_token}->{attributes}) {
774 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
775 wakaba 1.1 }
776     } else {
777     die "$0: $self->{current_token}->{type}: Unknown token type";
778     }
779     $self->{state} = 'data';
780     # reconsume
781    
782     !!!emit ($self->{current_token}); # start tag or end tag
783     undef $self->{current_token};
784    
785     redo A;
786     } else {
787     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
788     value => ''};
789     $self->{state} = 'attribute name';
790     !!!next-input-character;
791     redo A;
792     }
793     } elsif ($self->{state} eq 'attribute name') {
794     my $before_leave = sub {
795     if (exists $self->{current_token}->{attributes} # start tag or end tag
796     ->{$self->{current_attribute}->{name}}) { # MUST
797 wakaba 1.3 !!!parse-error (type => 'dupulicate attribute');
798 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
799     } else {
800     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
801     = $self->{current_attribute};
802     }
803     }; # $before_leave
804    
805     if ($self->{next_input_character} == 0x0009 or # HT
806     $self->{next_input_character} == 0x000A or # LF
807     $self->{next_input_character} == 0x000B or # VT
808     $self->{next_input_character} == 0x000C or # FF
809     $self->{next_input_character} == 0x0020) { # SP
810     $before_leave->();
811     $self->{state} = 'after attribute name';
812     !!!next-input-character;
813     redo A;
814     } elsif ($self->{next_input_character} == 0x003D) { # =
815     $before_leave->();
816     $self->{state} = 'before attribute value';
817     !!!next-input-character;
818     redo A;
819     } elsif ($self->{next_input_character} == 0x003E) { # >
820     $before_leave->();
821     if ($self->{current_token}->{type} eq 'start tag') {
822     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
823     } elsif ($self->{current_token}->{type} eq 'end tag') {
824     $self->{content_model_flag} = 'PCDATA'; # MUST
825     if ($self->{current_token}->{attributes}) {
826 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
827 wakaba 1.1 }
828     } else {
829     die "$0: $self->{current_token}->{type}: Unknown token type";
830     }
831     $self->{state} = 'data';
832     !!!next-input-character;
833    
834     !!!emit ($self->{current_token}); # start tag or end tag
835     undef $self->{current_token};
836    
837     redo A;
838     } elsif (0x0041 <= $self->{next_input_character} and
839     $self->{next_input_character} <= 0x005A) { # A..Z
840     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
841     ## Stay in the state
842     !!!next-input-character;
843     redo A;
844     } elsif ($self->{next_input_character} == 0x002F) { # /
845     $before_leave->();
846     !!!next-input-character;
847     if ($self->{next_input_character} == 0x003E and # >
848     $self->{current_token}->{type} eq 'start tag' and
849     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
850     # permitted slash
851     #
852     } else {
853 wakaba 1.3 !!!parse-error (type => 'nestc');
854 wakaba 1.1 }
855     $self->{state} = 'before attribute name';
856     # next-input-character is already done
857     redo A;
858     } elsif ($self->{next_input_character} == 0x003C or # <
859     $self->{next_input_character} == -1) {
860 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
861 wakaba 1.1 $before_leave->();
862     if ($self->{current_token}->{type} eq 'start tag') {
863     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
864     } elsif ($self->{current_token}->{type} eq 'end tag') {
865     $self->{content_model_flag} = 'PCDATA'; # MUST
866     if ($self->{current_token}->{attributes}) {
867 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
868 wakaba 1.1 }
869     } else {
870     die "$0: $self->{current_token}->{type}: Unknown token type";
871     }
872     $self->{state} = 'data';
873     # reconsume
874    
875     !!!emit ($self->{current_token}); # start tag or end tag
876     undef $self->{current_token};
877    
878     redo A;
879     } else {
880     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
881     ## Stay in the state
882     !!!next-input-character;
883     redo A;
884     }
885     } elsif ($self->{state} eq 'after attribute name') {
886     if ($self->{next_input_character} == 0x0009 or # HT
887     $self->{next_input_character} == 0x000A or # LF
888     $self->{next_input_character} == 0x000B or # VT
889     $self->{next_input_character} == 0x000C or # FF
890     $self->{next_input_character} == 0x0020) { # SP
891     ## Stay in the state
892     !!!next-input-character;
893     redo A;
894     } elsif ($self->{next_input_character} == 0x003D) { # =
895     $self->{state} = 'before attribute value';
896     !!!next-input-character;
897     redo A;
898     } elsif ($self->{next_input_character} == 0x003E) { # >
899     if ($self->{current_token}->{type} eq 'start tag') {
900     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
901     } elsif ($self->{current_token}->{type} eq 'end tag') {
902     $self->{content_model_flag} = 'PCDATA'; # MUST
903     if ($self->{current_token}->{attributes}) {
904 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
905 wakaba 1.1 }
906     } else {
907     die "$0: $self->{current_token}->{type}: Unknown token type";
908     }
909     $self->{state} = 'data';
910     !!!next-input-character;
911    
912     !!!emit ($self->{current_token}); # start tag or end tag
913     undef $self->{current_token};
914    
915     redo A;
916     } elsif (0x0041 <= $self->{next_input_character} and
917     $self->{next_input_character} <= 0x005A) { # A..Z
918     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
919     value => ''};
920     $self->{state} = 'attribute name';
921     !!!next-input-character;
922     redo A;
923     } elsif ($self->{next_input_character} == 0x002F) { # /
924     !!!next-input-character;
925     if ($self->{next_input_character} == 0x003E and # >
926     $self->{current_token}->{type} eq 'start tag' and
927     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
928     # permitted slash
929     #
930     } else {
931 wakaba 1.3 !!!parse-error (type => 'nestc');
932 wakaba 1.1 }
933     $self->{state} = 'before attribute name';
934     # next-input-character is already done
935     redo A;
936     } elsif ($self->{next_input_character} == 0x003C or # <
937     $self->{next_input_character} == -1) {
938 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
939 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
940     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
941     } elsif ($self->{current_token}->{type} eq 'end tag') {
942     $self->{content_model_flag} = 'PCDATA'; # MUST
943     if ($self->{current_token}->{attributes}) {
944 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
945 wakaba 1.1 }
946     } else {
947     die "$0: $self->{current_token}->{type}: Unknown token type";
948     }
949     $self->{state} = 'data';
950     # reconsume
951    
952     !!!emit ($self->{current_token}); # start tag or end tag
953     undef $self->{current_token};
954    
955     redo A;
956     } else {
957     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
958     value => ''};
959     $self->{state} = 'attribute name';
960     !!!next-input-character;
961     redo A;
962     }
963     } elsif ($self->{state} eq 'before attribute value') {
964     if ($self->{next_input_character} == 0x0009 or # HT
965     $self->{next_input_character} == 0x000A or # LF
966     $self->{next_input_character} == 0x000B or # VT
967     $self->{next_input_character} == 0x000C or # FF
968     $self->{next_input_character} == 0x0020) { # SP
969     ## Stay in the state
970     !!!next-input-character;
971     redo A;
972     } elsif ($self->{next_input_character} == 0x0022) { # "
973     $self->{state} = 'attribute value (double-quoted)';
974     !!!next-input-character;
975     redo A;
976     } elsif ($self->{next_input_character} == 0x0026) { # &
977     $self->{state} = 'attribute value (unquoted)';
978     ## reconsume
979     redo A;
980     } elsif ($self->{next_input_character} == 0x0027) { # '
981     $self->{state} = 'attribute value (single-quoted)';
982     !!!next-input-character;
983     redo A;
984     } elsif ($self->{next_input_character} == 0x003E) { # >
985     if ($self->{current_token}->{type} eq 'start tag') {
986     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
987     } elsif ($self->{current_token}->{type} eq 'end tag') {
988     $self->{content_model_flag} = 'PCDATA'; # MUST
989     if ($self->{current_token}->{attributes}) {
990 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
991 wakaba 1.1 }
992     } else {
993     die "$0: $self->{current_token}->{type}: Unknown token type";
994     }
995     $self->{state} = 'data';
996     !!!next-input-character;
997    
998     !!!emit ($self->{current_token}); # start tag or end tag
999     undef $self->{current_token};
1000    
1001     redo A;
1002     } elsif ($self->{next_input_character} == 0x003C or # <
1003     $self->{next_input_character} == -1) {
1004 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1005 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1006     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1007     } elsif ($self->{current_token}->{type} eq 'end tag') {
1008     $self->{content_model_flag} = 'PCDATA'; # MUST
1009     if ($self->{current_token}->{attributes}) {
1010 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1011 wakaba 1.1 }
1012     } else {
1013     die "$0: $self->{current_token}->{type}: Unknown token type";
1014     }
1015     $self->{state} = 'data';
1016     ## reconsume
1017    
1018     !!!emit ($self->{current_token}); # start tag or end tag
1019     undef $self->{current_token};
1020    
1021     redo A;
1022     } else {
1023     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1024     $self->{state} = 'attribute value (unquoted)';
1025     !!!next-input-character;
1026     redo A;
1027     }
1028     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
1029     if ($self->{next_input_character} == 0x0022) { # "
1030     $self->{state} = 'before attribute name';
1031     !!!next-input-character;
1032     redo A;
1033     } elsif ($self->{next_input_character} == 0x0026) { # &
1034     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
1035     $self->{state} = 'entity in attribute value';
1036     !!!next-input-character;
1037     redo A;
1038     } elsif ($self->{next_input_character} == -1) {
1039 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1040 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1041     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1042     } elsif ($self->{current_token}->{type} eq 'end tag') {
1043     $self->{content_model_flag} = 'PCDATA'; # MUST
1044     if ($self->{current_token}->{attributes}) {
1045 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1046 wakaba 1.1 }
1047     } else {
1048     die "$0: $self->{current_token}->{type}: Unknown token type";
1049     }
1050     $self->{state} = 'data';
1051     ## reconsume
1052    
1053     !!!emit ($self->{current_token}); # start tag or end tag
1054     undef $self->{current_token};
1055    
1056     redo A;
1057     } else {
1058     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1059     ## Stay in the state
1060     !!!next-input-character;
1061     redo A;
1062     }
1063     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
1064     if ($self->{next_input_character} == 0x0027) { # '
1065     $self->{state} = 'before attribute name';
1066     !!!next-input-character;
1067     redo A;
1068     } elsif ($self->{next_input_character} == 0x0026) { # &
1069     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
1070     $self->{state} = 'entity in attribute value';
1071     !!!next-input-character;
1072     redo A;
1073     } elsif ($self->{next_input_character} == -1) {
1074 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1075 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1076     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1077     } elsif ($self->{current_token}->{type} eq 'end tag') {
1078     $self->{content_model_flag} = 'PCDATA'; # MUST
1079     if ($self->{current_token}->{attributes}) {
1080 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1081 wakaba 1.1 }
1082     } else {
1083     die "$0: $self->{current_token}->{type}: Unknown token type";
1084     }
1085     $self->{state} = 'data';
1086     ## reconsume
1087    
1088     !!!emit ($self->{current_token}); # start tag or end tag
1089     undef $self->{current_token};
1090    
1091     redo A;
1092     } else {
1093     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1094     ## Stay in the state
1095     !!!next-input-character;
1096     redo A;
1097     }
1098     } elsif ($self->{state} eq 'attribute value (unquoted)') {
1099     if ($self->{next_input_character} == 0x0009 or # HT
1100     $self->{next_input_character} == 0x000A or # LF
1101     $self->{next_input_character} == 0x000B or # HT
1102     $self->{next_input_character} == 0x000C or # FF
1103     $self->{next_input_character} == 0x0020) { # SP
1104     $self->{state} = 'before attribute name';
1105     !!!next-input-character;
1106     redo A;
1107     } elsif ($self->{next_input_character} == 0x0026) { # &
1108     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
1109     $self->{state} = 'entity in attribute value';
1110     !!!next-input-character;
1111     redo A;
1112     } elsif ($self->{next_input_character} == 0x003E) { # >
1113     if ($self->{current_token}->{type} eq 'start tag') {
1114     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1115     } elsif ($self->{current_token}->{type} eq 'end tag') {
1116     $self->{content_model_flag} = 'PCDATA'; # MUST
1117     if ($self->{current_token}->{attributes}) {
1118 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1119 wakaba 1.1 }
1120     } else {
1121     die "$0: $self->{current_token}->{type}: Unknown token type";
1122     }
1123     $self->{state} = 'data';
1124     !!!next-input-character;
1125    
1126     !!!emit ($self->{current_token}); # start tag or end tag
1127     undef $self->{current_token};
1128    
1129     redo A;
1130     } elsif ($self->{next_input_character} == 0x003C or # <
1131     $self->{next_input_character} == -1) {
1132 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1133 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1134     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1135     } elsif ($self->{current_token}->{type} eq 'end tag') {
1136     $self->{content_model_flag} = 'PCDATA'; # MUST
1137     if ($self->{current_token}->{attributes}) {
1138 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1139 wakaba 1.1 }
1140     } else {
1141     die "$0: $self->{current_token}->{type}: Unknown token type";
1142     }
1143     $self->{state} = 'data';
1144     ## reconsume
1145    
1146     !!!emit ($self->{current_token}); # start tag or end tag
1147     undef $self->{current_token};
1148    
1149     redo A;
1150     } else {
1151     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1152     ## Stay in the state
1153     !!!next-input-character;
1154     redo A;
1155     }
1156     } elsif ($self->{state} eq 'entity in attribute value') {
1157     my $token = $self->_tokenize_attempt_to_consume_an_entity;
1158    
1159     unless (defined $token) {
1160     $self->{current_attribute}->{value} .= '&';
1161     } else {
1162     $self->{current_attribute}->{value} .= $token->{data};
1163     ## ISSUE: spec says "append the returned character token to the current attribute's value"
1164     }
1165    
1166     $self->{state} = $self->{last_attribute_value_state};
1167     # next-input-character is already done
1168     redo A;
1169     } elsif ($self->{state} eq 'bogus comment') {
1170     ## (only happen if PCDATA state)
1171    
1172     my $token = {type => 'comment', data => ''};
1173    
1174     BC: {
1175     if ($self->{next_input_character} == 0x003E) { # >
1176     $self->{state} = 'data';
1177     !!!next-input-character;
1178    
1179     !!!emit ($token);
1180    
1181     redo A;
1182     } elsif ($self->{next_input_character} == -1) {
1183     $self->{state} = 'data';
1184     ## reconsume
1185    
1186     !!!emit ($token);
1187    
1188     redo A;
1189     } else {
1190     $token->{data} .= chr ($self->{next_input_character});
1191     !!!next-input-character;
1192     redo BC;
1193     }
1194     } # BC
1195     } elsif ($self->{state} eq 'markup declaration open') {
1196     ## (only happen if PCDATA state)
1197    
1198     my @next_char;
1199     push @next_char, $self->{next_input_character};
1200    
1201     if ($self->{next_input_character} == 0x002D) { # -
1202     !!!next-input-character;
1203     push @next_char, $self->{next_input_character};
1204     if ($self->{next_input_character} == 0x002D) { # -
1205     $self->{current_token} = {type => 'comment', data => ''};
1206     $self->{state} = 'comment';
1207     !!!next-input-character;
1208     redo A;
1209     }
1210     } elsif ($self->{next_input_character} == 0x0044 or # D
1211     $self->{next_input_character} == 0x0064) { # d
1212     !!!next-input-character;
1213     push @next_char, $self->{next_input_character};
1214     if ($self->{next_input_character} == 0x004F or # O
1215     $self->{next_input_character} == 0x006F) { # o
1216     !!!next-input-character;
1217     push @next_char, $self->{next_input_character};
1218     if ($self->{next_input_character} == 0x0043 or # C
1219     $self->{next_input_character} == 0x0063) { # c
1220     !!!next-input-character;
1221     push @next_char, $self->{next_input_character};
1222     if ($self->{next_input_character} == 0x0054 or # T
1223     $self->{next_input_character} == 0x0074) { # t
1224     !!!next-input-character;
1225     push @next_char, $self->{next_input_character};
1226     if ($self->{next_input_character} == 0x0059 or # Y
1227     $self->{next_input_character} == 0x0079) { # y
1228     !!!next-input-character;
1229     push @next_char, $self->{next_input_character};
1230     if ($self->{next_input_character} == 0x0050 or # P
1231     $self->{next_input_character} == 0x0070) { # p
1232     !!!next-input-character;
1233     push @next_char, $self->{next_input_character};
1234     if ($self->{next_input_character} == 0x0045 or # E
1235     $self->{next_input_character} == 0x0065) { # e
1236     ## ISSUE: What a stupid code this is!
1237     $self->{state} = 'DOCTYPE';
1238     !!!next-input-character;
1239     redo A;
1240     }
1241     }
1242     }
1243     }
1244     }
1245     }
1246     }
1247    
1248 wakaba 1.3 !!!parse-error (type => 'bogus comment open');
1249 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1250     !!!back-next-input-character (@next_char);
1251     $self->{state} = 'bogus comment';
1252     redo A;
1253    
1254     ## ISSUE: typos in spec: chacacters, is is a parse error
1255     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1256     } elsif ($self->{state} eq 'comment') {
1257     if ($self->{next_input_character} == 0x002D) { # -
1258     $self->{state} = 'comment dash';
1259     !!!next-input-character;
1260     redo A;
1261     } elsif ($self->{next_input_character} == -1) {
1262 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1263 wakaba 1.1 $self->{state} = 'data';
1264     ## reconsume
1265    
1266     !!!emit ($self->{current_token}); # comment
1267     undef $self->{current_token};
1268    
1269     redo A;
1270     } else {
1271     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1272     ## Stay in the state
1273     !!!next-input-character;
1274     redo A;
1275     }
1276     } elsif ($self->{state} eq 'comment dash') {
1277     if ($self->{next_input_character} == 0x002D) { # -
1278     $self->{state} = 'comment end';
1279     !!!next-input-character;
1280     redo A;
1281     } elsif ($self->{next_input_character} == -1) {
1282 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1283 wakaba 1.1 $self->{state} = 'data';
1284     ## reconsume
1285    
1286     !!!emit ($self->{current_token}); # comment
1287     undef $self->{current_token};
1288    
1289     redo A;
1290     } else {
1291     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1292     $self->{state} = 'comment';
1293     !!!next-input-character;
1294     redo A;
1295     }
1296     } elsif ($self->{state} eq 'comment end') {
1297     if ($self->{next_input_character} == 0x003E) { # >
1298     $self->{state} = 'data';
1299     !!!next-input-character;
1300    
1301     !!!emit ($self->{current_token}); # comment
1302     undef $self->{current_token};
1303    
1304     redo A;
1305     } elsif ($self->{next_input_character} == 0x002D) { # -
1306 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1307 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1308     ## Stay in the state
1309     !!!next-input-character;
1310     redo A;
1311     } elsif ($self->{next_input_character} == -1) {
1312 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1313 wakaba 1.1 $self->{state} = 'data';
1314     ## reconsume
1315    
1316     !!!emit ($self->{current_token}); # comment
1317     undef $self->{current_token};
1318    
1319     redo A;
1320     } else {
1321 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1322 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1323     $self->{state} = 'comment';
1324     !!!next-input-character;
1325     redo A;
1326     }
1327     } elsif ($self->{state} eq 'DOCTYPE') {
1328     if ($self->{next_input_character} == 0x0009 or # HT
1329     $self->{next_input_character} == 0x000A or # LF
1330     $self->{next_input_character} == 0x000B or # VT
1331     $self->{next_input_character} == 0x000C or # FF
1332     $self->{next_input_character} == 0x0020) { # SP
1333     $self->{state} = 'before DOCTYPE name';
1334     !!!next-input-character;
1335     redo A;
1336     } else {
1337 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
1338 wakaba 1.1 $self->{state} = 'before DOCTYPE name';
1339     ## reconsume
1340     redo A;
1341     }
1342     } elsif ($self->{state} eq 'before DOCTYPE name') {
1343     if ($self->{next_input_character} == 0x0009 or # HT
1344     $self->{next_input_character} == 0x000A or # LF
1345     $self->{next_input_character} == 0x000B or # VT
1346     $self->{next_input_character} == 0x000C or # FF
1347     $self->{next_input_character} == 0x0020) { # SP
1348     ## Stay in the state
1349     !!!next-input-character;
1350     redo A;
1351     } elsif (0x0061 <= $self->{next_input_character} and
1352     $self->{next_input_character} <= 0x007A) { # a..z
1353 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1354 wakaba 1.1 $self->{current_token} = {type => 'DOCTYPE',
1355     name => chr ($self->{next_input_character} - 0x0020),
1356     error => 1};
1357     $self->{state} = 'DOCTYPE name';
1358     !!!next-input-character;
1359     redo A;
1360     } elsif ($self->{next_input_character} == 0x003E) { # >
1361 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1362 wakaba 1.1 $self->{state} = 'data';
1363     !!!next-input-character;
1364    
1365     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1366    
1367     redo A;
1368     } elsif ($self->{next_input_character} == -1) {
1369 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1370 wakaba 1.1 $self->{state} = 'data';
1371     ## reconsume
1372    
1373     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1374    
1375     redo A;
1376     } else {
1377     $self->{current_token} = {type => 'DOCTYPE',
1378     name => chr ($self->{next_input_character}),
1379     error => 1};
1380 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1381 wakaba 1.1 $self->{state} = 'DOCTYPE name';
1382     !!!next-input-character;
1383     redo A;
1384     }
1385     } elsif ($self->{state} eq 'DOCTYPE name') {
1386     if ($self->{next_input_character} == 0x0009 or # HT
1387     $self->{next_input_character} == 0x000A or # LF
1388     $self->{next_input_character} == 0x000B or # VT
1389     $self->{next_input_character} == 0x000C or # FF
1390     $self->{next_input_character} == 0x0020) { # SP
1391     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1392     $self->{state} = 'after DOCTYPE name';
1393     !!!next-input-character;
1394     redo A;
1395     } elsif ($self->{next_input_character} == 0x003E) { # >
1396     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1397     $self->{state} = 'data';
1398     !!!next-input-character;
1399    
1400     !!!emit ($self->{current_token}); # DOCTYPE
1401     undef $self->{current_token};
1402    
1403     redo A;
1404     } elsif (0x0061 <= $self->{next_input_character} and
1405     $self->{next_input_character} <= 0x007A) { # a..z
1406     $self->{current_token}->{name} .= chr ($self->{next_input_character} - 0x0020); # DOCTYPE
1407     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1408     ## Stay in the state
1409     !!!next-input-character;
1410     redo A;
1411     } elsif ($self->{next_input_character} == -1) {
1412 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1413 wakaba 1.1 $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1414     $self->{state} = 'data';
1415     ## reconsume
1416    
1417     !!!emit ($self->{current_token});
1418     undef $self->{current_token};
1419    
1420     redo A;
1421     } else {
1422     $self->{current_token}->{name}
1423     .= chr ($self->{next_input_character}); # DOCTYPE
1424     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1425     ## Stay in the state
1426     !!!next-input-character;
1427     redo A;
1428     }
1429     } elsif ($self->{state} eq 'after DOCTYPE name') {
1430     if ($self->{next_input_character} == 0x0009 or # HT
1431     $self->{next_input_character} == 0x000A or # LF
1432     $self->{next_input_character} == 0x000B or # VT
1433     $self->{next_input_character} == 0x000C or # FF
1434     $self->{next_input_character} == 0x0020) { # SP
1435     ## Stay in the state
1436     !!!next-input-character;
1437     redo A;
1438     } elsif ($self->{next_input_character} == 0x003E) { # >
1439     $self->{state} = 'data';
1440     !!!next-input-character;
1441    
1442     !!!emit ($self->{current_token}); # DOCTYPE
1443     undef $self->{current_token};
1444    
1445     redo A;
1446     } elsif ($self->{next_input_character} == -1) {
1447 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1448 wakaba 1.1 $self->{state} = 'data';
1449     ## reconsume
1450    
1451     !!!emit ($self->{current_token}); # DOCTYPE
1452     undef $self->{current_token};
1453    
1454     redo A;
1455     } else {
1456 wakaba 1.3 !!!parse-error (type => 'string after DOCTYPE name');
1457 wakaba 1.1 $self->{current_token}->{error} = 1; # DOCTYPE
1458     $self->{state} = 'bogus DOCTYPE';
1459     !!!next-input-character;
1460     redo A;
1461     }
1462     } elsif ($self->{state} eq 'bogus DOCTYPE') {
1463     if ($self->{next_input_character} == 0x003E) { # >
1464     $self->{state} = 'data';
1465     !!!next-input-character;
1466    
1467     !!!emit ($self->{current_token}); # DOCTYPE
1468     undef $self->{current_token};
1469    
1470     redo A;
1471     } elsif ($self->{next_input_character} == -1) {
1472 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1473 wakaba 1.1 $self->{state} = 'data';
1474     ## reconsume
1475    
1476     !!!emit ($self->{current_token}); # DOCTYPE
1477     undef $self->{current_token};
1478    
1479     redo A;
1480     } else {
1481     ## Stay in the state
1482     !!!next-input-character;
1483     redo A;
1484     }
1485     } else {
1486     die "$0: $self->{state}: Unknown state";
1487     }
1488     } # A
1489    
1490     die "$0: _get_next_token: unexpected case";
1491     } # _get_next_token
1492    
1493     sub _tokenize_attempt_to_consume_an_entity ($) {
1494     my $self = shift;
1495    
1496     if ($self->{next_input_character} == 0x0023) { # #
1497     !!!next-input-character;
1498     if ($self->{next_input_character} == 0x0078 or # x
1499     $self->{next_input_character} == 0x0058) { # X
1500 wakaba 1.4 my $num;
1501 wakaba 1.1 X: {
1502     my $x_char = $self->{next_input_character};
1503     !!!next-input-character;
1504     if (0x0030 <= $self->{next_input_character} and
1505     $self->{next_input_character} <= 0x0039) { # 0..9
1506     $num ||= 0;
1507     $num *= 0x10;
1508     $num += $self->{next_input_character} - 0x0030;
1509     redo X;
1510     } elsif (0x0061 <= $self->{next_input_character} and
1511     $self->{next_input_character} <= 0x0066) { # a..f
1512     ## ISSUE: the spec says U+0078, which is apparently incorrect
1513     $num ||= 0;
1514     $num *= 0x10;
1515     $num += $self->{next_input_character} - 0x0060 + 9;
1516     redo X;
1517     } elsif (0x0041 <= $self->{next_input_character} and
1518     $self->{next_input_character} <= 0x0046) { # A..F
1519     ## ISSUE: the spec says U+0058, which is apparently incorrect
1520     $num ||= 0;
1521     $num *= 0x10;
1522     $num += $self->{next_input_character} - 0x0040 + 9;
1523     redo X;
1524     } elsif (not defined $num) { # no hexadecimal digit
1525 wakaba 1.3 !!!parse-error (type => 'bare hcro');
1526 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
1527     !!!back-next-input-character ($x_char);
1528     return undef;
1529     } elsif ($self->{next_input_character} == 0x003B) { # ;
1530     !!!next-input-character;
1531     } else {
1532 wakaba 1.3 !!!parse-error (type => 'no refc');
1533 wakaba 1.1 }
1534    
1535     ## TODO: check the definition for |a valid Unicode character|.
1536 wakaba 1.4 ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8189>
1537 wakaba 1.1 if ($num > 1114111 or $num == 0) {
1538     $num = 0xFFFD; # REPLACEMENT CHARACTER
1539     ## ISSUE: Why this is not an error?
1540 wakaba 1.4 } elsif (0x80 <= $num and $num <= 0x9F) {
1541     ## NOTE: <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562>
1542     ## ISSUE: Not in the spec yet; parse error?
1543     $num = $c1_entity_char->{$num};
1544 wakaba 1.1 }
1545    
1546     return {type => 'character', data => chr $num};
1547     } # X
1548     } elsif (0x0030 <= $self->{next_input_character} and
1549     $self->{next_input_character} <= 0x0039) { # 0..9
1550     my $code = $self->{next_input_character} - 0x0030;
1551     !!!next-input-character;
1552    
1553     while (0x0030 <= $self->{next_input_character} and
1554     $self->{next_input_character} <= 0x0039) { # 0..9
1555     $code *= 10;
1556     $code += $self->{next_input_character} - 0x0030;
1557    
1558     !!!next-input-character;
1559     }
1560    
1561     if ($self->{next_input_character} == 0x003B) { # ;
1562     !!!next-input-character;
1563     } else {
1564 wakaba 1.3 !!!parse-error (type => 'no refc');
1565 wakaba 1.1 }
1566    
1567     ## TODO: check the definition for |a valid Unicode character|.
1568     if ($code > 1114111 or $code == 0) {
1569     $code = 0xFFFD; # REPLACEMENT CHARACTER
1570     ## ISSUE: Why this is not an error?
1571 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
1572     ## NOTE: <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562>
1573     ## ISSUE: Not in the spec yet; parse error?
1574     $code = $c1_entity_char->{$code};
1575 wakaba 1.1 }
1576    
1577     return {type => 'character', data => chr $code};
1578     } else {
1579 wakaba 1.3 !!!parse-error (type => 'bare nero');
1580 wakaba 1.1 !!!back-next-input-character ($self->{next_input_character});
1581     $self->{next_input_character} = 0x0023; # #
1582     return undef;
1583     }
1584     } elsif ((0x0041 <= $self->{next_input_character} and
1585     $self->{next_input_character} <= 0x005A) or
1586     (0x0061 <= $self->{next_input_character} and
1587     $self->{next_input_character} <= 0x007A)) {
1588     my $entity_name = chr $self->{next_input_character};
1589     !!!next-input-character;
1590    
1591     my $value = $entity_name;
1592     my $match;
1593    
1594     while (length $entity_name < 10 and
1595     ## NOTE: Some number greater than the maximum length of entity name
1596     ((0x0041 <= $self->{next_input_character} and
1597     $self->{next_input_character} <= 0x005A) or
1598     (0x0061 <= $self->{next_input_character} and
1599     $self->{next_input_character} <= 0x007A) or
1600     (0x0030 <= $self->{next_input_character} and
1601     $self->{next_input_character} <= 0x0039))) {
1602     $entity_name .= chr $self->{next_input_character};
1603     if (defined $entity_char->{$entity_name}) {
1604     $value = $entity_char->{$entity_name};
1605     $match = 1;
1606     } else {
1607     $value .= chr $self->{next_input_character};
1608     }
1609     !!!next-input-character;
1610     }
1611    
1612     if ($match) {
1613     if ($self->{next_input_character} == 0x003B) { # ;
1614     !!!next-input-character;
1615     } else {
1616 wakaba 1.3 !!!parse-error (type => 'refc');
1617 wakaba 1.1 }
1618    
1619     return {type => 'character', data => $value};
1620     } else {
1621 wakaba 1.3 !!!parse-error (type => 'bare ero');
1622 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
1623     !!!back-token ({type => 'character', data => $value});
1624     return undef;
1625     }
1626     } else {
1627     ## no characters are consumed
1628 wakaba 1.3 !!!parse-error (type => 'bare ero');
1629 wakaba 1.1 return undef;
1630     }
1631     } # _tokenize_attempt_to_consume_an_entity
1632    
1633     sub _initialize_tree_constructor ($) {
1634     my $self = shift;
1635     ## NOTE: $self->{document} MUST be specified before this method is called
1636     $self->{document}->strict_error_checking (0);
1637     ## TODO: Turn mutation events off # MUST
1638     ## TODO: Turn loose Document option (manakai extension) on
1639     ## TODO: Mark the Document as an HTML document # MUST
1640     } # _initialize_tree_constructor
1641    
1642     sub _terminate_tree_constructor ($) {
1643     my $self = shift;
1644     $self->{document}->strict_error_checking (1);
1645     ## TODO: Turn mutation events on
1646     } # _terminate_tree_constructor
1647    
1648     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
1649    
1650 wakaba 1.3 { # tree construction stage
1651     my $token;
1652    
1653 wakaba 1.1 sub _construct_tree ($) {
1654     my ($self) = @_;
1655    
1656     ## When an interactive UA render the $self->{document} available
1657     ## to the user, or when it begin accepting user input, are
1658     ## not defined.
1659    
1660     ## Append a character: collect it and all subsequent consecutive
1661     ## characters and insert one Text node whose data is concatenation
1662     ## of all those characters. # MUST
1663    
1664     !!!next-token;
1665    
1666 wakaba 1.3 $self->{insertion_mode} = 'before head';
1667     undef $self->{form_element};
1668     undef $self->{head_element};
1669     $self->{open_elements} = [];
1670     undef $self->{inner_html_node};
1671    
1672     $self->_tree_construction_initial; # MUST
1673     $self->_tree_construction_root_element;
1674     $self->_tree_construction_main;
1675     } # _construct_tree
1676    
1677     sub _tree_construction_initial ($) {
1678     my $self = shift;
1679     B: {
1680     if ($token->{type} eq 'DOCTYPE') {
1681     if ($token->{error}) {
1682     ## ISSUE: Spec currently left this case undefined.
1683     !!!parse-error (type => 'bogus DOCTYPE');
1684     }
1685     my $doctype = $self->{document}->create_document_type_definition
1686     ($token->{name});
1687     $self->{document}->append_child ($doctype);
1688     #$phase = 'root element';
1689     !!!next-token;
1690     #redo B;
1691     return;
1692     } elsif ({
1693     comment => 1,
1694     'start tag' => 1,
1695     'end tag' => 1,
1696     'end-of-file' => 1,
1697     }->{$token->{type}}) {
1698     ## ISSUE: Spec currently left this case undefined.
1699     !!!parse-error (type => 'missing DOCTYPE');
1700     #$phase = 'root element';
1701     ## reprocess
1702     #redo B;
1703     return;
1704     } elsif ($token->{type} eq 'character') {
1705     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1706     $self->{document}->manakai_append_text ($1);
1707     ## ISSUE: DOM3 Core does not allow Document > Text
1708     unless (length $token->{data}) {
1709     ## Stay in the phase
1710     !!!next-token;
1711     redo B;
1712     }
1713     }
1714     ## ISSUE: Spec currently left this case undefined.
1715     !!!parse-error (type => 'missing DOCTYPE');
1716     #$phase = 'root element';
1717     ## reprocess
1718     #redo B;
1719     return;
1720     } else {
1721     die "$0: $token->{type}: Unknown token";
1722     }
1723     } # B
1724     } # _tree_construction_initial
1725    
1726     sub _tree_construction_root_element ($) {
1727     my $self = shift;
1728    
1729     B: {
1730     if ($token->{type} eq 'DOCTYPE') {
1731     !!!parse-error (type => 'in html:#DOCTYPE');
1732     ## Ignore the token
1733     ## Stay in the phase
1734     !!!next-token;
1735     redo B;
1736     } elsif ($token->{type} eq 'comment') {
1737     my $comment = $self->{document}->create_comment ($token->{data});
1738     $self->{document}->append_child ($comment);
1739     ## Stay in the phase
1740     !!!next-token;
1741     redo B;
1742     } elsif ($token->{type} eq 'character') {
1743     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1744     $self->{document}->manakai_append_text ($1);
1745     ## ISSUE: DOM3 Core does not allow Document > Text
1746     unless (length $token->{data}) {
1747     ## Stay in the phase
1748     !!!next-token;
1749     redo B;
1750     }
1751     }
1752     #
1753     } elsif ({
1754     'start tag' => 1,
1755     'end tag' => 1,
1756     'end-of-file' => 1,
1757     }->{$token->{type}}) {
1758     ## ISSUE: There is an issue in the spec
1759     #
1760     } else {
1761     die "$0: $token->{type}: Unknown token";
1762     }
1763     my $root_element; !!!create-element ($root_element, 'html');
1764     $self->{document}->append_child ($root_element);
1765     push @{$self->{open_elements}}, [$root_element, 'html'];
1766     #$phase = 'main';
1767     ## reprocess
1768     #redo B;
1769     return;
1770     } # B
1771     } # _tree_construction_root_element
1772    
1773     sub _reset_insertion_mode ($) {
1774     my $self = shift;
1775    
1776     ## Step 1
1777     my $last;
1778    
1779     ## Step 2
1780     my $i = -1;
1781     my $node = $self->{open_elements}->[$i];
1782    
1783     ## Step 3
1784     S3: {
1785     $last = 1 if $self->{open_elements}->[0]->[0] eq $node->[0];
1786     if (defined $self->{inner_html_node}) {
1787     if ($self->{inner_html_node}->[1] eq 'td' or
1788     $self->{inner_html_node}->[1] eq 'th') {
1789     #
1790     } else {
1791     $node = $self->{inner_html_node};
1792     }
1793     }
1794    
1795     ## Step 4..13
1796     my $new_mode = {
1797     select => 'in select',
1798     td => 'in cell',
1799     th => 'in cell',
1800     tr => 'in row',
1801     tbody => 'in table body',
1802     thead => 'in table head',
1803     tfoot => 'in table foot',
1804     caption => 'in caption',
1805     colgroup => 'in column group',
1806     table => 'in table',
1807     head => 'in body', # not in head!
1808     body => 'in body',
1809     frameset => 'in frameset',
1810     }->{$node->[1]};
1811     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1812    
1813     ## Step 14
1814     if ($node->[1] eq 'html') {
1815     unless (defined $self->{head_element}) {
1816     $self->{insertion_mode} = 'before head';
1817     } else {
1818     $self->{insertion_mode} = 'after head';
1819     }
1820     return;
1821     }
1822    
1823     ## Step 15
1824     $self->{insertion_mode} = 'in body' and return if $last;
1825    
1826     ## Step 16
1827     $i--;
1828     $node = $self->{open_elements}->[$i];
1829    
1830     ## Step 17
1831     redo S3;
1832     } # S3
1833     } # _reset_insertion_mode
1834    
1835     sub _tree_construction_main ($) {
1836     my $self = shift;
1837    
1838     my $phase = 'main';
1839 wakaba 1.1
1840     my $active_formatting_elements = [];
1841    
1842     my $reconstruct_active_formatting_elements = sub { # MUST
1843     my $insert = shift;
1844    
1845     ## Step 1
1846     return unless @$active_formatting_elements;
1847    
1848     ## Step 3
1849     my $i = -1;
1850     my $entry = $active_formatting_elements->[$i];
1851    
1852     ## Step 2
1853     return if $entry->[0] eq '#marker';
1854 wakaba 1.3 for (@{$self->{open_elements}}) {
1855 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1856     return;
1857     }
1858     }
1859    
1860     S4: {
1861     ## Step 4
1862     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1863    
1864     ## Step 5
1865     $i--;
1866     $entry = $active_formatting_elements->[$i];
1867    
1868     ## Step 6
1869     if ($entry->[0] eq '#marker') {
1870     #
1871     } else {
1872     my $in_open_elements;
1873 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
1874 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1875     $in_open_elements = 1;
1876     last OE;
1877     }
1878     }
1879     if ($in_open_elements) {
1880     #
1881     } else {
1882     redo S4;
1883     }
1884     }
1885    
1886     ## Step 7
1887     $i++;
1888     $entry = $active_formatting_elements->[$i];
1889     } # S4
1890    
1891     S7: {
1892     ## Step 8
1893     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1894    
1895     ## Step 9
1896     $insert->($clone->[0]);
1897 wakaba 1.3 push @{$self->{open_elements}}, $clone;
1898 wakaba 1.1
1899     ## Step 10
1900 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1901 wakaba 1.1
1902     ## Step 11
1903     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1904     ## Step 7'
1905     $i++;
1906     $entry = $active_formatting_elements->[$i];
1907    
1908     redo S7;
1909     }
1910     } # S7
1911     }; # $reconstruct_active_formatting_elements
1912    
1913     my $clear_up_to_marker = sub {
1914     for (reverse 0..$#$active_formatting_elements) {
1915     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1916     splice @$active_formatting_elements, $_;
1917     return;
1918     }
1919     }
1920     }; # $clear_up_to_marker
1921    
1922     my $style_start_tag = sub {
1923     my $style_el; !!!create-element ($style_el, 'style');
1924 wakaba 1.3 ## $self->{insertion_mode} eq 'in head' and ... (always true)
1925     (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
1926     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
1927 wakaba 1.1 ->append_child ($style_el);
1928     $self->{content_model_flag} = 'CDATA';
1929    
1930     my $text = '';
1931     !!!next-token;
1932     while ($token->{type} eq 'character') {
1933     $text .= $token->{data};
1934     !!!next-token;
1935     } # stop if non-character token or tokenizer stops tokenising
1936     if (length $text) {
1937     $style_el->manakai_append_text ($text);
1938     }
1939    
1940     $self->{content_model_flag} = 'PCDATA';
1941    
1942     if ($token->{type} eq 'end tag' and $token->{tag_name} eq 'style') {
1943     ## Ignore the token
1944     } else {
1945 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
1946 wakaba 1.1 ## ISSUE: And ignore?
1947     }
1948     !!!next-token;
1949     }; # $style_start_tag
1950    
1951     my $script_start_tag = sub {
1952     my $script_el;
1953     !!!create-element ($script_el, 'script', $token->{attributes});
1954     ## TODO: mark as "parser-inserted"
1955    
1956     $self->{content_model_flag} = 'CDATA';
1957    
1958     my $text = '';
1959     !!!next-token;
1960     while ($token->{type} eq 'character') {
1961     $text .= $token->{data};
1962     !!!next-token;
1963     } # stop if non-character token or tokenizer stops tokenising
1964     if (length $text) {
1965     $script_el->manakai_append_text ($text);
1966     }
1967    
1968     $self->{content_model_flag} = 'PCDATA';
1969    
1970     if ($token->{type} eq 'end tag' and
1971     $token->{tag_name} eq 'script') {
1972     ## Ignore the token
1973     } else {
1974 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
1975 wakaba 1.1 ## ISSUE: And ignore?
1976     ## TODO: mark as "already executed"
1977     }
1978    
1979 wakaba 1.3 if (defined $self->{inner_html_node}) {
1980     ## TODO: mark as "already executed"
1981     } else {
1982 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
1983     ## TODO: insertion point = just before the next input character
1984    
1985 wakaba 1.3 (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
1986     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])->append_child ($script_el);
1987 wakaba 1.1
1988     ## TODO: insertion point = $old_insertion_point (might be "undefined")
1989    
1990     ## TODO: if there is a script that will execute as soon as the parser resume, then...
1991     }
1992    
1993     !!!next-token;
1994     }; # $script_start_tag
1995    
1996     my $formatting_end_tag = sub {
1997     my $tag_name = shift;
1998    
1999     FET: {
2000     ## Step 1
2001     my $formatting_element;
2002     my $formatting_element_i_in_active;
2003     AFE: for (reverse 0..$#$active_formatting_elements) {
2004     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
2005     $formatting_element = $active_formatting_elements->[$_];
2006     $formatting_element_i_in_active = $_;
2007     last AFE;
2008     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
2009     last AFE;
2010     }
2011     } # AFE
2012     unless (defined $formatting_element) {
2013 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$tag_name);
2014 wakaba 1.1 ## Ignore the token
2015     !!!next-token;
2016     return;
2017     }
2018     ## has an element in scope
2019     my $in_scope = 1;
2020     my $formatting_element_i_in_open;
2021 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2022     my $node = $self->{open_elements}->[$_];
2023 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
2024     if ($in_scope) {
2025     $formatting_element_i_in_open = $_;
2026     last INSCOPE;
2027     } else { # in open elements but not in scope
2028 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2029 wakaba 1.1 ## Ignore the token
2030     !!!next-token;
2031     return;
2032     }
2033     } elsif ({
2034     table => 1, caption => 1, td => 1, th => 1,
2035     button => 1, marquee => 1, object => 1, html => 1,
2036     }->{$node->[1]}) {
2037     $in_scope = 0;
2038     }
2039     } # INSCOPE
2040     unless (defined $formatting_element_i_in_open) {
2041 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2042 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
2043     !!!next-token; ## TODO: ok?
2044     return;
2045     }
2046 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
2047 wakaba 1.4 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2048 wakaba 1.1 }
2049    
2050     ## Step 2
2051     my $furthest_block;
2052     my $furthest_block_i_in_open;
2053 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2054     my $node = $self->{open_elements}->[$_];
2055 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
2056     #not $phrasing_category->{$node->[1]} and
2057     ($special_category->{$node->[1]} or
2058     $scoping_category->{$node->[1]})) {
2059     $furthest_block = $node;
2060     $furthest_block_i_in_open = $_;
2061     } elsif ($node->[0] eq $formatting_element->[0]) {
2062     last OE;
2063     }
2064     } # OE
2065    
2066     ## Step 3
2067     unless (defined $furthest_block) { # MUST
2068 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
2069 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
2070     !!!next-token;
2071     return;
2072     }
2073    
2074     ## Step 4
2075 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
2076 wakaba 1.1
2077     ## Step 5
2078     my $furthest_block_parent = $furthest_block->[0]->parent_node;
2079     if (defined $furthest_block_parent) {
2080     $furthest_block_parent->remove_child ($furthest_block->[0]);
2081     }
2082    
2083     ## Step 6
2084     my $bookmark_prev_el
2085     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
2086     ->[0];
2087    
2088     ## Step 7
2089     my $node = $furthest_block;
2090     my $node_i_in_open = $furthest_block_i_in_open;
2091     my $last_node = $furthest_block;
2092     S7: {
2093     ## Step 1
2094     $node_i_in_open--;
2095 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
2096 wakaba 1.1
2097     ## Step 2
2098     my $node_i_in_active;
2099     S7S2: {
2100     for (reverse 0..$#$active_formatting_elements) {
2101     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2102     $node_i_in_active = $_;
2103     last S7S2;
2104     }
2105     }
2106 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
2107 wakaba 1.1 redo S7;
2108     } # S7S2
2109    
2110     ## Step 3
2111     last S7 if $node->[0] eq $formatting_element->[0];
2112    
2113     ## Step 4
2114     if ($last_node->[0] eq $furthest_block->[0]) {
2115     $bookmark_prev_el = $node->[0];
2116     }
2117    
2118     ## Step 5
2119     if ($node->[0]->has_child_nodes ()) {
2120     my $clone = [$node->[0]->clone_node (0), $node->[1]];
2121     $active_formatting_elements->[$node_i_in_active] = $clone;
2122 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
2123 wakaba 1.1 $node = $clone;
2124     }
2125    
2126     ## Step 6
2127     $node->[0]->append_child ($last_node->[0]);
2128    
2129     ## Step 7
2130     $last_node = $node;
2131    
2132     ## Step 8
2133     redo S7;
2134     } # S7
2135    
2136     ## Step 8
2137     $common_ancestor_node->[0]->append_child ($last_node->[0]);
2138    
2139     ## Step 9
2140     my $clone = [$formatting_element->[0]->clone_node (0),
2141     $formatting_element->[1]];
2142    
2143     ## Step 10
2144     my @cn = @{$furthest_block->[0]->child_nodes};
2145     $clone->[0]->append_child ($_) for @cn;
2146    
2147     ## Step 11
2148     $furthest_block->[0]->append_child ($clone->[0]);
2149    
2150     ## Step 12
2151     my $i;
2152     AFE: for (reverse 0..$#$active_formatting_elements) {
2153     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
2154     splice @$active_formatting_elements, $_, 1;
2155     $i-- and last AFE if defined $i;
2156     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
2157     $i = $_;
2158     }
2159     } # AFE
2160     splice @$active_formatting_elements, $i + 1, 0, $clone;
2161    
2162     ## Step 13
2163     undef $i;
2164 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2165     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
2166     splice @{$self->{open_elements}}, $_, 1;
2167 wakaba 1.1 $i-- and last OE if defined $i;
2168 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
2169 wakaba 1.1 $i = $_;
2170     }
2171     } # OE
2172 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
2173 wakaba 1.1
2174     ## Step 14
2175     redo FET;
2176     } # FET
2177     }; # $formatting_end_tag
2178    
2179     my $insert_to_current = sub {
2180 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child (shift);
2181 wakaba 1.1 }; # $insert_to_current
2182    
2183     my $insert_to_foster = sub {
2184     my $child = shift;
2185     if ({
2186     table => 1, tbody => 1, tfoot => 1,
2187     thead => 1, tr => 1,
2188 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2189 wakaba 1.1 # MUST
2190     my $foster_parent_element;
2191     my $next_sibling;
2192 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2193     if ($self->{open_elements}->[$_]->[1] eq 'table') {
2194     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
2195 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
2196     $foster_parent_element = $parent;
2197 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
2198 wakaba 1.1 } else {
2199     $foster_parent_element
2200 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
2201 wakaba 1.1 }
2202     last OE;
2203     }
2204     } # OE
2205 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
2206 wakaba 1.1 unless defined $foster_parent_element;
2207     $foster_parent_element->insert_before
2208     ($child, $next_sibling);
2209     } else {
2210 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
2211 wakaba 1.1 }
2212     }; # $insert_to_foster
2213    
2214     my $in_body = sub {
2215     my $insert = shift;
2216     if ($token->{type} eq 'start tag') {
2217     if ($token->{tag_name} eq 'script') {
2218     $script_start_tag->();
2219     return;
2220     } elsif ($token->{tag_name} eq 'style') {
2221     $style_start_tag->();
2222     return;
2223     } elsif ({
2224     base => 1, link => 1, meta => 1,
2225     }->{$token->{tag_name}}) {
2226 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2227 wakaba 1.1 ## NOTE: This is an "as if in head" code clone
2228     my $el;
2229     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2230 wakaba 1.3 if (defined $self->{head_element}) {
2231     $self->{head_element}->append_child ($el);
2232 wakaba 1.1 } else {
2233     $insert->($el);
2234     }
2235    
2236     !!!next-token;
2237     return;
2238     } elsif ($token->{tag_name} eq 'title') {
2239 wakaba 1.3 !!!parse-error (type => 'in body:title');
2240 wakaba 1.1 ## NOTE: There is an "as if in head" code clone
2241     my $title_el;
2242     !!!create-element ($title_el, 'title', $token->{attributes});
2243 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2244 wakaba 1.1 ->append_child ($title_el);
2245     $self->{content_model_flag} = 'RCDATA';
2246    
2247     my $text = '';
2248     !!!next-token;
2249     while ($token->{type} eq 'character') {
2250     $text .= $token->{data};
2251     !!!next-token;
2252     }
2253     if (length $text) {
2254     $title_el->manakai_append_text ($text);
2255     }
2256    
2257     $self->{content_model_flag} = 'PCDATA';
2258    
2259     if ($token->{type} eq 'end tag' and
2260     $token->{tag_name} eq 'title') {
2261     ## Ignore the token
2262     } else {
2263 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2264 wakaba 1.1 ## ISSUE: And ignore?
2265     }
2266     !!!next-token;
2267     return;
2268     } elsif ($token->{tag_name} eq 'body') {
2269 wakaba 1.3 !!!parse-error (type => 'in body:body');
2270 wakaba 1.1
2271 wakaba 1.3 if (@{$self->{open_elements}} == 1 or
2272     $self->{open_elements}->[1]->[1] ne 'body') {
2273 wakaba 1.1 ## Ignore the token
2274     } else {
2275 wakaba 1.3 my $body_el = $self->{open_elements}->[1]->[0];
2276 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2277     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
2278     $body_el->set_attribute_ns
2279     (undef, [undef, $attr_name],
2280     $token->{attributes}->{$attr_name}->{value});
2281     }
2282     }
2283     }
2284     !!!next-token;
2285     return;
2286     } elsif ({
2287     address => 1, blockquote => 1, center => 1, dir => 1,
2288     div => 1, dl => 1, fieldset => 1, listing => 1,
2289     menu => 1, ol => 1, p => 1, ul => 1,
2290     pre => 1,
2291     }->{$token->{tag_name}}) {
2292     ## has a p element in scope
2293 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2294 wakaba 1.1 if ($_->[1] eq 'p') {
2295     !!!back-token;
2296     $token = {type => 'end tag', tag_name => 'p'};
2297     return;
2298     } elsif ({
2299     table => 1, caption => 1, td => 1, th => 1,
2300     button => 1, marquee => 1, object => 1, html => 1,
2301     }->{$_->[1]}) {
2302     last INSCOPE;
2303     }
2304     } # INSCOPE
2305    
2306     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2307     if ($token->{tag_name} eq 'pre') {
2308     !!!next-token;
2309     if ($token->{type} eq 'character') {
2310     $token->{data} =~ s/^\x0A//;
2311     unless (length $token->{data}) {
2312     !!!next-token;
2313     }
2314     }
2315     } else {
2316     !!!next-token;
2317     }
2318     return;
2319     } elsif ($token->{tag_name} eq 'form') {
2320 wakaba 1.3 if (defined $self->{form_element}) {
2321     !!!parse-error (type => 'in form:form');
2322 wakaba 1.1 ## Ignore the token
2323     } else {
2324     ## has a p element in scope
2325 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2326 wakaba 1.1 if ($_->[1] eq 'p') {
2327     !!!back-token;
2328     $token = {type => 'end tag', tag_name => 'p'};
2329     return;
2330     } elsif ({
2331     table => 1, caption => 1, td => 1, th => 1,
2332     button => 1, marquee => 1, object => 1, html => 1,
2333     }->{$_->[1]}) {
2334     last INSCOPE;
2335     }
2336     } # INSCOPE
2337    
2338     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2339 wakaba 1.3 $self->{form_element} = $self->{open_elements}->[-1]->[0];
2340 wakaba 1.1 !!!next-token;
2341     return;
2342     }
2343     } elsif ($token->{tag_name} eq 'li') {
2344     ## has a p element in scope
2345 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2346 wakaba 1.1 if ($_->[1] eq 'p') {
2347     !!!back-token;
2348     $token = {type => 'end tag', tag_name => 'p'};
2349     return;
2350     } elsif ({
2351     table => 1, caption => 1, td => 1, th => 1,
2352     button => 1, marquee => 1, object => 1, html => 1,
2353     }->{$_->[1]}) {
2354     last INSCOPE;
2355     }
2356     } # INSCOPE
2357    
2358     ## Step 1
2359     my $i = -1;
2360 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2361 wakaba 1.1 LI: {
2362     ## Step 2
2363     if ($node->[1] eq 'li') {
2364 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2365 wakaba 1.1 last LI;
2366     }
2367    
2368     ## Step 3
2369     if (not $formatting_category->{$node->[1]} and
2370     #not $phrasing_category->{$node->[1]} and
2371     ($special_category->{$node->[1]} or
2372     $scoping_category->{$node->[1]}) and
2373     $node->[1] ne 'address' and $node->[1] ne 'div') {
2374     last LI;
2375     }
2376    
2377     ## Step 4
2378     $i--;
2379 wakaba 1.3 $node = $self->{open_elements}->[$i];
2380 wakaba 1.1 redo LI;
2381     } # LI
2382    
2383     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2384     !!!next-token;
2385     return;
2386     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
2387     ## has a p element in scope
2388 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2389 wakaba 1.1 if ($_->[1] eq 'p') {
2390     !!!back-token;
2391     $token = {type => 'end tag', tag_name => 'p'};
2392     return;
2393     } elsif ({
2394     table => 1, caption => 1, td => 1, th => 1,
2395     button => 1, marquee => 1, object => 1, html => 1,
2396     }->{$_->[1]}) {
2397     last INSCOPE;
2398     }
2399     } # INSCOPE
2400    
2401     ## Step 1
2402     my $i = -1;
2403 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2404 wakaba 1.1 LI: {
2405     ## Step 2
2406     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
2407 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2408 wakaba 1.1 last LI;
2409     }
2410    
2411     ## Step 3
2412     if (not $formatting_category->{$node->[1]} and
2413     #not $phrasing_category->{$node->[1]} and
2414     ($special_category->{$node->[1]} or
2415     $scoping_category->{$node->[1]}) and
2416     $node->[1] ne 'address' and $node->[1] ne 'div') {
2417     last LI;
2418     }
2419    
2420     ## Step 4
2421     $i--;
2422 wakaba 1.3 $node = $self->{open_elements}->[$i];
2423 wakaba 1.1 redo LI;
2424     } # LI
2425    
2426     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2427     !!!next-token;
2428     return;
2429     } elsif ($token->{tag_name} eq 'plaintext') {
2430     ## has a p element in scope
2431 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2432 wakaba 1.1 if ($_->[1] eq 'p') {
2433     !!!back-token;
2434     $token = {type => 'end tag', tag_name => 'p'};
2435     return;
2436     } elsif ({
2437     table => 1, caption => 1, td => 1, th => 1,
2438     button => 1, marquee => 1, object => 1, html => 1,
2439     }->{$_->[1]}) {
2440     last INSCOPE;
2441     }
2442     } # INSCOPE
2443    
2444     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2445    
2446     $self->{content_model_flag} = 'PLAINTEXT';
2447    
2448     !!!next-token;
2449     return;
2450     } elsif ({
2451     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2452     }->{$token->{tag_name}}) {
2453     ## has a p element in scope
2454 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2455     my $node = $self->{open_elements}->[$_];
2456 wakaba 1.1 if ($node->[1] eq 'p') {
2457     !!!back-token;
2458     $token = {type => 'end tag', tag_name => 'p'};
2459     return;
2460     } elsif ({
2461     table => 1, caption => 1, td => 1, th => 1,
2462     button => 1, marquee => 1, object => 1, html => 1,
2463     }->{$node->[1]}) {
2464     last INSCOPE;
2465     }
2466     } # INSCOPE
2467    
2468     ## has an element in scope
2469     my $i;
2470 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2471     my $node = $self->{open_elements}->[$_];
2472 wakaba 1.1 if ({
2473     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2474     }->{$node->[1]}) {
2475     $i = $_;
2476     last INSCOPE;
2477     } elsif ({
2478     table => 1, caption => 1, td => 1, th => 1,
2479     button => 1, marquee => 1, object => 1, html => 1,
2480     }->{$node->[1]}) {
2481     last INSCOPE;
2482     }
2483     } # INSCOPE
2484    
2485     if (defined $i) {
2486 wakaba 1.3 !!!parse-error (type => 'in hn:hn');
2487     splice @{$self->{open_elements}}, $i;
2488 wakaba 1.1 }
2489    
2490     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2491    
2492     !!!next-token;
2493     return;
2494     } elsif ($token->{tag_name} eq 'a') {
2495     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
2496     my $node = $active_formatting_elements->[$i];
2497     if ($node->[1] eq 'a') {
2498 wakaba 1.3 !!!parse-error (type => 'in a:a');
2499 wakaba 1.1
2500     !!!back-token;
2501     $token = {type => 'end tag', tag_name => 'a'};
2502     $formatting_end_tag->($token->{tag_name});
2503    
2504     AFE2: for (reverse 0..$#$active_formatting_elements) {
2505     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2506     splice @$active_formatting_elements, $_, 1;
2507     last AFE2;
2508     }
2509     } # AFE2
2510 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2511     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
2512     splice @{$self->{open_elements}}, $_, 1;
2513 wakaba 1.1 last OE;
2514     }
2515     } # OE
2516     last AFE;
2517     } elsif ($node->[0] eq '#marker') {
2518     last AFE;
2519     }
2520     } # AFE
2521    
2522     $reconstruct_active_formatting_elements->($insert_to_current);
2523    
2524     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2525 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2526 wakaba 1.1
2527     !!!next-token;
2528     return;
2529     } elsif ({
2530     b => 1, big => 1, em => 1, font => 1, i => 1,
2531     nobr => 1, s => 1, small => 1, strile => 1,
2532     strong => 1, tt => 1, u => 1,
2533     }->{$token->{tag_name}}) {
2534     $reconstruct_active_formatting_elements->($insert_to_current);
2535    
2536     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2537 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2538 wakaba 1.1
2539     !!!next-token;
2540     return;
2541     } elsif ($token->{tag_name} eq 'button') {
2542     ## has a button element in scope
2543 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2544     my $node = $self->{open_elements}->[$_];
2545 wakaba 1.1 if ($node->[1] eq 'button') {
2546 wakaba 1.3 !!!parse-error (type => 'in button:button');
2547 wakaba 1.1 !!!back-token;
2548     $token = {type => 'end tag', tag_name => 'button'};
2549     return;
2550     } elsif ({
2551     table => 1, caption => 1, td => 1, th => 1,
2552     button => 1, marquee => 1, object => 1, html => 1,
2553     }->{$node->[1]}) {
2554     last INSCOPE;
2555     }
2556     } # INSCOPE
2557    
2558     $reconstruct_active_formatting_elements->($insert_to_current);
2559    
2560     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2561     push @$active_formatting_elements, ['#marker', ''];
2562    
2563     !!!next-token;
2564     return;
2565     } elsif ($token->{tag_name} eq 'marquee' or
2566     $token->{tag_name} eq 'object') {
2567     $reconstruct_active_formatting_elements->($insert_to_current);
2568    
2569     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2570     push @$active_formatting_elements, ['#marker', ''];
2571    
2572     !!!next-token;
2573     return;
2574     } elsif ($token->{tag_name} eq 'xmp') {
2575     $reconstruct_active_formatting_elements->($insert_to_current);
2576    
2577     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2578    
2579     $self->{content_model_flag} = 'CDATA';
2580    
2581     !!!next-token;
2582     return;
2583     } elsif ($token->{tag_name} eq 'table') {
2584     ## has a p element in scope
2585 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2586 wakaba 1.1 if ($_->[1] eq 'p') {
2587     !!!back-token;
2588     $token = {type => 'end tag', tag_name => 'p'};
2589     return;
2590     } elsif ({
2591     table => 1, caption => 1, td => 1, th => 1,
2592     button => 1, marquee => 1, object => 1, html => 1,
2593     }->{$_->[1]}) {
2594     last INSCOPE;
2595     }
2596     } # INSCOPE
2597    
2598     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2599    
2600 wakaba 1.3 $self->{insertion_mode} = 'in table';
2601 wakaba 1.1
2602     !!!next-token;
2603     return;
2604     } elsif ({
2605     area => 1, basefont => 1, bgsound => 1, br => 1,
2606     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
2607     image => 1,
2608     }->{$token->{tag_name}}) {
2609     if ($token->{tag_name} eq 'image') {
2610 wakaba 1.3 !!!parse-error (type => 'image');
2611 wakaba 1.1 $token->{tag_name} = 'img';
2612     }
2613    
2614     $reconstruct_active_formatting_elements->($insert_to_current);
2615    
2616     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2617 wakaba 1.3 pop @{$self->{open_elements}};
2618 wakaba 1.1
2619     !!!next-token;
2620     return;
2621     } elsif ($token->{tag_name} eq 'hr') {
2622     ## has a p element in scope
2623 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2624 wakaba 1.1 if ($_->[1] eq 'p') {
2625     !!!back-token;
2626     $token = {type => 'end tag', tag_name => 'p'};
2627     return;
2628     } elsif ({
2629     table => 1, caption => 1, td => 1, th => 1,
2630     button => 1, marquee => 1, object => 1, html => 1,
2631     }->{$_->[1]}) {
2632     last INSCOPE;
2633     }
2634     } # INSCOPE
2635    
2636     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2637 wakaba 1.3 pop @{$self->{open_elements}};
2638 wakaba 1.1
2639     !!!next-token;
2640     return;
2641     } elsif ($token->{tag_name} eq 'input') {
2642     $reconstruct_active_formatting_elements->($insert_to_current);
2643    
2644     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2645 wakaba 1.3 ## TODO: associate with $self->{form_element} if defined
2646     pop @{$self->{open_elements}};
2647 wakaba 1.1
2648     !!!next-token;
2649     return;
2650     } elsif ($token->{tag_name} eq 'isindex') {
2651 wakaba 1.3 !!!parse-error (type => 'isindex');
2652 wakaba 1.1
2653 wakaba 1.3 if (defined $self->{form_element}) {
2654 wakaba 1.1 ## Ignore the token
2655     !!!next-token;
2656     return;
2657     } else {
2658     my $at = $token->{attributes};
2659     $at->{name} = {name => 'name', value => 'isindex'};
2660     my @tokens = (
2661     {type => 'start tag', tag_name => 'form'},
2662     {type => 'start tag', tag_name => 'hr'},
2663     {type => 'start tag', tag_name => 'p'},
2664     {type => 'start tag', tag_name => 'label'},
2665     {type => 'character',
2666     data => 'This is a searchable index. Insert your search keywords here: '}, # SHOULD
2667     ## TODO: make this configurable
2668     {type => 'start tag', tag_name => 'input', attributes => $at},
2669     #{type => 'character', data => ''}, # SHOULD
2670     {type => 'end tag', tag_name => 'label'},
2671     {type => 'end tag', tag_name => 'p'},
2672     {type => 'start tag', tag_name => 'hr'},
2673     {type => 'end tag', tag_name => 'form'},
2674     );
2675     $token = shift @tokens;
2676     !!!back-token (@tokens);
2677     return;
2678     }
2679     } elsif ({
2680     textarea => 1,
2681 wakaba 1.5 iframe => 1,
2682 wakaba 1.1 noembed => 1,
2683     noframes => 1,
2684     noscript => 0, ## TODO: 1 if scripting is enabled
2685     }->{$token->{tag_name}}) {
2686     my $tag_name = $token->{tag_name};
2687     my $el;
2688     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2689    
2690     if ($token->{tag_name} eq 'textarea') {
2691 wakaba 1.3 ## TODO: $self->{form_element} if defined
2692 wakaba 1.1 $self->{content_model_flag} = 'RCDATA';
2693     } else {
2694     $self->{content_model_flag} = 'CDATA';
2695     }
2696    
2697     $insert->($el);
2698    
2699     my $text = '';
2700     !!!next-token;
2701     while ($token->{type} eq 'character') {
2702     $text .= $token->{data};
2703     !!!next-token;
2704     }
2705     if (length $text) {
2706     $el->manakai_append_text ($text);
2707     }
2708    
2709     $self->{content_model_flag} = 'PCDATA';
2710    
2711     if ($token->{type} eq 'end tag' and
2712     $token->{tag_name} eq $tag_name) {
2713     ## Ignore the token
2714     } else {
2715 wakaba 1.3 if ($token->{tag_name} eq 'textarea') {
2716     !!!parse-error (type => 'in CDATA:#'.$token->{type});
2717     } else {
2718     !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2719     }
2720 wakaba 1.1 ## ISSUE: And ignore?
2721     }
2722     !!!next-token;
2723     return;
2724     } elsif ($token->{tag_name} eq 'select') {
2725     $reconstruct_active_formatting_elements->($insert_to_current);
2726    
2727     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2728    
2729 wakaba 1.3 $self->{insertion_mode} = 'in select';
2730 wakaba 1.1 !!!next-token;
2731     return;
2732     } elsif ({
2733     caption => 1, col => 1, colgroup => 1, frame => 1,
2734     frameset => 1, head => 1, option => 1, optgroup => 1,
2735     tbody => 1, td => 1, tfoot => 1, th => 1,
2736     thead => 1, tr => 1,
2737     }->{$token->{tag_name}}) {
2738 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2739 wakaba 1.1 ## Ignore the token
2740     !!!next-token;
2741     return;
2742    
2743     ## ISSUE: An issue on HTML5 new elements in the spec.
2744     } else {
2745     $reconstruct_active_formatting_elements->($insert_to_current);
2746    
2747     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2748    
2749     !!!next-token;
2750     return;
2751     }
2752     } elsif ($token->{type} eq 'end tag') {
2753     if ($token->{tag_name} eq 'body') {
2754 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2755 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2756 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2757     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2758 wakaba 1.1 }
2759 wakaba 1.3 $self->{insertion_mode} = 'after body';
2760 wakaba 1.1 !!!next-token;
2761     return;
2762     } else {
2763 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2764 wakaba 1.1 ## Ignore the token
2765     !!!next-token;
2766     return;
2767     }
2768     } elsif ($token->{tag_name} eq 'html') {
2769 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2770 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2771 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2772     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
2773 wakaba 1.1 }
2774 wakaba 1.3 $self->{insertion_mode} = 'after body';
2775 wakaba 1.1 ## reprocess
2776     return;
2777     } else {
2778 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2779 wakaba 1.1 ## Ignore the token
2780     !!!next-token;
2781     return;
2782     }
2783     } elsif ({
2784     address => 1, blockquote => 1, center => 1, dir => 1,
2785     div => 1, dl => 1, fieldset => 1, listing => 1,
2786     menu => 1, ol => 1, pre => 1, ul => 1,
2787     form => 1,
2788     p => 1,
2789     dd => 1, dt => 1, li => 1,
2790     button => 1, marquee => 1, object => 1,
2791     }->{$token->{tag_name}}) {
2792     ## has an element in scope
2793     my $i;
2794 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2795     my $node = $self->{open_elements}->[$_];
2796 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
2797     ## generate implied end tags
2798     if ({
2799     dd => ($token->{tag_name} ne 'dd'),
2800     dt => ($token->{tag_name} ne 'dt'),
2801     li => ($token->{tag_name} ne 'li'),
2802     p => ($token->{tag_name} ne 'p'),
2803     td => 1, th => 1, tr => 1,
2804 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2805 wakaba 1.1 !!!back-token;
2806     $token = {type => 'end tag',
2807 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2808 wakaba 1.1 return;
2809     }
2810     $i = $_;
2811     last INSCOPE unless $token->{tag_name} eq 'p';
2812     } elsif ({
2813     table => 1, caption => 1, td => 1, th => 1,
2814     button => 1, marquee => 1, object => 1, html => 1,
2815     }->{$node->[1]}) {
2816     last INSCOPE;
2817     }
2818     } # INSCOPE
2819    
2820 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
2821     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2822 wakaba 1.1 }
2823    
2824 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
2825     undef $self->{form_element} if $token->{tag_name} eq 'form';
2826 wakaba 1.1 $clear_up_to_marker->()
2827     if {
2828     button => 1, marquee => 1, object => 1,
2829     }->{$token->{tag_name}};
2830     !!!next-token;
2831     return;
2832     } elsif ({
2833     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2834     }->{$token->{tag_name}}) {
2835     ## has an element in scope
2836     my $i;
2837 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2838     my $node = $self->{open_elements}->[$_];
2839 wakaba 1.1 if ({
2840     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2841     }->{$node->[1]}) {
2842     ## generate implied end tags
2843     if ({
2844     dd => 1, dt => 1, li => 1, p => 1,
2845     td => 1, th => 1, tr => 1,
2846 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2847 wakaba 1.1 !!!back-token;
2848     $token = {type => 'end tag',
2849 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2850 wakaba 1.1 return;
2851     }
2852     $i = $_;
2853     last INSCOPE;
2854     } elsif ({
2855     table => 1, caption => 1, td => 1, th => 1,
2856     button => 1, marquee => 1, object => 1, html => 1,
2857     }->{$node->[1]}) {
2858     last INSCOPE;
2859     }
2860     } # INSCOPE
2861    
2862 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
2863     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2864 wakaba 1.1 }
2865    
2866 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
2867 wakaba 1.1 !!!next-token;
2868     return;
2869     } elsif ({
2870     a => 1,
2871     b => 1, big => 1, em => 1, font => 1, i => 1,
2872     nobr => 1, s => 1, small => 1, strile => 1,
2873     strong => 1, tt => 1, u => 1,
2874     }->{$token->{tag_name}}) {
2875     $formatting_end_tag->($token->{tag_name});
2876     return;
2877     } elsif ({
2878     caption => 1, col => 1, colgroup => 1, frame => 1,
2879     frameset => 1, head => 1, option => 1, optgroup => 1,
2880     tbody => 1, td => 1, tfoot => 1, th => 1,
2881     thead => 1, tr => 1,
2882     area => 1, basefont => 1, bgsound => 1, br => 1,
2883     embed => 1, hr => 1, iframe => 1, image => 1,
2884 wakaba 1.5 img => 1, input => 1, isindex => 1, noembed => 1,
2885 wakaba 1.1 noframes => 1, param => 1, select => 1, spacer => 1,
2886     table => 1, textarea => 1, wbr => 1,
2887     noscript => 0, ## TODO: if scripting is enabled
2888     }->{$token->{tag_name}}) {
2889 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2890 wakaba 1.1 ## Ignore the token
2891     !!!next-token;
2892     return;
2893    
2894     ## ISSUE: Issue on HTML5 new elements in spec
2895    
2896     } else {
2897     ## Step 1
2898     my $node_i = -1;
2899 wakaba 1.3 my $node = $self->{open_elements}->[$node_i];
2900 wakaba 1.1
2901     ## Step 2
2902     S2: {
2903     if ($node->[1] eq $token->{tag_name}) {
2904     ## Step 1
2905     ## generate implied end tags
2906     if ({
2907     dd => 1, dt => 1, li => 1, p => 1,
2908     td => 1, th => 1, tr => 1,
2909 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2910 wakaba 1.1 !!!back-token;
2911     $token = {type => 'end tag',
2912 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2913 wakaba 1.1 return;
2914     }
2915    
2916     ## Step 2
2917 wakaba 1.3 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
2918     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2919 wakaba 1.1 }
2920    
2921     ## Step 3
2922 wakaba 1.3 splice @{$self->{open_elements}}, $node_i;
2923    
2924     !!!next-token;
2925 wakaba 1.1 last S2;
2926     } else {
2927     ## Step 3
2928     if (not $formatting_category->{$node->[1]} and
2929     #not $phrasing_category->{$node->[1]} and
2930     ($special_category->{$node->[1]} or
2931     $scoping_category->{$node->[1]})) {
2932 wakaba 1.3 !!!parse-error (type => 'not closed:'.$node->[1]);
2933 wakaba 1.1 ## Ignore the token
2934     !!!next-token;
2935     last S2;
2936     }
2937     }
2938    
2939     ## Step 4
2940     $node_i--;
2941 wakaba 1.3 $node = $self->{open_elements}->[$node_i];
2942 wakaba 1.1
2943     ## Step 5;
2944     redo S2;
2945     } # S2
2946 wakaba 1.3 return;
2947 wakaba 1.1 }
2948     }
2949     }; # $in_body
2950    
2951     B: {
2952 wakaba 1.3 if ($phase eq 'main') {
2953 wakaba 1.1 if ($token->{type} eq 'DOCTYPE') {
2954 wakaba 1.3 !!!parse-error (type => 'in html:#DOCTYPE');
2955 wakaba 1.1 ## Ignore the token
2956     ## Stay in the phase
2957     !!!next-token;
2958     redo B;
2959     } elsif ($token->{type} eq 'start tag' and
2960     $token->{tag_name} eq 'html') {
2961     ## TODO: unless it is the first start tag token, parse-error
2962 wakaba 1.3 my $top_el = $self->{open_elements}->[0]->[0];
2963 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2964     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
2965     $top_el->set_attribute_ns
2966     (undef, [undef, $attr_name],
2967     $token->{attributes}->{$attr_name}->{value});
2968     }
2969     }
2970     !!!next-token;
2971     redo B;
2972     } elsif ($token->{type} eq 'end-of-file') {
2973     ## Generate implied end tags
2974     if ({
2975     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
2976 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2977 wakaba 1.1 !!!back-token;
2978 wakaba 1.3 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
2979 wakaba 1.1 redo B;
2980     }
2981    
2982 wakaba 1.3 if (@{$self->{open_elements}} > 2 or
2983     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
2984     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2985     } elsif (defined $self->{inner_html_node} and
2986     @{$self->{open_elements}} > 1 and
2987     $self->{open_elements}->[1]->[1] ne 'body') {
2988     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2989 wakaba 1.1 }
2990    
2991     ## Stop parsing
2992     last B;
2993    
2994     ## ISSUE: There is an issue in the spec.
2995     } else {
2996 wakaba 1.3 if ($self->{insertion_mode} eq 'before head') {
2997 wakaba 1.1 if ($token->{type} eq 'character') {
2998     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2999 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3000 wakaba 1.1 unless (length $token->{data}) {
3001     !!!next-token;
3002     redo B;
3003     }
3004     }
3005     ## As if <head>
3006 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
3007     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3008     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3009     $self->{insertion_mode} = 'in head';
3010 wakaba 1.1 ## reprocess
3011     redo B;
3012     } elsif ($token->{type} eq 'comment') {
3013     my $comment = $self->{document}->create_comment ($token->{data});
3014 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3015 wakaba 1.1 !!!next-token;
3016     redo B;
3017     } elsif ($token->{type} eq 'start tag') {
3018     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
3019 wakaba 1.3 !!!create-element ($self->{head_element}, 'head', $attr);
3020     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3021     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3022     $self->{insertion_mode} = 'in head';
3023 wakaba 1.1 if ($token->{tag_name} eq 'head') {
3024     !!!next-token;
3025     #} elsif ({
3026     # base => 1, link => 1, meta => 1,
3027     # script => 1, style => 1, title => 1,
3028     # }->{$token->{tag_name}}) {
3029     # ## reprocess
3030     } else {
3031     ## reprocess
3032     }
3033     redo B;
3034     } elsif ($token->{type} eq 'end tag') {
3035     if ($token->{tag_name} eq 'html') {
3036     ## As if <head>
3037 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
3038     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3039     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3040     $self->{insertion_mode} = 'in head';
3041 wakaba 1.1 ## reprocess
3042     redo B;
3043     } else {
3044 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3045 wakaba 1.1 ## Ignore the token
3046     !!!next-token;
3047     redo B;
3048     }
3049     } else {
3050     die "$0: $token->{type}: Unknown type";
3051     }
3052 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in head') {
3053 wakaba 1.1 if ($token->{type} eq 'character') {
3054     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3055 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3056 wakaba 1.1 unless (length $token->{data}) {
3057     !!!next-token;
3058     redo B;
3059     }
3060     }
3061    
3062     #
3063     } elsif ($token->{type} eq 'comment') {
3064     my $comment = $self->{document}->create_comment ($token->{data});
3065 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3066 wakaba 1.1 !!!next-token;
3067     redo B;
3068     } elsif ($token->{type} eq 'start tag') {
3069     if ($token->{tag_name} eq 'title') {
3070     ## NOTE: There is an "as if in head" code clone
3071     my $title_el;
3072     !!!create-element ($title_el, 'title', $token->{attributes});
3073 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3074 wakaba 1.1 ->append_child ($title_el);
3075     $self->{content_model_flag} = 'RCDATA';
3076    
3077     my $text = '';
3078     !!!next-token;
3079     while ($token->{type} eq 'character') {
3080     $text .= $token->{data};
3081     !!!next-token;
3082     }
3083     if (length $text) {
3084     $title_el->manakai_append_text ($text);
3085     }
3086    
3087     $self->{content_model_flag} = 'PCDATA';
3088    
3089     if ($token->{type} eq 'end tag' and
3090     $token->{tag_name} eq 'title') {
3091     ## Ignore the token
3092     } else {
3093 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
3094 wakaba 1.1 ## ISSUE: And ignore?
3095     }
3096     !!!next-token;
3097     redo B;
3098     } elsif ($token->{tag_name} eq 'style') {
3099     $style_start_tag->();
3100     redo B;
3101     } elsif ($token->{tag_name} eq 'script') {
3102     $script_start_tag->();
3103     redo B;
3104     } elsif ({base => 1, link => 1, meta => 1}->{$token->{tag_name}}) {
3105     ## NOTE: There are "as if in head" code clones
3106     my $el;
3107     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
3108 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3109 wakaba 1.1 ->append_child ($el);
3110    
3111     !!!next-token;
3112     redo B;
3113     } elsif ($token->{tag_name} eq 'head') {
3114 wakaba 1.3 !!!parse-error (type => 'in head:head');
3115 wakaba 1.1 ## Ignore the token
3116     !!!next-token;
3117     redo B;
3118     } else {
3119     #
3120     }
3121     } elsif ($token->{type} eq 'end tag') {
3122     if ($token->{tag_name} eq 'head') {
3123 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
3124     pop @{$self->{open_elements}};
3125 wakaba 1.1 } else {
3126 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:head');
3127 wakaba 1.1 }
3128 wakaba 1.3 $self->{insertion_mode} = 'after head';
3129 wakaba 1.1 !!!next-token;
3130     redo B;
3131     } elsif ($token->{tag_name} eq 'html') {
3132     #
3133     } else {
3134 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3135 wakaba 1.1 ## Ignore the token
3136     !!!next-token;
3137     redo B;
3138     }
3139     } else {
3140     #
3141     }
3142    
3143 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
3144 wakaba 1.1 ## As if </head>
3145 wakaba 1.3 pop @{$self->{open_elements}};
3146 wakaba 1.1 }
3147 wakaba 1.3 $self->{insertion_mode} = 'after head';
3148 wakaba 1.1 ## reprocess
3149     redo B;
3150    
3151     ## ISSUE: An issue in the spec.
3152 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after head') {
3153 wakaba 1.1 if ($token->{type} eq 'character') {
3154     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3155 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3156 wakaba 1.1 unless (length $token->{data}) {
3157     !!!next-token;
3158     redo B;
3159     }
3160     }
3161    
3162     #
3163     } elsif ($token->{type} eq 'comment') {
3164     my $comment = $self->{document}->create_comment ($token->{data});
3165 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3166 wakaba 1.1 !!!next-token;
3167     redo B;
3168     } elsif ($token->{type} eq 'start tag') {
3169     if ($token->{tag_name} eq 'body') {
3170     !!!insert-element ('body', $token->{attributes});
3171 wakaba 1.3 $self->{insertion_mode} = 'in body';
3172 wakaba 1.1 !!!next-token;
3173     redo B;
3174     } elsif ($token->{tag_name} eq 'frameset') {
3175     !!!insert-element ('frameset', $token->{attributes});
3176 wakaba 1.3 $self->{insertion_mode} = 'in frameset';
3177 wakaba 1.1 !!!next-token;
3178     redo B;
3179     } elsif ({
3180     base => 1, link => 1, meta => 1,
3181 wakaba 1.3 script => 1, style => 1, title => 1,
3182 wakaba 1.1 }->{$token->{tag_name}}) {
3183 wakaba 1.3 !!!parse-error (type => 'after head:'.$token->{tag_name});
3184     $self->{insertion_mode} = 'in head';
3185 wakaba 1.1 ## reprocess
3186     redo B;
3187     } else {
3188     #
3189     }
3190     } else {
3191     #
3192     }
3193    
3194     ## As if <body>
3195     !!!insert-element ('body');
3196 wakaba 1.3 $self->{insertion_mode} = 'in body';
3197 wakaba 1.1 ## reprocess
3198     redo B;
3199 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in body') {
3200 wakaba 1.1 if ($token->{type} eq 'character') {
3201     ## NOTE: There is a code clone of "character in body".
3202     $reconstruct_active_formatting_elements->($insert_to_current);
3203    
3204 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3205 wakaba 1.1
3206     !!!next-token;
3207     redo B;
3208     } elsif ($token->{type} eq 'comment') {
3209     ## NOTE: There is a code clone of "comment in body".
3210     my $comment = $self->{document}->create_comment ($token->{data});
3211 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3212 wakaba 1.1 !!!next-token;
3213     redo B;
3214     } else {
3215     $in_body->($insert_to_current);
3216     redo B;
3217     }
3218 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table') {
3219 wakaba 1.1 if ($token->{type} eq 'character') {
3220     ## NOTE: There are "character in table" code clones.
3221     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3222 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3223 wakaba 1.1
3224     unless (length $token->{data}) {
3225     !!!next-token;
3226     redo B;
3227     }
3228     }
3229    
3230 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3231    
3232 wakaba 1.1 ## As if in body, but insert into foster parent element
3233     ## ISSUE: Spec says that "whenever a node would be inserted
3234     ## into the current node" while characters might not be
3235     ## result in a new Text node.
3236     $reconstruct_active_formatting_elements->($insert_to_foster);
3237    
3238     if ({
3239     table => 1, tbody => 1, tfoot => 1,
3240     thead => 1, tr => 1,
3241 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3242 wakaba 1.1 # MUST
3243     my $foster_parent_element;
3244     my $next_sibling;
3245     my $prev_sibling;
3246 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3247     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3248     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3249 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3250     $foster_parent_element = $parent;
3251 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3252 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3253     } else {
3254 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3255 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3256     }
3257     last OE;
3258     }
3259     } # OE
3260 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3261 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3262     unless defined $foster_parent_element;
3263     if (defined $prev_sibling and
3264     $prev_sibling->node_type == 3) {
3265     $prev_sibling->manakai_append_text ($token->{data});
3266     } else {
3267     $foster_parent_element->insert_before
3268     ($self->{document}->create_text_node ($token->{data}),
3269     $next_sibling);
3270     }
3271     } else {
3272 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3273 wakaba 1.1 }
3274    
3275     !!!next-token;
3276     redo B;
3277     } elsif ($token->{type} eq 'comment') {
3278     my $comment = $self->{document}->create_comment ($token->{data});
3279 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3280 wakaba 1.1 !!!next-token;
3281     redo B;
3282     } elsif ($token->{type} eq 'start tag') {
3283     if ({
3284     caption => 1,
3285     colgroup => 1,
3286     tbody => 1, tfoot => 1, thead => 1,
3287     }->{$token->{tag_name}}) {
3288     ## Clear back to table context
3289 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3290     $self->{open_elements}->[-1]->[1] ne 'html') {
3291     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3292     pop @{$self->{open_elements}};
3293 wakaba 1.1 }
3294    
3295     push @$active_formatting_elements, ['#marker', '']
3296     if $token->{tag_name} eq 'caption';
3297    
3298     !!!insert-element ($token->{tag_name}, $token->{attributes});
3299 wakaba 1.3 $self->{insertion_mode} = {
3300 wakaba 1.1 caption => 'in caption',
3301     colgroup => 'in column group',
3302     tbody => 'in table body',
3303     tfoot => 'in table body',
3304     thead => 'in table body',
3305     }->{$token->{tag_name}};
3306     !!!next-token;
3307     redo B;
3308     } elsif ({
3309     col => 1,
3310     td => 1, th => 1, tr => 1,
3311     }->{$token->{tag_name}}) {
3312     ## Clear back to table context
3313 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3314     $self->{open_elements}->[-1]->[1] ne 'html') {
3315     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3316     pop @{$self->{open_elements}};
3317 wakaba 1.1 }
3318    
3319     !!!insert-element ($token->{tag_name} eq 'col' ? 'colgroup' : 'tbody');
3320 wakaba 1.3 $self->{insertion_mode} = $token->{tag_name} eq 'col'
3321 wakaba 1.1 ? 'in column group' : 'in table body';
3322     ## reprocess
3323     redo B;
3324     } elsif ($token->{tag_name} eq 'table') {
3325     ## NOTE: There are code clones for this "table in table"
3326 wakaba 1.3 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3327 wakaba 1.1
3328     ## As if </table>
3329     ## have a table element in table scope
3330     my $i;
3331 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3332     my $node = $self->{open_elements}->[$_];
3333 wakaba 1.1 if ($node->[1] eq 'table') {
3334     $i = $_;
3335     last INSCOPE;
3336     } elsif ({
3337     table => 1, html => 1,
3338     }->{$node->[1]}) {
3339     last INSCOPE;
3340     }
3341     } # INSCOPE
3342     unless (defined $i) {
3343 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3344 wakaba 1.1 ## Ignore tokens </table><table>
3345     !!!next-token;
3346     redo B;
3347     }
3348    
3349     ## generate implied end tags
3350     if ({
3351     dd => 1, dt => 1, li => 1, p => 1,
3352     td => 1, th => 1, tr => 1,
3353 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3354 wakaba 1.1 !!!back-token; # <table>
3355     $token = {type => 'end tag', tag_name => 'table'};
3356     !!!back-token;
3357     $token = {type => 'end tag',
3358 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3359 wakaba 1.1 redo B;
3360     }
3361    
3362 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3363     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3364 wakaba 1.1 }
3365    
3366 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3367 wakaba 1.1
3368 wakaba 1.3 $self->_reset_insertion_mode;
3369 wakaba 1.1
3370     ## reprocess
3371     redo B;
3372     } else {
3373     #
3374     }
3375     } elsif ($token->{type} eq 'end tag') {
3376     if ($token->{tag_name} eq 'table') {
3377     ## have a table element in table scope
3378     my $i;
3379 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3380     my $node = $self->{open_elements}->[$_];
3381 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3382     $i = $_;
3383     last INSCOPE;
3384     } elsif ({
3385     table => 1, html => 1,
3386     }->{$node->[1]}) {
3387     last INSCOPE;
3388     }
3389     } # INSCOPE
3390     unless (defined $i) {
3391 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3392 wakaba 1.1 ## Ignore the token
3393     !!!next-token;
3394     redo B;
3395     }
3396    
3397     ## generate implied end tags
3398     if ({
3399     dd => 1, dt => 1, li => 1, p => 1,
3400     td => 1, th => 1, tr => 1,
3401 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3402 wakaba 1.1 !!!back-token;
3403     $token = {type => 'end tag',
3404 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3405 wakaba 1.1 redo B;
3406     }
3407    
3408 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3409     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3410 wakaba 1.1 }
3411    
3412 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3413 wakaba 1.1
3414 wakaba 1.3 $self->_reset_insertion_mode;
3415 wakaba 1.1
3416     !!!next-token;
3417     redo B;
3418     } elsif ({
3419     body => 1, caption => 1, col => 1, colgroup => 1,
3420     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
3421     thead => 1, tr => 1,
3422     }->{$token->{tag_name}}) {
3423 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3424 wakaba 1.1 ## Ignore the token
3425     !!!next-token;
3426     redo B;
3427     } else {
3428     #
3429     }
3430     } else {
3431     #
3432     }
3433    
3434 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3435 wakaba 1.1 $in_body->($insert_to_foster);
3436     redo B;
3437 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in caption') {
3438 wakaba 1.1 if ($token->{type} eq 'character') {
3439     ## NOTE: This is a code clone of "character in body".
3440     $reconstruct_active_formatting_elements->($insert_to_current);
3441    
3442 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3443 wakaba 1.1
3444     !!!next-token;
3445     redo B;
3446     } elsif ($token->{type} eq 'comment') {
3447     ## NOTE: This is a code clone of "comment in body".
3448     my $comment = $self->{document}->create_comment ($token->{data});
3449 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3450 wakaba 1.1 !!!next-token;
3451     redo B;
3452     } elsif ($token->{type} eq 'start tag') {
3453     if ({
3454     caption => 1, col => 1, colgroup => 1, tbody => 1,
3455     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
3456     }->{$token->{tag_name}}) {
3457 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3458 wakaba 1.1
3459     ## As if </caption>
3460     ## have a table element in table scope
3461     my $i;
3462 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3463     my $node = $self->{open_elements}->[$_];
3464 wakaba 1.1 if ($node->[1] eq 'caption') {
3465     $i = $_;
3466     last INSCOPE;
3467     } elsif ({
3468     table => 1, html => 1,
3469     }->{$node->[1]}) {
3470     last INSCOPE;
3471     }
3472     } # INSCOPE
3473     unless (defined $i) {
3474 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3475 wakaba 1.1 ## Ignore the token
3476     !!!next-token;
3477     redo B;
3478     }
3479    
3480     ## generate implied end tags
3481     if ({
3482     dd => 1, dt => 1, li => 1, p => 1,
3483     td => 1, th => 1, tr => 1,
3484 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3485 wakaba 1.1 !!!back-token; # <?>
3486     $token = {type => 'end tag', tag_name => 'caption'};
3487     !!!back-token;
3488     $token = {type => 'end tag',
3489 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3490 wakaba 1.1 redo B;
3491     }
3492    
3493 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3494     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3495 wakaba 1.1 }
3496    
3497 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3498 wakaba 1.1
3499     $clear_up_to_marker->();
3500    
3501 wakaba 1.3 $self->{insertion_mode} = 'in table';
3502 wakaba 1.1
3503     ## reprocess
3504     redo B;
3505     } else {
3506     #
3507     }
3508     } elsif ($token->{type} eq 'end tag') {
3509     if ($token->{tag_name} eq 'caption') {
3510     ## have a table element in table scope
3511     my $i;
3512 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3513     my $node = $self->{open_elements}->[$_];
3514 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3515     $i = $_;
3516     last INSCOPE;
3517     } elsif ({
3518     table => 1, html => 1,
3519     }->{$node->[1]}) {
3520     last INSCOPE;
3521     }
3522     } # INSCOPE
3523     unless (defined $i) {
3524 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3525 wakaba 1.1 ## Ignore the token
3526     !!!next-token;
3527     redo B;
3528     }
3529    
3530     ## generate implied end tags
3531     if ({
3532     dd => 1, dt => 1, li => 1, p => 1,
3533     td => 1, th => 1, tr => 1,
3534 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3535 wakaba 1.1 !!!back-token;
3536     $token = {type => 'end tag',
3537 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3538 wakaba 1.1 redo B;
3539     }
3540    
3541 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3542     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3543 wakaba 1.1 }
3544    
3545 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3546 wakaba 1.1
3547     $clear_up_to_marker->();
3548    
3549 wakaba 1.3 $self->{insertion_mode} = 'in table';
3550 wakaba 1.1
3551     !!!next-token;
3552     redo B;
3553     } elsif ($token->{tag_name} eq 'table') {
3554 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3555 wakaba 1.1
3556     ## As if </caption>
3557     ## have a table element in table scope
3558     my $i;
3559 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3560     my $node = $self->{open_elements}->[$_];
3561 wakaba 1.1 if ($node->[1] eq 'caption') {
3562     $i = $_;
3563     last INSCOPE;
3564     } elsif ({
3565     table => 1, html => 1,
3566     }->{$node->[1]}) {
3567     last INSCOPE;
3568     }
3569     } # INSCOPE
3570     unless (defined $i) {
3571 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3572 wakaba 1.1 ## Ignore the token
3573     !!!next-token;
3574     redo B;
3575     }
3576    
3577     ## generate implied end tags
3578     if ({
3579     dd => 1, dt => 1, li => 1, p => 1,
3580     td => 1, th => 1, tr => 1,
3581 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3582 wakaba 1.1 !!!back-token; # </table>
3583     $token = {type => 'end tag', tag_name => 'caption'};
3584     !!!back-token;
3585     $token = {type => 'end tag',
3586 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3587 wakaba 1.1 redo B;
3588     }
3589    
3590 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3591     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3592 wakaba 1.1 }
3593    
3594 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3595 wakaba 1.1
3596     $clear_up_to_marker->();
3597    
3598 wakaba 1.3 $self->{insertion_mode} = 'in table';
3599 wakaba 1.1
3600     ## reprocess
3601     redo B;
3602     } elsif ({
3603     body => 1, col => 1, colgroup => 1,
3604     html => 1, tbody => 1, td => 1, tfoot => 1,
3605     th => 1, thead => 1, tr => 1,
3606     }->{$token->{tag_name}}) {
3607 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3608 wakaba 1.1 ## Ignore the token
3609     redo B;
3610     } else {
3611     #
3612     }
3613     } else {
3614     #
3615     }
3616    
3617     $in_body->($insert_to_current);
3618     redo B;
3619 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in column group') {
3620 wakaba 1.1 if ($token->{type} eq 'character') {
3621     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3622 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3623 wakaba 1.1 unless (length $token->{data}) {
3624     !!!next-token;
3625     redo B;
3626     }
3627     }
3628    
3629     #
3630     } elsif ($token->{type} eq 'comment') {
3631     my $comment = $self->{document}->create_comment ($token->{data});
3632 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3633 wakaba 1.1 !!!next-token;
3634     redo B;
3635     } elsif ($token->{type} eq 'start tag') {
3636     if ($token->{tag_name} eq 'col') {
3637     !!!insert-element ($token->{tag_name}, $token->{attributes});
3638 wakaba 1.3 pop @{$self->{open_elements}};
3639 wakaba 1.1 !!!next-token;
3640     redo B;
3641     } else {
3642     #
3643     }
3644     } elsif ($token->{type} eq 'end tag') {
3645     if ($token->{tag_name} eq 'colgroup') {
3646 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3647     !!!parse-error (type => 'unmatched end tag:colgroup');
3648 wakaba 1.1 ## Ignore the token
3649     !!!next-token;
3650     redo B;
3651     } else {
3652 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3653     $self->{insertion_mode} = 'in table';
3654 wakaba 1.1 !!!next-token;
3655     redo B;
3656     }
3657     } elsif ($token->{tag_name} eq 'col') {
3658 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:col');
3659 wakaba 1.1 ## Ignore the token
3660     !!!next-token;
3661     redo B;
3662     } else {
3663     #
3664     }
3665     } else {
3666     #
3667     }
3668    
3669     ## As if </colgroup>
3670 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3671     !!!parse-error (type => 'unmatched end tag:colgroup');
3672 wakaba 1.1 ## Ignore the token
3673     !!!next-token;
3674     redo B;
3675     } else {
3676 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3677     $self->{insertion_mode} = 'in table';
3678 wakaba 1.1 ## reprocess
3679     redo B;
3680     }
3681 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table body') {
3682 wakaba 1.1 if ($token->{type} eq 'character') {
3683     ## NOTE: This is a "character in table" code clone.
3684     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3685 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3686 wakaba 1.1
3687     unless (length $token->{data}) {
3688     !!!next-token;
3689     redo B;
3690     }
3691     }
3692    
3693 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3694    
3695 wakaba 1.1 ## As if in body, but insert into foster parent element
3696     ## ISSUE: Spec says that "whenever a node would be inserted
3697     ## into the current node" while characters might not be
3698     ## result in a new Text node.
3699     $reconstruct_active_formatting_elements->($insert_to_foster);
3700    
3701     if ({
3702     table => 1, tbody => 1, tfoot => 1,
3703     thead => 1, tr => 1,
3704 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3705 wakaba 1.1 # MUST
3706     my $foster_parent_element;
3707     my $next_sibling;
3708     my $prev_sibling;
3709 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3710     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3711     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3712 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3713     $foster_parent_element = $parent;
3714 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3715 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3716     } else {
3717 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3718 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3719     }
3720     last OE;
3721     }
3722     } # OE
3723 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3724 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3725     unless defined $foster_parent_element;
3726     if (defined $prev_sibling and
3727     $prev_sibling->node_type == 3) {
3728     $prev_sibling->manakai_append_text ($token->{data});
3729     } else {
3730     $foster_parent_element->insert_before
3731     ($self->{document}->create_text_node ($token->{data}),
3732     $next_sibling);
3733     }
3734     } else {
3735 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3736 wakaba 1.1 }
3737    
3738     !!!next-token;
3739     redo B;
3740     } elsif ($token->{type} eq 'comment') {
3741     ## Copied from 'in table'
3742     my $comment = $self->{document}->create_comment ($token->{data});
3743 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3744 wakaba 1.1 !!!next-token;
3745     redo B;
3746     } elsif ($token->{type} eq 'start tag') {
3747     if ({
3748     tr => 1,
3749     th => 1, td => 1,
3750     }->{$token->{tag_name}}) {
3751 wakaba 1.3 unless ($token->{tag_name} eq 'tr') {
3752     !!!parse-error (type => 'missing start tag:tr');
3753     }
3754    
3755 wakaba 1.1 ## Clear back to table body context
3756     while (not {
3757     tbody => 1, tfoot => 1, thead => 1, html => 1,
3758 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3759     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3760     pop @{$self->{open_elements}};
3761 wakaba 1.1 }
3762    
3763 wakaba 1.3 $self->{insertion_mode} = 'in row';
3764 wakaba 1.1 if ($token->{tag_name} eq 'tr') {
3765     !!!insert-element ($token->{tag_name}, $token->{attributes});
3766     !!!next-token;
3767     } else {
3768     !!!insert-element ('tr');
3769     ## reprocess
3770     }
3771     redo B;
3772     } elsif ({
3773     caption => 1, col => 1, colgroup => 1,
3774     tbody => 1, tfoot => 1, thead => 1,
3775     }->{$token->{tag_name}}) {
3776     ## have an element in table scope
3777     my $i;
3778 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3779     my $node = $self->{open_elements}->[$_];
3780 wakaba 1.1 if ({
3781     tbody => 1, thead => 1, tfoot => 1,
3782     }->{$node->[1]}) {
3783     $i = $_;
3784     last INSCOPE;
3785     } elsif ({
3786     table => 1, html => 1,
3787     }->{$node->[1]}) {
3788     last INSCOPE;
3789     }
3790     } # INSCOPE
3791     unless (defined $i) {
3792 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3793 wakaba 1.1 ## Ignore the token
3794     !!!next-token;
3795     redo B;
3796     }
3797    
3798     ## Clear back to table body context
3799     while (not {
3800     tbody => 1, tfoot => 1, thead => 1, html => 1,
3801 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3802     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3803     pop @{$self->{open_elements}};
3804 wakaba 1.1 }
3805    
3806     ## As if <{current node}>
3807     ## have an element in table scope
3808     ## true by definition
3809    
3810     ## Clear back to table body context
3811     ## nop by definition
3812    
3813 wakaba 1.3 pop @{$self->{open_elements}};
3814     $self->{insertion_mode} = 'in table';
3815 wakaba 1.1 ## reprocess
3816     redo B;
3817     } elsif ($token->{tag_name} eq 'table') {
3818     ## NOTE: This is a code clone of "table in table"
3819 wakaba 1.3 !!!parse-error (type => 'not closed:table');
3820 wakaba 1.1
3821     ## As if </table>
3822     ## have a table element in table scope
3823     my $i;
3824 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3825     my $node = $self->{open_elements}->[$_];
3826 wakaba 1.1 if ($node->[1] eq 'table') {
3827     $i = $_;
3828     last INSCOPE;
3829     } elsif ({
3830     table => 1, html => 1,
3831     }->{$node->[1]}) {
3832     last INSCOPE;
3833     }
3834     } # INSCOPE
3835     unless (defined $i) {
3836 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3837 wakaba 1.1 ## Ignore tokens </table><table>
3838     !!!next-token;
3839     redo B;
3840     }
3841    
3842     ## generate implied end tags
3843     if ({
3844     dd => 1, dt => 1, li => 1, p => 1,
3845     td => 1, th => 1, tr => 1,
3846 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3847 wakaba 1.1 !!!back-token; # <table>
3848     $token = {type => 'end tag', tag_name => 'table'};
3849     !!!back-token;
3850     $token = {type => 'end tag',
3851 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3852 wakaba 1.1 redo B;
3853     }
3854    
3855 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3856     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3857 wakaba 1.1 }
3858    
3859 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3860 wakaba 1.1
3861 wakaba 1.3 $self->_reset_insertion_mode;
3862 wakaba 1.1
3863     ## reprocess
3864     redo B;
3865     } else {
3866     #
3867     }
3868     } elsif ($token->{type} eq 'end tag') {
3869     if ({
3870     tbody => 1, tfoot => 1, thead => 1,
3871     }->{$token->{tag_name}}) {
3872     ## have an element in table scope
3873     my $i;
3874 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3875     my $node = $self->{open_elements}->[$_];
3876 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3877     $i = $_;
3878     last INSCOPE;
3879     } elsif ({
3880     table => 1, html => 1,
3881     }->{$node->[1]}) {
3882     last INSCOPE;
3883     }
3884     } # INSCOPE
3885     unless (defined $i) {
3886 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3887 wakaba 1.1 ## Ignore the token
3888     !!!next-token;
3889     redo B;
3890     }
3891    
3892     ## Clear back to table body context
3893     while (not {
3894     tbody => 1, tfoot => 1, thead => 1, html => 1,
3895 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3896     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3897     pop @{$self->{open_elements}};
3898 wakaba 1.1 }
3899    
3900 wakaba 1.3 pop @{$self->{open_elements}};
3901     $self->{insertion_mode} = 'in table';
3902 wakaba 1.1 !!!next-token;
3903     redo B;
3904     } elsif ($token->{tag_name} eq 'table') {
3905     ## have an element in table scope
3906     my $i;
3907 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3908     my $node = $self->{open_elements}->[$_];
3909 wakaba 1.1 if ({
3910     tbody => 1, thead => 1, tfoot => 1,
3911     }->{$node->[1]}) {
3912     $i = $_;
3913     last INSCOPE;
3914     } elsif ({
3915     table => 1, html => 1,
3916     }->{$node->[1]}) {
3917     last INSCOPE;
3918     }
3919     } # INSCOPE
3920     unless (defined $i) {
3921 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3922 wakaba 1.1 ## Ignore the token
3923     !!!next-token;
3924     redo B;
3925     }
3926    
3927     ## Clear back to table body context
3928     while (not {
3929     tbody => 1, tfoot => 1, thead => 1, html => 1,
3930 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3931     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3932     pop @{$self->{open_elements}};
3933 wakaba 1.1 }
3934    
3935     ## As if <{current node}>
3936     ## have an element in table scope
3937     ## true by definition
3938    
3939     ## Clear back to table body context
3940     ## nop by definition
3941    
3942 wakaba 1.3 pop @{$self->{open_elements}};
3943     $self->{insertion_mode} = 'in table';
3944 wakaba 1.1 ## reprocess
3945     redo B;
3946     } elsif ({
3947     body => 1, caption => 1, col => 1, colgroup => 1,
3948     html => 1, td => 1, th => 1, tr => 1,
3949     }->{$token->{tag_name}}) {
3950 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3951 wakaba 1.1 ## Ignore the token
3952     !!!next-token;
3953     redo B;
3954     } else {
3955     #
3956     }
3957     } else {
3958     #
3959     }
3960    
3961     ## As if in table
3962 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3963 wakaba 1.1 $in_body->($insert_to_foster);
3964     redo B;
3965 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in row') {
3966 wakaba 1.1 if ($token->{type} eq 'character') {
3967     ## NOTE: This is a "character in table" code clone.
3968     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3969 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3970 wakaba 1.1
3971     unless (length $token->{data}) {
3972     !!!next-token;
3973     redo B;
3974     }
3975     }
3976    
3977 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3978    
3979 wakaba 1.1 ## As if in body, but insert into foster parent element
3980     ## ISSUE: Spec says that "whenever a node would be inserted
3981     ## into the current node" while characters might not be
3982     ## result in a new Text node.
3983     $reconstruct_active_formatting_elements->($insert_to_foster);
3984    
3985     if ({
3986     table => 1, tbody => 1, tfoot => 1,
3987     thead => 1, tr => 1,
3988 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3989 wakaba 1.1 # MUST
3990     my $foster_parent_element;
3991     my $next_sibling;
3992     my $prev_sibling;
3993 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3994     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3995     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3996 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3997     $foster_parent_element = $parent;
3998 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3999 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
4000     } else {
4001 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4002 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
4003     }
4004     last OE;
4005     }
4006     } # OE
4007 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
4008 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
4009     unless defined $foster_parent_element;
4010     if (defined $prev_sibling and
4011     $prev_sibling->node_type == 3) {
4012     $prev_sibling->manakai_append_text ($token->{data});
4013     } else {
4014     $foster_parent_element->insert_before
4015     ($self->{document}->create_text_node ($token->{data}),
4016     $next_sibling);
4017     }
4018     } else {
4019 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4020 wakaba 1.1 }
4021    
4022     !!!next-token;
4023     redo B;
4024     } elsif ($token->{type} eq 'comment') {
4025     ## Copied from 'in table'
4026     my $comment = $self->{document}->create_comment ($token->{data});
4027 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4028 wakaba 1.1 !!!next-token;
4029     redo B;
4030     } elsif ($token->{type} eq 'start tag') {
4031     if ($token->{tag_name} eq 'th' or
4032     $token->{tag_name} eq 'td') {
4033     ## Clear back to table row context
4034     while (not {
4035     tr => 1, html => 1,
4036 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4037     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4038     pop @{$self->{open_elements}};
4039 wakaba 1.1 }
4040    
4041     !!!insert-element ($token->{tag_name}, $token->{attributes});
4042 wakaba 1.3 $self->{insertion_mode} = 'in cell';
4043 wakaba 1.1
4044     push @$active_formatting_elements, ['#marker', ''];
4045    
4046     !!!next-token;
4047     redo B;
4048     } elsif ({
4049     caption => 1, col => 1, colgroup => 1,
4050     tbody => 1, tfoot => 1, thead => 1, tr => 1,
4051     }->{$token->{tag_name}}) {
4052     ## As if </tr>
4053     ## have an element in table scope
4054     my $i;
4055 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4056     my $node = $self->{open_elements}->[$_];
4057 wakaba 1.1 if ($node->[1] eq 'tr') {
4058     $i = $_;
4059     last INSCOPE;
4060     } elsif ({
4061     table => 1, html => 1,
4062     }->{$node->[1]}) {
4063     last INSCOPE;
4064     }
4065     } # INSCOPE
4066     unless (defined $i) {
4067 wakaba 1.3 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name});
4068 wakaba 1.1 ## Ignore the token
4069     !!!next-token;
4070     redo B;
4071     }
4072    
4073     ## Clear back to table row context
4074     while (not {
4075     tr => 1, html => 1,
4076 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4077     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4078     pop @{$self->{open_elements}};
4079 wakaba 1.1 }
4080    
4081 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4082     $self->{insertion_mode} = 'in table body';
4083 wakaba 1.1 ## reprocess
4084     redo B;
4085     } elsif ($token->{tag_name} eq 'table') {
4086     ## NOTE: This is a code clone of "table in table"
4087 wakaba 1.3 !!!parse-error (type => 'not closed:table');
4088 wakaba 1.1
4089     ## As if </table>
4090     ## have a table element in table scope
4091     my $i;
4092 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4093     my $node = $self->{open_elements}->[$_];
4094 wakaba 1.1 if ($node->[1] eq 'table') {
4095     $i = $_;
4096     last INSCOPE;
4097     } elsif ({
4098     table => 1, html => 1,
4099     }->{$node->[1]}) {
4100     last INSCOPE;
4101     }
4102     } # INSCOPE
4103     unless (defined $i) {
4104 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
4105 wakaba 1.1 ## Ignore tokens </table><table>
4106     !!!next-token;
4107     redo B;
4108     }
4109    
4110     ## generate implied end tags
4111     if ({
4112     dd => 1, dt => 1, li => 1, p => 1,
4113     td => 1, th => 1, tr => 1,
4114 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4115 wakaba 1.1 !!!back-token; # <table>
4116     $token = {type => 'end tag', tag_name => 'table'};
4117     !!!back-token;
4118     $token = {type => 'end tag',
4119 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4120 wakaba 1.1 redo B;
4121     }
4122    
4123 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4124     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4125 wakaba 1.1 }
4126    
4127 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4128 wakaba 1.1
4129 wakaba 1.3 $self->_reset_insertion_mode;
4130 wakaba 1.1
4131     ## reprocess
4132     redo B;
4133     } else {
4134     #
4135     }
4136     } elsif ($token->{type} eq 'end tag') {
4137     if ($token->{tag_name} eq 'tr') {
4138     ## have an element in table scope
4139     my $i;
4140 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4141     my $node = $self->{open_elements}->[$_];
4142 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4143     $i = $_;
4144     last INSCOPE;
4145     } elsif ({
4146     table => 1, html => 1,
4147     }->{$node->[1]}) {
4148     last INSCOPE;
4149     }
4150     } # INSCOPE
4151     unless (defined $i) {
4152 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4153 wakaba 1.1 ## Ignore the token
4154     !!!next-token;
4155     redo B;
4156     }
4157    
4158     ## Clear back to table row context
4159     while (not {
4160     tr => 1, html => 1,
4161 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4162     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4163     pop @{$self->{open_elements}};
4164 wakaba 1.1 }
4165    
4166 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4167     $self->{insertion_mode} = 'in table body';
4168 wakaba 1.1 !!!next-token;
4169     redo B;
4170     } elsif ($token->{tag_name} eq 'table') {
4171     ## As if </tr>
4172     ## have an element in table scope
4173     my $i;
4174 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4175     my $node = $self->{open_elements}->[$_];
4176 wakaba 1.1 if ($node->[1] eq 'tr') {
4177     $i = $_;
4178     last INSCOPE;
4179     } elsif ({
4180     table => 1, html => 1,
4181     }->{$node->[1]}) {
4182     last INSCOPE;
4183     }
4184     } # INSCOPE
4185     unless (defined $i) {
4186 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{type});
4187 wakaba 1.1 ## Ignore the token
4188     !!!next-token;
4189     redo B;
4190     }
4191    
4192     ## Clear back to table row context
4193     while (not {
4194     tr => 1, html => 1,
4195 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4196     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4197     pop @{$self->{open_elements}};
4198 wakaba 1.1 }
4199    
4200 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4201     $self->{insertion_mode} = 'in table body';
4202 wakaba 1.1 ## reprocess
4203     redo B;
4204     } elsif ({
4205     tbody => 1, tfoot => 1, thead => 1,
4206     }->{$token->{tag_name}}) {
4207     ## have an element in table scope
4208     my $i;
4209 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4210     my $node = $self->{open_elements}->[$_];
4211 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4212     $i = $_;
4213     last INSCOPE;
4214     } elsif ({
4215     table => 1, html => 1,
4216     }->{$node->[1]}) {
4217     last INSCOPE;
4218     }
4219     } # INSCOPE
4220     unless (defined $i) {
4221 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4222 wakaba 1.1 ## Ignore the token
4223     !!!next-token;
4224     redo B;
4225     }
4226    
4227     ## As if </tr>
4228     ## have an element in table scope
4229     my $i;
4230 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4231     my $node = $self->{open_elements}->[$_];
4232 wakaba 1.1 if ($node->[1] eq 'tr') {
4233     $i = $_;
4234     last INSCOPE;
4235     } elsif ({
4236     table => 1, html => 1,
4237     }->{$node->[1]}) {
4238     last INSCOPE;
4239     }
4240     } # INSCOPE
4241     unless (defined $i) {
4242 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:tr');
4243 wakaba 1.1 ## Ignore the token
4244     !!!next-token;
4245     redo B;
4246     }
4247    
4248     ## Clear back to table row context
4249     while (not {
4250     tr => 1, html => 1,
4251 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4252     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4253     pop @{$self->{open_elements}};
4254 wakaba 1.1 }
4255    
4256 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4257     $self->{insertion_mode} = 'in table body';
4258 wakaba 1.1 ## reprocess
4259     redo B;
4260     } elsif ({
4261     body => 1, caption => 1, col => 1,
4262     colgroup => 1, html => 1, td => 1, th => 1,
4263     }->{$token->{tag_name}}) {
4264 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4265 wakaba 1.1 ## Ignore the token
4266     !!!next-token;
4267     redo B;
4268     } else {
4269     #
4270     }
4271     } else {
4272     #
4273     }
4274    
4275     ## As if in table
4276 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
4277 wakaba 1.1 $in_body->($insert_to_foster);
4278     redo B;
4279 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in cell') {
4280 wakaba 1.1 if ($token->{type} eq 'character') {
4281     ## NOTE: This is a code clone of "character in body".
4282     $reconstruct_active_formatting_elements->($insert_to_current);
4283    
4284 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4285 wakaba 1.1
4286     !!!next-token;
4287     redo B;
4288     } elsif ($token->{type} eq 'comment') {
4289     ## NOTE: This is a code clone of "comment in body".
4290     my $comment = $self->{document}->create_comment ($token->{data});
4291 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4292 wakaba 1.1 !!!next-token;
4293     redo B;
4294     } elsif ($token->{type} eq 'start tag') {
4295     if ({
4296     caption => 1, col => 1, colgroup => 1,
4297     tbody => 1, td => 1, tfoot => 1, th => 1,
4298     thead => 1, tr => 1,
4299     }->{$token->{tag_name}}) {
4300     ## have an element in table scope
4301     my $tn;
4302 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4303     my $node = $self->{open_elements}->[$_];
4304 wakaba 1.1 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
4305     $tn = $node->[1];
4306     last INSCOPE;
4307     } elsif ({
4308     table => 1, html => 1,
4309     }->{$node->[1]}) {
4310     last INSCOPE;
4311     }
4312     } # INSCOPE
4313     unless (defined $tn) {
4314 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4315 wakaba 1.1 ## Ignore the token
4316     !!!next-token;
4317     redo B;
4318     }
4319    
4320     ## Close the cell
4321     !!!back-token; # <?>
4322     $token = {type => 'end tag', tag_name => $tn};
4323     redo B;
4324     } else {
4325     #
4326     }
4327     } elsif ($token->{type} eq 'end tag') {
4328     if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4329     ## have an element in table scope
4330     my $i;
4331 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4332     my $node = $self->{open_elements}->[$_];
4333 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4334     $i = $_;
4335     last INSCOPE;
4336     } elsif ({
4337     table => 1, html => 1,
4338     }->{$node->[1]}) {
4339     last INSCOPE;
4340     }
4341     } # INSCOPE
4342     unless (defined $i) {
4343 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4344 wakaba 1.1 ## Ignore the token
4345     !!!next-token;
4346     redo B;
4347     }
4348    
4349     ## generate implied end tags
4350     if ({
4351     dd => 1, dt => 1, li => 1, p => 1,
4352     td => ($token->{tag_name} eq 'th'),
4353     th => ($token->{tag_name} eq 'td'),
4354     tr => 1,
4355 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4356 wakaba 1.1 !!!back-token;
4357     $token = {type => 'end tag',
4358 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4359 wakaba 1.1 redo B;
4360     }
4361    
4362 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4363     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4364 wakaba 1.1 }
4365    
4366 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4367 wakaba 1.1
4368     $clear_up_to_marker->();
4369    
4370 wakaba 1.3 $self->{insertion_mode} = 'in row';
4371 wakaba 1.1
4372     !!!next-token;
4373     redo B;
4374     } elsif ({
4375     body => 1, caption => 1, col => 1,
4376     colgroup => 1, html => 1,
4377     }->{$token->{tag_name}}) {
4378 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4379 wakaba 1.1 ## Ignore the token
4380     !!!next-token;
4381     redo B;
4382     } elsif ({
4383     table => 1, tbody => 1, tfoot => 1,
4384     thead => 1, tr => 1,
4385     }->{$token->{tag_name}}) {
4386     ## have an element in table scope
4387     my $i;
4388     my $tn;
4389 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4390     my $node = $self->{open_elements}->[$_];
4391 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4392     $i = $_;
4393     last INSCOPE;
4394     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4395     $tn = $node->[1];
4396     ## NOTE: There is exactly one |td| or |th| element
4397     ## in scope in the stack of open elements by definition.
4398     } elsif ({
4399     table => 1, html => 1,
4400     }->{$node->[1]}) {
4401     last INSCOPE;
4402     }
4403     } # INSCOPE
4404     unless (defined $i) {
4405 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4406 wakaba 1.1 ## Ignore the token
4407     !!!next-token;
4408     redo B;
4409     }
4410    
4411     ## Close the cell
4412     !!!back-token; # </?>
4413     $token = {type => 'end tag', tag_name => $tn};
4414     redo B;
4415     } else {
4416     #
4417     }
4418     } else {
4419     #
4420     }
4421    
4422     $in_body->($insert_to_current);
4423     redo B;
4424 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in select') {
4425 wakaba 1.1 if ($token->{type} eq 'character') {
4426 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4427 wakaba 1.1 !!!next-token;
4428     redo B;
4429     } elsif ($token->{type} eq 'comment') {
4430     my $comment = $self->{document}->create_comment ($token->{data});
4431 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4432 wakaba 1.1 !!!next-token;
4433     redo B;
4434     } elsif ($token->{type} eq 'start tag') {
4435     if ($token->{tag_name} eq 'option') {
4436 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4437 wakaba 1.1 ## As if </option>
4438 wakaba 1.3 pop @{$self->{open_elements}};
4439 wakaba 1.1 }
4440    
4441     !!!insert-element ($token->{tag_name}, $token->{attributes});
4442     !!!next-token;
4443     redo B;
4444     } elsif ($token->{tag_name} eq 'optgroup') {
4445 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4446 wakaba 1.1 ## As if </option>
4447 wakaba 1.3 pop @{$self->{open_elements}};
4448 wakaba 1.1 }
4449    
4450 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4451 wakaba 1.1 ## As if </optgroup>
4452 wakaba 1.3 pop @{$self->{open_elements}};
4453 wakaba 1.1 }
4454    
4455     !!!insert-element ($token->{tag_name}, $token->{attributes});
4456     !!!next-token;
4457     redo B;
4458     } elsif ($token->{tag_name} eq 'select') {
4459 wakaba 1.3 !!!parse-error (type => 'not closed:select');
4460 wakaba 1.1 ## As if </select> instead
4461     ## have an element in table scope
4462     my $i;
4463 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4464     my $node = $self->{open_elements}->[$_];
4465 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4466     $i = $_;
4467     last INSCOPE;
4468     } elsif ({
4469     table => 1, html => 1,
4470     }->{$node->[1]}) {
4471     last INSCOPE;
4472     }
4473     } # INSCOPE
4474     unless (defined $i) {
4475 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4476 wakaba 1.1 ## Ignore the token
4477     !!!next-token;
4478     redo B;
4479     }
4480    
4481 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4482 wakaba 1.1
4483 wakaba 1.3 $self->_reset_insertion_mode;
4484 wakaba 1.1
4485     !!!next-token;
4486     redo B;
4487     } else {
4488     #
4489     }
4490     } elsif ($token->{type} eq 'end tag') {
4491     if ($token->{tag_name} eq 'optgroup') {
4492 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option' and
4493     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
4494 wakaba 1.1 ## As if </option>
4495 wakaba 1.3 splice @{$self->{open_elements}}, -2;
4496     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4497     pop @{$self->{open_elements}};
4498 wakaba 1.1 } else {
4499 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4500 wakaba 1.1 ## Ignore the token
4501     }
4502     !!!next-token;
4503     redo B;
4504     } elsif ($token->{tag_name} eq 'option') {
4505 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4506     pop @{$self->{open_elements}};
4507 wakaba 1.1 } else {
4508 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4509 wakaba 1.1 ## Ignore the token
4510     }
4511     !!!next-token;
4512     redo B;
4513     } elsif ($token->{tag_name} eq 'select') {
4514     ## have an element in table scope
4515     my $i;
4516 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4517     my $node = $self->{open_elements}->[$_];
4518 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4519     $i = $_;
4520     last INSCOPE;
4521     } elsif ({
4522     table => 1, html => 1,
4523     }->{$node->[1]}) {
4524     last INSCOPE;
4525     }
4526     } # INSCOPE
4527     unless (defined $i) {
4528 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4529 wakaba 1.1 ## Ignore the token
4530     !!!next-token;
4531     redo B;
4532     }
4533    
4534 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4535 wakaba 1.1
4536 wakaba 1.3 $self->_reset_insertion_mode;
4537 wakaba 1.1
4538     !!!next-token;
4539     redo B;
4540     } elsif ({
4541     caption => 1, table => 1, tbody => 1,
4542     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
4543     }->{$token->{tag_name}}) {
4544 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4545 wakaba 1.1
4546     ## have an element in table scope
4547     my $i;
4548 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4549     my $node = $self->{open_elements}->[$_];
4550 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4551     $i = $_;
4552     last INSCOPE;
4553     } elsif ({
4554     table => 1, html => 1,
4555     }->{$node->[1]}) {
4556     last INSCOPE;
4557     }
4558     } # INSCOPE
4559     unless (defined $i) {
4560     ## Ignore the token
4561     !!!next-token;
4562     redo B;
4563     }
4564    
4565     ## As if </select>
4566     ## have an element in table scope
4567     undef $i;
4568 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4569     my $node = $self->{open_elements}->[$_];
4570 wakaba 1.1 if ($node->[1] eq 'select') {
4571     $i = $_;
4572     last INSCOPE;
4573     } elsif ({
4574     table => 1, html => 1,
4575     }->{$node->[1]}) {
4576     last INSCOPE;
4577     }
4578     } # INSCOPE
4579     unless (defined $i) {
4580 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4581 wakaba 1.1 ## Ignore the </select> token
4582     !!!next-token; ## TODO: ok?
4583     redo B;
4584     }
4585    
4586 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4587 wakaba 1.1
4588 wakaba 1.3 $self->_reset_insertion_mode;
4589 wakaba 1.1
4590     ## reprocess
4591     redo B;
4592     } else {
4593     #
4594     }
4595     } else {
4596     #
4597     }
4598    
4599 wakaba 1.3 !!!parse-error (type => 'in select:'.$token->{tag_name});
4600 wakaba 1.1 ## Ignore the token
4601     !!!next-token;
4602     redo B;
4603 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after body') {
4604 wakaba 1.1 if ($token->{type} eq 'character') {
4605     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4606     ## As if in body
4607     $reconstruct_active_formatting_elements->($insert_to_current);
4608    
4609 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4610 wakaba 1.1
4611     unless (length $token->{data}) {
4612     !!!next-token;
4613     redo B;
4614     }
4615     }
4616    
4617     #
4618 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4619 wakaba 1.1 } elsif ($token->{type} eq 'comment') {
4620     my $comment = $self->{document}->create_comment ($token->{data});
4621 wakaba 1.3 $self->{open_elements}->[0]->[0]->append_child ($comment);
4622 wakaba 1.1 !!!next-token;
4623     redo B;
4624 wakaba 1.3 } elsif ($token->{type} eq 'start tag') {
4625     !!!parse-error (type => 'after body:'.$token->{tag_name});
4626     #
4627 wakaba 1.1 } elsif ($token->{type} eq 'end tag') {
4628     if ($token->{tag_name} eq 'html') {
4629 wakaba 1.3 if (defined $self->{inner_html_node}) {
4630     !!!parse-error (type => 'unmatched end tag:html');
4631     ## Ignore the token
4632     !!!next-token;
4633     redo B;
4634     } else {
4635     $phase = 'trailing end';
4636     !!!next-token;
4637     redo B;
4638     }
4639 wakaba 1.1 } else {
4640 wakaba 1.3 !!!parse-error (type => 'after body:/'.$token->{tag_name});
4641 wakaba 1.1 }
4642     } else {
4643 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4644 wakaba 1.1 }
4645    
4646 wakaba 1.3 $self->{insertion_mode} = 'in body';
4647 wakaba 1.1 ## reprocess
4648     redo B;
4649 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in frameset') {
4650 wakaba 1.1 if ($token->{type} eq 'character') {
4651     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4652 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4653 wakaba 1.1
4654     unless (length $token->{data}) {
4655     !!!next-token;
4656     redo B;
4657     }
4658     }
4659    
4660     #
4661     } elsif ($token->{type} eq 'comment') {
4662     my $comment = $self->{document}->create_comment ($token->{data});
4663 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4664 wakaba 1.1 !!!next-token;
4665     redo B;
4666     } elsif ($token->{type} eq 'start tag') {
4667     if ($token->{tag_name} eq 'frameset') {
4668     !!!insert-element ($token->{tag_name}, $token->{attributes});
4669     !!!next-token;
4670     redo B;
4671     } elsif ($token->{tag_name} eq 'frame') {
4672     !!!insert-element ($token->{tag_name}, $token->{attributes});
4673 wakaba 1.3 pop @{$self->{open_elements}};
4674 wakaba 1.1 !!!next-token;
4675     redo B;
4676     } elsif ($token->{tag_name} eq 'noframes') {
4677     $in_body->($insert_to_current);
4678     redo B;
4679     } else {
4680     #
4681     }
4682     } elsif ($token->{type} eq 'end tag') {
4683     if ($token->{tag_name} eq 'frameset') {
4684 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html' and
4685     @{$self->{open_elements}} == 1) {
4686     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4687 wakaba 1.1 ## Ignore the token
4688     !!!next-token;
4689     } else {
4690 wakaba 1.3 pop @{$self->{open_elements}};
4691 wakaba 1.1 !!!next-token;
4692     }
4693    
4694     ## if not inner_html and
4695 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
4696     $self->{insertion_mode} = 'after frameset';
4697 wakaba 1.1 }
4698     redo B;
4699     } else {
4700     #
4701     }
4702     } else {
4703     #
4704     }
4705    
4706 wakaba 1.3 if (defined $token->{tag_name}) {
4707     !!!parse-error (type => 'in frameset:'.$token->{tag_name});
4708     } else {
4709     !!!parse-error (type => 'in frameset:#'.$token->{type});
4710     }
4711 wakaba 1.1 ## Ignore the token
4712     !!!next-token;
4713     redo B;
4714 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after frameset') {
4715 wakaba 1.1 if ($token->{type} eq 'character') {
4716     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4717 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4718 wakaba 1.1
4719     unless (length $token->{data}) {
4720     !!!next-token;
4721     redo B;
4722     }
4723     }
4724    
4725     #
4726     } elsif ($token->{type} eq 'comment') {
4727     my $comment = $self->{document}->create_comment ($token->{data});
4728 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4729 wakaba 1.1 !!!next-token;
4730     redo B;
4731     } elsif ($token->{type} eq 'start tag') {
4732     if ($token->{tag_name} eq 'noframes') {
4733     $in_body->($insert_to_current);
4734     redo B;
4735     } else {
4736     #
4737     }
4738     } elsif ($token->{type} eq 'end tag') {
4739     if ($token->{tag_name} eq 'html') {
4740     $phase = 'trailing end';
4741     !!!next-token;
4742     redo B;
4743     } else {
4744     #
4745     }
4746     } else {
4747     #
4748     }
4749    
4750 wakaba 1.3 if (defined $token->{tag_name}) {
4751     !!!parse-error (type => 'after frameset:'.$token->{tag_name});
4752     } else {
4753     !!!parse-error (type => 'after frameset:#'.$token->{type});
4754     }
4755 wakaba 1.1 ## Ignore the token
4756     !!!next-token;
4757     redo B;
4758    
4759     ## ISSUE: An issue in spec there
4760     } else {
4761 wakaba 1.3 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4762 wakaba 1.1 }
4763     }
4764     } elsif ($phase eq 'trailing end') {
4765     ## states in the main stage is preserved yet # MUST
4766    
4767     if ($token->{type} eq 'DOCTYPE') {
4768 wakaba 1.3 !!!parse-error (type => 'after html:#DOCTYPE');
4769 wakaba 1.1 ## Ignore the token
4770     !!!next-token;
4771     redo B;
4772     } elsif ($token->{type} eq 'comment') {
4773     my $comment = $self->{document}->create_comment ($token->{data});
4774     $self->{document}->append_child ($comment);
4775     !!!next-token;
4776     redo B;
4777     } elsif ($token->{type} eq 'character') {
4778     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4779     my $data = $1;
4780     ## As if in the main phase.
4781     ## NOTE: The insertion mode in the main phase
4782     ## just before the phase has been changed to the trailing
4783     ## end phase is either "after body" or "after frameset".
4784     $reconstruct_active_formatting_elements->($insert_to_current)
4785     if $phase eq 'main';
4786    
4787 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
4788 wakaba 1.1
4789     unless (length $token->{data}) {
4790     !!!next-token;
4791     redo B;
4792     }
4793     }
4794    
4795 wakaba 1.3 !!!parse-error (type => 'after html:#character');
4796 wakaba 1.1 $phase = 'main';
4797     ## reprocess
4798     redo B;
4799     } elsif ($token->{type} eq 'start tag' or
4800     $token->{type} eq 'end tag') {
4801 wakaba 1.3 !!!parse-error (type => 'after html:'.$token->{tag_name});
4802 wakaba 1.1 $phase = 'main';
4803     ## reprocess
4804     redo B;
4805     } elsif ($token->{type} eq 'end-of-file') {
4806     ## Stop parsing
4807     last B;
4808     } else {
4809     die "$0: $token->{type}: Unknown token";
4810     }
4811     }
4812     } # B
4813    
4814     ## Stop parsing # MUST
4815    
4816     ## TODO: script stuffs
4817 wakaba 1.3 } # _tree_construct_main
4818    
4819     sub set_inner_html ($$$) {
4820     my $class = shift;
4821     my $node = shift;
4822     my $s = \$_[0];
4823     my $onerror = $_[1];
4824    
4825     my $nt = $node->node_type;
4826     if ($nt == 9) {
4827     # MUST
4828    
4829     ## Step 1 # MUST
4830     ## TODO: If the document has an active parser, ...
4831     ## ISSUE: There is an issue in the spec.
4832    
4833     ## Step 2 # MUST
4834     my @cn = @{$node->child_nodes};
4835     for (@cn) {
4836     $node->remove_child ($_);
4837     }
4838    
4839     ## Step 3, 4, 5 # MUST
4840     $class->parse_string ($$s => $node, $onerror);
4841     } elsif ($nt == 1) {
4842     ## TODO: If non-html element
4843    
4844     ## NOTE: Most of this code is copied from |parse_string|
4845    
4846     ## Step 1 # MUST
4847     my $doc = $node->owner_document->implementation->create_document;
4848     ## TODO: Mark as HTML document
4849     my $p = $class->new;
4850     $p->{document} = $doc;
4851    
4852     ## Step 9 # MUST
4853     my $i = 0;
4854     my $line = 1;
4855     my $column = 0;
4856     $p->{set_next_input_character} = sub {
4857     my $self = shift;
4858     $self->{next_input_character} = -1 and return if $i >= length $$s;
4859     $self->{next_input_character} = ord substr $$s, $i++, 1;
4860     $column++;
4861 wakaba 1.4
4862     if ($self->{next_input_character} == 0x000A) { # LF
4863     $line++;
4864     $column = 0;
4865     } elsif ($self->{next_input_character} == 0x000D) { # CR
4866 wakaba 1.3 if ($i >= length $$s) {
4867     #
4868     } else {
4869     my $next_char = ord substr $$s, $i++, 1;
4870     if ($next_char == 0x000A) { # LF
4871     #
4872     } else {
4873     push @{$self->{char}}, $next_char;
4874     }
4875     }
4876     $self->{next_input_character} = 0x000A; # LF # MUST
4877     $line++;
4878 wakaba 1.4 $column = 0;
4879 wakaba 1.3 } elsif ($self->{next_input_character} > 0x10FFFF) {
4880     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4881     } elsif ($self->{next_input_character} == 0x0000) { # NULL
4882     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4883     }
4884     };
4885    
4886     my $ponerror = $onerror || sub {
4887     my (%opt) = @_;
4888     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
4889     };
4890     $p->{parse_error} = sub {
4891     $ponerror->(@_, line => $line, column => $column);
4892     };
4893    
4894     $p->_initialize_tokenizer;
4895     $p->_initialize_tree_constructor;
4896    
4897     ## Step 2
4898     my $node_ln = $node->local_name;
4899     $p->{content_model_flag} = {
4900     title => 'RCDATA',
4901     textarea => 'RCDATA',
4902     style => 'CDATA',
4903     script => 'CDATA',
4904     xmp => 'CDATA',
4905     iframe => 'CDATA',
4906     noembed => 'CDATA',
4907     noframes => 'CDATA',
4908     noscript => 'CDATA',
4909     plaintext => 'PLAINTEXT',
4910     }->{$node_ln} || 'PCDATA';
4911     ## ISSUE: What is "the name of the element"? local name?
4912    
4913     $p->{inner_html_node} = [$node, $node_ln];
4914    
4915     ## Step 4
4916     my $root = $doc->create_element_ns
4917     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
4918    
4919     ## Step 5 # MUST
4920     $doc->append_child ($root);
4921    
4922     ## Step 6 # MUST
4923     push @{$p->{open_elements}}, [$root, 'html'];
4924    
4925     undef $p->{head_element};
4926    
4927     ## Step 7 # MUST
4928     $p->_reset_insertion_mode;
4929    
4930     ## Step 8 # MUST
4931     my $anode = $node;
4932     AN: while (defined $anode) {
4933     if ($anode->node_type == 1) {
4934     my $nsuri = $anode->namespace_uri;
4935     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
4936     if ($anode->local_name eq 'form') { ## TODO: case?
4937     $p->{form_element} = $anode;
4938     last AN;
4939     }
4940     }
4941     }
4942     $anode = $anode->parent_node;
4943     } # AN
4944    
4945     ## Step 3 # MUST
4946     ## Step 10 # MUST
4947     {
4948     my $self = $p;
4949     !!!next-token;
4950     }
4951     $p->_tree_construction_main;
4952    
4953     ## Step 11 # MUST
4954     my @cn = @{$node->child_nodes};
4955     for (@cn) {
4956     $node->remove_child ($_);
4957     }
4958     ## ISSUE: mutation events? read-only?
4959    
4960     ## Step 12 # MUST
4961     @cn = @{$root->child_nodes};
4962     for (@cn) {
4963     $node->append_child ($_);
4964     }
4965     ## ISSUE: adopt_node? mutation events?
4966    
4967     $p->_terminate_tree_constructor;
4968     } else {
4969     die "$0: |set_inner_html| is not defined for node of type $nt";
4970     }
4971     } # set_inner_html
4972    
4973     } # tree construction stage
4974 wakaba 1.1
4975     sub get_inner_html ($$$) {
4976 wakaba 1.3 my (undef, $node, $on_error) = @_;
4977 wakaba 1.1
4978     ## Step 1
4979     my $s = '';
4980    
4981     my $in_cdata;
4982     my $parent = $node;
4983     while (defined $parent) {
4984     if ($parent->node_type == 1 and
4985     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
4986     {
4987     style => 1, script => 1, xmp => 1, iframe => 1,
4988     noembed => 1, noframes => 1, noscript => 1,
4989     }->{$parent->local_name}) { ## TODO: case thingy
4990     $in_cdata = 1;
4991     }
4992     $parent = $parent->parent_node;
4993     }
4994    
4995     ## Step 2
4996     my @node = @{$node->child_nodes};
4997     C: while (@node) {
4998     my $child = shift @node;
4999     unless (ref $child) {
5000     if ($child eq 'cdata-out') {
5001     $in_cdata = 0;
5002     } else {
5003     $s .= $child; # end tag
5004     }
5005     next C;
5006     }
5007    
5008     my $nt = $child->node_type;
5009     if ($nt == 1) { # Element
5010     my $tag_name = lc $child->tag_name; ## ISSUE: Definition of "lowercase"
5011     $s .= '<' . $tag_name;
5012    
5013     ## ISSUE: Non-html elements
5014    
5015     my @attrs = @{$child->attributes}; # sort order MUST be stable
5016     for my $attr (@attrs) { # order is implementation dependent
5017     my $attr_name = lc $attr->name; ## ISSUE: Definition of "lowercase"
5018     $s .= ' ' . $attr_name . '="';
5019     my $attr_value = $attr->value;
5020     ## escape
5021     $attr_value =~ s/&/&amp;/g;
5022     $attr_value =~ s/</&lt;/g;
5023     $attr_value =~ s/>/&gt;/g;
5024     $attr_value =~ s/"/&quot;/g;
5025     $s .= $attr_value . '"';
5026     }
5027     $s .= '>';
5028    
5029     next C if {
5030     area => 1, base => 1, basefont => 1, bgsound => 1,
5031     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
5032     img => 1, input => 1, link => 1, meta => 1, param => 1,
5033     spacer => 1, wbr => 1,
5034     }->{$tag_name};
5035    
5036     if (not $in_cdata and {
5037     style => 1, script => 1, xmp => 1, iframe => 1,
5038     noembed => 1, noframes => 1, noscript => 1,
5039     }->{$tag_name}) {
5040     unshift @node, 'cdata-out';
5041     $in_cdata = 1;
5042     }
5043    
5044     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
5045     } elsif ($nt == 3 or $nt == 4) {
5046     if ($in_cdata) {
5047     $s .= $child->data;
5048     } else {
5049     my $value = $child->data;
5050     $value =~ s/&/&amp;/g;
5051     $value =~ s/</&lt;/g;
5052     $value =~ s/>/&gt;/g;
5053     $value =~ s/"/&quot;/g;
5054     $s .= $value;
5055     }
5056     } elsif ($nt == 8) {
5057     $s .= '<!--' . $child->data . '-->';
5058     } elsif ($nt == 10) {
5059     $s .= '<!DOCTYPE ' . $child->name . '>';
5060     } elsif ($nt == 5) { # entrefs
5061     push @node, @{$child->child_nodes};
5062     } else {
5063     $on_error->($child) if defined $on_error;
5064     }
5065     ## ISSUE: This code does not support PIs.
5066     } # C
5067    
5068     ## Step 3
5069     return \$s;
5070     } # get_inner_html
5071    
5072     1;
5073 wakaba 1.5 # $Date: 2007/05/04 09:16:04 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24