/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (hide annotations) (download) (as text)
Sat Jun 23 02:26:51 2007 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.7: +23 -7 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	23 Jun 2007 02:21:24 -0000
2007-06-23  Wakaba  <wakaba@suika.fam.cx>

	* Makefile, HTML-tokenizer.t, HTML-tree.t: New test
	files are added.

	* tokenize/, tree-construction/: Sync with latest html5lib
	trunk.

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.8 our $VERSION=do{my @r=(q$Revision: 1.7 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5     ## This is an early version of an HTML parser.
6    
7     my $permitted_slash_tag_name = {
8     base => 1,
9     link => 1,
10     meta => 1,
11     hr => 1,
12     br => 1,
13     img=> 1,
14     embed => 1,
15     param => 1,
16     area => 1,
17     col => 1,
18     input => 1,
19     };
20    
21     my $entity_char = {
22     AElig => "\x{00C6}",
23     Aacute => "\x{00C1}",
24     Acirc => "\x{00C2}",
25     Agrave => "\x{00C0}",
26     Alpha => "\x{0391}",
27     Aring => "\x{00C5}",
28     Atilde => "\x{00C3}",
29     Auml => "\x{00C4}",
30     Beta => "\x{0392}",
31     Ccedil => "\x{00C7}",
32     Chi => "\x{03A7}",
33     Dagger => "\x{2021}",
34     Delta => "\x{0394}",
35     ETH => "\x{00D0}",
36     Eacute => "\x{00C9}",
37     Ecirc => "\x{00CA}",
38     Egrave => "\x{00C8}",
39     Epsilon => "\x{0395}",
40     Eta => "\x{0397}",
41     Euml => "\x{00CB}",
42     Gamma => "\x{0393}",
43     Iacute => "\x{00CD}",
44     Icirc => "\x{00CE}",
45     Igrave => "\x{00CC}",
46     Iota => "\x{0399}",
47     Iuml => "\x{00CF}",
48     Kappa => "\x{039A}",
49     Lambda => "\x{039B}",
50     Mu => "\x{039C}",
51     Ntilde => "\x{00D1}",
52     Nu => "\x{039D}",
53     OElig => "\x{0152}",
54     Oacute => "\x{00D3}",
55     Ocirc => "\x{00D4}",
56     Ograve => "\x{00D2}",
57     Omega => "\x{03A9}",
58     Omicron => "\x{039F}",
59     Oslash => "\x{00D8}",
60     Otilde => "\x{00D5}",
61     Ouml => "\x{00D6}",
62     Phi => "\x{03A6}",
63     Pi => "\x{03A0}",
64     Prime => "\x{2033}",
65     Psi => "\x{03A8}",
66     Rho => "\x{03A1}",
67     Scaron => "\x{0160}",
68     Sigma => "\x{03A3}",
69     THORN => "\x{00DE}",
70     Tau => "\x{03A4}",
71     Theta => "\x{0398}",
72     Uacute => "\x{00DA}",
73     Ucirc => "\x{00DB}",
74     Ugrave => "\x{00D9}",
75     Upsilon => "\x{03A5}",
76     Uuml => "\x{00DC}",
77     Xi => "\x{039E}",
78     Yacute => "\x{00DD}",
79     Yuml => "\x{0178}",
80     Zeta => "\x{0396}",
81     aacute => "\x{00E1}",
82     acirc => "\x{00E2}",
83     acute => "\x{00B4}",
84     aelig => "\x{00E6}",
85     agrave => "\x{00E0}",
86     alefsym => "\x{2135}",
87     alpha => "\x{03B1}",
88     amp => "\x{0026}",
89     AMP => "\x{0026}",
90     and => "\x{2227}",
91     ang => "\x{2220}",
92     apos => "\x{0027}",
93     aring => "\x{00E5}",
94     asymp => "\x{2248}",
95     atilde => "\x{00E3}",
96     auml => "\x{00E4}",
97     bdquo => "\x{201E}",
98     beta => "\x{03B2}",
99     brvbar => "\x{00A6}",
100     bull => "\x{2022}",
101     cap => "\x{2229}",
102     ccedil => "\x{00E7}",
103     cedil => "\x{00B8}",
104     cent => "\x{00A2}",
105     chi => "\x{03C7}",
106     circ => "\x{02C6}",
107     clubs => "\x{2663}",
108     cong => "\x{2245}",
109     copy => "\x{00A9}",
110     COPY => "\x{00A9}",
111     crarr => "\x{21B5}",
112     cup => "\x{222A}",
113     curren => "\x{00A4}",
114     dArr => "\x{21D3}",
115     dagger => "\x{2020}",
116     darr => "\x{2193}",
117     deg => "\x{00B0}",
118     delta => "\x{03B4}",
119     diams => "\x{2666}",
120     divide => "\x{00F7}",
121     eacute => "\x{00E9}",
122     ecirc => "\x{00EA}",
123     egrave => "\x{00E8}",
124     empty => "\x{2205}",
125     emsp => "\x{2003}",
126     ensp => "\x{2002}",
127     epsilon => "\x{03B5}",
128     equiv => "\x{2261}",
129     eta => "\x{03B7}",
130     eth => "\x{00F0}",
131     euml => "\x{00EB}",
132     euro => "\x{20AC}",
133     exist => "\x{2203}",
134     fnof => "\x{0192}",
135     forall => "\x{2200}",
136     frac12 => "\x{00BD}",
137     frac14 => "\x{00BC}",
138     frac34 => "\x{00BE}",
139     frasl => "\x{2044}",
140     gamma => "\x{03B3}",
141     ge => "\x{2265}",
142     gt => "\x{003E}",
143     GT => "\x{003E}",
144     hArr => "\x{21D4}",
145     harr => "\x{2194}",
146     hearts => "\x{2665}",
147     hellip => "\x{2026}",
148     iacute => "\x{00ED}",
149     icirc => "\x{00EE}",
150     iexcl => "\x{00A1}",
151     igrave => "\x{00EC}",
152     image => "\x{2111}",
153     infin => "\x{221E}",
154     int => "\x{222B}",
155     iota => "\x{03B9}",
156     iquest => "\x{00BF}",
157     isin => "\x{2208}",
158     iuml => "\x{00EF}",
159     kappa => "\x{03BA}",
160     lArr => "\x{21D0}",
161     lambda => "\x{03BB}",
162     lang => "\x{2329}",
163     laquo => "\x{00AB}",
164     larr => "\x{2190}",
165     lceil => "\x{2308}",
166     ldquo => "\x{201C}",
167     le => "\x{2264}",
168     lfloor => "\x{230A}",
169     lowast => "\x{2217}",
170     loz => "\x{25CA}",
171     lrm => "\x{200E}",
172     lsaquo => "\x{2039}",
173     lsquo => "\x{2018}",
174     lt => "\x{003C}",
175     LT => "\x{003C}",
176     macr => "\x{00AF}",
177     mdash => "\x{2014}",
178     micro => "\x{00B5}",
179     middot => "\x{00B7}",
180     minus => "\x{2212}",
181     mu => "\x{03BC}",
182     nabla => "\x{2207}",
183     nbsp => "\x{00A0}",
184     ndash => "\x{2013}",
185     ne => "\x{2260}",
186     ni => "\x{220B}",
187     not => "\x{00AC}",
188     notin => "\x{2209}",
189     nsub => "\x{2284}",
190     ntilde => "\x{00F1}",
191     nu => "\x{03BD}",
192     oacute => "\x{00F3}",
193     ocirc => "\x{00F4}",
194     oelig => "\x{0153}",
195     ograve => "\x{00F2}",
196     oline => "\x{203E}",
197     omega => "\x{03C9}",
198     omicron => "\x{03BF}",
199     oplus => "\x{2295}",
200     or => "\x{2228}",
201     ordf => "\x{00AA}",
202     ordm => "\x{00BA}",
203     oslash => "\x{00F8}",
204     otilde => "\x{00F5}",
205     otimes => "\x{2297}",
206     ouml => "\x{00F6}",
207     para => "\x{00B6}",
208     part => "\x{2202}",
209     permil => "\x{2030}",
210     perp => "\x{22A5}",
211     phi => "\x{03C6}",
212     pi => "\x{03C0}",
213     piv => "\x{03D6}",
214     plusmn => "\x{00B1}",
215     pound => "\x{00A3}",
216     prime => "\x{2032}",
217     prod => "\x{220F}",
218     prop => "\x{221D}",
219     psi => "\x{03C8}",
220     quot => "\x{0022}",
221     QUOT => "\x{0022}",
222     rArr => "\x{21D2}",
223     radic => "\x{221A}",
224     rang => "\x{232A}",
225     raquo => "\x{00BB}",
226     rarr => "\x{2192}",
227     rceil => "\x{2309}",
228     rdquo => "\x{201D}",
229     real => "\x{211C}",
230     reg => "\x{00AE}",
231     REG => "\x{00AE}",
232     rfloor => "\x{230B}",
233     rho => "\x{03C1}",
234     rlm => "\x{200F}",
235     rsaquo => "\x{203A}",
236     rsquo => "\x{2019}",
237     sbquo => "\x{201A}",
238     scaron => "\x{0161}",
239     sdot => "\x{22C5}",
240     sect => "\x{00A7}",
241     shy => "\x{00AD}",
242     sigma => "\x{03C3}",
243     sigmaf => "\x{03C2}",
244     sim => "\x{223C}",
245     spades => "\x{2660}",
246     sub => "\x{2282}",
247     sube => "\x{2286}",
248     sum => "\x{2211}",
249     sup => "\x{2283}",
250     sup1 => "\x{00B9}",
251     sup2 => "\x{00B2}",
252     sup3 => "\x{00B3}",
253     supe => "\x{2287}",
254     szlig => "\x{00DF}",
255     tau => "\x{03C4}",
256     there4 => "\x{2234}",
257     theta => "\x{03B8}",
258     thetasym => "\x{03D1}",
259     thinsp => "\x{2009}",
260     thorn => "\x{00FE}",
261     tilde => "\x{02DC}",
262     times => "\x{00D7}",
263     trade => "\x{2122}",
264     uArr => "\x{21D1}",
265     uacute => "\x{00FA}",
266     uarr => "\x{2191}",
267     ucirc => "\x{00FB}",
268     ugrave => "\x{00F9}",
269     uml => "\x{00A8}",
270     upsih => "\x{03D2}",
271     upsilon => "\x{03C5}",
272     uuml => "\x{00FC}",
273     weierp => "\x{2118}",
274     xi => "\x{03BE}",
275     yacute => "\x{00FD}",
276     yen => "\x{00A5}",
277     yuml => "\x{00FF}",
278     zeta => "\x{03B6}",
279     zwj => "\x{200D}",
280     zwnj => "\x{200C}",
281 wakaba 1.4 }; # $entity_char
282    
283 wakaba 1.8 ## TODO: Ensure that this table match to <http://html5.org/tools/web-apps-tracker?from=868&to=869>.
284 wakaba 1.4 ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8562>
285     my $c1_entity_char = {
286     128, 8364,
287     129, 65533,
288     130, 8218,
289     131, 402,
290     132, 8222,
291     133, 8230,
292     134, 8224,
293     135, 8225,
294     136, 710,
295     137, 8240,
296     138, 352,
297     139, 8249,
298     140, 338,
299     141, 65533,
300     142, 381,
301     143, 65533,
302     144, 65533,
303     145, 8216,
304     146, 8217,
305     147, 8220,
306     148, 8221,
307     149, 8226,
308     150, 8211,
309     151, 8212,
310     152, 732,
311     153, 8482,
312     154, 353,
313     155, 8250,
314     156, 339,
315     157, 65533,
316     158, 382,
317     159, 376,
318     }; # $c1_entity_char
319 wakaba 1.1
320     my $special_category = {
321     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
322     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
323     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
324     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
325     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
326     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
327     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
328     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
329     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
330     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
331     };
332     my $scoping_category = {
333     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
334     table => 1, td => 1, th => 1,
335     };
336     my $formatting_category = {
337     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
338     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
339     };
340     # $phrasing_category: all other elements
341    
342     sub parse_string ($$$;$) {
343     my $self = shift->new;
344     my $s = \$_[0];
345     $self->{document} = $_[1];
346    
347 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
348    
349 wakaba 1.1 my $i = 0;
350 wakaba 1.3 my $line = 1;
351     my $column = 0;
352 wakaba 1.1 $self->{set_next_input_character} = sub {
353     my $self = shift;
354     $self->{next_input_character} = -1 and return if $i >= length $$s;
355     $self->{next_input_character} = ord substr $$s, $i++, 1;
356 wakaba 1.3 $column++;
357 wakaba 1.1
358 wakaba 1.4 if ($self->{next_input_character} == 0x000A) { # LF
359     $line++;
360     $column = 0;
361     } elsif ($self->{next_input_character} == 0x000D) { # CR
362 wakaba 1.1 if ($i >= length $$s) {
363     #
364     } else {
365     my $next_char = ord substr $$s, $i++, 1;
366     if ($next_char == 0x000A) { # LF
367     #
368     } else {
369     push @{$self->{char}}, $next_char;
370     }
371     }
372     $self->{next_input_character} = 0x000A; # LF # MUST
373 wakaba 1.3 $line++;
374 wakaba 1.4 $column = 0;
375 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
376     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
377     } elsif ($self->{next_input_character} == 0x0000) { # NULL
378 wakaba 1.8 !!!parse-error (type => 'NULL');
379     ## TODO: test
380 wakaba 1.1 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
381     }
382     };
383    
384 wakaba 1.3 my $onerror = $_[2] || sub {
385     my (%opt) = @_;
386     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
387     };
388     $self->{parse_error} = sub {
389     $onerror->(@_, line => $line, column => $column);
390 wakaba 1.1 };
391    
392     $self->_initialize_tokenizer;
393     $self->_initialize_tree_constructor;
394     $self->_construct_tree;
395     $self->_terminate_tree_constructor;
396    
397     return $self->{document};
398     } # parse_string
399    
400     sub new ($) {
401     my $class = shift;
402     my $self = bless {}, $class;
403     $self->{set_next_input_character} = sub {
404     $self->{next_input_character} = -1;
405     };
406     $self->{parse_error} = sub {
407     #
408     };
409     return $self;
410     } # new
411    
412     ## Implementations MUST act as if state machine in the spec
413    
414     sub _initialize_tokenizer ($) {
415     my $self = shift;
416     $self->{state} = 'data'; # MUST
417     $self->{content_model_flag} = 'PCDATA'; # be
418     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
419     undef $self->{current_attribute};
420     undef $self->{last_emitted_start_tag_name};
421     undef $self->{last_attribute_value_state};
422     $self->{char} = [];
423     # $self->{next_input_character}
424     !!!next-input-character;
425     $self->{token} = [];
426     } # _initialize_tokenizer
427    
428     ## A token has:
429     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
430     ## 'character', or 'end-of-file'
431     ## ->{name} (DOCTYPE, start tag (tagname), end tag (tagname))
432     ## ISSUE: the spec need s/tagname/tag name/
433     ## ->{error} == 1 or 0 (DOCTYPE)
434     ## ->{attributes} isa HASH (start tag, end tag)
435     ## ->{data} (comment, character)
436    
437     ## Macros
438     ## Macros MUST be preceded by three EXCLAMATION MARKs.
439     ## emit ($token)
440     ## Emits the specified token.
441    
442     ## Emitted token MUST immediately be handled by the tree construction state.
443    
444     ## Before each step, UA MAY check to see if either one of the scripts in
445     ## "list of scripts that will execute as soon as possible" or the first
446     ## script in the "list of scripts that will execute asynchronously",
447     ## has completed loading. If one has, then it MUST be executed
448     ## and removed from the list.
449    
450 wakaba 1.8 ## ISSUE: <http://html5.org/tools/web-apps-tracker?from=874&to=876>
451    
452 wakaba 1.1 sub _get_next_token ($) {
453     my $self = shift;
454     if (@{$self->{token}}) {
455     return shift @{$self->{token}};
456     }
457    
458     A: {
459     if ($self->{state} eq 'data') {
460     if ($self->{next_input_character} == 0x0026) { # &
461     if ($self->{content_model_flag} eq 'PCDATA' or
462     $self->{content_model_flag} eq 'RCDATA') {
463     $self->{state} = 'entity data';
464     !!!next-input-character;
465     redo A;
466     } else {
467     #
468     }
469     } elsif ($self->{next_input_character} == 0x003C) { # <
470     if ($self->{content_model_flag} ne 'PLAINTEXT') {
471     $self->{state} = 'tag open';
472     !!!next-input-character;
473     redo A;
474     } else {
475     #
476     }
477     } elsif ($self->{next_input_character} == -1) {
478     !!!emit ({type => 'end-of-file'});
479     last A; ## TODO: ok?
480     }
481     # Anything else
482     my $token = {type => 'character',
483     data => chr $self->{next_input_character}};
484     ## Stay in the data state
485     !!!next-input-character;
486    
487     !!!emit ($token);
488    
489     redo A;
490     } elsif ($self->{state} eq 'entity data') {
491     ## (cannot happen in CDATA state)
492    
493     my $token = $self->_tokenize_attempt_to_consume_an_entity;
494    
495     $self->{state} = 'data';
496     # next-input-character is already done
497    
498     unless (defined $token) {
499     !!!emit ({type => 'character', data => '&'});
500     } else {
501     !!!emit ($token);
502     }
503    
504     redo A;
505     } elsif ($self->{state} eq 'tag open') {
506     if ($self->{content_model_flag} eq 'RCDATA' or
507     $self->{content_model_flag} eq 'CDATA') {
508     if ($self->{next_input_character} == 0x002F) { # /
509     !!!next-input-character;
510     $self->{state} = 'close tag open';
511     redo A;
512     } else {
513     ## reconsume
514     $self->{state} = 'data';
515    
516     !!!emit ({type => 'character', data => '<'});
517    
518     redo A;
519     }
520     } elsif ($self->{content_model_flag} eq 'PCDATA') {
521     if ($self->{next_input_character} == 0x0021) { # !
522     $self->{state} = 'markup declaration open';
523     !!!next-input-character;
524     redo A;
525     } elsif ($self->{next_input_character} == 0x002F) { # /
526     $self->{state} = 'close tag open';
527     !!!next-input-character;
528     redo A;
529     } elsif (0x0041 <= $self->{next_input_character} and
530     $self->{next_input_character} <= 0x005A) { # A..Z
531     $self->{current_token}
532     = {type => 'start tag',
533     tag_name => chr ($self->{next_input_character} + 0x0020)};
534     $self->{state} = 'tag name';
535     !!!next-input-character;
536     redo A;
537     } elsif (0x0061 <= $self->{next_input_character} and
538     $self->{next_input_character} <= 0x007A) { # a..z
539     $self->{current_token} = {type => 'start tag',
540     tag_name => chr ($self->{next_input_character})};
541     $self->{state} = 'tag name';
542     !!!next-input-character;
543     redo A;
544     } elsif ($self->{next_input_character} == 0x003E) { # >
545 wakaba 1.3 !!!parse-error (type => 'empty start tag');
546 wakaba 1.1 $self->{state} = 'data';
547     !!!next-input-character;
548    
549     !!!emit ({type => 'character', data => '<>'});
550    
551     redo A;
552     } elsif ($self->{next_input_character} == 0x003F) { # ?
553 wakaba 1.3 !!!parse-error (type => 'pio');
554 wakaba 1.1 $self->{state} = 'bogus comment';
555     ## $self->{next_input_character} is intentionally left as is
556     redo A;
557     } else {
558 wakaba 1.3 !!!parse-error (type => 'bare stago');
559 wakaba 1.1 $self->{state} = 'data';
560     ## reconsume
561    
562     !!!emit ({type => 'character', data => '<'});
563    
564     redo A;
565     }
566     } else {
567     die "$0: $self->{content_model_flag}: Unknown content model flag";
568     }
569     } elsif ($self->{state} eq 'close tag open') {
570     if ($self->{content_model_flag} eq 'RCDATA' or
571     $self->{content_model_flag} eq 'CDATA') {
572     my @next_char;
573     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
574     push @next_char, $self->{next_input_character};
575     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
576     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
577     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
578     !!!next-input-character;
579     next TAGNAME;
580     } else {
581 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
582 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
583     !!!back-next-input-character (@next_char);
584     $self->{state} = 'data';
585    
586     !!!emit ({type => 'character', data => '</'});
587    
588     redo A;
589     }
590     }
591     push @next_char, $self->{next_input_character};
592    
593     unless ($self->{next_input_character} == 0x0009 or # HT
594     $self->{next_input_character} == 0x000A or # LF
595     $self->{next_input_character} == 0x000B or # VT
596     $self->{next_input_character} == 0x000C or # FF
597     $self->{next_input_character} == 0x0020 or # SP
598     $self->{next_input_character} == 0x003E or # >
599     $self->{next_input_character} == 0x002F or # /
600     $self->{next_input_character} == 0x003C or # <
601     $self->{next_input_character} == -1) {
602 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
603 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
604     !!!back-next-input-character (@next_char);
605     $self->{state} = 'data';
606    
607     !!!emit ({type => 'character', data => '</'});
608    
609     redo A;
610     } else {
611     $self->{next_input_character} = shift @next_char;
612     !!!back-next-input-character (@next_char);
613     # and consume...
614     }
615     }
616    
617     if (0x0041 <= $self->{next_input_character} and
618     $self->{next_input_character} <= 0x005A) { # A..Z
619     $self->{current_token} = {type => 'end tag',
620     tag_name => chr ($self->{next_input_character} + 0x0020)};
621     $self->{state} = 'tag name';
622     !!!next-input-character;
623     redo A;
624     } elsif (0x0061 <= $self->{next_input_character} and
625     $self->{next_input_character} <= 0x007A) { # a..z
626     $self->{current_token} = {type => 'end tag',
627     tag_name => chr ($self->{next_input_character})};
628     $self->{state} = 'tag name';
629     !!!next-input-character;
630     redo A;
631     } elsif ($self->{next_input_character} == 0x003E) { # >
632 wakaba 1.3 !!!parse-error (type => 'empty end tag');
633 wakaba 1.1 $self->{state} = 'data';
634     !!!next-input-character;
635     redo A;
636     } elsif ($self->{next_input_character} == -1) {
637 wakaba 1.3 !!!parse-error (type => 'bare etago');
638 wakaba 1.1 $self->{state} = 'data';
639     # reconsume
640    
641     !!!emit ({type => 'character', data => '</'});
642    
643     redo A;
644     } else {
645 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
646 wakaba 1.1 $self->{state} = 'bogus comment';
647     ## $self->{next_input_character} is intentionally left as is
648     redo A;
649     }
650     } elsif ($self->{state} eq 'tag name') {
651     if ($self->{next_input_character} == 0x0009 or # HT
652     $self->{next_input_character} == 0x000A or # LF
653     $self->{next_input_character} == 0x000B or # VT
654     $self->{next_input_character} == 0x000C or # FF
655     $self->{next_input_character} == 0x0020) { # SP
656     $self->{state} = 'before attribute name';
657     !!!next-input-character;
658     redo A;
659     } elsif ($self->{next_input_character} == 0x003E) { # >
660     if ($self->{current_token}->{type} eq 'start tag') {
661     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
662     } elsif ($self->{current_token}->{type} eq 'end tag') {
663     $self->{content_model_flag} = 'PCDATA'; # MUST
664     if ($self->{current_token}->{attributes}) {
665 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
666 wakaba 1.1 }
667     } else {
668     die "$0: $self->{current_token}->{type}: Unknown token type";
669     }
670     $self->{state} = 'data';
671     !!!next-input-character;
672    
673     !!!emit ($self->{current_token}); # start tag or end tag
674     undef $self->{current_token};
675    
676     redo A;
677     } elsif (0x0041 <= $self->{next_input_character} and
678     $self->{next_input_character} <= 0x005A) { # A..Z
679     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
680     # start tag or end tag
681     ## Stay in this state
682     !!!next-input-character;
683     redo A;
684     } elsif ($self->{next_input_character} == 0x003C or # <
685     $self->{next_input_character} == -1) {
686 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
687 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
688     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
689     } elsif ($self->{current_token}->{type} eq 'end tag') {
690     $self->{content_model_flag} = 'PCDATA'; # MUST
691     if ($self->{current_token}->{attributes}) {
692 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
693 wakaba 1.1 }
694     } else {
695     die "$0: $self->{current_token}->{type}: Unknown token type";
696     }
697     $self->{state} = 'data';
698     # reconsume
699    
700     !!!emit ($self->{current_token}); # start tag or end tag
701     undef $self->{current_token};
702    
703     redo A;
704     } elsif ($self->{next_input_character} == 0x002F) { # /
705     !!!next-input-character;
706     if ($self->{next_input_character} == 0x003E and # >
707     $self->{current_token}->{type} eq 'start tag' and
708     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
709     # permitted slash
710     #
711     } else {
712 wakaba 1.3 !!!parse-error (type => 'nestc');
713 wakaba 1.1 }
714     $self->{state} = 'before attribute name';
715     # next-input-character is already done
716     redo A;
717     } else {
718     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
719     # start tag or end tag
720     ## Stay in the state
721     !!!next-input-character;
722     redo A;
723     }
724     } elsif ($self->{state} eq 'before attribute name') {
725     if ($self->{next_input_character} == 0x0009 or # HT
726     $self->{next_input_character} == 0x000A or # LF
727     $self->{next_input_character} == 0x000B or # VT
728     $self->{next_input_character} == 0x000C or # FF
729     $self->{next_input_character} == 0x0020) { # SP
730     ## Stay in the state
731     !!!next-input-character;
732     redo A;
733     } elsif ($self->{next_input_character} == 0x003E) { # >
734     if ($self->{current_token}->{type} eq 'start tag') {
735     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
736     } elsif ($self->{current_token}->{type} eq 'end tag') {
737     $self->{content_model_flag} = 'PCDATA'; # MUST
738     if ($self->{current_token}->{attributes}) {
739 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
740 wakaba 1.1 }
741     } else {
742     die "$0: $self->{current_token}->{type}: Unknown token type";
743     }
744     $self->{state} = 'data';
745     !!!next-input-character;
746    
747     !!!emit ($self->{current_token}); # start tag or end tag
748     undef $self->{current_token};
749    
750     redo A;
751     } elsif (0x0041 <= $self->{next_input_character} and
752     $self->{next_input_character} <= 0x005A) { # A..Z
753     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
754     value => ''};
755     $self->{state} = 'attribute name';
756     !!!next-input-character;
757     redo A;
758     } elsif ($self->{next_input_character} == 0x002F) { # /
759     !!!next-input-character;
760     if ($self->{next_input_character} == 0x003E and # >
761     $self->{current_token}->{type} eq 'start tag' and
762     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
763     # permitted slash
764     #
765     } else {
766 wakaba 1.3 !!!parse-error (type => 'nestc');
767 wakaba 1.1 }
768     ## Stay in the state
769     # next-input-character is already done
770     redo A;
771     } elsif ($self->{next_input_character} == 0x003C or # <
772     $self->{next_input_character} == -1) {
773 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
774 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
775     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
776     } elsif ($self->{current_token}->{type} eq 'end tag') {
777     $self->{content_model_flag} = 'PCDATA'; # MUST
778     if ($self->{current_token}->{attributes}) {
779 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
780 wakaba 1.1 }
781     } else {
782     die "$0: $self->{current_token}->{type}: Unknown token type";
783     }
784     $self->{state} = 'data';
785     # reconsume
786    
787     !!!emit ($self->{current_token}); # start tag or end tag
788     undef $self->{current_token};
789    
790     redo A;
791     } else {
792     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
793     value => ''};
794     $self->{state} = 'attribute name';
795     !!!next-input-character;
796     redo A;
797     }
798     } elsif ($self->{state} eq 'attribute name') {
799     my $before_leave = sub {
800     if (exists $self->{current_token}->{attributes} # start tag or end tag
801     ->{$self->{current_attribute}->{name}}) { # MUST
802 wakaba 1.3 !!!parse-error (type => 'dupulicate attribute');
803 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
804     } else {
805     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
806     = $self->{current_attribute};
807     }
808     }; # $before_leave
809    
810     if ($self->{next_input_character} == 0x0009 or # HT
811     $self->{next_input_character} == 0x000A or # LF
812     $self->{next_input_character} == 0x000B or # VT
813     $self->{next_input_character} == 0x000C or # FF
814     $self->{next_input_character} == 0x0020) { # SP
815     $before_leave->();
816     $self->{state} = 'after attribute name';
817     !!!next-input-character;
818     redo A;
819     } elsif ($self->{next_input_character} == 0x003D) { # =
820     $before_leave->();
821     $self->{state} = 'before attribute value';
822     !!!next-input-character;
823     redo A;
824     } elsif ($self->{next_input_character} == 0x003E) { # >
825     $before_leave->();
826     if ($self->{current_token}->{type} eq 'start tag') {
827     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
828     } elsif ($self->{current_token}->{type} eq 'end tag') {
829     $self->{content_model_flag} = 'PCDATA'; # MUST
830     if ($self->{current_token}->{attributes}) {
831 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
832 wakaba 1.1 }
833     } else {
834     die "$0: $self->{current_token}->{type}: Unknown token type";
835     }
836     $self->{state} = 'data';
837     !!!next-input-character;
838    
839     !!!emit ($self->{current_token}); # start tag or end tag
840     undef $self->{current_token};
841    
842     redo A;
843     } elsif (0x0041 <= $self->{next_input_character} and
844     $self->{next_input_character} <= 0x005A) { # A..Z
845     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
846     ## Stay in the state
847     !!!next-input-character;
848     redo A;
849     } elsif ($self->{next_input_character} == 0x002F) { # /
850     $before_leave->();
851     !!!next-input-character;
852     if ($self->{next_input_character} == 0x003E and # >
853     $self->{current_token}->{type} eq 'start tag' and
854     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
855     # permitted slash
856     #
857     } else {
858 wakaba 1.3 !!!parse-error (type => 'nestc');
859 wakaba 1.1 }
860     $self->{state} = 'before attribute name';
861     # next-input-character is already done
862     redo A;
863     } elsif ($self->{next_input_character} == 0x003C or # <
864     $self->{next_input_character} == -1) {
865 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
866 wakaba 1.1 $before_leave->();
867     if ($self->{current_token}->{type} eq 'start tag') {
868     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
869     } elsif ($self->{current_token}->{type} eq 'end tag') {
870     $self->{content_model_flag} = 'PCDATA'; # MUST
871     if ($self->{current_token}->{attributes}) {
872 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
873 wakaba 1.1 }
874     } else {
875     die "$0: $self->{current_token}->{type}: Unknown token type";
876     }
877     $self->{state} = 'data';
878     # reconsume
879    
880     !!!emit ($self->{current_token}); # start tag or end tag
881     undef $self->{current_token};
882    
883     redo A;
884     } else {
885     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
886     ## Stay in the state
887     !!!next-input-character;
888     redo A;
889     }
890     } elsif ($self->{state} eq 'after attribute name') {
891     if ($self->{next_input_character} == 0x0009 or # HT
892     $self->{next_input_character} == 0x000A or # LF
893     $self->{next_input_character} == 0x000B or # VT
894     $self->{next_input_character} == 0x000C or # FF
895     $self->{next_input_character} == 0x0020) { # SP
896     ## Stay in the state
897     !!!next-input-character;
898     redo A;
899     } elsif ($self->{next_input_character} == 0x003D) { # =
900     $self->{state} = 'before attribute value';
901     !!!next-input-character;
902     redo A;
903     } elsif ($self->{next_input_character} == 0x003E) { # >
904     if ($self->{current_token}->{type} eq 'start tag') {
905     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
906     } elsif ($self->{current_token}->{type} eq 'end tag') {
907     $self->{content_model_flag} = 'PCDATA'; # MUST
908     if ($self->{current_token}->{attributes}) {
909 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
910 wakaba 1.1 }
911     } else {
912     die "$0: $self->{current_token}->{type}: Unknown token type";
913     }
914     $self->{state} = 'data';
915     !!!next-input-character;
916    
917     !!!emit ($self->{current_token}); # start tag or end tag
918     undef $self->{current_token};
919    
920     redo A;
921     } elsif (0x0041 <= $self->{next_input_character} and
922     $self->{next_input_character} <= 0x005A) { # A..Z
923     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
924     value => ''};
925     $self->{state} = 'attribute name';
926     !!!next-input-character;
927     redo A;
928     } elsif ($self->{next_input_character} == 0x002F) { # /
929     !!!next-input-character;
930     if ($self->{next_input_character} == 0x003E and # >
931     $self->{current_token}->{type} eq 'start tag' and
932     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
933     # permitted slash
934     #
935     } else {
936 wakaba 1.3 !!!parse-error (type => 'nestc');
937 wakaba 1.1 }
938     $self->{state} = 'before attribute name';
939     # next-input-character is already done
940     redo A;
941     } elsif ($self->{next_input_character} == 0x003C or # <
942     $self->{next_input_character} == -1) {
943 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
944 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
945     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
946     } elsif ($self->{current_token}->{type} eq 'end tag') {
947     $self->{content_model_flag} = 'PCDATA'; # MUST
948     if ($self->{current_token}->{attributes}) {
949 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
950 wakaba 1.1 }
951     } else {
952     die "$0: $self->{current_token}->{type}: Unknown token type";
953     }
954     $self->{state} = 'data';
955     # reconsume
956    
957     !!!emit ($self->{current_token}); # start tag or end tag
958     undef $self->{current_token};
959    
960     redo A;
961     } else {
962     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
963     value => ''};
964     $self->{state} = 'attribute name';
965     !!!next-input-character;
966     redo A;
967     }
968     } elsif ($self->{state} eq 'before attribute value') {
969     if ($self->{next_input_character} == 0x0009 or # HT
970     $self->{next_input_character} == 0x000A or # LF
971     $self->{next_input_character} == 0x000B or # VT
972     $self->{next_input_character} == 0x000C or # FF
973     $self->{next_input_character} == 0x0020) { # SP
974     ## Stay in the state
975     !!!next-input-character;
976     redo A;
977     } elsif ($self->{next_input_character} == 0x0022) { # "
978     $self->{state} = 'attribute value (double-quoted)';
979     !!!next-input-character;
980     redo A;
981     } elsif ($self->{next_input_character} == 0x0026) { # &
982     $self->{state} = 'attribute value (unquoted)';
983     ## reconsume
984     redo A;
985     } elsif ($self->{next_input_character} == 0x0027) { # '
986     $self->{state} = 'attribute value (single-quoted)';
987     !!!next-input-character;
988     redo A;
989     } elsif ($self->{next_input_character} == 0x003E) { # >
990     if ($self->{current_token}->{type} eq 'start tag') {
991     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
992     } elsif ($self->{current_token}->{type} eq 'end tag') {
993     $self->{content_model_flag} = 'PCDATA'; # MUST
994     if ($self->{current_token}->{attributes}) {
995 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
996 wakaba 1.1 }
997     } else {
998     die "$0: $self->{current_token}->{type}: Unknown token type";
999     }
1000     $self->{state} = 'data';
1001     !!!next-input-character;
1002    
1003     !!!emit ($self->{current_token}); # start tag or end tag
1004     undef $self->{current_token};
1005    
1006     redo A;
1007     } elsif ($self->{next_input_character} == 0x003C or # <
1008     $self->{next_input_character} == -1) {
1009 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1010 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1011     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1012     } elsif ($self->{current_token}->{type} eq 'end tag') {
1013     $self->{content_model_flag} = 'PCDATA'; # MUST
1014     if ($self->{current_token}->{attributes}) {
1015 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1016 wakaba 1.1 }
1017     } else {
1018     die "$0: $self->{current_token}->{type}: Unknown token type";
1019     }
1020     $self->{state} = 'data';
1021     ## reconsume
1022    
1023     !!!emit ($self->{current_token}); # start tag or end tag
1024     undef $self->{current_token};
1025    
1026     redo A;
1027     } else {
1028     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1029     $self->{state} = 'attribute value (unquoted)';
1030     !!!next-input-character;
1031     redo A;
1032     }
1033     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
1034     if ($self->{next_input_character} == 0x0022) { # "
1035     $self->{state} = 'before attribute name';
1036     !!!next-input-character;
1037     redo A;
1038     } elsif ($self->{next_input_character} == 0x0026) { # &
1039     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
1040     $self->{state} = 'entity in attribute value';
1041     !!!next-input-character;
1042     redo A;
1043     } elsif ($self->{next_input_character} == -1) {
1044 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1045 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1046     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1047     } elsif ($self->{current_token}->{type} eq 'end tag') {
1048     $self->{content_model_flag} = 'PCDATA'; # MUST
1049     if ($self->{current_token}->{attributes}) {
1050 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1051 wakaba 1.1 }
1052     } else {
1053     die "$0: $self->{current_token}->{type}: Unknown token type";
1054     }
1055     $self->{state} = 'data';
1056     ## reconsume
1057    
1058     !!!emit ($self->{current_token}); # start tag or end tag
1059     undef $self->{current_token};
1060    
1061     redo A;
1062     } else {
1063     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1064     ## Stay in the state
1065     !!!next-input-character;
1066     redo A;
1067     }
1068     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
1069     if ($self->{next_input_character} == 0x0027) { # '
1070     $self->{state} = 'before attribute name';
1071     !!!next-input-character;
1072     redo A;
1073     } elsif ($self->{next_input_character} == 0x0026) { # &
1074     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
1075     $self->{state} = 'entity in attribute value';
1076     !!!next-input-character;
1077     redo A;
1078     } elsif ($self->{next_input_character} == -1) {
1079 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1080 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1081     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1082     } elsif ($self->{current_token}->{type} eq 'end tag') {
1083     $self->{content_model_flag} = 'PCDATA'; # MUST
1084     if ($self->{current_token}->{attributes}) {
1085 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1086 wakaba 1.1 }
1087     } else {
1088     die "$0: $self->{current_token}->{type}: Unknown token type";
1089     }
1090     $self->{state} = 'data';
1091     ## reconsume
1092    
1093     !!!emit ($self->{current_token}); # start tag or end tag
1094     undef $self->{current_token};
1095    
1096     redo A;
1097     } else {
1098     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1099     ## Stay in the state
1100     !!!next-input-character;
1101     redo A;
1102     }
1103     } elsif ($self->{state} eq 'attribute value (unquoted)') {
1104     if ($self->{next_input_character} == 0x0009 or # HT
1105     $self->{next_input_character} == 0x000A or # LF
1106     $self->{next_input_character} == 0x000B or # HT
1107     $self->{next_input_character} == 0x000C or # FF
1108     $self->{next_input_character} == 0x0020) { # SP
1109     $self->{state} = 'before attribute name';
1110     !!!next-input-character;
1111     redo A;
1112     } elsif ($self->{next_input_character} == 0x0026) { # &
1113     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
1114     $self->{state} = 'entity in attribute value';
1115     !!!next-input-character;
1116     redo A;
1117     } elsif ($self->{next_input_character} == 0x003E) { # >
1118     if ($self->{current_token}->{type} eq 'start tag') {
1119     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1120     } elsif ($self->{current_token}->{type} eq 'end tag') {
1121     $self->{content_model_flag} = 'PCDATA'; # MUST
1122     if ($self->{current_token}->{attributes}) {
1123 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1124 wakaba 1.1 }
1125     } else {
1126     die "$0: $self->{current_token}->{type}: Unknown token type";
1127     }
1128     $self->{state} = 'data';
1129     !!!next-input-character;
1130    
1131     !!!emit ($self->{current_token}); # start tag or end tag
1132     undef $self->{current_token};
1133    
1134     redo A;
1135     } elsif ($self->{next_input_character} == 0x003C or # <
1136     $self->{next_input_character} == -1) {
1137 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1138 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1139     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1140     } elsif ($self->{current_token}->{type} eq 'end tag') {
1141     $self->{content_model_flag} = 'PCDATA'; # MUST
1142     if ($self->{current_token}->{attributes}) {
1143 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1144 wakaba 1.1 }
1145     } else {
1146     die "$0: $self->{current_token}->{type}: Unknown token type";
1147     }
1148     $self->{state} = 'data';
1149     ## reconsume
1150    
1151     !!!emit ($self->{current_token}); # start tag or end tag
1152     undef $self->{current_token};
1153    
1154     redo A;
1155     } else {
1156     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1157     ## Stay in the state
1158     !!!next-input-character;
1159     redo A;
1160     }
1161     } elsif ($self->{state} eq 'entity in attribute value') {
1162     my $token = $self->_tokenize_attempt_to_consume_an_entity;
1163    
1164     unless (defined $token) {
1165     $self->{current_attribute}->{value} .= '&';
1166     } else {
1167     $self->{current_attribute}->{value} .= $token->{data};
1168     ## ISSUE: spec says "append the returned character token to the current attribute's value"
1169     }
1170    
1171     $self->{state} = $self->{last_attribute_value_state};
1172     # next-input-character is already done
1173     redo A;
1174     } elsif ($self->{state} eq 'bogus comment') {
1175     ## (only happen if PCDATA state)
1176    
1177     my $token = {type => 'comment', data => ''};
1178    
1179     BC: {
1180     if ($self->{next_input_character} == 0x003E) { # >
1181     $self->{state} = 'data';
1182     !!!next-input-character;
1183    
1184     !!!emit ($token);
1185    
1186     redo A;
1187     } elsif ($self->{next_input_character} == -1) {
1188     $self->{state} = 'data';
1189     ## reconsume
1190    
1191     !!!emit ($token);
1192    
1193     redo A;
1194     } else {
1195     $token->{data} .= chr ($self->{next_input_character});
1196     !!!next-input-character;
1197     redo BC;
1198     }
1199     } # BC
1200     } elsif ($self->{state} eq 'markup declaration open') {
1201     ## (only happen if PCDATA state)
1202    
1203     my @next_char;
1204     push @next_char, $self->{next_input_character};
1205    
1206     if ($self->{next_input_character} == 0x002D) { # -
1207     !!!next-input-character;
1208     push @next_char, $self->{next_input_character};
1209     if ($self->{next_input_character} == 0x002D) { # -
1210     $self->{current_token} = {type => 'comment', data => ''};
1211     $self->{state} = 'comment';
1212     !!!next-input-character;
1213     redo A;
1214     }
1215     } elsif ($self->{next_input_character} == 0x0044 or # D
1216     $self->{next_input_character} == 0x0064) { # d
1217     !!!next-input-character;
1218     push @next_char, $self->{next_input_character};
1219     if ($self->{next_input_character} == 0x004F or # O
1220     $self->{next_input_character} == 0x006F) { # o
1221     !!!next-input-character;
1222     push @next_char, $self->{next_input_character};
1223     if ($self->{next_input_character} == 0x0043 or # C
1224     $self->{next_input_character} == 0x0063) { # c
1225     !!!next-input-character;
1226     push @next_char, $self->{next_input_character};
1227     if ($self->{next_input_character} == 0x0054 or # T
1228     $self->{next_input_character} == 0x0074) { # t
1229     !!!next-input-character;
1230     push @next_char, $self->{next_input_character};
1231     if ($self->{next_input_character} == 0x0059 or # Y
1232     $self->{next_input_character} == 0x0079) { # y
1233     !!!next-input-character;
1234     push @next_char, $self->{next_input_character};
1235     if ($self->{next_input_character} == 0x0050 or # P
1236     $self->{next_input_character} == 0x0070) { # p
1237     !!!next-input-character;
1238     push @next_char, $self->{next_input_character};
1239     if ($self->{next_input_character} == 0x0045 or # E
1240     $self->{next_input_character} == 0x0065) { # e
1241     ## ISSUE: What a stupid code this is!
1242     $self->{state} = 'DOCTYPE';
1243     !!!next-input-character;
1244     redo A;
1245     }
1246     }
1247     }
1248     }
1249     }
1250     }
1251     }
1252    
1253 wakaba 1.3 !!!parse-error (type => 'bogus comment open');
1254 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1255     !!!back-next-input-character (@next_char);
1256     $self->{state} = 'bogus comment';
1257     redo A;
1258    
1259     ## ISSUE: typos in spec: chacacters, is is a parse error
1260     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1261     } elsif ($self->{state} eq 'comment') {
1262     if ($self->{next_input_character} == 0x002D) { # -
1263     $self->{state} = 'comment dash';
1264     !!!next-input-character;
1265     redo A;
1266     } elsif ($self->{next_input_character} == -1) {
1267 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1268 wakaba 1.1 $self->{state} = 'data';
1269     ## reconsume
1270    
1271     !!!emit ($self->{current_token}); # comment
1272     undef $self->{current_token};
1273    
1274     redo A;
1275     } else {
1276     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1277     ## Stay in the state
1278     !!!next-input-character;
1279     redo A;
1280     }
1281     } elsif ($self->{state} eq 'comment dash') {
1282     if ($self->{next_input_character} == 0x002D) { # -
1283     $self->{state} = 'comment end';
1284     !!!next-input-character;
1285     redo A;
1286     } elsif ($self->{next_input_character} == -1) {
1287 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1288 wakaba 1.1 $self->{state} = 'data';
1289     ## reconsume
1290    
1291     !!!emit ($self->{current_token}); # comment
1292     undef $self->{current_token};
1293    
1294     redo A;
1295     } else {
1296     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1297     $self->{state} = 'comment';
1298     !!!next-input-character;
1299     redo A;
1300     }
1301     } elsif ($self->{state} eq 'comment end') {
1302     if ($self->{next_input_character} == 0x003E) { # >
1303     $self->{state} = 'data';
1304     !!!next-input-character;
1305    
1306     !!!emit ($self->{current_token}); # comment
1307     undef $self->{current_token};
1308    
1309     redo A;
1310     } elsif ($self->{next_input_character} == 0x002D) { # -
1311 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1312 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1313     ## Stay in the state
1314     !!!next-input-character;
1315     redo A;
1316     } elsif ($self->{next_input_character} == -1) {
1317 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1318 wakaba 1.1 $self->{state} = 'data';
1319     ## reconsume
1320    
1321     !!!emit ($self->{current_token}); # comment
1322     undef $self->{current_token};
1323    
1324     redo A;
1325     } else {
1326 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1327 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1328     $self->{state} = 'comment';
1329     !!!next-input-character;
1330     redo A;
1331     }
1332     } elsif ($self->{state} eq 'DOCTYPE') {
1333     if ($self->{next_input_character} == 0x0009 or # HT
1334     $self->{next_input_character} == 0x000A or # LF
1335     $self->{next_input_character} == 0x000B or # VT
1336     $self->{next_input_character} == 0x000C or # FF
1337     $self->{next_input_character} == 0x0020) { # SP
1338     $self->{state} = 'before DOCTYPE name';
1339     !!!next-input-character;
1340     redo A;
1341     } else {
1342 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
1343 wakaba 1.1 $self->{state} = 'before DOCTYPE name';
1344     ## reconsume
1345     redo A;
1346     }
1347     } elsif ($self->{state} eq 'before DOCTYPE name') {
1348     if ($self->{next_input_character} == 0x0009 or # HT
1349     $self->{next_input_character} == 0x000A or # LF
1350     $self->{next_input_character} == 0x000B or # VT
1351     $self->{next_input_character} == 0x000C or # FF
1352     $self->{next_input_character} == 0x0020) { # SP
1353     ## Stay in the state
1354     !!!next-input-character;
1355     redo A;
1356     } elsif (0x0061 <= $self->{next_input_character} and
1357     $self->{next_input_character} <= 0x007A) { # a..z
1358 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1359 wakaba 1.1 $self->{current_token} = {type => 'DOCTYPE',
1360     name => chr ($self->{next_input_character} - 0x0020),
1361     error => 1};
1362     $self->{state} = 'DOCTYPE name';
1363     !!!next-input-character;
1364     redo A;
1365     } elsif ($self->{next_input_character} == 0x003E) { # >
1366 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1367 wakaba 1.1 $self->{state} = 'data';
1368     !!!next-input-character;
1369    
1370     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1371    
1372     redo A;
1373     } elsif ($self->{next_input_character} == -1) {
1374 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1375 wakaba 1.1 $self->{state} = 'data';
1376     ## reconsume
1377    
1378     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1379    
1380     redo A;
1381     } else {
1382     $self->{current_token} = {type => 'DOCTYPE',
1383     name => chr ($self->{next_input_character}),
1384     error => 1};
1385 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1386 wakaba 1.1 $self->{state} = 'DOCTYPE name';
1387     !!!next-input-character;
1388     redo A;
1389     }
1390     } elsif ($self->{state} eq 'DOCTYPE name') {
1391     if ($self->{next_input_character} == 0x0009 or # HT
1392     $self->{next_input_character} == 0x000A or # LF
1393     $self->{next_input_character} == 0x000B or # VT
1394     $self->{next_input_character} == 0x000C or # FF
1395     $self->{next_input_character} == 0x0020) { # SP
1396     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1397     $self->{state} = 'after DOCTYPE name';
1398     !!!next-input-character;
1399     redo A;
1400     } elsif ($self->{next_input_character} == 0x003E) { # >
1401     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1402     $self->{state} = 'data';
1403     !!!next-input-character;
1404    
1405     !!!emit ($self->{current_token}); # DOCTYPE
1406     undef $self->{current_token};
1407    
1408     redo A;
1409     } elsif (0x0061 <= $self->{next_input_character} and
1410     $self->{next_input_character} <= 0x007A) { # a..z
1411     $self->{current_token}->{name} .= chr ($self->{next_input_character} - 0x0020); # DOCTYPE
1412     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1413     ## Stay in the state
1414     !!!next-input-character;
1415     redo A;
1416     } elsif ($self->{next_input_character} == -1) {
1417 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1418 wakaba 1.1 $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1419     $self->{state} = 'data';
1420     ## reconsume
1421    
1422     !!!emit ($self->{current_token});
1423     undef $self->{current_token};
1424    
1425     redo A;
1426     } else {
1427     $self->{current_token}->{name}
1428     .= chr ($self->{next_input_character}); # DOCTYPE
1429     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1430     ## Stay in the state
1431     !!!next-input-character;
1432     redo A;
1433     }
1434     } elsif ($self->{state} eq 'after DOCTYPE name') {
1435     if ($self->{next_input_character} == 0x0009 or # HT
1436     $self->{next_input_character} == 0x000A or # LF
1437     $self->{next_input_character} == 0x000B or # VT
1438     $self->{next_input_character} == 0x000C or # FF
1439     $self->{next_input_character} == 0x0020) { # SP
1440     ## Stay in the state
1441     !!!next-input-character;
1442     redo A;
1443     } elsif ($self->{next_input_character} == 0x003E) { # >
1444     $self->{state} = 'data';
1445     !!!next-input-character;
1446    
1447     !!!emit ($self->{current_token}); # DOCTYPE
1448     undef $self->{current_token};
1449    
1450     redo A;
1451     } elsif ($self->{next_input_character} == -1) {
1452 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1453 wakaba 1.1 $self->{state} = 'data';
1454     ## reconsume
1455    
1456     !!!emit ($self->{current_token}); # DOCTYPE
1457     undef $self->{current_token};
1458    
1459     redo A;
1460     } else {
1461 wakaba 1.3 !!!parse-error (type => 'string after DOCTYPE name');
1462 wakaba 1.1 $self->{current_token}->{error} = 1; # DOCTYPE
1463     $self->{state} = 'bogus DOCTYPE';
1464     !!!next-input-character;
1465     redo A;
1466     }
1467     } elsif ($self->{state} eq 'bogus DOCTYPE') {
1468     if ($self->{next_input_character} == 0x003E) { # >
1469     $self->{state} = 'data';
1470     !!!next-input-character;
1471    
1472     !!!emit ($self->{current_token}); # DOCTYPE
1473     undef $self->{current_token};
1474    
1475     redo A;
1476     } elsif ($self->{next_input_character} == -1) {
1477 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1478 wakaba 1.1 $self->{state} = 'data';
1479     ## reconsume
1480    
1481     !!!emit ($self->{current_token}); # DOCTYPE
1482     undef $self->{current_token};
1483    
1484     redo A;
1485     } else {
1486     ## Stay in the state
1487     !!!next-input-character;
1488     redo A;
1489     }
1490     } else {
1491     die "$0: $self->{state}: Unknown state";
1492     }
1493     } # A
1494    
1495     die "$0: _get_next_token: unexpected case";
1496     } # _get_next_token
1497    
1498     sub _tokenize_attempt_to_consume_an_entity ($) {
1499     my $self = shift;
1500    
1501     if ($self->{next_input_character} == 0x0023) { # #
1502     !!!next-input-character;
1503     if ($self->{next_input_character} == 0x0078 or # x
1504     $self->{next_input_character} == 0x0058) { # X
1505 wakaba 1.4 my $num;
1506 wakaba 1.1 X: {
1507     my $x_char = $self->{next_input_character};
1508     !!!next-input-character;
1509     if (0x0030 <= $self->{next_input_character} and
1510     $self->{next_input_character} <= 0x0039) { # 0..9
1511     $num ||= 0;
1512     $num *= 0x10;
1513     $num += $self->{next_input_character} - 0x0030;
1514     redo X;
1515     } elsif (0x0061 <= $self->{next_input_character} and
1516     $self->{next_input_character} <= 0x0066) { # a..f
1517     ## ISSUE: the spec says U+0078, which is apparently incorrect
1518     $num ||= 0;
1519     $num *= 0x10;
1520     $num += $self->{next_input_character} - 0x0060 + 9;
1521     redo X;
1522     } elsif (0x0041 <= $self->{next_input_character} and
1523     $self->{next_input_character} <= 0x0046) { # A..F
1524     ## ISSUE: the spec says U+0058, which is apparently incorrect
1525     $num ||= 0;
1526     $num *= 0x10;
1527     $num += $self->{next_input_character} - 0x0040 + 9;
1528     redo X;
1529     } elsif (not defined $num) { # no hexadecimal digit
1530 wakaba 1.3 !!!parse-error (type => 'bare hcro');
1531 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
1532     !!!back-next-input-character ($x_char);
1533     return undef;
1534     } elsif ($self->{next_input_character} == 0x003B) { # ;
1535     !!!next-input-character;
1536     } else {
1537 wakaba 1.3 !!!parse-error (type => 'no refc');
1538 wakaba 1.1 }
1539    
1540     ## TODO: check the definition for |a valid Unicode character|.
1541 wakaba 1.4 ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8189>
1542 wakaba 1.1 if ($num > 1114111 or $num == 0) {
1543     $num = 0xFFFD; # REPLACEMENT CHARACTER
1544     ## ISSUE: Why this is not an error?
1545 wakaba 1.4 } elsif (0x80 <= $num and $num <= 0x9F) {
1546 wakaba 1.8 !!!parse-error (type => sprintf 'c1 entity:U+%04X', $num);
1547 wakaba 1.4 $num = $c1_entity_char->{$num};
1548 wakaba 1.1 }
1549    
1550     return {type => 'character', data => chr $num};
1551     } # X
1552     } elsif (0x0030 <= $self->{next_input_character} and
1553     $self->{next_input_character} <= 0x0039) { # 0..9
1554     my $code = $self->{next_input_character} - 0x0030;
1555     !!!next-input-character;
1556    
1557     while (0x0030 <= $self->{next_input_character} and
1558     $self->{next_input_character} <= 0x0039) { # 0..9
1559     $code *= 10;
1560     $code += $self->{next_input_character} - 0x0030;
1561    
1562     !!!next-input-character;
1563     }
1564    
1565     if ($self->{next_input_character} == 0x003B) { # ;
1566     !!!next-input-character;
1567     } else {
1568 wakaba 1.3 !!!parse-error (type => 'no refc');
1569 wakaba 1.1 }
1570    
1571     ## TODO: check the definition for |a valid Unicode character|.
1572     if ($code > 1114111 or $code == 0) {
1573     $code = 0xFFFD; # REPLACEMENT CHARACTER
1574     ## ISSUE: Why this is not an error?
1575 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
1576 wakaba 1.8 !!!parse-error (type => sprintf 'c1 entity:U+%04X', $code);
1577 wakaba 1.4 $code = $c1_entity_char->{$code};
1578 wakaba 1.1 }
1579    
1580     return {type => 'character', data => chr $code};
1581     } else {
1582 wakaba 1.3 !!!parse-error (type => 'bare nero');
1583 wakaba 1.1 !!!back-next-input-character ($self->{next_input_character});
1584     $self->{next_input_character} = 0x0023; # #
1585     return undef;
1586     }
1587     } elsif ((0x0041 <= $self->{next_input_character} and
1588     $self->{next_input_character} <= 0x005A) or
1589     (0x0061 <= $self->{next_input_character} and
1590     $self->{next_input_character} <= 0x007A)) {
1591     my $entity_name = chr $self->{next_input_character};
1592     !!!next-input-character;
1593    
1594     my $value = $entity_name;
1595     my $match;
1596    
1597     while (length $entity_name < 10 and
1598     ## NOTE: Some number greater than the maximum length of entity name
1599     ((0x0041 <= $self->{next_input_character} and
1600     $self->{next_input_character} <= 0x005A) or
1601     (0x0061 <= $self->{next_input_character} and
1602     $self->{next_input_character} <= 0x007A) or
1603     (0x0030 <= $self->{next_input_character} and
1604     $self->{next_input_character} <= 0x0039))) {
1605     $entity_name .= chr $self->{next_input_character};
1606     if (defined $entity_char->{$entity_name}) {
1607     $value = $entity_char->{$entity_name};
1608     $match = 1;
1609     } else {
1610     $value .= chr $self->{next_input_character};
1611     }
1612     !!!next-input-character;
1613     }
1614    
1615     if ($match) {
1616     if ($self->{next_input_character} == 0x003B) { # ;
1617     !!!next-input-character;
1618     } else {
1619 wakaba 1.3 !!!parse-error (type => 'refc');
1620 wakaba 1.1 }
1621    
1622     return {type => 'character', data => $value};
1623     } else {
1624 wakaba 1.3 !!!parse-error (type => 'bare ero');
1625 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
1626     !!!back-token ({type => 'character', data => $value});
1627     return undef;
1628     }
1629     } else {
1630     ## no characters are consumed
1631 wakaba 1.3 !!!parse-error (type => 'bare ero');
1632 wakaba 1.1 return undef;
1633     }
1634     } # _tokenize_attempt_to_consume_an_entity
1635    
1636     sub _initialize_tree_constructor ($) {
1637     my $self = shift;
1638     ## NOTE: $self->{document} MUST be specified before this method is called
1639     $self->{document}->strict_error_checking (0);
1640     ## TODO: Turn mutation events off # MUST
1641     ## TODO: Turn loose Document option (manakai extension) on
1642     ## TODO: Mark the Document as an HTML document # MUST
1643     } # _initialize_tree_constructor
1644    
1645     sub _terminate_tree_constructor ($) {
1646     my $self = shift;
1647     $self->{document}->strict_error_checking (1);
1648     ## TODO: Turn mutation events on
1649     } # _terminate_tree_constructor
1650    
1651     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
1652    
1653 wakaba 1.3 { # tree construction stage
1654     my $token;
1655    
1656 wakaba 1.1 sub _construct_tree ($) {
1657     my ($self) = @_;
1658    
1659     ## When an interactive UA render the $self->{document} available
1660     ## to the user, or when it begin accepting user input, are
1661     ## not defined.
1662    
1663     ## Append a character: collect it and all subsequent consecutive
1664     ## characters and insert one Text node whose data is concatenation
1665     ## of all those characters. # MUST
1666    
1667     !!!next-token;
1668    
1669 wakaba 1.3 $self->{insertion_mode} = 'before head';
1670     undef $self->{form_element};
1671     undef $self->{head_element};
1672     $self->{open_elements} = [];
1673     undef $self->{inner_html_node};
1674    
1675     $self->_tree_construction_initial; # MUST
1676     $self->_tree_construction_root_element;
1677     $self->_tree_construction_main;
1678     } # _construct_tree
1679    
1680     sub _tree_construction_initial ($) {
1681     my $self = shift;
1682     B: {
1683     if ($token->{type} eq 'DOCTYPE') {
1684     if ($token->{error}) {
1685     ## ISSUE: Spec currently left this case undefined.
1686     !!!parse-error (type => 'bogus DOCTYPE');
1687     }
1688     my $doctype = $self->{document}->create_document_type_definition
1689     ($token->{name});
1690     $self->{document}->append_child ($doctype);
1691     #$phase = 'root element';
1692     !!!next-token;
1693     #redo B;
1694     return;
1695     } elsif ({
1696     comment => 1,
1697     'start tag' => 1,
1698     'end tag' => 1,
1699     'end-of-file' => 1,
1700     }->{$token->{type}}) {
1701     ## ISSUE: Spec currently left this case undefined.
1702     !!!parse-error (type => 'missing DOCTYPE');
1703     #$phase = 'root element';
1704     ## reprocess
1705     #redo B;
1706     return;
1707     } elsif ($token->{type} eq 'character') {
1708     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1709     $self->{document}->manakai_append_text ($1);
1710     ## ISSUE: DOM3 Core does not allow Document > Text
1711     unless (length $token->{data}) {
1712     ## Stay in the phase
1713     !!!next-token;
1714     redo B;
1715     }
1716     }
1717     ## ISSUE: Spec currently left this case undefined.
1718     !!!parse-error (type => 'missing DOCTYPE');
1719     #$phase = 'root element';
1720     ## reprocess
1721     #redo B;
1722     return;
1723     } else {
1724     die "$0: $token->{type}: Unknown token";
1725     }
1726     } # B
1727     } # _tree_construction_initial
1728    
1729     sub _tree_construction_root_element ($) {
1730     my $self = shift;
1731    
1732     B: {
1733     if ($token->{type} eq 'DOCTYPE') {
1734     !!!parse-error (type => 'in html:#DOCTYPE');
1735     ## Ignore the token
1736     ## Stay in the phase
1737     !!!next-token;
1738     redo B;
1739     } elsif ($token->{type} eq 'comment') {
1740     my $comment = $self->{document}->create_comment ($token->{data});
1741     $self->{document}->append_child ($comment);
1742     ## Stay in the phase
1743     !!!next-token;
1744     redo B;
1745     } elsif ($token->{type} eq 'character') {
1746     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1747     $self->{document}->manakai_append_text ($1);
1748     ## ISSUE: DOM3 Core does not allow Document > Text
1749     unless (length $token->{data}) {
1750     ## Stay in the phase
1751     !!!next-token;
1752     redo B;
1753     }
1754     }
1755     #
1756     } elsif ({
1757     'start tag' => 1,
1758     'end tag' => 1,
1759     'end-of-file' => 1,
1760     }->{$token->{type}}) {
1761     ## ISSUE: There is an issue in the spec
1762     #
1763     } else {
1764     die "$0: $token->{type}: Unknown token";
1765     }
1766     my $root_element; !!!create-element ($root_element, 'html');
1767     $self->{document}->append_child ($root_element);
1768     push @{$self->{open_elements}}, [$root_element, 'html'];
1769     #$phase = 'main';
1770     ## reprocess
1771     #redo B;
1772     return;
1773     } # B
1774     } # _tree_construction_root_element
1775    
1776     sub _reset_insertion_mode ($) {
1777     my $self = shift;
1778    
1779     ## Step 1
1780     my $last;
1781    
1782     ## Step 2
1783     my $i = -1;
1784     my $node = $self->{open_elements}->[$i];
1785    
1786     ## Step 3
1787     S3: {
1788     $last = 1 if $self->{open_elements}->[0]->[0] eq $node->[0];
1789     if (defined $self->{inner_html_node}) {
1790     if ($self->{inner_html_node}->[1] eq 'td' or
1791     $self->{inner_html_node}->[1] eq 'th') {
1792     #
1793     } else {
1794     $node = $self->{inner_html_node};
1795     }
1796     }
1797    
1798     ## Step 4..13
1799     my $new_mode = {
1800     select => 'in select',
1801     td => 'in cell',
1802     th => 'in cell',
1803     tr => 'in row',
1804     tbody => 'in table body',
1805     thead => 'in table head',
1806     tfoot => 'in table foot',
1807     caption => 'in caption',
1808     colgroup => 'in column group',
1809     table => 'in table',
1810     head => 'in body', # not in head!
1811     body => 'in body',
1812     frameset => 'in frameset',
1813     }->{$node->[1]};
1814     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1815    
1816     ## Step 14
1817     if ($node->[1] eq 'html') {
1818     unless (defined $self->{head_element}) {
1819     $self->{insertion_mode} = 'before head';
1820     } else {
1821     $self->{insertion_mode} = 'after head';
1822     }
1823     return;
1824     }
1825    
1826     ## Step 15
1827     $self->{insertion_mode} = 'in body' and return if $last;
1828    
1829     ## Step 16
1830     $i--;
1831     $node = $self->{open_elements}->[$i];
1832    
1833     ## Step 17
1834     redo S3;
1835     } # S3
1836     } # _reset_insertion_mode
1837    
1838     sub _tree_construction_main ($) {
1839     my $self = shift;
1840    
1841     my $phase = 'main';
1842 wakaba 1.1
1843     my $active_formatting_elements = [];
1844    
1845     my $reconstruct_active_formatting_elements = sub { # MUST
1846     my $insert = shift;
1847    
1848     ## Step 1
1849     return unless @$active_formatting_elements;
1850    
1851     ## Step 3
1852     my $i = -1;
1853     my $entry = $active_formatting_elements->[$i];
1854    
1855     ## Step 2
1856     return if $entry->[0] eq '#marker';
1857 wakaba 1.3 for (@{$self->{open_elements}}) {
1858 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1859     return;
1860     }
1861     }
1862    
1863     S4: {
1864     ## Step 4
1865     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1866    
1867     ## Step 5
1868     $i--;
1869     $entry = $active_formatting_elements->[$i];
1870    
1871     ## Step 6
1872     if ($entry->[0] eq '#marker') {
1873     #
1874     } else {
1875     my $in_open_elements;
1876 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
1877 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1878     $in_open_elements = 1;
1879     last OE;
1880     }
1881     }
1882     if ($in_open_elements) {
1883     #
1884     } else {
1885     redo S4;
1886     }
1887     }
1888    
1889     ## Step 7
1890     $i++;
1891     $entry = $active_formatting_elements->[$i];
1892     } # S4
1893    
1894     S7: {
1895     ## Step 8
1896     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1897    
1898     ## Step 9
1899     $insert->($clone->[0]);
1900 wakaba 1.3 push @{$self->{open_elements}}, $clone;
1901 wakaba 1.1
1902     ## Step 10
1903 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1904 wakaba 1.1
1905     ## Step 11
1906     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1907     ## Step 7'
1908     $i++;
1909     $entry = $active_formatting_elements->[$i];
1910    
1911     redo S7;
1912     }
1913     } # S7
1914     }; # $reconstruct_active_formatting_elements
1915    
1916     my $clear_up_to_marker = sub {
1917     for (reverse 0..$#$active_formatting_elements) {
1918     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1919     splice @$active_formatting_elements, $_;
1920     return;
1921     }
1922     }
1923     }; # $clear_up_to_marker
1924    
1925     my $style_start_tag = sub {
1926 wakaba 1.6 my $style_el; !!!create-element ($style_el, 'style', $token->{attributes});
1927 wakaba 1.3 ## $self->{insertion_mode} eq 'in head' and ... (always true)
1928     (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
1929     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
1930 wakaba 1.1 ->append_child ($style_el);
1931     $self->{content_model_flag} = 'CDATA';
1932    
1933     my $text = '';
1934     !!!next-token;
1935     while ($token->{type} eq 'character') {
1936     $text .= $token->{data};
1937     !!!next-token;
1938     } # stop if non-character token or tokenizer stops tokenising
1939     if (length $text) {
1940     $style_el->manakai_append_text ($text);
1941     }
1942    
1943     $self->{content_model_flag} = 'PCDATA';
1944    
1945     if ($token->{type} eq 'end tag' and $token->{tag_name} eq 'style') {
1946     ## Ignore the token
1947     } else {
1948 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
1949 wakaba 1.1 ## ISSUE: And ignore?
1950     }
1951     !!!next-token;
1952     }; # $style_start_tag
1953    
1954     my $script_start_tag = sub {
1955     my $script_el;
1956     !!!create-element ($script_el, 'script', $token->{attributes});
1957     ## TODO: mark as "parser-inserted"
1958    
1959     $self->{content_model_flag} = 'CDATA';
1960    
1961     my $text = '';
1962     !!!next-token;
1963     while ($token->{type} eq 'character') {
1964     $text .= $token->{data};
1965     !!!next-token;
1966     } # stop if non-character token or tokenizer stops tokenising
1967     if (length $text) {
1968     $script_el->manakai_append_text ($text);
1969     }
1970    
1971     $self->{content_model_flag} = 'PCDATA';
1972    
1973     if ($token->{type} eq 'end tag' and
1974     $token->{tag_name} eq 'script') {
1975     ## Ignore the token
1976     } else {
1977 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
1978 wakaba 1.1 ## ISSUE: And ignore?
1979     ## TODO: mark as "already executed"
1980     }
1981    
1982 wakaba 1.3 if (defined $self->{inner_html_node}) {
1983     ## TODO: mark as "already executed"
1984     } else {
1985 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
1986     ## TODO: insertion point = just before the next input character
1987    
1988 wakaba 1.3 (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
1989     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])->append_child ($script_el);
1990 wakaba 1.1
1991     ## TODO: insertion point = $old_insertion_point (might be "undefined")
1992    
1993     ## TODO: if there is a script that will execute as soon as the parser resume, then...
1994     }
1995    
1996     !!!next-token;
1997     }; # $script_start_tag
1998    
1999     my $formatting_end_tag = sub {
2000     my $tag_name = shift;
2001    
2002     FET: {
2003     ## Step 1
2004     my $formatting_element;
2005     my $formatting_element_i_in_active;
2006     AFE: for (reverse 0..$#$active_formatting_elements) {
2007     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
2008     $formatting_element = $active_formatting_elements->[$_];
2009     $formatting_element_i_in_active = $_;
2010     last AFE;
2011     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
2012     last AFE;
2013     }
2014     } # AFE
2015     unless (defined $formatting_element) {
2016 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$tag_name);
2017 wakaba 1.1 ## Ignore the token
2018     !!!next-token;
2019     return;
2020     }
2021     ## has an element in scope
2022     my $in_scope = 1;
2023     my $formatting_element_i_in_open;
2024 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2025     my $node = $self->{open_elements}->[$_];
2026 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
2027     if ($in_scope) {
2028     $formatting_element_i_in_open = $_;
2029     last INSCOPE;
2030     } else { # in open elements but not in scope
2031 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2032 wakaba 1.1 ## Ignore the token
2033     !!!next-token;
2034     return;
2035     }
2036     } elsif ({
2037     table => 1, caption => 1, td => 1, th => 1,
2038     button => 1, marquee => 1, object => 1, html => 1,
2039     }->{$node->[1]}) {
2040     $in_scope = 0;
2041     }
2042     } # INSCOPE
2043     unless (defined $formatting_element_i_in_open) {
2044 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2045 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
2046     !!!next-token; ## TODO: ok?
2047     return;
2048     }
2049 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
2050 wakaba 1.4 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2051 wakaba 1.1 }
2052    
2053     ## Step 2
2054     my $furthest_block;
2055     my $furthest_block_i_in_open;
2056 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2057     my $node = $self->{open_elements}->[$_];
2058 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
2059     #not $phrasing_category->{$node->[1]} and
2060     ($special_category->{$node->[1]} or
2061     $scoping_category->{$node->[1]})) {
2062     $furthest_block = $node;
2063     $furthest_block_i_in_open = $_;
2064     } elsif ($node->[0] eq $formatting_element->[0]) {
2065     last OE;
2066     }
2067     } # OE
2068    
2069     ## Step 3
2070     unless (defined $furthest_block) { # MUST
2071 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
2072 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
2073     !!!next-token;
2074     return;
2075     }
2076    
2077     ## Step 4
2078 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
2079 wakaba 1.1
2080     ## Step 5
2081     my $furthest_block_parent = $furthest_block->[0]->parent_node;
2082     if (defined $furthest_block_parent) {
2083     $furthest_block_parent->remove_child ($furthest_block->[0]);
2084     }
2085    
2086     ## Step 6
2087     my $bookmark_prev_el
2088     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
2089     ->[0];
2090    
2091     ## Step 7
2092     my $node = $furthest_block;
2093     my $node_i_in_open = $furthest_block_i_in_open;
2094     my $last_node = $furthest_block;
2095     S7: {
2096     ## Step 1
2097     $node_i_in_open--;
2098 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
2099 wakaba 1.1
2100     ## Step 2
2101     my $node_i_in_active;
2102     S7S2: {
2103     for (reverse 0..$#$active_formatting_elements) {
2104     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2105     $node_i_in_active = $_;
2106     last S7S2;
2107     }
2108     }
2109 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
2110 wakaba 1.1 redo S7;
2111     } # S7S2
2112    
2113     ## Step 3
2114     last S7 if $node->[0] eq $formatting_element->[0];
2115    
2116     ## Step 4
2117     if ($last_node->[0] eq $furthest_block->[0]) {
2118     $bookmark_prev_el = $node->[0];
2119     }
2120    
2121     ## Step 5
2122     if ($node->[0]->has_child_nodes ()) {
2123     my $clone = [$node->[0]->clone_node (0), $node->[1]];
2124     $active_formatting_elements->[$node_i_in_active] = $clone;
2125 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
2126 wakaba 1.1 $node = $clone;
2127     }
2128    
2129     ## Step 6
2130     $node->[0]->append_child ($last_node->[0]);
2131    
2132     ## Step 7
2133     $last_node = $node;
2134    
2135     ## Step 8
2136     redo S7;
2137     } # S7
2138    
2139     ## Step 8
2140     $common_ancestor_node->[0]->append_child ($last_node->[0]);
2141    
2142     ## Step 9
2143     my $clone = [$formatting_element->[0]->clone_node (0),
2144     $formatting_element->[1]];
2145    
2146     ## Step 10
2147     my @cn = @{$furthest_block->[0]->child_nodes};
2148     $clone->[0]->append_child ($_) for @cn;
2149    
2150     ## Step 11
2151     $furthest_block->[0]->append_child ($clone->[0]);
2152    
2153     ## Step 12
2154     my $i;
2155     AFE: for (reverse 0..$#$active_formatting_elements) {
2156     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
2157     splice @$active_formatting_elements, $_, 1;
2158     $i-- and last AFE if defined $i;
2159     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
2160     $i = $_;
2161     }
2162     } # AFE
2163     splice @$active_formatting_elements, $i + 1, 0, $clone;
2164    
2165     ## Step 13
2166     undef $i;
2167 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2168     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
2169     splice @{$self->{open_elements}}, $_, 1;
2170 wakaba 1.1 $i-- and last OE if defined $i;
2171 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
2172 wakaba 1.1 $i = $_;
2173     }
2174     } # OE
2175 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
2176 wakaba 1.1
2177     ## Step 14
2178     redo FET;
2179     } # FET
2180     }; # $formatting_end_tag
2181    
2182     my $insert_to_current = sub {
2183 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child (shift);
2184 wakaba 1.1 }; # $insert_to_current
2185    
2186     my $insert_to_foster = sub {
2187     my $child = shift;
2188     if ({
2189     table => 1, tbody => 1, tfoot => 1,
2190     thead => 1, tr => 1,
2191 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2192 wakaba 1.1 # MUST
2193     my $foster_parent_element;
2194     my $next_sibling;
2195 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2196     if ($self->{open_elements}->[$_]->[1] eq 'table') {
2197     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
2198 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
2199     $foster_parent_element = $parent;
2200 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
2201 wakaba 1.1 } else {
2202     $foster_parent_element
2203 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
2204 wakaba 1.1 }
2205     last OE;
2206     }
2207     } # OE
2208 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
2209 wakaba 1.1 unless defined $foster_parent_element;
2210     $foster_parent_element->insert_before
2211     ($child, $next_sibling);
2212     } else {
2213 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
2214 wakaba 1.1 }
2215     }; # $insert_to_foster
2216    
2217     my $in_body = sub {
2218     my $insert = shift;
2219     if ($token->{type} eq 'start tag') {
2220     if ($token->{tag_name} eq 'script') {
2221     $script_start_tag->();
2222     return;
2223     } elsif ($token->{tag_name} eq 'style') {
2224     $style_start_tag->();
2225     return;
2226     } elsif ({
2227     base => 1, link => 1, meta => 1,
2228     }->{$token->{tag_name}}) {
2229 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2230 wakaba 1.1 ## NOTE: This is an "as if in head" code clone
2231     my $el;
2232     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2233 wakaba 1.3 if (defined $self->{head_element}) {
2234     $self->{head_element}->append_child ($el);
2235 wakaba 1.1 } else {
2236     $insert->($el);
2237     }
2238    
2239     !!!next-token;
2240     return;
2241     } elsif ($token->{tag_name} eq 'title') {
2242 wakaba 1.3 !!!parse-error (type => 'in body:title');
2243 wakaba 1.1 ## NOTE: There is an "as if in head" code clone
2244     my $title_el;
2245     !!!create-element ($title_el, 'title', $token->{attributes});
2246 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2247 wakaba 1.1 ->append_child ($title_el);
2248     $self->{content_model_flag} = 'RCDATA';
2249    
2250     my $text = '';
2251     !!!next-token;
2252     while ($token->{type} eq 'character') {
2253     $text .= $token->{data};
2254     !!!next-token;
2255     }
2256     if (length $text) {
2257     $title_el->manakai_append_text ($text);
2258     }
2259    
2260     $self->{content_model_flag} = 'PCDATA';
2261    
2262     if ($token->{type} eq 'end tag' and
2263     $token->{tag_name} eq 'title') {
2264     ## Ignore the token
2265     } else {
2266 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2267 wakaba 1.1 ## ISSUE: And ignore?
2268     }
2269     !!!next-token;
2270     return;
2271     } elsif ($token->{tag_name} eq 'body') {
2272 wakaba 1.3 !!!parse-error (type => 'in body:body');
2273 wakaba 1.1
2274 wakaba 1.3 if (@{$self->{open_elements}} == 1 or
2275     $self->{open_elements}->[1]->[1] ne 'body') {
2276 wakaba 1.1 ## Ignore the token
2277     } else {
2278 wakaba 1.3 my $body_el = $self->{open_elements}->[1]->[0];
2279 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2280     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
2281     $body_el->set_attribute_ns
2282     (undef, [undef, $attr_name],
2283     $token->{attributes}->{$attr_name}->{value});
2284     }
2285     }
2286     }
2287     !!!next-token;
2288     return;
2289     } elsif ({
2290     address => 1, blockquote => 1, center => 1, dir => 1,
2291     div => 1, dl => 1, fieldset => 1, listing => 1,
2292     menu => 1, ol => 1, p => 1, ul => 1,
2293     pre => 1,
2294     }->{$token->{tag_name}}) {
2295     ## has a p element in scope
2296 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2297 wakaba 1.1 if ($_->[1] eq 'p') {
2298     !!!back-token;
2299     $token = {type => 'end tag', tag_name => 'p'};
2300     return;
2301     } elsif ({
2302     table => 1, caption => 1, td => 1, th => 1,
2303     button => 1, marquee => 1, object => 1, html => 1,
2304     }->{$_->[1]}) {
2305     last INSCOPE;
2306     }
2307     } # INSCOPE
2308    
2309     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2310     if ($token->{tag_name} eq 'pre') {
2311     !!!next-token;
2312     if ($token->{type} eq 'character') {
2313     $token->{data} =~ s/^\x0A//;
2314     unless (length $token->{data}) {
2315     !!!next-token;
2316     }
2317     }
2318     } else {
2319     !!!next-token;
2320     }
2321     return;
2322     } elsif ($token->{tag_name} eq 'form') {
2323 wakaba 1.3 if (defined $self->{form_element}) {
2324     !!!parse-error (type => 'in form:form');
2325 wakaba 1.1 ## Ignore the token
2326 wakaba 1.7 !!!next-token;
2327     return;
2328 wakaba 1.1 } else {
2329     ## has a p element in scope
2330 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2331 wakaba 1.1 if ($_->[1] eq 'p') {
2332     !!!back-token;
2333     $token = {type => 'end tag', tag_name => 'p'};
2334     return;
2335     } elsif ({
2336     table => 1, caption => 1, td => 1, th => 1,
2337     button => 1, marquee => 1, object => 1, html => 1,
2338     }->{$_->[1]}) {
2339     last INSCOPE;
2340     }
2341     } # INSCOPE
2342    
2343     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2344 wakaba 1.3 $self->{form_element} = $self->{open_elements}->[-1]->[0];
2345 wakaba 1.1 !!!next-token;
2346     return;
2347     }
2348     } elsif ($token->{tag_name} eq 'li') {
2349     ## has a p element in scope
2350 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2351 wakaba 1.1 if ($_->[1] eq 'p') {
2352     !!!back-token;
2353     $token = {type => 'end tag', tag_name => 'p'};
2354     return;
2355     } elsif ({
2356     table => 1, caption => 1, td => 1, th => 1,
2357     button => 1, marquee => 1, object => 1, html => 1,
2358     }->{$_->[1]}) {
2359     last INSCOPE;
2360     }
2361     } # INSCOPE
2362    
2363     ## Step 1
2364     my $i = -1;
2365 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2366 wakaba 1.1 LI: {
2367     ## Step 2
2368     if ($node->[1] eq 'li') {
2369 wakaba 1.8 if ($i != -1) {
2370     !!!parse-error (type => 'end tag missing:'.
2371     $self->{open_elements}->[-1]->[1]);
2372     ## TODO: test
2373     }
2374 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2375 wakaba 1.1 last LI;
2376     }
2377    
2378     ## Step 3
2379     if (not $formatting_category->{$node->[1]} and
2380     #not $phrasing_category->{$node->[1]} and
2381     ($special_category->{$node->[1]} or
2382     $scoping_category->{$node->[1]}) and
2383     $node->[1] ne 'address' and $node->[1] ne 'div') {
2384     last LI;
2385     }
2386    
2387     ## Step 4
2388     $i--;
2389 wakaba 1.3 $node = $self->{open_elements}->[$i];
2390 wakaba 1.1 redo LI;
2391     } # LI
2392    
2393     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2394     !!!next-token;
2395     return;
2396     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
2397     ## has a p element in scope
2398 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2399 wakaba 1.1 if ($_->[1] eq 'p') {
2400     !!!back-token;
2401     $token = {type => 'end tag', tag_name => 'p'};
2402     return;
2403     } elsif ({
2404     table => 1, caption => 1, td => 1, th => 1,
2405     button => 1, marquee => 1, object => 1, html => 1,
2406     }->{$_->[1]}) {
2407     last INSCOPE;
2408     }
2409     } # INSCOPE
2410    
2411     ## Step 1
2412     my $i = -1;
2413 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2414 wakaba 1.1 LI: {
2415     ## Step 2
2416     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
2417 wakaba 1.8 if ($i != -1) {
2418     !!!parse-error (type => 'end tag missing:'.
2419     $self->{open_elements}->[-1]->[1]);
2420     ## TODO: test
2421     }
2422 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2423 wakaba 1.1 last LI;
2424     }
2425    
2426     ## Step 3
2427     if (not $formatting_category->{$node->[1]} and
2428     #not $phrasing_category->{$node->[1]} and
2429     ($special_category->{$node->[1]} or
2430     $scoping_category->{$node->[1]}) and
2431     $node->[1] ne 'address' and $node->[1] ne 'div') {
2432     last LI;
2433     }
2434    
2435     ## Step 4
2436     $i--;
2437 wakaba 1.3 $node = $self->{open_elements}->[$i];
2438 wakaba 1.1 redo LI;
2439     } # LI
2440    
2441     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2442     !!!next-token;
2443     return;
2444     } elsif ($token->{tag_name} eq 'plaintext') {
2445     ## has a p element in scope
2446 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2447 wakaba 1.1 if ($_->[1] eq 'p') {
2448     !!!back-token;
2449     $token = {type => 'end tag', tag_name => 'p'};
2450     return;
2451     } elsif ({
2452     table => 1, caption => 1, td => 1, th => 1,
2453     button => 1, marquee => 1, object => 1, html => 1,
2454     }->{$_->[1]}) {
2455     last INSCOPE;
2456     }
2457     } # INSCOPE
2458    
2459     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2460    
2461     $self->{content_model_flag} = 'PLAINTEXT';
2462    
2463     !!!next-token;
2464     return;
2465     } elsif ({
2466     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2467     }->{$token->{tag_name}}) {
2468     ## has a p element in scope
2469 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2470     my $node = $self->{open_elements}->[$_];
2471 wakaba 1.1 if ($node->[1] eq 'p') {
2472     !!!back-token;
2473     $token = {type => 'end tag', tag_name => 'p'};
2474     return;
2475     } elsif ({
2476     table => 1, caption => 1, td => 1, th => 1,
2477     button => 1, marquee => 1, object => 1, html => 1,
2478     }->{$node->[1]}) {
2479     last INSCOPE;
2480     }
2481     } # INSCOPE
2482    
2483     ## has an element in scope
2484     my $i;
2485 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2486     my $node = $self->{open_elements}->[$_];
2487 wakaba 1.1 if ({
2488     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2489     }->{$node->[1]}) {
2490     $i = $_;
2491     last INSCOPE;
2492     } elsif ({
2493     table => 1, caption => 1, td => 1, th => 1,
2494     button => 1, marquee => 1, object => 1, html => 1,
2495     }->{$node->[1]}) {
2496     last INSCOPE;
2497     }
2498     } # INSCOPE
2499    
2500     if (defined $i) {
2501 wakaba 1.3 !!!parse-error (type => 'in hn:hn');
2502     splice @{$self->{open_elements}}, $i;
2503 wakaba 1.1 }
2504    
2505     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2506    
2507     !!!next-token;
2508     return;
2509     } elsif ($token->{tag_name} eq 'a') {
2510     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
2511     my $node = $active_formatting_elements->[$i];
2512     if ($node->[1] eq 'a') {
2513 wakaba 1.3 !!!parse-error (type => 'in a:a');
2514 wakaba 1.1
2515     !!!back-token;
2516     $token = {type => 'end tag', tag_name => 'a'};
2517     $formatting_end_tag->($token->{tag_name});
2518    
2519     AFE2: for (reverse 0..$#$active_formatting_elements) {
2520     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2521     splice @$active_formatting_elements, $_, 1;
2522     last AFE2;
2523     }
2524     } # AFE2
2525 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2526     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
2527     splice @{$self->{open_elements}}, $_, 1;
2528 wakaba 1.1 last OE;
2529     }
2530     } # OE
2531     last AFE;
2532     } elsif ($node->[0] eq '#marker') {
2533     last AFE;
2534     }
2535     } # AFE
2536    
2537     $reconstruct_active_formatting_elements->($insert_to_current);
2538    
2539     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2540 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2541 wakaba 1.1
2542     !!!next-token;
2543     return;
2544     } elsif ({
2545     b => 1, big => 1, em => 1, font => 1, i => 1,
2546     nobr => 1, s => 1, small => 1, strile => 1,
2547     strong => 1, tt => 1, u => 1,
2548     }->{$token->{tag_name}}) {
2549     $reconstruct_active_formatting_elements->($insert_to_current);
2550    
2551     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2552 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2553 wakaba 1.1
2554     !!!next-token;
2555     return;
2556     } elsif ($token->{tag_name} eq 'button') {
2557     ## has a button element in scope
2558 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2559     my $node = $self->{open_elements}->[$_];
2560 wakaba 1.1 if ($node->[1] eq 'button') {
2561 wakaba 1.3 !!!parse-error (type => 'in button:button');
2562 wakaba 1.1 !!!back-token;
2563     $token = {type => 'end tag', tag_name => 'button'};
2564     return;
2565     } elsif ({
2566     table => 1, caption => 1, td => 1, th => 1,
2567     button => 1, marquee => 1, object => 1, html => 1,
2568     }->{$node->[1]}) {
2569     last INSCOPE;
2570     }
2571     } # INSCOPE
2572    
2573     $reconstruct_active_formatting_elements->($insert_to_current);
2574    
2575     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2576     push @$active_formatting_elements, ['#marker', ''];
2577    
2578     !!!next-token;
2579     return;
2580     } elsif ($token->{tag_name} eq 'marquee' or
2581     $token->{tag_name} eq 'object') {
2582     $reconstruct_active_formatting_elements->($insert_to_current);
2583    
2584     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2585     push @$active_formatting_elements, ['#marker', ''];
2586    
2587     !!!next-token;
2588     return;
2589     } elsif ($token->{tag_name} eq 'xmp') {
2590     $reconstruct_active_formatting_elements->($insert_to_current);
2591    
2592     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2593    
2594     $self->{content_model_flag} = 'CDATA';
2595    
2596     !!!next-token;
2597     return;
2598     } elsif ($token->{tag_name} eq 'table') {
2599     ## has a p element in scope
2600 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2601 wakaba 1.1 if ($_->[1] eq 'p') {
2602     !!!back-token;
2603     $token = {type => 'end tag', tag_name => 'p'};
2604     return;
2605     } elsif ({
2606     table => 1, caption => 1, td => 1, th => 1,
2607     button => 1, marquee => 1, object => 1, html => 1,
2608     }->{$_->[1]}) {
2609     last INSCOPE;
2610     }
2611     } # INSCOPE
2612    
2613     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2614    
2615 wakaba 1.3 $self->{insertion_mode} = 'in table';
2616 wakaba 1.1
2617     !!!next-token;
2618     return;
2619     } elsif ({
2620     area => 1, basefont => 1, bgsound => 1, br => 1,
2621     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
2622     image => 1,
2623     }->{$token->{tag_name}}) {
2624     if ($token->{tag_name} eq 'image') {
2625 wakaba 1.3 !!!parse-error (type => 'image');
2626 wakaba 1.1 $token->{tag_name} = 'img';
2627     }
2628    
2629     $reconstruct_active_formatting_elements->($insert_to_current);
2630    
2631     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2632 wakaba 1.3 pop @{$self->{open_elements}};
2633 wakaba 1.1
2634     !!!next-token;
2635     return;
2636     } elsif ($token->{tag_name} eq 'hr') {
2637     ## has a p element in scope
2638 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2639 wakaba 1.1 if ($_->[1] eq 'p') {
2640     !!!back-token;
2641     $token = {type => 'end tag', tag_name => 'p'};
2642     return;
2643     } elsif ({
2644     table => 1, caption => 1, td => 1, th => 1,
2645     button => 1, marquee => 1, object => 1, html => 1,
2646     }->{$_->[1]}) {
2647     last INSCOPE;
2648     }
2649     } # INSCOPE
2650    
2651     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2652 wakaba 1.3 pop @{$self->{open_elements}};
2653 wakaba 1.1
2654     !!!next-token;
2655     return;
2656     } elsif ($token->{tag_name} eq 'input') {
2657     $reconstruct_active_formatting_elements->($insert_to_current);
2658    
2659     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2660 wakaba 1.3 ## TODO: associate with $self->{form_element} if defined
2661     pop @{$self->{open_elements}};
2662 wakaba 1.1
2663     !!!next-token;
2664     return;
2665     } elsif ($token->{tag_name} eq 'isindex') {
2666 wakaba 1.3 !!!parse-error (type => 'isindex');
2667 wakaba 1.1
2668 wakaba 1.3 if (defined $self->{form_element}) {
2669 wakaba 1.1 ## Ignore the token
2670     !!!next-token;
2671     return;
2672     } else {
2673     my $at = $token->{attributes};
2674     $at->{name} = {name => 'name', value => 'isindex'};
2675     my @tokens = (
2676     {type => 'start tag', tag_name => 'form'},
2677     {type => 'start tag', tag_name => 'hr'},
2678     {type => 'start tag', tag_name => 'p'},
2679     {type => 'start tag', tag_name => 'label'},
2680     {type => 'character',
2681     data => 'This is a searchable index. Insert your search keywords here: '}, # SHOULD
2682     ## TODO: make this configurable
2683     {type => 'start tag', tag_name => 'input', attributes => $at},
2684     #{type => 'character', data => ''}, # SHOULD
2685     {type => 'end tag', tag_name => 'label'},
2686     {type => 'end tag', tag_name => 'p'},
2687     {type => 'start tag', tag_name => 'hr'},
2688     {type => 'end tag', tag_name => 'form'},
2689     );
2690     $token = shift @tokens;
2691     !!!back-token (@tokens);
2692     return;
2693     }
2694     } elsif ({
2695     textarea => 1,
2696 wakaba 1.5 iframe => 1,
2697 wakaba 1.1 noembed => 1,
2698     noframes => 1,
2699     noscript => 0, ## TODO: 1 if scripting is enabled
2700     }->{$token->{tag_name}}) {
2701     my $tag_name = $token->{tag_name};
2702     my $el;
2703     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2704    
2705     if ($token->{tag_name} eq 'textarea') {
2706 wakaba 1.3 ## TODO: $self->{form_element} if defined
2707 wakaba 1.8 ## TODO: ignore first LF <http://html5.org/tools/web-apps-tracker?from=866&to=867>
2708 wakaba 1.1 $self->{content_model_flag} = 'RCDATA';
2709     } else {
2710     $self->{content_model_flag} = 'CDATA';
2711     }
2712    
2713     $insert->($el);
2714    
2715     my $text = '';
2716     !!!next-token;
2717     while ($token->{type} eq 'character') {
2718     $text .= $token->{data};
2719     !!!next-token;
2720     }
2721     if (length $text) {
2722     $el->manakai_append_text ($text);
2723     }
2724    
2725     $self->{content_model_flag} = 'PCDATA';
2726    
2727     if ($token->{type} eq 'end tag' and
2728     $token->{tag_name} eq $tag_name) {
2729     ## Ignore the token
2730     } else {
2731 wakaba 1.8 if ($token->{tag_name} eq 'textarea') { ## TODO: This is incorrect maybe
2732     ## TODO: <http://html5.org/tools/web-apps-tracker?from=866&to=867>
2733 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
2734     } else {
2735     !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2736     }
2737 wakaba 1.1 ## ISSUE: And ignore?
2738     }
2739     !!!next-token;
2740     return;
2741     } elsif ($token->{tag_name} eq 'select') {
2742     $reconstruct_active_formatting_elements->($insert_to_current);
2743    
2744     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2745    
2746 wakaba 1.3 $self->{insertion_mode} = 'in select';
2747 wakaba 1.1 !!!next-token;
2748     return;
2749     } elsif ({
2750     caption => 1, col => 1, colgroup => 1, frame => 1,
2751     frameset => 1, head => 1, option => 1, optgroup => 1,
2752     tbody => 1, td => 1, tfoot => 1, th => 1,
2753     thead => 1, tr => 1,
2754     }->{$token->{tag_name}}) {
2755 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2756 wakaba 1.1 ## Ignore the token
2757     !!!next-token;
2758     return;
2759    
2760     ## ISSUE: An issue on HTML5 new elements in the spec.
2761     } else {
2762     $reconstruct_active_formatting_elements->($insert_to_current);
2763    
2764     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2765    
2766     !!!next-token;
2767     return;
2768     }
2769     } elsif ($token->{type} eq 'end tag') {
2770     if ($token->{tag_name} eq 'body') {
2771 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2772 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2773 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2774     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2775 wakaba 1.1 }
2776 wakaba 1.3 $self->{insertion_mode} = 'after body';
2777 wakaba 1.1 !!!next-token;
2778     return;
2779     } else {
2780 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2781 wakaba 1.1 ## Ignore the token
2782     !!!next-token;
2783     return;
2784     }
2785     } elsif ($token->{tag_name} eq 'html') {
2786 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2787 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2788 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2789     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
2790 wakaba 1.1 }
2791 wakaba 1.3 $self->{insertion_mode} = 'after body';
2792 wakaba 1.1 ## reprocess
2793     return;
2794     } else {
2795 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2796 wakaba 1.1 ## Ignore the token
2797     !!!next-token;
2798     return;
2799     }
2800     } elsif ({
2801     address => 1, blockquote => 1, center => 1, dir => 1,
2802     div => 1, dl => 1, fieldset => 1, listing => 1,
2803     menu => 1, ol => 1, pre => 1, ul => 1,
2804     form => 1,
2805     p => 1,
2806     dd => 1, dt => 1, li => 1,
2807     button => 1, marquee => 1, object => 1,
2808     }->{$token->{tag_name}}) {
2809     ## has an element in scope
2810     my $i;
2811 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2812     my $node = $self->{open_elements}->[$_];
2813 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
2814     ## generate implied end tags
2815     if ({
2816     dd => ($token->{tag_name} ne 'dd'),
2817     dt => ($token->{tag_name} ne 'dt'),
2818     li => ($token->{tag_name} ne 'li'),
2819     p => ($token->{tag_name} ne 'p'),
2820     td => 1, th => 1, tr => 1,
2821 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2822 wakaba 1.1 !!!back-token;
2823     $token = {type => 'end tag',
2824 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2825 wakaba 1.1 return;
2826     }
2827     $i = $_;
2828     last INSCOPE unless $token->{tag_name} eq 'p';
2829     } elsif ({
2830     table => 1, caption => 1, td => 1, th => 1,
2831     button => 1, marquee => 1, object => 1, html => 1,
2832     }->{$node->[1]}) {
2833     last INSCOPE;
2834     }
2835     } # INSCOPE
2836    
2837 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
2838     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2839 wakaba 1.1 }
2840    
2841 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
2842     undef $self->{form_element} if $token->{tag_name} eq 'form';
2843 wakaba 1.1 $clear_up_to_marker->()
2844     if {
2845     button => 1, marquee => 1, object => 1,
2846     }->{$token->{tag_name}};
2847     !!!next-token;
2848     return;
2849     } elsif ({
2850     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2851     }->{$token->{tag_name}}) {
2852     ## has an element in scope
2853     my $i;
2854 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2855     my $node = $self->{open_elements}->[$_];
2856 wakaba 1.1 if ({
2857     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2858     }->{$node->[1]}) {
2859     ## generate implied end tags
2860     if ({
2861     dd => 1, dt => 1, li => 1, p => 1,
2862     td => 1, th => 1, tr => 1,
2863 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2864 wakaba 1.1 !!!back-token;
2865     $token = {type => 'end tag',
2866 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2867 wakaba 1.1 return;
2868     }
2869     $i = $_;
2870     last INSCOPE;
2871     } elsif ({
2872     table => 1, caption => 1, td => 1, th => 1,
2873     button => 1, marquee => 1, object => 1, html => 1,
2874     }->{$node->[1]}) {
2875     last INSCOPE;
2876     }
2877     } # INSCOPE
2878    
2879 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
2880     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2881 wakaba 1.1 }
2882    
2883 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
2884 wakaba 1.1 !!!next-token;
2885     return;
2886     } elsif ({
2887     a => 1,
2888     b => 1, big => 1, em => 1, font => 1, i => 1,
2889     nobr => 1, s => 1, small => 1, strile => 1,
2890     strong => 1, tt => 1, u => 1,
2891     }->{$token->{tag_name}}) {
2892     $formatting_end_tag->($token->{tag_name});
2893 wakaba 1.8 ## TODO: <http://html5.org/tools/web-apps-tracker?from=883&to=884>
2894 wakaba 1.1 return;
2895     } elsif ({
2896     caption => 1, col => 1, colgroup => 1, frame => 1,
2897     frameset => 1, head => 1, option => 1, optgroup => 1,
2898     tbody => 1, td => 1, tfoot => 1, th => 1,
2899     thead => 1, tr => 1,
2900     area => 1, basefont => 1, bgsound => 1, br => 1,
2901     embed => 1, hr => 1, iframe => 1, image => 1,
2902 wakaba 1.5 img => 1, input => 1, isindex => 1, noembed => 1,
2903 wakaba 1.1 noframes => 1, param => 1, select => 1, spacer => 1,
2904     table => 1, textarea => 1, wbr => 1,
2905     noscript => 0, ## TODO: if scripting is enabled
2906     }->{$token->{tag_name}}) {
2907 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2908 wakaba 1.1 ## Ignore the token
2909     !!!next-token;
2910     return;
2911    
2912     ## ISSUE: Issue on HTML5 new elements in spec
2913    
2914     } else {
2915     ## Step 1
2916     my $node_i = -1;
2917 wakaba 1.3 my $node = $self->{open_elements}->[$node_i];
2918 wakaba 1.1
2919     ## Step 2
2920     S2: {
2921     if ($node->[1] eq $token->{tag_name}) {
2922     ## Step 1
2923     ## generate implied end tags
2924     if ({
2925     dd => 1, dt => 1, li => 1, p => 1,
2926     td => 1, th => 1, tr => 1,
2927 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2928 wakaba 1.1 !!!back-token;
2929     $token = {type => 'end tag',
2930 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2931 wakaba 1.1 return;
2932     }
2933    
2934     ## Step 2
2935 wakaba 1.3 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
2936     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2937 wakaba 1.1 }
2938    
2939     ## Step 3
2940 wakaba 1.3 splice @{$self->{open_elements}}, $node_i;
2941    
2942     !!!next-token;
2943 wakaba 1.1 last S2;
2944     } else {
2945     ## Step 3
2946     if (not $formatting_category->{$node->[1]} and
2947     #not $phrasing_category->{$node->[1]} and
2948     ($special_category->{$node->[1]} or
2949     $scoping_category->{$node->[1]})) {
2950 wakaba 1.3 !!!parse-error (type => 'not closed:'.$node->[1]);
2951 wakaba 1.1 ## Ignore the token
2952     !!!next-token;
2953     last S2;
2954     }
2955     }
2956    
2957     ## Step 4
2958     $node_i--;
2959 wakaba 1.3 $node = $self->{open_elements}->[$node_i];
2960 wakaba 1.1
2961     ## Step 5;
2962     redo S2;
2963     } # S2
2964 wakaba 1.3 return;
2965 wakaba 1.1 }
2966     }
2967     }; # $in_body
2968    
2969     B: {
2970 wakaba 1.3 if ($phase eq 'main') {
2971 wakaba 1.1 if ($token->{type} eq 'DOCTYPE') {
2972 wakaba 1.3 !!!parse-error (type => 'in html:#DOCTYPE');
2973 wakaba 1.1 ## Ignore the token
2974     ## Stay in the phase
2975     !!!next-token;
2976     redo B;
2977     } elsif ($token->{type} eq 'start tag' and
2978     $token->{tag_name} eq 'html') {
2979     ## TODO: unless it is the first start tag token, parse-error
2980 wakaba 1.3 my $top_el = $self->{open_elements}->[0]->[0];
2981 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2982     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
2983     $top_el->set_attribute_ns
2984     (undef, [undef, $attr_name],
2985     $token->{attributes}->{$attr_name}->{value});
2986     }
2987     }
2988     !!!next-token;
2989     redo B;
2990     } elsif ($token->{type} eq 'end-of-file') {
2991     ## Generate implied end tags
2992     if ({
2993     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
2994 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2995 wakaba 1.1 !!!back-token;
2996 wakaba 1.3 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
2997 wakaba 1.1 redo B;
2998     }
2999    
3000 wakaba 1.3 if (@{$self->{open_elements}} > 2 or
3001     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
3002     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3003     } elsif (defined $self->{inner_html_node} and
3004     @{$self->{open_elements}} > 1 and
3005     $self->{open_elements}->[1]->[1] ne 'body') {
3006     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3007 wakaba 1.1 }
3008    
3009     ## Stop parsing
3010     last B;
3011    
3012     ## ISSUE: There is an issue in the spec.
3013     } else {
3014 wakaba 1.3 if ($self->{insertion_mode} eq 'before head') {
3015 wakaba 1.1 if ($token->{type} eq 'character') {
3016     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3017 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3018 wakaba 1.1 unless (length $token->{data}) {
3019     !!!next-token;
3020     redo B;
3021     }
3022     }
3023     ## As if <head>
3024 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
3025     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3026     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3027     $self->{insertion_mode} = 'in head';
3028 wakaba 1.1 ## reprocess
3029     redo B;
3030     } elsif ($token->{type} eq 'comment') {
3031     my $comment = $self->{document}->create_comment ($token->{data});
3032 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3033 wakaba 1.1 !!!next-token;
3034     redo B;
3035     } elsif ($token->{type} eq 'start tag') {
3036     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
3037 wakaba 1.3 !!!create-element ($self->{head_element}, 'head', $attr);
3038     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3039     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3040     $self->{insertion_mode} = 'in head';
3041 wakaba 1.1 if ($token->{tag_name} eq 'head') {
3042     !!!next-token;
3043     #} elsif ({
3044     # base => 1, link => 1, meta => 1,
3045     # script => 1, style => 1, title => 1,
3046     # }->{$token->{tag_name}}) {
3047     # ## reprocess
3048     } else {
3049     ## reprocess
3050     }
3051     redo B;
3052     } elsif ($token->{type} eq 'end tag') {
3053     if ($token->{tag_name} eq 'html') {
3054     ## As if <head>
3055 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
3056     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3057     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3058     $self->{insertion_mode} = 'in head';
3059 wakaba 1.1 ## reprocess
3060     redo B;
3061     } else {
3062 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3063 wakaba 1.1 ## Ignore the token
3064     !!!next-token;
3065     redo B;
3066     }
3067     } else {
3068     die "$0: $token->{type}: Unknown type";
3069     }
3070 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in head') {
3071 wakaba 1.1 if ($token->{type} eq 'character') {
3072     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3073 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3074 wakaba 1.1 unless (length $token->{data}) {
3075     !!!next-token;
3076     redo B;
3077     }
3078     }
3079    
3080     #
3081     } elsif ($token->{type} eq 'comment') {
3082     my $comment = $self->{document}->create_comment ($token->{data});
3083 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3084 wakaba 1.1 !!!next-token;
3085     redo B;
3086     } elsif ($token->{type} eq 'start tag') {
3087     if ($token->{tag_name} eq 'title') {
3088     ## NOTE: There is an "as if in head" code clone
3089     my $title_el;
3090     !!!create-element ($title_el, 'title', $token->{attributes});
3091 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3092 wakaba 1.1 ->append_child ($title_el);
3093     $self->{content_model_flag} = 'RCDATA';
3094    
3095     my $text = '';
3096     !!!next-token;
3097     while ($token->{type} eq 'character') {
3098     $text .= $token->{data};
3099     !!!next-token;
3100     }
3101     if (length $text) {
3102     $title_el->manakai_append_text ($text);
3103     }
3104    
3105     $self->{content_model_flag} = 'PCDATA';
3106    
3107     if ($token->{type} eq 'end tag' and
3108     $token->{tag_name} eq 'title') {
3109     ## Ignore the token
3110     } else {
3111 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
3112 wakaba 1.1 ## ISSUE: And ignore?
3113     }
3114     !!!next-token;
3115     redo B;
3116     } elsif ($token->{tag_name} eq 'style') {
3117     $style_start_tag->();
3118     redo B;
3119     } elsif ($token->{tag_name} eq 'script') {
3120     $script_start_tag->();
3121     redo B;
3122     } elsif ({base => 1, link => 1, meta => 1}->{$token->{tag_name}}) {
3123     ## NOTE: There are "as if in head" code clones
3124     my $el;
3125     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
3126 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3127 wakaba 1.1 ->append_child ($el);
3128    
3129     !!!next-token;
3130     redo B;
3131     } elsif ($token->{tag_name} eq 'head') {
3132 wakaba 1.3 !!!parse-error (type => 'in head:head');
3133 wakaba 1.1 ## Ignore the token
3134     !!!next-token;
3135     redo B;
3136     } else {
3137     #
3138     }
3139     } elsif ($token->{type} eq 'end tag') {
3140     if ($token->{tag_name} eq 'head') {
3141 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
3142     pop @{$self->{open_elements}};
3143 wakaba 1.1 } else {
3144 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:head');
3145 wakaba 1.1 }
3146 wakaba 1.3 $self->{insertion_mode} = 'after head';
3147 wakaba 1.1 !!!next-token;
3148     redo B;
3149     } elsif ($token->{tag_name} eq 'html') {
3150     #
3151     } else {
3152 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3153 wakaba 1.1 ## Ignore the token
3154     !!!next-token;
3155     redo B;
3156     }
3157     } else {
3158     #
3159     }
3160    
3161 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
3162 wakaba 1.1 ## As if </head>
3163 wakaba 1.3 pop @{$self->{open_elements}};
3164 wakaba 1.1 }
3165 wakaba 1.3 $self->{insertion_mode} = 'after head';
3166 wakaba 1.1 ## reprocess
3167     redo B;
3168    
3169     ## ISSUE: An issue in the spec.
3170 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after head') {
3171 wakaba 1.1 if ($token->{type} eq 'character') {
3172     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3173 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3174 wakaba 1.1 unless (length $token->{data}) {
3175     !!!next-token;
3176     redo B;
3177     }
3178     }
3179    
3180     #
3181     } elsif ($token->{type} eq 'comment') {
3182     my $comment = $self->{document}->create_comment ($token->{data});
3183 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3184 wakaba 1.1 !!!next-token;
3185     redo B;
3186     } elsif ($token->{type} eq 'start tag') {
3187     if ($token->{tag_name} eq 'body') {
3188     !!!insert-element ('body', $token->{attributes});
3189 wakaba 1.3 $self->{insertion_mode} = 'in body';
3190 wakaba 1.1 !!!next-token;
3191     redo B;
3192     } elsif ($token->{tag_name} eq 'frameset') {
3193     !!!insert-element ('frameset', $token->{attributes});
3194 wakaba 1.3 $self->{insertion_mode} = 'in frameset';
3195 wakaba 1.1 !!!next-token;
3196     redo B;
3197     } elsif ({
3198     base => 1, link => 1, meta => 1,
3199 wakaba 1.3 script => 1, style => 1, title => 1,
3200 wakaba 1.1 }->{$token->{tag_name}}) {
3201 wakaba 1.3 !!!parse-error (type => 'after head:'.$token->{tag_name});
3202     $self->{insertion_mode} = 'in head';
3203 wakaba 1.1 ## reprocess
3204     redo B;
3205     } else {
3206     #
3207     }
3208     } else {
3209     #
3210     }
3211    
3212     ## As if <body>
3213     !!!insert-element ('body');
3214 wakaba 1.3 $self->{insertion_mode} = 'in body';
3215 wakaba 1.1 ## reprocess
3216     redo B;
3217 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in body') {
3218 wakaba 1.1 if ($token->{type} eq 'character') {
3219     ## NOTE: There is a code clone of "character in body".
3220     $reconstruct_active_formatting_elements->($insert_to_current);
3221    
3222 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3223 wakaba 1.1
3224     !!!next-token;
3225     redo B;
3226     } elsif ($token->{type} eq 'comment') {
3227     ## NOTE: There is a code clone of "comment in body".
3228     my $comment = $self->{document}->create_comment ($token->{data});
3229 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3230 wakaba 1.1 !!!next-token;
3231     redo B;
3232     } else {
3233     $in_body->($insert_to_current);
3234     redo B;
3235     }
3236 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table') {
3237 wakaba 1.1 if ($token->{type} eq 'character') {
3238     ## NOTE: There are "character in table" code clones.
3239     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3240 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3241 wakaba 1.1
3242     unless (length $token->{data}) {
3243     !!!next-token;
3244     redo B;
3245     }
3246     }
3247    
3248 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3249    
3250 wakaba 1.1 ## As if in body, but insert into foster parent element
3251     ## ISSUE: Spec says that "whenever a node would be inserted
3252     ## into the current node" while characters might not be
3253     ## result in a new Text node.
3254     $reconstruct_active_formatting_elements->($insert_to_foster);
3255    
3256     if ({
3257     table => 1, tbody => 1, tfoot => 1,
3258     thead => 1, tr => 1,
3259 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3260 wakaba 1.1 # MUST
3261     my $foster_parent_element;
3262     my $next_sibling;
3263     my $prev_sibling;
3264 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3265     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3266     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3267 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3268     $foster_parent_element = $parent;
3269 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3270 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3271     } else {
3272 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3273 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3274     }
3275     last OE;
3276     }
3277     } # OE
3278 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3279 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3280     unless defined $foster_parent_element;
3281     if (defined $prev_sibling and
3282     $prev_sibling->node_type == 3) {
3283     $prev_sibling->manakai_append_text ($token->{data});
3284     } else {
3285     $foster_parent_element->insert_before
3286     ($self->{document}->create_text_node ($token->{data}),
3287     $next_sibling);
3288     }
3289     } else {
3290 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3291 wakaba 1.1 }
3292    
3293     !!!next-token;
3294     redo B;
3295     } elsif ($token->{type} eq 'comment') {
3296     my $comment = $self->{document}->create_comment ($token->{data});
3297 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3298 wakaba 1.1 !!!next-token;
3299     redo B;
3300     } elsif ($token->{type} eq 'start tag') {
3301     if ({
3302     caption => 1,
3303     colgroup => 1,
3304     tbody => 1, tfoot => 1, thead => 1,
3305     }->{$token->{tag_name}}) {
3306     ## Clear back to table context
3307 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3308     $self->{open_elements}->[-1]->[1] ne 'html') {
3309     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3310     pop @{$self->{open_elements}};
3311 wakaba 1.1 }
3312    
3313     push @$active_formatting_elements, ['#marker', '']
3314     if $token->{tag_name} eq 'caption';
3315    
3316     !!!insert-element ($token->{tag_name}, $token->{attributes});
3317 wakaba 1.3 $self->{insertion_mode} = {
3318 wakaba 1.1 caption => 'in caption',
3319     colgroup => 'in column group',
3320     tbody => 'in table body',
3321     tfoot => 'in table body',
3322     thead => 'in table body',
3323     }->{$token->{tag_name}};
3324     !!!next-token;
3325     redo B;
3326     } elsif ({
3327     col => 1,
3328     td => 1, th => 1, tr => 1,
3329     }->{$token->{tag_name}}) {
3330     ## Clear back to table context
3331 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3332     $self->{open_elements}->[-1]->[1] ne 'html') {
3333     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3334     pop @{$self->{open_elements}};
3335 wakaba 1.1 }
3336    
3337     !!!insert-element ($token->{tag_name} eq 'col' ? 'colgroup' : 'tbody');
3338 wakaba 1.3 $self->{insertion_mode} = $token->{tag_name} eq 'col'
3339 wakaba 1.1 ? 'in column group' : 'in table body';
3340     ## reprocess
3341     redo B;
3342     } elsif ($token->{tag_name} eq 'table') {
3343     ## NOTE: There are code clones for this "table in table"
3344 wakaba 1.3 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3345 wakaba 1.1
3346     ## As if </table>
3347     ## have a table element in table scope
3348     my $i;
3349 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3350     my $node = $self->{open_elements}->[$_];
3351 wakaba 1.1 if ($node->[1] eq 'table') {
3352     $i = $_;
3353     last INSCOPE;
3354     } elsif ({
3355     table => 1, html => 1,
3356     }->{$node->[1]}) {
3357     last INSCOPE;
3358     }
3359     } # INSCOPE
3360     unless (defined $i) {
3361 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3362 wakaba 1.1 ## Ignore tokens </table><table>
3363     !!!next-token;
3364     redo B;
3365     }
3366    
3367     ## generate implied end tags
3368     if ({
3369     dd => 1, dt => 1, li => 1, p => 1,
3370     td => 1, th => 1, tr => 1,
3371 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3372 wakaba 1.1 !!!back-token; # <table>
3373     $token = {type => 'end tag', tag_name => 'table'};
3374     !!!back-token;
3375     $token = {type => 'end tag',
3376 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3377 wakaba 1.1 redo B;
3378     }
3379    
3380 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3381     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3382 wakaba 1.1 }
3383    
3384 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3385 wakaba 1.1
3386 wakaba 1.3 $self->_reset_insertion_mode;
3387 wakaba 1.1
3388     ## reprocess
3389     redo B;
3390     } else {
3391     #
3392     }
3393     } elsif ($token->{type} eq 'end tag') {
3394     if ($token->{tag_name} eq 'table') {
3395     ## have a table element in table scope
3396     my $i;
3397 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3398     my $node = $self->{open_elements}->[$_];
3399 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3400     $i = $_;
3401     last INSCOPE;
3402     } elsif ({
3403     table => 1, html => 1,
3404     }->{$node->[1]}) {
3405     last INSCOPE;
3406     }
3407     } # INSCOPE
3408     unless (defined $i) {
3409 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3410 wakaba 1.1 ## Ignore the token
3411     !!!next-token;
3412     redo B;
3413     }
3414    
3415     ## generate implied end tags
3416     if ({
3417     dd => 1, dt => 1, li => 1, p => 1,
3418     td => 1, th => 1, tr => 1,
3419 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3420 wakaba 1.1 !!!back-token;
3421     $token = {type => 'end tag',
3422 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3423 wakaba 1.1 redo B;
3424     }
3425    
3426 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3427     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3428 wakaba 1.1 }
3429    
3430 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3431 wakaba 1.1
3432 wakaba 1.3 $self->_reset_insertion_mode;
3433 wakaba 1.1
3434     !!!next-token;
3435     redo B;
3436     } elsif ({
3437     body => 1, caption => 1, col => 1, colgroup => 1,
3438     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
3439     thead => 1, tr => 1,
3440     }->{$token->{tag_name}}) {
3441 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3442 wakaba 1.1 ## Ignore the token
3443     !!!next-token;
3444     redo B;
3445     } else {
3446     #
3447     }
3448     } else {
3449     #
3450     }
3451    
3452 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3453 wakaba 1.1 $in_body->($insert_to_foster);
3454     redo B;
3455 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in caption') {
3456 wakaba 1.1 if ($token->{type} eq 'character') {
3457     ## NOTE: This is a code clone of "character in body".
3458     $reconstruct_active_formatting_elements->($insert_to_current);
3459    
3460 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3461 wakaba 1.1
3462     !!!next-token;
3463     redo B;
3464     } elsif ($token->{type} eq 'comment') {
3465     ## NOTE: This is a code clone of "comment in body".
3466     my $comment = $self->{document}->create_comment ($token->{data});
3467 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3468 wakaba 1.1 !!!next-token;
3469     redo B;
3470     } elsif ($token->{type} eq 'start tag') {
3471     if ({
3472     caption => 1, col => 1, colgroup => 1, tbody => 1,
3473     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
3474     }->{$token->{tag_name}}) {
3475 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3476 wakaba 1.1
3477     ## As if </caption>
3478     ## have a table element in table scope
3479     my $i;
3480 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3481     my $node = $self->{open_elements}->[$_];
3482 wakaba 1.1 if ($node->[1] eq 'caption') {
3483     $i = $_;
3484     last INSCOPE;
3485     } elsif ({
3486     table => 1, html => 1,
3487     }->{$node->[1]}) {
3488     last INSCOPE;
3489     }
3490     } # INSCOPE
3491     unless (defined $i) {
3492 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3493 wakaba 1.1 ## Ignore the token
3494     !!!next-token;
3495     redo B;
3496     }
3497    
3498     ## generate implied end tags
3499     if ({
3500     dd => 1, dt => 1, li => 1, p => 1,
3501     td => 1, th => 1, tr => 1,
3502 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3503 wakaba 1.1 !!!back-token; # <?>
3504     $token = {type => 'end tag', tag_name => 'caption'};
3505     !!!back-token;
3506     $token = {type => 'end tag',
3507 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3508 wakaba 1.1 redo B;
3509     }
3510    
3511 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3512     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3513 wakaba 1.1 }
3514    
3515 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3516 wakaba 1.1
3517     $clear_up_to_marker->();
3518    
3519 wakaba 1.3 $self->{insertion_mode} = 'in table';
3520 wakaba 1.1
3521     ## reprocess
3522     redo B;
3523     } else {
3524     #
3525     }
3526     } elsif ($token->{type} eq 'end tag') {
3527     if ($token->{tag_name} eq 'caption') {
3528     ## have a table element in table scope
3529     my $i;
3530 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3531     my $node = $self->{open_elements}->[$_];
3532 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3533     $i = $_;
3534     last INSCOPE;
3535     } elsif ({
3536     table => 1, html => 1,
3537     }->{$node->[1]}) {
3538     last INSCOPE;
3539     }
3540     } # INSCOPE
3541     unless (defined $i) {
3542 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3543 wakaba 1.1 ## Ignore the token
3544     !!!next-token;
3545     redo B;
3546     }
3547    
3548     ## generate implied end tags
3549     if ({
3550     dd => 1, dt => 1, li => 1, p => 1,
3551     td => 1, th => 1, tr => 1,
3552 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3553 wakaba 1.1 !!!back-token;
3554     $token = {type => 'end tag',
3555 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3556 wakaba 1.1 redo B;
3557     }
3558    
3559 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3560     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3561 wakaba 1.1 }
3562    
3563 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3564 wakaba 1.1
3565     $clear_up_to_marker->();
3566    
3567 wakaba 1.3 $self->{insertion_mode} = 'in table';
3568 wakaba 1.1
3569     !!!next-token;
3570     redo B;
3571     } elsif ($token->{tag_name} eq 'table') {
3572 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3573 wakaba 1.1
3574     ## As if </caption>
3575     ## have a table element in table scope
3576     my $i;
3577 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3578     my $node = $self->{open_elements}->[$_];
3579 wakaba 1.1 if ($node->[1] eq 'caption') {
3580     $i = $_;
3581     last INSCOPE;
3582     } elsif ({
3583     table => 1, html => 1,
3584     }->{$node->[1]}) {
3585     last INSCOPE;
3586     }
3587     } # INSCOPE
3588     unless (defined $i) {
3589 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3590 wakaba 1.1 ## Ignore the token
3591     !!!next-token;
3592     redo B;
3593     }
3594    
3595     ## generate implied end tags
3596     if ({
3597     dd => 1, dt => 1, li => 1, p => 1,
3598     td => 1, th => 1, tr => 1,
3599 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3600 wakaba 1.1 !!!back-token; # </table>
3601     $token = {type => 'end tag', tag_name => 'caption'};
3602     !!!back-token;
3603     $token = {type => 'end tag',
3604 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3605 wakaba 1.1 redo B;
3606     }
3607    
3608 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3609     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3610 wakaba 1.1 }
3611    
3612 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3613 wakaba 1.1
3614     $clear_up_to_marker->();
3615    
3616 wakaba 1.3 $self->{insertion_mode} = 'in table';
3617 wakaba 1.1
3618     ## reprocess
3619     redo B;
3620     } elsif ({
3621     body => 1, col => 1, colgroup => 1,
3622     html => 1, tbody => 1, td => 1, tfoot => 1,
3623     th => 1, thead => 1, tr => 1,
3624     }->{$token->{tag_name}}) {
3625 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3626 wakaba 1.1 ## Ignore the token
3627     redo B;
3628     } else {
3629     #
3630     }
3631     } else {
3632     #
3633     }
3634    
3635     $in_body->($insert_to_current);
3636     redo B;
3637 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in column group') {
3638 wakaba 1.1 if ($token->{type} eq 'character') {
3639     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3640 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3641 wakaba 1.1 unless (length $token->{data}) {
3642     !!!next-token;
3643     redo B;
3644     }
3645     }
3646    
3647     #
3648     } elsif ($token->{type} eq 'comment') {
3649     my $comment = $self->{document}->create_comment ($token->{data});
3650 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3651 wakaba 1.1 !!!next-token;
3652     redo B;
3653     } elsif ($token->{type} eq 'start tag') {
3654     if ($token->{tag_name} eq 'col') {
3655     !!!insert-element ($token->{tag_name}, $token->{attributes});
3656 wakaba 1.3 pop @{$self->{open_elements}};
3657 wakaba 1.1 !!!next-token;
3658     redo B;
3659     } else {
3660     #
3661     }
3662     } elsif ($token->{type} eq 'end tag') {
3663     if ($token->{tag_name} eq 'colgroup') {
3664 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3665     !!!parse-error (type => 'unmatched end tag:colgroup');
3666 wakaba 1.1 ## Ignore the token
3667     !!!next-token;
3668     redo B;
3669     } else {
3670 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3671     $self->{insertion_mode} = 'in table';
3672 wakaba 1.1 !!!next-token;
3673     redo B;
3674     }
3675     } elsif ($token->{tag_name} eq 'col') {
3676 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:col');
3677 wakaba 1.1 ## Ignore the token
3678     !!!next-token;
3679     redo B;
3680     } else {
3681     #
3682     }
3683     } else {
3684     #
3685     }
3686    
3687     ## As if </colgroup>
3688 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3689     !!!parse-error (type => 'unmatched end tag:colgroup');
3690 wakaba 1.1 ## Ignore the token
3691     !!!next-token;
3692     redo B;
3693     } else {
3694 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3695     $self->{insertion_mode} = 'in table';
3696 wakaba 1.1 ## reprocess
3697     redo B;
3698     }
3699 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table body') {
3700 wakaba 1.1 if ($token->{type} eq 'character') {
3701     ## NOTE: This is a "character in table" code clone.
3702     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3703 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3704 wakaba 1.1
3705     unless (length $token->{data}) {
3706     !!!next-token;
3707     redo B;
3708     }
3709     }
3710    
3711 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3712    
3713 wakaba 1.1 ## As if in body, but insert into foster parent element
3714     ## ISSUE: Spec says that "whenever a node would be inserted
3715     ## into the current node" while characters might not be
3716     ## result in a new Text node.
3717     $reconstruct_active_formatting_elements->($insert_to_foster);
3718    
3719     if ({
3720     table => 1, tbody => 1, tfoot => 1,
3721     thead => 1, tr => 1,
3722 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3723 wakaba 1.1 # MUST
3724     my $foster_parent_element;
3725     my $next_sibling;
3726     my $prev_sibling;
3727 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3728     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3729     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3730 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3731     $foster_parent_element = $parent;
3732 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3733 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3734     } else {
3735 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3736 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3737     }
3738     last OE;
3739     }
3740     } # OE
3741 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3742 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3743     unless defined $foster_parent_element;
3744     if (defined $prev_sibling and
3745     $prev_sibling->node_type == 3) {
3746     $prev_sibling->manakai_append_text ($token->{data});
3747     } else {
3748     $foster_parent_element->insert_before
3749     ($self->{document}->create_text_node ($token->{data}),
3750     $next_sibling);
3751     }
3752     } else {
3753 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3754 wakaba 1.1 }
3755    
3756     !!!next-token;
3757     redo B;
3758     } elsif ($token->{type} eq 'comment') {
3759     ## Copied from 'in table'
3760     my $comment = $self->{document}->create_comment ($token->{data});
3761 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3762 wakaba 1.1 !!!next-token;
3763     redo B;
3764     } elsif ($token->{type} eq 'start tag') {
3765     if ({
3766     tr => 1,
3767     th => 1, td => 1,
3768     }->{$token->{tag_name}}) {
3769 wakaba 1.3 unless ($token->{tag_name} eq 'tr') {
3770     !!!parse-error (type => 'missing start tag:tr');
3771     }
3772    
3773 wakaba 1.1 ## Clear back to table body context
3774     while (not {
3775     tbody => 1, tfoot => 1, thead => 1, html => 1,
3776 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3777     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3778     pop @{$self->{open_elements}};
3779 wakaba 1.1 }
3780    
3781 wakaba 1.3 $self->{insertion_mode} = 'in row';
3782 wakaba 1.1 if ($token->{tag_name} eq 'tr') {
3783     !!!insert-element ($token->{tag_name}, $token->{attributes});
3784     !!!next-token;
3785     } else {
3786     !!!insert-element ('tr');
3787     ## reprocess
3788     }
3789     redo B;
3790     } elsif ({
3791     caption => 1, col => 1, colgroup => 1,
3792     tbody => 1, tfoot => 1, thead => 1,
3793     }->{$token->{tag_name}}) {
3794     ## have an element in table scope
3795     my $i;
3796 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3797     my $node = $self->{open_elements}->[$_];
3798 wakaba 1.1 if ({
3799     tbody => 1, thead => 1, tfoot => 1,
3800     }->{$node->[1]}) {
3801     $i = $_;
3802     last INSCOPE;
3803     } elsif ({
3804     table => 1, html => 1,
3805     }->{$node->[1]}) {
3806     last INSCOPE;
3807     }
3808     } # INSCOPE
3809     unless (defined $i) {
3810 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3811 wakaba 1.1 ## Ignore the token
3812     !!!next-token;
3813     redo B;
3814     }
3815    
3816     ## Clear back to table body context
3817     while (not {
3818     tbody => 1, tfoot => 1, thead => 1, html => 1,
3819 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3820     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3821     pop @{$self->{open_elements}};
3822 wakaba 1.1 }
3823    
3824     ## As if <{current node}>
3825     ## have an element in table scope
3826     ## true by definition
3827    
3828     ## Clear back to table body context
3829     ## nop by definition
3830    
3831 wakaba 1.3 pop @{$self->{open_elements}};
3832     $self->{insertion_mode} = 'in table';
3833 wakaba 1.1 ## reprocess
3834     redo B;
3835     } elsif ($token->{tag_name} eq 'table') {
3836     ## NOTE: This is a code clone of "table in table"
3837 wakaba 1.3 !!!parse-error (type => 'not closed:table');
3838 wakaba 1.1
3839     ## As if </table>
3840     ## have a table element in table scope
3841     my $i;
3842 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3843     my $node = $self->{open_elements}->[$_];
3844 wakaba 1.1 if ($node->[1] eq 'table') {
3845     $i = $_;
3846     last INSCOPE;
3847     } elsif ({
3848     table => 1, html => 1,
3849     }->{$node->[1]}) {
3850     last INSCOPE;
3851     }
3852     } # INSCOPE
3853     unless (defined $i) {
3854 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3855 wakaba 1.1 ## Ignore tokens </table><table>
3856     !!!next-token;
3857     redo B;
3858     }
3859    
3860     ## generate implied end tags
3861     if ({
3862     dd => 1, dt => 1, li => 1, p => 1,
3863     td => 1, th => 1, tr => 1,
3864 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3865 wakaba 1.1 !!!back-token; # <table>
3866     $token = {type => 'end tag', tag_name => 'table'};
3867     !!!back-token;
3868     $token = {type => 'end tag',
3869 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3870 wakaba 1.1 redo B;
3871     }
3872    
3873 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3874     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3875 wakaba 1.1 }
3876    
3877 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3878 wakaba 1.1
3879 wakaba 1.3 $self->_reset_insertion_mode;
3880 wakaba 1.1
3881     ## reprocess
3882     redo B;
3883     } else {
3884     #
3885     }
3886     } elsif ($token->{type} eq 'end tag') {
3887     if ({
3888     tbody => 1, tfoot => 1, thead => 1,
3889     }->{$token->{tag_name}}) {
3890     ## have an element in table scope
3891     my $i;
3892 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3893     my $node = $self->{open_elements}->[$_];
3894 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3895     $i = $_;
3896     last INSCOPE;
3897     } elsif ({
3898     table => 1, html => 1,
3899     }->{$node->[1]}) {
3900     last INSCOPE;
3901     }
3902     } # INSCOPE
3903     unless (defined $i) {
3904 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3905 wakaba 1.1 ## Ignore the token
3906     !!!next-token;
3907     redo B;
3908     }
3909    
3910     ## Clear back to table body context
3911     while (not {
3912     tbody => 1, tfoot => 1, thead => 1, html => 1,
3913 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3914     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3915     pop @{$self->{open_elements}};
3916 wakaba 1.1 }
3917    
3918 wakaba 1.3 pop @{$self->{open_elements}};
3919     $self->{insertion_mode} = 'in table';
3920 wakaba 1.1 !!!next-token;
3921     redo B;
3922     } elsif ($token->{tag_name} eq 'table') {
3923     ## have an element in table scope
3924     my $i;
3925 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3926     my $node = $self->{open_elements}->[$_];
3927 wakaba 1.1 if ({
3928     tbody => 1, thead => 1, tfoot => 1,
3929     }->{$node->[1]}) {
3930     $i = $_;
3931     last INSCOPE;
3932     } elsif ({
3933     table => 1, html => 1,
3934     }->{$node->[1]}) {
3935     last INSCOPE;
3936     }
3937     } # INSCOPE
3938     unless (defined $i) {
3939 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3940 wakaba 1.1 ## Ignore the token
3941     !!!next-token;
3942     redo B;
3943     }
3944    
3945     ## Clear back to table body context
3946     while (not {
3947     tbody => 1, tfoot => 1, thead => 1, html => 1,
3948 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3949     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3950     pop @{$self->{open_elements}};
3951 wakaba 1.1 }
3952    
3953     ## As if <{current node}>
3954     ## have an element in table scope
3955     ## true by definition
3956    
3957     ## Clear back to table body context
3958     ## nop by definition
3959    
3960 wakaba 1.3 pop @{$self->{open_elements}};
3961     $self->{insertion_mode} = 'in table';
3962 wakaba 1.1 ## reprocess
3963     redo B;
3964     } elsif ({
3965     body => 1, caption => 1, col => 1, colgroup => 1,
3966     html => 1, td => 1, th => 1, tr => 1,
3967     }->{$token->{tag_name}}) {
3968 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3969 wakaba 1.1 ## Ignore the token
3970     !!!next-token;
3971     redo B;
3972     } else {
3973     #
3974     }
3975     } else {
3976     #
3977     }
3978    
3979     ## As if in table
3980 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3981 wakaba 1.1 $in_body->($insert_to_foster);
3982     redo B;
3983 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in row') {
3984 wakaba 1.1 if ($token->{type} eq 'character') {
3985     ## NOTE: This is a "character in table" code clone.
3986     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3987 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3988 wakaba 1.1
3989     unless (length $token->{data}) {
3990     !!!next-token;
3991     redo B;
3992     }
3993     }
3994    
3995 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3996    
3997 wakaba 1.1 ## As if in body, but insert into foster parent element
3998     ## ISSUE: Spec says that "whenever a node would be inserted
3999     ## into the current node" while characters might not be
4000     ## result in a new Text node.
4001     $reconstruct_active_formatting_elements->($insert_to_foster);
4002    
4003     if ({
4004     table => 1, tbody => 1, tfoot => 1,
4005     thead => 1, tr => 1,
4006 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4007 wakaba 1.1 # MUST
4008     my $foster_parent_element;
4009     my $next_sibling;
4010     my $prev_sibling;
4011 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
4012     if ($self->{open_elements}->[$_]->[1] eq 'table') {
4013     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4014 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
4015     $foster_parent_element = $parent;
4016 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
4017 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
4018     } else {
4019 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4020 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
4021     }
4022     last OE;
4023     }
4024     } # OE
4025 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
4026 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
4027     unless defined $foster_parent_element;
4028     if (defined $prev_sibling and
4029     $prev_sibling->node_type == 3) {
4030     $prev_sibling->manakai_append_text ($token->{data});
4031     } else {
4032     $foster_parent_element->insert_before
4033     ($self->{document}->create_text_node ($token->{data}),
4034     $next_sibling);
4035     }
4036     } else {
4037 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4038 wakaba 1.1 }
4039    
4040     !!!next-token;
4041     redo B;
4042     } elsif ($token->{type} eq 'comment') {
4043     ## Copied from 'in table'
4044     my $comment = $self->{document}->create_comment ($token->{data});
4045 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4046 wakaba 1.1 !!!next-token;
4047     redo B;
4048     } elsif ($token->{type} eq 'start tag') {
4049     if ($token->{tag_name} eq 'th' or
4050     $token->{tag_name} eq 'td') {
4051     ## Clear back to table row context
4052     while (not {
4053     tr => 1, html => 1,
4054 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4055     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4056     pop @{$self->{open_elements}};
4057 wakaba 1.1 }
4058    
4059     !!!insert-element ($token->{tag_name}, $token->{attributes});
4060 wakaba 1.3 $self->{insertion_mode} = 'in cell';
4061 wakaba 1.1
4062     push @$active_formatting_elements, ['#marker', ''];
4063    
4064     !!!next-token;
4065     redo B;
4066     } elsif ({
4067     caption => 1, col => 1, colgroup => 1,
4068     tbody => 1, tfoot => 1, thead => 1, tr => 1,
4069     }->{$token->{tag_name}}) {
4070     ## As if </tr>
4071     ## have an element in table scope
4072     my $i;
4073 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4074     my $node = $self->{open_elements}->[$_];
4075 wakaba 1.1 if ($node->[1] eq 'tr') {
4076     $i = $_;
4077     last INSCOPE;
4078     } elsif ({
4079     table => 1, html => 1,
4080     }->{$node->[1]}) {
4081     last INSCOPE;
4082     }
4083     } # INSCOPE
4084     unless (defined $i) {
4085 wakaba 1.3 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name});
4086 wakaba 1.1 ## Ignore the token
4087     !!!next-token;
4088     redo B;
4089     }
4090    
4091     ## Clear back to table row context
4092     while (not {
4093     tr => 1, html => 1,
4094 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4095     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4096     pop @{$self->{open_elements}};
4097 wakaba 1.1 }
4098    
4099 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4100     $self->{insertion_mode} = 'in table body';
4101 wakaba 1.1 ## reprocess
4102     redo B;
4103     } elsif ($token->{tag_name} eq 'table') {
4104     ## NOTE: This is a code clone of "table in table"
4105 wakaba 1.3 !!!parse-error (type => 'not closed:table');
4106 wakaba 1.1
4107     ## As if </table>
4108     ## have a table element in table scope
4109     my $i;
4110 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4111     my $node = $self->{open_elements}->[$_];
4112 wakaba 1.1 if ($node->[1] eq 'table') {
4113     $i = $_;
4114     last INSCOPE;
4115     } elsif ({
4116     table => 1, html => 1,
4117     }->{$node->[1]}) {
4118     last INSCOPE;
4119     }
4120     } # INSCOPE
4121     unless (defined $i) {
4122 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
4123 wakaba 1.1 ## Ignore tokens </table><table>
4124     !!!next-token;
4125     redo B;
4126     }
4127    
4128     ## generate implied end tags
4129     if ({
4130     dd => 1, dt => 1, li => 1, p => 1,
4131     td => 1, th => 1, tr => 1,
4132 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4133 wakaba 1.1 !!!back-token; # <table>
4134     $token = {type => 'end tag', tag_name => 'table'};
4135     !!!back-token;
4136     $token = {type => 'end tag',
4137 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4138 wakaba 1.1 redo B;
4139     }
4140    
4141 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4142     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4143 wakaba 1.1 }
4144    
4145 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4146 wakaba 1.1
4147 wakaba 1.3 $self->_reset_insertion_mode;
4148 wakaba 1.1
4149     ## reprocess
4150     redo B;
4151     } else {
4152     #
4153     }
4154     } elsif ($token->{type} eq 'end tag') {
4155     if ($token->{tag_name} eq 'tr') {
4156     ## have an element in table scope
4157     my $i;
4158 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4159     my $node = $self->{open_elements}->[$_];
4160 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4161     $i = $_;
4162     last INSCOPE;
4163     } elsif ({
4164     table => 1, html => 1,
4165     }->{$node->[1]}) {
4166     last INSCOPE;
4167     }
4168     } # INSCOPE
4169     unless (defined $i) {
4170 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4171 wakaba 1.1 ## Ignore the token
4172     !!!next-token;
4173     redo B;
4174     }
4175    
4176     ## Clear back to table row context
4177     while (not {
4178     tr => 1, html => 1,
4179 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4180     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4181     pop @{$self->{open_elements}};
4182 wakaba 1.1 }
4183    
4184 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4185     $self->{insertion_mode} = 'in table body';
4186 wakaba 1.1 !!!next-token;
4187     redo B;
4188     } elsif ($token->{tag_name} eq 'table') {
4189     ## As if </tr>
4190     ## have an element in table scope
4191     my $i;
4192 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4193     my $node = $self->{open_elements}->[$_];
4194 wakaba 1.1 if ($node->[1] eq 'tr') {
4195     $i = $_;
4196     last INSCOPE;
4197     } elsif ({
4198     table => 1, html => 1,
4199     }->{$node->[1]}) {
4200     last INSCOPE;
4201     }
4202     } # INSCOPE
4203     unless (defined $i) {
4204 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{type});
4205 wakaba 1.1 ## Ignore the token
4206     !!!next-token;
4207     redo B;
4208     }
4209    
4210     ## Clear back to table row context
4211     while (not {
4212     tr => 1, html => 1,
4213 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4214     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4215     pop @{$self->{open_elements}};
4216 wakaba 1.1 }
4217    
4218 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4219     $self->{insertion_mode} = 'in table body';
4220 wakaba 1.1 ## reprocess
4221     redo B;
4222     } elsif ({
4223     tbody => 1, tfoot => 1, thead => 1,
4224     }->{$token->{tag_name}}) {
4225     ## have an element in table scope
4226     my $i;
4227 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4228     my $node = $self->{open_elements}->[$_];
4229 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4230     $i = $_;
4231     last INSCOPE;
4232     } elsif ({
4233     table => 1, html => 1,
4234     }->{$node->[1]}) {
4235     last INSCOPE;
4236     }
4237     } # INSCOPE
4238     unless (defined $i) {
4239 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4240 wakaba 1.1 ## Ignore the token
4241     !!!next-token;
4242     redo B;
4243     }
4244    
4245     ## As if </tr>
4246     ## have an element in table scope
4247     my $i;
4248 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4249     my $node = $self->{open_elements}->[$_];
4250 wakaba 1.1 if ($node->[1] eq 'tr') {
4251     $i = $_;
4252     last INSCOPE;
4253     } elsif ({
4254     table => 1, html => 1,
4255     }->{$node->[1]}) {
4256     last INSCOPE;
4257     }
4258     } # INSCOPE
4259     unless (defined $i) {
4260 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:tr');
4261 wakaba 1.1 ## Ignore the token
4262     !!!next-token;
4263     redo B;
4264     }
4265    
4266     ## Clear back to table row context
4267     while (not {
4268     tr => 1, html => 1,
4269 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4270     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4271     pop @{$self->{open_elements}};
4272 wakaba 1.1 }
4273    
4274 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4275     $self->{insertion_mode} = 'in table body';
4276 wakaba 1.1 ## reprocess
4277     redo B;
4278     } elsif ({
4279     body => 1, caption => 1, col => 1,
4280     colgroup => 1, html => 1, td => 1, th => 1,
4281     }->{$token->{tag_name}}) {
4282 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4283 wakaba 1.1 ## Ignore the token
4284     !!!next-token;
4285     redo B;
4286     } else {
4287     #
4288     }
4289     } else {
4290     #
4291     }
4292    
4293     ## As if in table
4294 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
4295 wakaba 1.1 $in_body->($insert_to_foster);
4296     redo B;
4297 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in cell') {
4298 wakaba 1.1 if ($token->{type} eq 'character') {
4299     ## NOTE: This is a code clone of "character in body".
4300     $reconstruct_active_formatting_elements->($insert_to_current);
4301    
4302 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4303 wakaba 1.1
4304     !!!next-token;
4305     redo B;
4306     } elsif ($token->{type} eq 'comment') {
4307     ## NOTE: This is a code clone of "comment in body".
4308     my $comment = $self->{document}->create_comment ($token->{data});
4309 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4310 wakaba 1.1 !!!next-token;
4311     redo B;
4312     } elsif ($token->{type} eq 'start tag') {
4313     if ({
4314     caption => 1, col => 1, colgroup => 1,
4315     tbody => 1, td => 1, tfoot => 1, th => 1,
4316     thead => 1, tr => 1,
4317     }->{$token->{tag_name}}) {
4318     ## have an element in table scope
4319     my $tn;
4320 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4321     my $node = $self->{open_elements}->[$_];
4322 wakaba 1.1 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
4323     $tn = $node->[1];
4324     last INSCOPE;
4325     } elsif ({
4326     table => 1, html => 1,
4327     }->{$node->[1]}) {
4328     last INSCOPE;
4329     }
4330     } # INSCOPE
4331     unless (defined $tn) {
4332 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4333 wakaba 1.1 ## Ignore the token
4334     !!!next-token;
4335     redo B;
4336     }
4337    
4338     ## Close the cell
4339     !!!back-token; # <?>
4340     $token = {type => 'end tag', tag_name => $tn};
4341     redo B;
4342     } else {
4343     #
4344     }
4345     } elsif ($token->{type} eq 'end tag') {
4346     if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4347     ## have an element in table scope
4348     my $i;
4349 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4350     my $node = $self->{open_elements}->[$_];
4351 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4352     $i = $_;
4353     last INSCOPE;
4354     } elsif ({
4355     table => 1, html => 1,
4356     }->{$node->[1]}) {
4357     last INSCOPE;
4358     }
4359     } # INSCOPE
4360     unless (defined $i) {
4361 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4362 wakaba 1.1 ## Ignore the token
4363     !!!next-token;
4364     redo B;
4365     }
4366    
4367     ## generate implied end tags
4368     if ({
4369     dd => 1, dt => 1, li => 1, p => 1,
4370     td => ($token->{tag_name} eq 'th'),
4371     th => ($token->{tag_name} eq 'td'),
4372     tr => 1,
4373 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4374 wakaba 1.1 !!!back-token;
4375     $token = {type => 'end tag',
4376 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4377 wakaba 1.1 redo B;
4378     }
4379    
4380 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4381     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4382 wakaba 1.1 }
4383    
4384 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4385 wakaba 1.1
4386     $clear_up_to_marker->();
4387    
4388 wakaba 1.3 $self->{insertion_mode} = 'in row';
4389 wakaba 1.1
4390     !!!next-token;
4391     redo B;
4392     } elsif ({
4393     body => 1, caption => 1, col => 1,
4394     colgroup => 1, html => 1,
4395     }->{$token->{tag_name}}) {
4396 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4397 wakaba 1.1 ## Ignore the token
4398     !!!next-token;
4399     redo B;
4400     } elsif ({
4401     table => 1, tbody => 1, tfoot => 1,
4402     thead => 1, tr => 1,
4403     }->{$token->{tag_name}}) {
4404     ## have an element in table scope
4405     my $i;
4406     my $tn;
4407 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4408     my $node = $self->{open_elements}->[$_];
4409 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4410     $i = $_;
4411     last INSCOPE;
4412     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4413     $tn = $node->[1];
4414     ## NOTE: There is exactly one |td| or |th| element
4415     ## in scope in the stack of open elements by definition.
4416     } elsif ({
4417     table => 1, html => 1,
4418     }->{$node->[1]}) {
4419     last INSCOPE;
4420     }
4421     } # INSCOPE
4422     unless (defined $i) {
4423 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4424 wakaba 1.1 ## Ignore the token
4425     !!!next-token;
4426     redo B;
4427     }
4428    
4429     ## Close the cell
4430     !!!back-token; # </?>
4431     $token = {type => 'end tag', tag_name => $tn};
4432     redo B;
4433     } else {
4434     #
4435     }
4436     } else {
4437     #
4438     }
4439    
4440     $in_body->($insert_to_current);
4441     redo B;
4442 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in select') {
4443 wakaba 1.1 if ($token->{type} eq 'character') {
4444 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4445 wakaba 1.1 !!!next-token;
4446     redo B;
4447     } elsif ($token->{type} eq 'comment') {
4448     my $comment = $self->{document}->create_comment ($token->{data});
4449 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4450 wakaba 1.1 !!!next-token;
4451     redo B;
4452     } elsif ($token->{type} eq 'start tag') {
4453     if ($token->{tag_name} eq 'option') {
4454 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4455 wakaba 1.1 ## As if </option>
4456 wakaba 1.3 pop @{$self->{open_elements}};
4457 wakaba 1.1 }
4458    
4459     !!!insert-element ($token->{tag_name}, $token->{attributes});
4460     !!!next-token;
4461     redo B;
4462     } elsif ($token->{tag_name} eq 'optgroup') {
4463 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4464 wakaba 1.1 ## As if </option>
4465 wakaba 1.3 pop @{$self->{open_elements}};
4466 wakaba 1.1 }
4467    
4468 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4469 wakaba 1.1 ## As if </optgroup>
4470 wakaba 1.3 pop @{$self->{open_elements}};
4471 wakaba 1.1 }
4472    
4473     !!!insert-element ($token->{tag_name}, $token->{attributes});
4474     !!!next-token;
4475     redo B;
4476     } elsif ($token->{tag_name} eq 'select') {
4477 wakaba 1.3 !!!parse-error (type => 'not closed:select');
4478 wakaba 1.1 ## As if </select> instead
4479     ## have an element in table scope
4480     my $i;
4481 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4482     my $node = $self->{open_elements}->[$_];
4483 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4484     $i = $_;
4485     last INSCOPE;
4486     } elsif ({
4487     table => 1, html => 1,
4488     }->{$node->[1]}) {
4489     last INSCOPE;
4490     }
4491     } # INSCOPE
4492     unless (defined $i) {
4493 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4494 wakaba 1.1 ## Ignore the token
4495     !!!next-token;
4496     redo B;
4497     }
4498    
4499 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4500 wakaba 1.1
4501 wakaba 1.3 $self->_reset_insertion_mode;
4502 wakaba 1.1
4503     !!!next-token;
4504     redo B;
4505     } else {
4506     #
4507     }
4508     } elsif ($token->{type} eq 'end tag') {
4509     if ($token->{tag_name} eq 'optgroup') {
4510 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option' and
4511     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
4512 wakaba 1.1 ## As if </option>
4513 wakaba 1.3 splice @{$self->{open_elements}}, -2;
4514     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4515     pop @{$self->{open_elements}};
4516 wakaba 1.1 } else {
4517 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4518 wakaba 1.1 ## Ignore the token
4519     }
4520     !!!next-token;
4521     redo B;
4522     } elsif ($token->{tag_name} eq 'option') {
4523 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4524     pop @{$self->{open_elements}};
4525 wakaba 1.1 } else {
4526 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4527 wakaba 1.1 ## Ignore the token
4528     }
4529     !!!next-token;
4530     redo B;
4531     } elsif ($token->{tag_name} eq 'select') {
4532     ## have an element in table scope
4533     my $i;
4534 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4535     my $node = $self->{open_elements}->[$_];
4536 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4537     $i = $_;
4538     last INSCOPE;
4539     } elsif ({
4540     table => 1, html => 1,
4541     }->{$node->[1]}) {
4542     last INSCOPE;
4543     }
4544     } # INSCOPE
4545     unless (defined $i) {
4546 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4547 wakaba 1.1 ## Ignore the token
4548     !!!next-token;
4549     redo B;
4550     }
4551    
4552 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4553 wakaba 1.1
4554 wakaba 1.3 $self->_reset_insertion_mode;
4555 wakaba 1.1
4556     !!!next-token;
4557     redo B;
4558     } elsif ({
4559     caption => 1, table => 1, tbody => 1,
4560     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
4561     }->{$token->{tag_name}}) {
4562 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4563 wakaba 1.1
4564     ## have an element in table scope
4565     my $i;
4566 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4567     my $node = $self->{open_elements}->[$_];
4568 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4569     $i = $_;
4570     last INSCOPE;
4571     } elsif ({
4572     table => 1, html => 1,
4573     }->{$node->[1]}) {
4574     last INSCOPE;
4575     }
4576     } # INSCOPE
4577     unless (defined $i) {
4578     ## Ignore the token
4579     !!!next-token;
4580     redo B;
4581     }
4582    
4583     ## As if </select>
4584     ## have an element in table scope
4585     undef $i;
4586 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4587     my $node = $self->{open_elements}->[$_];
4588 wakaba 1.1 if ($node->[1] eq 'select') {
4589     $i = $_;
4590     last INSCOPE;
4591     } elsif ({
4592     table => 1, html => 1,
4593     }->{$node->[1]}) {
4594     last INSCOPE;
4595     }
4596     } # INSCOPE
4597     unless (defined $i) {
4598 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4599 wakaba 1.1 ## Ignore the </select> token
4600     !!!next-token; ## TODO: ok?
4601     redo B;
4602     }
4603    
4604 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4605 wakaba 1.1
4606 wakaba 1.3 $self->_reset_insertion_mode;
4607 wakaba 1.1
4608     ## reprocess
4609     redo B;
4610     } else {
4611     #
4612     }
4613     } else {
4614     #
4615     }
4616    
4617 wakaba 1.3 !!!parse-error (type => 'in select:'.$token->{tag_name});
4618 wakaba 1.1 ## Ignore the token
4619     !!!next-token;
4620     redo B;
4621 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after body') {
4622 wakaba 1.1 if ($token->{type} eq 'character') {
4623     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4624     ## As if in body
4625     $reconstruct_active_formatting_elements->($insert_to_current);
4626    
4627 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4628 wakaba 1.1
4629     unless (length $token->{data}) {
4630     !!!next-token;
4631     redo B;
4632     }
4633     }
4634    
4635     #
4636 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4637 wakaba 1.1 } elsif ($token->{type} eq 'comment') {
4638     my $comment = $self->{document}->create_comment ($token->{data});
4639 wakaba 1.3 $self->{open_elements}->[0]->[0]->append_child ($comment);
4640 wakaba 1.1 !!!next-token;
4641     redo B;
4642 wakaba 1.3 } elsif ($token->{type} eq 'start tag') {
4643     !!!parse-error (type => 'after body:'.$token->{tag_name});
4644     #
4645 wakaba 1.1 } elsif ($token->{type} eq 'end tag') {
4646     if ($token->{tag_name} eq 'html') {
4647 wakaba 1.3 if (defined $self->{inner_html_node}) {
4648     !!!parse-error (type => 'unmatched end tag:html');
4649     ## Ignore the token
4650     !!!next-token;
4651     redo B;
4652     } else {
4653     $phase = 'trailing end';
4654     !!!next-token;
4655     redo B;
4656     }
4657 wakaba 1.1 } else {
4658 wakaba 1.3 !!!parse-error (type => 'after body:/'.$token->{tag_name});
4659 wakaba 1.1 }
4660     } else {
4661 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4662 wakaba 1.1 }
4663    
4664 wakaba 1.3 $self->{insertion_mode} = 'in body';
4665 wakaba 1.1 ## reprocess
4666     redo B;
4667 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in frameset') {
4668 wakaba 1.1 if ($token->{type} eq 'character') {
4669     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4670 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4671 wakaba 1.1
4672     unless (length $token->{data}) {
4673     !!!next-token;
4674     redo B;
4675     }
4676     }
4677    
4678     #
4679     } elsif ($token->{type} eq 'comment') {
4680     my $comment = $self->{document}->create_comment ($token->{data});
4681 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4682 wakaba 1.1 !!!next-token;
4683     redo B;
4684     } elsif ($token->{type} eq 'start tag') {
4685     if ($token->{tag_name} eq 'frameset') {
4686     !!!insert-element ($token->{tag_name}, $token->{attributes});
4687     !!!next-token;
4688     redo B;
4689     } elsif ($token->{tag_name} eq 'frame') {
4690     !!!insert-element ($token->{tag_name}, $token->{attributes});
4691 wakaba 1.3 pop @{$self->{open_elements}};
4692 wakaba 1.1 !!!next-token;
4693     redo B;
4694     } elsif ($token->{tag_name} eq 'noframes') {
4695     $in_body->($insert_to_current);
4696     redo B;
4697     } else {
4698     #
4699     }
4700     } elsif ($token->{type} eq 'end tag') {
4701     if ($token->{tag_name} eq 'frameset') {
4702 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html' and
4703     @{$self->{open_elements}} == 1) {
4704     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4705 wakaba 1.1 ## Ignore the token
4706     !!!next-token;
4707     } else {
4708 wakaba 1.3 pop @{$self->{open_elements}};
4709 wakaba 1.1 !!!next-token;
4710     }
4711    
4712     ## if not inner_html and
4713 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
4714     $self->{insertion_mode} = 'after frameset';
4715 wakaba 1.1 }
4716     redo B;
4717     } else {
4718     #
4719     }
4720     } else {
4721     #
4722     }
4723    
4724 wakaba 1.3 if (defined $token->{tag_name}) {
4725     !!!parse-error (type => 'in frameset:'.$token->{tag_name});
4726     } else {
4727     !!!parse-error (type => 'in frameset:#'.$token->{type});
4728     }
4729 wakaba 1.1 ## Ignore the token
4730     !!!next-token;
4731     redo B;
4732 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after frameset') {
4733 wakaba 1.1 if ($token->{type} eq 'character') {
4734     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4735 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4736 wakaba 1.1
4737     unless (length $token->{data}) {
4738     !!!next-token;
4739     redo B;
4740     }
4741     }
4742    
4743     #
4744     } elsif ($token->{type} eq 'comment') {
4745     my $comment = $self->{document}->create_comment ($token->{data});
4746 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4747 wakaba 1.1 !!!next-token;
4748     redo B;
4749     } elsif ($token->{type} eq 'start tag') {
4750     if ($token->{tag_name} eq 'noframes') {
4751     $in_body->($insert_to_current);
4752     redo B;
4753     } else {
4754     #
4755     }
4756     } elsif ($token->{type} eq 'end tag') {
4757     if ($token->{tag_name} eq 'html') {
4758     $phase = 'trailing end';
4759     !!!next-token;
4760     redo B;
4761     } else {
4762     #
4763     }
4764     } else {
4765     #
4766     }
4767    
4768 wakaba 1.3 if (defined $token->{tag_name}) {
4769     !!!parse-error (type => 'after frameset:'.$token->{tag_name});
4770     } else {
4771     !!!parse-error (type => 'after frameset:#'.$token->{type});
4772     }
4773 wakaba 1.1 ## Ignore the token
4774     !!!next-token;
4775     redo B;
4776    
4777     ## ISSUE: An issue in spec there
4778     } else {
4779 wakaba 1.3 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4780 wakaba 1.1 }
4781     }
4782     } elsif ($phase eq 'trailing end') {
4783     ## states in the main stage is preserved yet # MUST
4784    
4785     if ($token->{type} eq 'DOCTYPE') {
4786 wakaba 1.3 !!!parse-error (type => 'after html:#DOCTYPE');
4787 wakaba 1.1 ## Ignore the token
4788     !!!next-token;
4789     redo B;
4790     } elsif ($token->{type} eq 'comment') {
4791     my $comment = $self->{document}->create_comment ($token->{data});
4792     $self->{document}->append_child ($comment);
4793     !!!next-token;
4794     redo B;
4795     } elsif ($token->{type} eq 'character') {
4796     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4797     my $data = $1;
4798     ## As if in the main phase.
4799     ## NOTE: The insertion mode in the main phase
4800     ## just before the phase has been changed to the trailing
4801     ## end phase is either "after body" or "after frameset".
4802     $reconstruct_active_formatting_elements->($insert_to_current)
4803     if $phase eq 'main';
4804    
4805 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
4806 wakaba 1.1
4807     unless (length $token->{data}) {
4808     !!!next-token;
4809     redo B;
4810     }
4811     }
4812    
4813 wakaba 1.3 !!!parse-error (type => 'after html:#character');
4814 wakaba 1.1 $phase = 'main';
4815     ## reprocess
4816     redo B;
4817     } elsif ($token->{type} eq 'start tag' or
4818     $token->{type} eq 'end tag') {
4819 wakaba 1.3 !!!parse-error (type => 'after html:'.$token->{tag_name});
4820 wakaba 1.1 $phase = 'main';
4821     ## reprocess
4822     redo B;
4823     } elsif ($token->{type} eq 'end-of-file') {
4824     ## Stop parsing
4825     last B;
4826     } else {
4827     die "$0: $token->{type}: Unknown token";
4828     }
4829     }
4830     } # B
4831    
4832     ## Stop parsing # MUST
4833    
4834     ## TODO: script stuffs
4835 wakaba 1.3 } # _tree_construct_main
4836    
4837     sub set_inner_html ($$$) {
4838     my $class = shift;
4839     my $node = shift;
4840     my $s = \$_[0];
4841     my $onerror = $_[1];
4842    
4843     my $nt = $node->node_type;
4844     if ($nt == 9) {
4845     # MUST
4846    
4847     ## Step 1 # MUST
4848     ## TODO: If the document has an active parser, ...
4849     ## ISSUE: There is an issue in the spec.
4850    
4851     ## Step 2 # MUST
4852     my @cn = @{$node->child_nodes};
4853     for (@cn) {
4854     $node->remove_child ($_);
4855     }
4856    
4857     ## Step 3, 4, 5 # MUST
4858     $class->parse_string ($$s => $node, $onerror);
4859     } elsif ($nt == 1) {
4860     ## TODO: If non-html element
4861    
4862     ## NOTE: Most of this code is copied from |parse_string|
4863    
4864     ## Step 1 # MUST
4865     my $doc = $node->owner_document->implementation->create_document;
4866     ## TODO: Mark as HTML document
4867     my $p = $class->new;
4868     $p->{document} = $doc;
4869    
4870     ## Step 9 # MUST
4871     my $i = 0;
4872     my $line = 1;
4873     my $column = 0;
4874     $p->{set_next_input_character} = sub {
4875     my $self = shift;
4876     $self->{next_input_character} = -1 and return if $i >= length $$s;
4877     $self->{next_input_character} = ord substr $$s, $i++, 1;
4878     $column++;
4879 wakaba 1.4
4880     if ($self->{next_input_character} == 0x000A) { # LF
4881     $line++;
4882     $column = 0;
4883     } elsif ($self->{next_input_character} == 0x000D) { # CR
4884 wakaba 1.3 if ($i >= length $$s) {
4885     #
4886     } else {
4887     my $next_char = ord substr $$s, $i++, 1;
4888     if ($next_char == 0x000A) { # LF
4889     #
4890     } else {
4891     push @{$self->{char}}, $next_char;
4892     }
4893     }
4894     $self->{next_input_character} = 0x000A; # LF # MUST
4895     $line++;
4896 wakaba 1.4 $column = 0;
4897 wakaba 1.3 } elsif ($self->{next_input_character} > 0x10FFFF) {
4898     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4899     } elsif ($self->{next_input_character} == 0x0000) { # NULL
4900     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4901     }
4902     };
4903    
4904     my $ponerror = $onerror || sub {
4905     my (%opt) = @_;
4906     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
4907     };
4908     $p->{parse_error} = sub {
4909     $ponerror->(@_, line => $line, column => $column);
4910     };
4911    
4912     $p->_initialize_tokenizer;
4913     $p->_initialize_tree_constructor;
4914    
4915     ## Step 2
4916     my $node_ln = $node->local_name;
4917     $p->{content_model_flag} = {
4918     title => 'RCDATA',
4919     textarea => 'RCDATA',
4920     style => 'CDATA',
4921     script => 'CDATA',
4922     xmp => 'CDATA',
4923     iframe => 'CDATA',
4924     noembed => 'CDATA',
4925     noframes => 'CDATA',
4926     noscript => 'CDATA',
4927     plaintext => 'PLAINTEXT',
4928     }->{$node_ln} || 'PCDATA';
4929     ## ISSUE: What is "the name of the element"? local name?
4930    
4931     $p->{inner_html_node} = [$node, $node_ln];
4932    
4933     ## Step 4
4934     my $root = $doc->create_element_ns
4935     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
4936    
4937     ## Step 5 # MUST
4938     $doc->append_child ($root);
4939    
4940     ## Step 6 # MUST
4941     push @{$p->{open_elements}}, [$root, 'html'];
4942    
4943     undef $p->{head_element};
4944    
4945     ## Step 7 # MUST
4946     $p->_reset_insertion_mode;
4947    
4948     ## Step 8 # MUST
4949     my $anode = $node;
4950     AN: while (defined $anode) {
4951     if ($anode->node_type == 1) {
4952     my $nsuri = $anode->namespace_uri;
4953     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
4954     if ($anode->local_name eq 'form') { ## TODO: case?
4955     $p->{form_element} = $anode;
4956     last AN;
4957     }
4958     }
4959     }
4960     $anode = $anode->parent_node;
4961     } # AN
4962    
4963     ## Step 3 # MUST
4964     ## Step 10 # MUST
4965     {
4966     my $self = $p;
4967     !!!next-token;
4968     }
4969     $p->_tree_construction_main;
4970    
4971     ## Step 11 # MUST
4972     my @cn = @{$node->child_nodes};
4973     for (@cn) {
4974     $node->remove_child ($_);
4975     }
4976     ## ISSUE: mutation events? read-only?
4977    
4978     ## Step 12 # MUST
4979     @cn = @{$root->child_nodes};
4980     for (@cn) {
4981     $node->append_child ($_);
4982     }
4983     ## ISSUE: adopt_node? mutation events?
4984    
4985     $p->_terminate_tree_constructor;
4986     } else {
4987     die "$0: |set_inner_html| is not defined for node of type $nt";
4988     }
4989     } # set_inner_html
4990    
4991     } # tree construction stage
4992 wakaba 1.1
4993     sub get_inner_html ($$$) {
4994 wakaba 1.3 my (undef, $node, $on_error) = @_;
4995 wakaba 1.1
4996     ## Step 1
4997     my $s = '';
4998    
4999     my $in_cdata;
5000     my $parent = $node;
5001     while (defined $parent) {
5002     if ($parent->node_type == 1 and
5003     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
5004     {
5005     style => 1, script => 1, xmp => 1, iframe => 1,
5006     noembed => 1, noframes => 1, noscript => 1,
5007     }->{$parent->local_name}) { ## TODO: case thingy
5008     $in_cdata = 1;
5009     }
5010     $parent = $parent->parent_node;
5011     }
5012    
5013     ## Step 2
5014     my @node = @{$node->child_nodes};
5015     C: while (@node) {
5016     my $child = shift @node;
5017     unless (ref $child) {
5018     if ($child eq 'cdata-out') {
5019     $in_cdata = 0;
5020     } else {
5021     $s .= $child; # end tag
5022     }
5023     next C;
5024     }
5025    
5026     my $nt = $child->node_type;
5027     if ($nt == 1) { # Element
5028     my $tag_name = lc $child->tag_name; ## ISSUE: Definition of "lowercase"
5029     $s .= '<' . $tag_name;
5030    
5031     ## ISSUE: Non-html elements
5032    
5033     my @attrs = @{$child->attributes}; # sort order MUST be stable
5034     for my $attr (@attrs) { # order is implementation dependent
5035     my $attr_name = lc $attr->name; ## ISSUE: Definition of "lowercase"
5036     $s .= ' ' . $attr_name . '="';
5037     my $attr_value = $attr->value;
5038     ## escape
5039     $attr_value =~ s/&/&amp;/g;
5040     $attr_value =~ s/</&lt;/g;
5041     $attr_value =~ s/>/&gt;/g;
5042     $attr_value =~ s/"/&quot;/g;
5043     $s .= $attr_value . '"';
5044     }
5045     $s .= '>';
5046    
5047     next C if {
5048     area => 1, base => 1, basefont => 1, bgsound => 1,
5049     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
5050     img => 1, input => 1, link => 1, meta => 1, param => 1,
5051     spacer => 1, wbr => 1,
5052     }->{$tag_name};
5053    
5054     if (not $in_cdata and {
5055     style => 1, script => 1, xmp => 1, iframe => 1,
5056     noembed => 1, noframes => 1, noscript => 1,
5057     }->{$tag_name}) {
5058     unshift @node, 'cdata-out';
5059     $in_cdata = 1;
5060     }
5061    
5062     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
5063     } elsif ($nt == 3 or $nt == 4) {
5064     if ($in_cdata) {
5065     $s .= $child->data;
5066     } else {
5067     my $value = $child->data;
5068     $value =~ s/&/&amp;/g;
5069     $value =~ s/</&lt;/g;
5070     $value =~ s/>/&gt;/g;
5071     $value =~ s/"/&quot;/g;
5072     $s .= $value;
5073     }
5074     } elsif ($nt == 8) {
5075     $s .= '<!--' . $child->data . '-->';
5076     } elsif ($nt == 10) {
5077     $s .= '<!DOCTYPE ' . $child->name . '>';
5078     } elsif ($nt == 5) { # entrefs
5079     push @node, @{$child->child_nodes};
5080     } else {
5081     $on_error->($child) if defined $on_error;
5082     }
5083     ## ISSUE: This code does not support PIs.
5084     } # C
5085    
5086     ## Step 3
5087     return \$s;
5088     } # get_inner_html
5089    
5090     1;
5091 wakaba 1.8 # $Date: 2007/05/30 12:24:50 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24