/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (hide annotations) (download) (as text)
Sat Jun 23 03:30:06 2007 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.9: +37 -40 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	23 Jun 2007 03:16:30 -0000
	* tokenizer-test-1.test: Tests for C1 character
	references are added.

2007-06-23  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	23 Jun 2007 03:26:51 -0000
	* HTML.pm.src: An error message was incorrect.
	HTML5 revision 869 (C1 character references).

2007-06-23  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.10 our $VERSION=do{my @r=(q$Revision: 1.9 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5     ## This is an early version of an HTML parser.
6    
7     my $permitted_slash_tag_name = {
8     base => 1,
9     link => 1,
10     meta => 1,
11     hr => 1,
12     br => 1,
13     img=> 1,
14     embed => 1,
15     param => 1,
16     area => 1,
17     col => 1,
18     input => 1,
19     };
20    
21     my $entity_char = {
22     AElig => "\x{00C6}",
23     Aacute => "\x{00C1}",
24     Acirc => "\x{00C2}",
25     Agrave => "\x{00C0}",
26     Alpha => "\x{0391}",
27     Aring => "\x{00C5}",
28     Atilde => "\x{00C3}",
29     Auml => "\x{00C4}",
30     Beta => "\x{0392}",
31     Ccedil => "\x{00C7}",
32     Chi => "\x{03A7}",
33     Dagger => "\x{2021}",
34     Delta => "\x{0394}",
35     ETH => "\x{00D0}",
36     Eacute => "\x{00C9}",
37     Ecirc => "\x{00CA}",
38     Egrave => "\x{00C8}",
39     Epsilon => "\x{0395}",
40     Eta => "\x{0397}",
41     Euml => "\x{00CB}",
42     Gamma => "\x{0393}",
43     Iacute => "\x{00CD}",
44     Icirc => "\x{00CE}",
45     Igrave => "\x{00CC}",
46     Iota => "\x{0399}",
47     Iuml => "\x{00CF}",
48     Kappa => "\x{039A}",
49     Lambda => "\x{039B}",
50     Mu => "\x{039C}",
51     Ntilde => "\x{00D1}",
52     Nu => "\x{039D}",
53     OElig => "\x{0152}",
54     Oacute => "\x{00D3}",
55     Ocirc => "\x{00D4}",
56     Ograve => "\x{00D2}",
57     Omega => "\x{03A9}",
58     Omicron => "\x{039F}",
59     Oslash => "\x{00D8}",
60     Otilde => "\x{00D5}",
61     Ouml => "\x{00D6}",
62     Phi => "\x{03A6}",
63     Pi => "\x{03A0}",
64     Prime => "\x{2033}",
65     Psi => "\x{03A8}",
66     Rho => "\x{03A1}",
67     Scaron => "\x{0160}",
68     Sigma => "\x{03A3}",
69     THORN => "\x{00DE}",
70     Tau => "\x{03A4}",
71     Theta => "\x{0398}",
72     Uacute => "\x{00DA}",
73     Ucirc => "\x{00DB}",
74     Ugrave => "\x{00D9}",
75     Upsilon => "\x{03A5}",
76     Uuml => "\x{00DC}",
77     Xi => "\x{039E}",
78     Yacute => "\x{00DD}",
79     Yuml => "\x{0178}",
80     Zeta => "\x{0396}",
81     aacute => "\x{00E1}",
82     acirc => "\x{00E2}",
83     acute => "\x{00B4}",
84     aelig => "\x{00E6}",
85     agrave => "\x{00E0}",
86     alefsym => "\x{2135}",
87     alpha => "\x{03B1}",
88     amp => "\x{0026}",
89     AMP => "\x{0026}",
90     and => "\x{2227}",
91     ang => "\x{2220}",
92     apos => "\x{0027}",
93     aring => "\x{00E5}",
94     asymp => "\x{2248}",
95     atilde => "\x{00E3}",
96     auml => "\x{00E4}",
97     bdquo => "\x{201E}",
98     beta => "\x{03B2}",
99     brvbar => "\x{00A6}",
100     bull => "\x{2022}",
101     cap => "\x{2229}",
102     ccedil => "\x{00E7}",
103     cedil => "\x{00B8}",
104     cent => "\x{00A2}",
105     chi => "\x{03C7}",
106     circ => "\x{02C6}",
107     clubs => "\x{2663}",
108     cong => "\x{2245}",
109     copy => "\x{00A9}",
110     COPY => "\x{00A9}",
111     crarr => "\x{21B5}",
112     cup => "\x{222A}",
113     curren => "\x{00A4}",
114     dArr => "\x{21D3}",
115     dagger => "\x{2020}",
116     darr => "\x{2193}",
117     deg => "\x{00B0}",
118     delta => "\x{03B4}",
119     diams => "\x{2666}",
120     divide => "\x{00F7}",
121     eacute => "\x{00E9}",
122     ecirc => "\x{00EA}",
123     egrave => "\x{00E8}",
124     empty => "\x{2205}",
125     emsp => "\x{2003}",
126     ensp => "\x{2002}",
127     epsilon => "\x{03B5}",
128     equiv => "\x{2261}",
129     eta => "\x{03B7}",
130     eth => "\x{00F0}",
131     euml => "\x{00EB}",
132     euro => "\x{20AC}",
133     exist => "\x{2203}",
134     fnof => "\x{0192}",
135     forall => "\x{2200}",
136     frac12 => "\x{00BD}",
137     frac14 => "\x{00BC}",
138     frac34 => "\x{00BE}",
139     frasl => "\x{2044}",
140     gamma => "\x{03B3}",
141     ge => "\x{2265}",
142     gt => "\x{003E}",
143     GT => "\x{003E}",
144     hArr => "\x{21D4}",
145     harr => "\x{2194}",
146     hearts => "\x{2665}",
147     hellip => "\x{2026}",
148     iacute => "\x{00ED}",
149     icirc => "\x{00EE}",
150     iexcl => "\x{00A1}",
151     igrave => "\x{00EC}",
152     image => "\x{2111}",
153     infin => "\x{221E}",
154     int => "\x{222B}",
155     iota => "\x{03B9}",
156     iquest => "\x{00BF}",
157     isin => "\x{2208}",
158     iuml => "\x{00EF}",
159     kappa => "\x{03BA}",
160     lArr => "\x{21D0}",
161     lambda => "\x{03BB}",
162     lang => "\x{2329}",
163     laquo => "\x{00AB}",
164     larr => "\x{2190}",
165     lceil => "\x{2308}",
166     ldquo => "\x{201C}",
167     le => "\x{2264}",
168     lfloor => "\x{230A}",
169     lowast => "\x{2217}",
170     loz => "\x{25CA}",
171     lrm => "\x{200E}",
172     lsaquo => "\x{2039}",
173     lsquo => "\x{2018}",
174     lt => "\x{003C}",
175     LT => "\x{003C}",
176     macr => "\x{00AF}",
177     mdash => "\x{2014}",
178     micro => "\x{00B5}",
179     middot => "\x{00B7}",
180     minus => "\x{2212}",
181     mu => "\x{03BC}",
182     nabla => "\x{2207}",
183     nbsp => "\x{00A0}",
184     ndash => "\x{2013}",
185     ne => "\x{2260}",
186     ni => "\x{220B}",
187     not => "\x{00AC}",
188     notin => "\x{2209}",
189     nsub => "\x{2284}",
190     ntilde => "\x{00F1}",
191     nu => "\x{03BD}",
192     oacute => "\x{00F3}",
193     ocirc => "\x{00F4}",
194     oelig => "\x{0153}",
195     ograve => "\x{00F2}",
196     oline => "\x{203E}",
197     omega => "\x{03C9}",
198     omicron => "\x{03BF}",
199     oplus => "\x{2295}",
200     or => "\x{2228}",
201     ordf => "\x{00AA}",
202     ordm => "\x{00BA}",
203     oslash => "\x{00F8}",
204     otilde => "\x{00F5}",
205     otimes => "\x{2297}",
206     ouml => "\x{00F6}",
207     para => "\x{00B6}",
208     part => "\x{2202}",
209     permil => "\x{2030}",
210     perp => "\x{22A5}",
211     phi => "\x{03C6}",
212     pi => "\x{03C0}",
213     piv => "\x{03D6}",
214     plusmn => "\x{00B1}",
215     pound => "\x{00A3}",
216     prime => "\x{2032}",
217     prod => "\x{220F}",
218     prop => "\x{221D}",
219     psi => "\x{03C8}",
220     quot => "\x{0022}",
221     QUOT => "\x{0022}",
222     rArr => "\x{21D2}",
223     radic => "\x{221A}",
224     rang => "\x{232A}",
225     raquo => "\x{00BB}",
226     rarr => "\x{2192}",
227     rceil => "\x{2309}",
228     rdquo => "\x{201D}",
229     real => "\x{211C}",
230     reg => "\x{00AE}",
231     REG => "\x{00AE}",
232     rfloor => "\x{230B}",
233     rho => "\x{03C1}",
234     rlm => "\x{200F}",
235     rsaquo => "\x{203A}",
236     rsquo => "\x{2019}",
237     sbquo => "\x{201A}",
238     scaron => "\x{0161}",
239     sdot => "\x{22C5}",
240     sect => "\x{00A7}",
241     shy => "\x{00AD}",
242     sigma => "\x{03C3}",
243     sigmaf => "\x{03C2}",
244     sim => "\x{223C}",
245     spades => "\x{2660}",
246     sub => "\x{2282}",
247     sube => "\x{2286}",
248     sum => "\x{2211}",
249     sup => "\x{2283}",
250     sup1 => "\x{00B9}",
251     sup2 => "\x{00B2}",
252     sup3 => "\x{00B3}",
253     supe => "\x{2287}",
254     szlig => "\x{00DF}",
255     tau => "\x{03C4}",
256     there4 => "\x{2234}",
257     theta => "\x{03B8}",
258     thetasym => "\x{03D1}",
259     thinsp => "\x{2009}",
260     thorn => "\x{00FE}",
261     tilde => "\x{02DC}",
262     times => "\x{00D7}",
263     trade => "\x{2122}",
264     uArr => "\x{21D1}",
265     uacute => "\x{00FA}",
266     uarr => "\x{2191}",
267     ucirc => "\x{00FB}",
268     ugrave => "\x{00F9}",
269     uml => "\x{00A8}",
270     upsih => "\x{03D2}",
271     upsilon => "\x{03C5}",
272     uuml => "\x{00FC}",
273     weierp => "\x{2118}",
274     xi => "\x{03BE}",
275     yacute => "\x{00FD}",
276     yen => "\x{00A5}",
277     yuml => "\x{00FF}",
278     zeta => "\x{03B6}",
279     zwj => "\x{200D}",
280     zwnj => "\x{200C}",
281 wakaba 1.4 }; # $entity_char
282    
283     my $c1_entity_char = {
284 wakaba 1.10 0x80 => 0x20AC,
285     0x81 => 0xFFFD,
286     0x82 => 0x201A,
287     0x83 => 0x0192,
288     0x84 => 0x201E,
289     0x85 => 0x2026,
290     0x86 => 0x2020,
291     0x87 => 0x2021,
292     0x88 => 0x02C6,
293     0x89 => 0x2030,
294     0x8A => 0x0160,
295     0x8B => 0x2039,
296     0x8C => 0x0152,
297     0x8D => 0xFFFD,
298     0x8E => 0x017D,
299     0x8F => 0xFFFD,
300     0x90 => 0xFFFD,
301     0x91 => 0x2018,
302     0x92 => 0x2019,
303     0x93 => 0x201C,
304     0x94 => 0x201D,
305     0x95 => 0x2022,
306     0x96 => 0x2013,
307     0x97 => 0x2014,
308     0x98 => 0x02DC,
309     0x99 => 0x2122,
310     0x9A => 0x0161,
311     0x9B => 0x203A,
312     0x9C => 0x0153,
313     0x9D => 0xFFFD,
314     0x9E => 0x017E,
315     0x9F => 0x0178,
316 wakaba 1.4 }; # $c1_entity_char
317 wakaba 1.1
318     my $special_category = {
319     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
320     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
321     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
322     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
323     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
324     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
325     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
326     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
327     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
328     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
329     };
330     my $scoping_category = {
331     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
332     table => 1, td => 1, th => 1,
333     };
334     my $formatting_category = {
335     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
336     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
337     };
338     # $phrasing_category: all other elements
339    
340     sub parse_string ($$$;$) {
341     my $self = shift->new;
342     my $s = \$_[0];
343     $self->{document} = $_[1];
344    
345 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
346    
347 wakaba 1.1 my $i = 0;
348 wakaba 1.3 my $line = 1;
349     my $column = 0;
350 wakaba 1.1 $self->{set_next_input_character} = sub {
351     my $self = shift;
352     $self->{next_input_character} = -1 and return if $i >= length $$s;
353     $self->{next_input_character} = ord substr $$s, $i++, 1;
354 wakaba 1.3 $column++;
355 wakaba 1.1
356 wakaba 1.4 if ($self->{next_input_character} == 0x000A) { # LF
357     $line++;
358     $column = 0;
359     } elsif ($self->{next_input_character} == 0x000D) { # CR
360 wakaba 1.1 if ($i >= length $$s) {
361     #
362     } else {
363     my $next_char = ord substr $$s, $i++, 1;
364     if ($next_char == 0x000A) { # LF
365     #
366     } else {
367     push @{$self->{char}}, $next_char;
368     }
369     }
370     $self->{next_input_character} = 0x000A; # LF # MUST
371 wakaba 1.3 $line++;
372 wakaba 1.4 $column = 0;
373 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
374     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
375     } elsif ($self->{next_input_character} == 0x0000) { # NULL
376 wakaba 1.8 !!!parse-error (type => 'NULL');
377     ## TODO: test
378 wakaba 1.1 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
379     }
380     };
381    
382 wakaba 1.3 my $onerror = $_[2] || sub {
383     my (%opt) = @_;
384     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
385     };
386     $self->{parse_error} = sub {
387     $onerror->(@_, line => $line, column => $column);
388 wakaba 1.1 };
389    
390     $self->_initialize_tokenizer;
391     $self->_initialize_tree_constructor;
392     $self->_construct_tree;
393     $self->_terminate_tree_constructor;
394    
395     return $self->{document};
396     } # parse_string
397    
398     sub new ($) {
399     my $class = shift;
400     my $self = bless {}, $class;
401     $self->{set_next_input_character} = sub {
402     $self->{next_input_character} = -1;
403     };
404     $self->{parse_error} = sub {
405     #
406     };
407     return $self;
408     } # new
409    
410     ## Implementations MUST act as if state machine in the spec
411    
412     sub _initialize_tokenizer ($) {
413     my $self = shift;
414     $self->{state} = 'data'; # MUST
415     $self->{content_model_flag} = 'PCDATA'; # be
416     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
417     undef $self->{current_attribute};
418     undef $self->{last_emitted_start_tag_name};
419     undef $self->{last_attribute_value_state};
420     $self->{char} = [];
421     # $self->{next_input_character}
422     !!!next-input-character;
423     $self->{token} = [];
424     } # _initialize_tokenizer
425    
426     ## A token has:
427     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
428     ## 'character', or 'end-of-file'
429     ## ->{name} (DOCTYPE, start tag (tagname), end tag (tagname))
430     ## ISSUE: the spec need s/tagname/tag name/
431     ## ->{error} == 1 or 0 (DOCTYPE)
432     ## ->{attributes} isa HASH (start tag, end tag)
433     ## ->{data} (comment, character)
434    
435     ## Macros
436     ## Macros MUST be preceded by three EXCLAMATION MARKs.
437     ## emit ($token)
438     ## Emits the specified token.
439    
440     ## Emitted token MUST immediately be handled by the tree construction state.
441    
442     ## Before each step, UA MAY check to see if either one of the scripts in
443     ## "list of scripts that will execute as soon as possible" or the first
444     ## script in the "list of scripts that will execute asynchronously",
445     ## has completed loading. If one has, then it MUST be executed
446     ## and removed from the list.
447    
448 wakaba 1.8 ## ISSUE: <http://html5.org/tools/web-apps-tracker?from=874&to=876>
449    
450 wakaba 1.1 sub _get_next_token ($) {
451     my $self = shift;
452     if (@{$self->{token}}) {
453     return shift @{$self->{token}};
454     }
455    
456     A: {
457     if ($self->{state} eq 'data') {
458     if ($self->{next_input_character} == 0x0026) { # &
459     if ($self->{content_model_flag} eq 'PCDATA' or
460     $self->{content_model_flag} eq 'RCDATA') {
461     $self->{state} = 'entity data';
462     !!!next-input-character;
463     redo A;
464     } else {
465     #
466     }
467     } elsif ($self->{next_input_character} == 0x003C) { # <
468     if ($self->{content_model_flag} ne 'PLAINTEXT') {
469     $self->{state} = 'tag open';
470     !!!next-input-character;
471     redo A;
472     } else {
473     #
474     }
475     } elsif ($self->{next_input_character} == -1) {
476     !!!emit ({type => 'end-of-file'});
477     last A; ## TODO: ok?
478     }
479     # Anything else
480     my $token = {type => 'character',
481     data => chr $self->{next_input_character}};
482     ## Stay in the data state
483     !!!next-input-character;
484    
485     !!!emit ($token);
486    
487     redo A;
488     } elsif ($self->{state} eq 'entity data') {
489     ## (cannot happen in CDATA state)
490    
491     my $token = $self->_tokenize_attempt_to_consume_an_entity;
492    
493     $self->{state} = 'data';
494     # next-input-character is already done
495    
496     unless (defined $token) {
497     !!!emit ({type => 'character', data => '&'});
498     } else {
499     !!!emit ($token);
500     }
501    
502     redo A;
503     } elsif ($self->{state} eq 'tag open') {
504     if ($self->{content_model_flag} eq 'RCDATA' or
505     $self->{content_model_flag} eq 'CDATA') {
506     if ($self->{next_input_character} == 0x002F) { # /
507     !!!next-input-character;
508     $self->{state} = 'close tag open';
509     redo A;
510     } else {
511     ## reconsume
512     $self->{state} = 'data';
513    
514     !!!emit ({type => 'character', data => '<'});
515    
516     redo A;
517     }
518     } elsif ($self->{content_model_flag} eq 'PCDATA') {
519     if ($self->{next_input_character} == 0x0021) { # !
520     $self->{state} = 'markup declaration open';
521     !!!next-input-character;
522     redo A;
523     } elsif ($self->{next_input_character} == 0x002F) { # /
524     $self->{state} = 'close tag open';
525     !!!next-input-character;
526     redo A;
527     } elsif (0x0041 <= $self->{next_input_character} and
528     $self->{next_input_character} <= 0x005A) { # A..Z
529     $self->{current_token}
530     = {type => 'start tag',
531     tag_name => chr ($self->{next_input_character} + 0x0020)};
532     $self->{state} = 'tag name';
533     !!!next-input-character;
534     redo A;
535     } elsif (0x0061 <= $self->{next_input_character} and
536     $self->{next_input_character} <= 0x007A) { # a..z
537     $self->{current_token} = {type => 'start tag',
538     tag_name => chr ($self->{next_input_character})};
539     $self->{state} = 'tag name';
540     !!!next-input-character;
541     redo A;
542     } elsif ($self->{next_input_character} == 0x003E) { # >
543 wakaba 1.3 !!!parse-error (type => 'empty start tag');
544 wakaba 1.1 $self->{state} = 'data';
545     !!!next-input-character;
546    
547     !!!emit ({type => 'character', data => '<>'});
548    
549     redo A;
550     } elsif ($self->{next_input_character} == 0x003F) { # ?
551 wakaba 1.3 !!!parse-error (type => 'pio');
552 wakaba 1.1 $self->{state} = 'bogus comment';
553     ## $self->{next_input_character} is intentionally left as is
554     redo A;
555     } else {
556 wakaba 1.3 !!!parse-error (type => 'bare stago');
557 wakaba 1.1 $self->{state} = 'data';
558     ## reconsume
559    
560     !!!emit ({type => 'character', data => '<'});
561    
562     redo A;
563     }
564     } else {
565     die "$0: $self->{content_model_flag}: Unknown content model flag";
566     }
567     } elsif ($self->{state} eq 'close tag open') {
568     if ($self->{content_model_flag} eq 'RCDATA' or
569     $self->{content_model_flag} eq 'CDATA') {
570     my @next_char;
571     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
572     push @next_char, $self->{next_input_character};
573     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
574     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
575     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
576     !!!next-input-character;
577     next TAGNAME;
578     } else {
579 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
580 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
581     !!!back-next-input-character (@next_char);
582     $self->{state} = 'data';
583    
584     !!!emit ({type => 'character', data => '</'});
585    
586     redo A;
587     }
588     }
589     push @next_char, $self->{next_input_character};
590    
591     unless ($self->{next_input_character} == 0x0009 or # HT
592     $self->{next_input_character} == 0x000A or # LF
593     $self->{next_input_character} == 0x000B or # VT
594     $self->{next_input_character} == 0x000C or # FF
595     $self->{next_input_character} == 0x0020 or # SP
596     $self->{next_input_character} == 0x003E or # >
597     $self->{next_input_character} == 0x002F or # /
598     $self->{next_input_character} == 0x003C or # <
599     $self->{next_input_character} == -1) {
600 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
601 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
602     !!!back-next-input-character (@next_char);
603     $self->{state} = 'data';
604    
605     !!!emit ({type => 'character', data => '</'});
606    
607     redo A;
608     } else {
609     $self->{next_input_character} = shift @next_char;
610     !!!back-next-input-character (@next_char);
611     # and consume...
612     }
613     }
614    
615     if (0x0041 <= $self->{next_input_character} and
616     $self->{next_input_character} <= 0x005A) { # A..Z
617     $self->{current_token} = {type => 'end tag',
618     tag_name => chr ($self->{next_input_character} + 0x0020)};
619     $self->{state} = 'tag name';
620     !!!next-input-character;
621     redo A;
622     } elsif (0x0061 <= $self->{next_input_character} and
623     $self->{next_input_character} <= 0x007A) { # a..z
624     $self->{current_token} = {type => 'end tag',
625     tag_name => chr ($self->{next_input_character})};
626     $self->{state} = 'tag name';
627     !!!next-input-character;
628     redo A;
629     } elsif ($self->{next_input_character} == 0x003E) { # >
630 wakaba 1.3 !!!parse-error (type => 'empty end tag');
631 wakaba 1.1 $self->{state} = 'data';
632     !!!next-input-character;
633     redo A;
634     } elsif ($self->{next_input_character} == -1) {
635 wakaba 1.3 !!!parse-error (type => 'bare etago');
636 wakaba 1.1 $self->{state} = 'data';
637     # reconsume
638    
639     !!!emit ({type => 'character', data => '</'});
640    
641     redo A;
642     } else {
643 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
644 wakaba 1.1 $self->{state} = 'bogus comment';
645     ## $self->{next_input_character} is intentionally left as is
646     redo A;
647     }
648     } elsif ($self->{state} eq 'tag name') {
649     if ($self->{next_input_character} == 0x0009 or # HT
650     $self->{next_input_character} == 0x000A or # LF
651     $self->{next_input_character} == 0x000B or # VT
652     $self->{next_input_character} == 0x000C or # FF
653     $self->{next_input_character} == 0x0020) { # SP
654     $self->{state} = 'before attribute name';
655     !!!next-input-character;
656     redo A;
657     } elsif ($self->{next_input_character} == 0x003E) { # >
658     if ($self->{current_token}->{type} eq 'start tag') {
659     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
660     } elsif ($self->{current_token}->{type} eq 'end tag') {
661     $self->{content_model_flag} = 'PCDATA'; # MUST
662     if ($self->{current_token}->{attributes}) {
663 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
664 wakaba 1.1 }
665     } else {
666     die "$0: $self->{current_token}->{type}: Unknown token type";
667     }
668     $self->{state} = 'data';
669     !!!next-input-character;
670    
671     !!!emit ($self->{current_token}); # start tag or end tag
672     undef $self->{current_token};
673    
674     redo A;
675     } elsif (0x0041 <= $self->{next_input_character} and
676     $self->{next_input_character} <= 0x005A) { # A..Z
677     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
678     # start tag or end tag
679     ## Stay in this state
680     !!!next-input-character;
681     redo A;
682     } elsif ($self->{next_input_character} == 0x003C or # <
683     $self->{next_input_character} == -1) {
684 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
685 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
686     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
687     } elsif ($self->{current_token}->{type} eq 'end tag') {
688     $self->{content_model_flag} = 'PCDATA'; # MUST
689     if ($self->{current_token}->{attributes}) {
690 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
691 wakaba 1.1 }
692     } else {
693     die "$0: $self->{current_token}->{type}: Unknown token type";
694     }
695     $self->{state} = 'data';
696     # reconsume
697    
698     !!!emit ($self->{current_token}); # start tag or end tag
699     undef $self->{current_token};
700    
701     redo A;
702     } elsif ($self->{next_input_character} == 0x002F) { # /
703     !!!next-input-character;
704     if ($self->{next_input_character} == 0x003E and # >
705     $self->{current_token}->{type} eq 'start tag' and
706     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
707     # permitted slash
708     #
709     } else {
710 wakaba 1.3 !!!parse-error (type => 'nestc');
711 wakaba 1.1 }
712     $self->{state} = 'before attribute name';
713     # next-input-character is already done
714     redo A;
715     } else {
716     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
717     # start tag or end tag
718     ## Stay in the state
719     !!!next-input-character;
720     redo A;
721     }
722     } elsif ($self->{state} eq 'before attribute name') {
723     if ($self->{next_input_character} == 0x0009 or # HT
724     $self->{next_input_character} == 0x000A or # LF
725     $self->{next_input_character} == 0x000B or # VT
726     $self->{next_input_character} == 0x000C or # FF
727     $self->{next_input_character} == 0x0020) { # SP
728     ## Stay in the state
729     !!!next-input-character;
730     redo A;
731     } elsif ($self->{next_input_character} == 0x003E) { # >
732     if ($self->{current_token}->{type} eq 'start tag') {
733     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
734     } elsif ($self->{current_token}->{type} eq 'end tag') {
735     $self->{content_model_flag} = 'PCDATA'; # MUST
736     if ($self->{current_token}->{attributes}) {
737 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
738 wakaba 1.1 }
739     } else {
740     die "$0: $self->{current_token}->{type}: Unknown token type";
741     }
742     $self->{state} = 'data';
743     !!!next-input-character;
744    
745     !!!emit ($self->{current_token}); # start tag or end tag
746     undef $self->{current_token};
747    
748     redo A;
749     } elsif (0x0041 <= $self->{next_input_character} and
750     $self->{next_input_character} <= 0x005A) { # A..Z
751     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
752     value => ''};
753     $self->{state} = 'attribute name';
754     !!!next-input-character;
755     redo A;
756     } elsif ($self->{next_input_character} == 0x002F) { # /
757     !!!next-input-character;
758     if ($self->{next_input_character} == 0x003E and # >
759     $self->{current_token}->{type} eq 'start tag' and
760     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
761     # permitted slash
762     #
763     } else {
764 wakaba 1.3 !!!parse-error (type => 'nestc');
765 wakaba 1.1 }
766     ## Stay in the state
767     # next-input-character is already done
768     redo A;
769     } elsif ($self->{next_input_character} == 0x003C or # <
770     $self->{next_input_character} == -1) {
771 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
772 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
773     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
774     } elsif ($self->{current_token}->{type} eq 'end tag') {
775     $self->{content_model_flag} = 'PCDATA'; # MUST
776     if ($self->{current_token}->{attributes}) {
777 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
778 wakaba 1.1 }
779     } else {
780     die "$0: $self->{current_token}->{type}: Unknown token type";
781     }
782     $self->{state} = 'data';
783     # reconsume
784    
785     !!!emit ($self->{current_token}); # start tag or end tag
786     undef $self->{current_token};
787    
788     redo A;
789     } else {
790     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
791     value => ''};
792     $self->{state} = 'attribute name';
793     !!!next-input-character;
794     redo A;
795     }
796     } elsif ($self->{state} eq 'attribute name') {
797     my $before_leave = sub {
798     if (exists $self->{current_token}->{attributes} # start tag or end tag
799     ->{$self->{current_attribute}->{name}}) { # MUST
800 wakaba 1.3 !!!parse-error (type => 'dupulicate attribute');
801 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
802     } else {
803     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
804     = $self->{current_attribute};
805     }
806     }; # $before_leave
807    
808     if ($self->{next_input_character} == 0x0009 or # HT
809     $self->{next_input_character} == 0x000A or # LF
810     $self->{next_input_character} == 0x000B or # VT
811     $self->{next_input_character} == 0x000C or # FF
812     $self->{next_input_character} == 0x0020) { # SP
813     $before_leave->();
814     $self->{state} = 'after attribute name';
815     !!!next-input-character;
816     redo A;
817     } elsif ($self->{next_input_character} == 0x003D) { # =
818     $before_leave->();
819     $self->{state} = 'before attribute value';
820     !!!next-input-character;
821     redo A;
822     } elsif ($self->{next_input_character} == 0x003E) { # >
823     $before_leave->();
824     if ($self->{current_token}->{type} eq 'start tag') {
825     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
826     } elsif ($self->{current_token}->{type} eq 'end tag') {
827     $self->{content_model_flag} = 'PCDATA'; # MUST
828     if ($self->{current_token}->{attributes}) {
829 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
830 wakaba 1.1 }
831     } else {
832     die "$0: $self->{current_token}->{type}: Unknown token type";
833     }
834     $self->{state} = 'data';
835     !!!next-input-character;
836    
837     !!!emit ($self->{current_token}); # start tag or end tag
838     undef $self->{current_token};
839    
840     redo A;
841     } elsif (0x0041 <= $self->{next_input_character} and
842     $self->{next_input_character} <= 0x005A) { # A..Z
843     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
844     ## Stay in the state
845     !!!next-input-character;
846     redo A;
847     } elsif ($self->{next_input_character} == 0x002F) { # /
848     $before_leave->();
849     !!!next-input-character;
850     if ($self->{next_input_character} == 0x003E and # >
851     $self->{current_token}->{type} eq 'start tag' and
852     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
853     # permitted slash
854     #
855     } else {
856 wakaba 1.3 !!!parse-error (type => 'nestc');
857 wakaba 1.1 }
858     $self->{state} = 'before attribute name';
859     # next-input-character is already done
860     redo A;
861     } elsif ($self->{next_input_character} == 0x003C or # <
862     $self->{next_input_character} == -1) {
863 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
864 wakaba 1.1 $before_leave->();
865     if ($self->{current_token}->{type} eq 'start tag') {
866     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
867     } elsif ($self->{current_token}->{type} eq 'end tag') {
868     $self->{content_model_flag} = 'PCDATA'; # MUST
869     if ($self->{current_token}->{attributes}) {
870 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
871 wakaba 1.1 }
872     } else {
873     die "$0: $self->{current_token}->{type}: Unknown token type";
874     }
875     $self->{state} = 'data';
876     # reconsume
877    
878     !!!emit ($self->{current_token}); # start tag or end tag
879     undef $self->{current_token};
880    
881     redo A;
882     } else {
883     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
884     ## Stay in the state
885     !!!next-input-character;
886     redo A;
887     }
888     } elsif ($self->{state} eq 'after attribute name') {
889     if ($self->{next_input_character} == 0x0009 or # HT
890     $self->{next_input_character} == 0x000A or # LF
891     $self->{next_input_character} == 0x000B or # VT
892     $self->{next_input_character} == 0x000C or # FF
893     $self->{next_input_character} == 0x0020) { # SP
894     ## Stay in the state
895     !!!next-input-character;
896     redo A;
897     } elsif ($self->{next_input_character} == 0x003D) { # =
898     $self->{state} = 'before attribute value';
899     !!!next-input-character;
900     redo A;
901     } elsif ($self->{next_input_character} == 0x003E) { # >
902     if ($self->{current_token}->{type} eq 'start tag') {
903     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
904     } elsif ($self->{current_token}->{type} eq 'end tag') {
905     $self->{content_model_flag} = 'PCDATA'; # MUST
906     if ($self->{current_token}->{attributes}) {
907 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
908 wakaba 1.1 }
909     } else {
910     die "$0: $self->{current_token}->{type}: Unknown token type";
911     }
912     $self->{state} = 'data';
913     !!!next-input-character;
914    
915     !!!emit ($self->{current_token}); # start tag or end tag
916     undef $self->{current_token};
917    
918     redo A;
919     } elsif (0x0041 <= $self->{next_input_character} and
920     $self->{next_input_character} <= 0x005A) { # A..Z
921     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
922     value => ''};
923     $self->{state} = 'attribute name';
924     !!!next-input-character;
925     redo A;
926     } elsif ($self->{next_input_character} == 0x002F) { # /
927     !!!next-input-character;
928     if ($self->{next_input_character} == 0x003E and # >
929     $self->{current_token}->{type} eq 'start tag' and
930     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
931     # permitted slash
932     #
933     } else {
934 wakaba 1.3 !!!parse-error (type => 'nestc');
935 wakaba 1.1 }
936     $self->{state} = 'before attribute name';
937     # next-input-character is already done
938     redo A;
939     } elsif ($self->{next_input_character} == 0x003C or # <
940     $self->{next_input_character} == -1) {
941 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
942 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
943     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
944     } elsif ($self->{current_token}->{type} eq 'end tag') {
945     $self->{content_model_flag} = 'PCDATA'; # MUST
946     if ($self->{current_token}->{attributes}) {
947 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
948 wakaba 1.1 }
949     } else {
950     die "$0: $self->{current_token}->{type}: Unknown token type";
951     }
952     $self->{state} = 'data';
953     # reconsume
954    
955     !!!emit ($self->{current_token}); # start tag or end tag
956     undef $self->{current_token};
957    
958     redo A;
959     } else {
960     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
961     value => ''};
962     $self->{state} = 'attribute name';
963     !!!next-input-character;
964     redo A;
965     }
966     } elsif ($self->{state} eq 'before attribute value') {
967     if ($self->{next_input_character} == 0x0009 or # HT
968     $self->{next_input_character} == 0x000A or # LF
969     $self->{next_input_character} == 0x000B or # VT
970     $self->{next_input_character} == 0x000C or # FF
971     $self->{next_input_character} == 0x0020) { # SP
972     ## Stay in the state
973     !!!next-input-character;
974     redo A;
975     } elsif ($self->{next_input_character} == 0x0022) { # "
976     $self->{state} = 'attribute value (double-quoted)';
977     !!!next-input-character;
978     redo A;
979     } elsif ($self->{next_input_character} == 0x0026) { # &
980     $self->{state} = 'attribute value (unquoted)';
981     ## reconsume
982     redo A;
983     } elsif ($self->{next_input_character} == 0x0027) { # '
984     $self->{state} = 'attribute value (single-quoted)';
985     !!!next-input-character;
986     redo A;
987     } elsif ($self->{next_input_character} == 0x003E) { # >
988     if ($self->{current_token}->{type} eq 'start tag') {
989     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
990     } elsif ($self->{current_token}->{type} eq 'end tag') {
991     $self->{content_model_flag} = 'PCDATA'; # MUST
992     if ($self->{current_token}->{attributes}) {
993 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
994 wakaba 1.1 }
995     } else {
996     die "$0: $self->{current_token}->{type}: Unknown token type";
997     }
998     $self->{state} = 'data';
999     !!!next-input-character;
1000    
1001     !!!emit ($self->{current_token}); # start tag or end tag
1002     undef $self->{current_token};
1003    
1004     redo A;
1005     } elsif ($self->{next_input_character} == 0x003C or # <
1006     $self->{next_input_character} == -1) {
1007 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1008 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1009     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1010     } elsif ($self->{current_token}->{type} eq 'end tag') {
1011     $self->{content_model_flag} = 'PCDATA'; # MUST
1012     if ($self->{current_token}->{attributes}) {
1013 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1014 wakaba 1.1 }
1015     } else {
1016     die "$0: $self->{current_token}->{type}: Unknown token type";
1017     }
1018     $self->{state} = 'data';
1019     ## reconsume
1020    
1021     !!!emit ($self->{current_token}); # start tag or end tag
1022     undef $self->{current_token};
1023    
1024     redo A;
1025     } else {
1026     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1027     $self->{state} = 'attribute value (unquoted)';
1028     !!!next-input-character;
1029     redo A;
1030     }
1031     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
1032     if ($self->{next_input_character} == 0x0022) { # "
1033     $self->{state} = 'before attribute name';
1034     !!!next-input-character;
1035     redo A;
1036     } elsif ($self->{next_input_character} == 0x0026) { # &
1037     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
1038     $self->{state} = 'entity in attribute value';
1039     !!!next-input-character;
1040     redo A;
1041     } elsif ($self->{next_input_character} == -1) {
1042 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1043 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1044     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1045     } elsif ($self->{current_token}->{type} eq 'end tag') {
1046     $self->{content_model_flag} = 'PCDATA'; # MUST
1047     if ($self->{current_token}->{attributes}) {
1048 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1049 wakaba 1.1 }
1050     } else {
1051     die "$0: $self->{current_token}->{type}: Unknown token type";
1052     }
1053     $self->{state} = 'data';
1054     ## reconsume
1055    
1056     !!!emit ($self->{current_token}); # start tag or end tag
1057     undef $self->{current_token};
1058    
1059     redo A;
1060     } else {
1061     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1062     ## Stay in the state
1063     !!!next-input-character;
1064     redo A;
1065     }
1066     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
1067     if ($self->{next_input_character} == 0x0027) { # '
1068     $self->{state} = 'before attribute name';
1069     !!!next-input-character;
1070     redo A;
1071     } elsif ($self->{next_input_character} == 0x0026) { # &
1072     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
1073     $self->{state} = 'entity in attribute value';
1074     !!!next-input-character;
1075     redo A;
1076     } elsif ($self->{next_input_character} == -1) {
1077 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1078 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1079     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1080     } elsif ($self->{current_token}->{type} eq 'end tag') {
1081     $self->{content_model_flag} = 'PCDATA'; # MUST
1082     if ($self->{current_token}->{attributes}) {
1083 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1084 wakaba 1.1 }
1085     } else {
1086     die "$0: $self->{current_token}->{type}: Unknown token type";
1087     }
1088     $self->{state} = 'data';
1089     ## reconsume
1090    
1091     !!!emit ($self->{current_token}); # start tag or end tag
1092     undef $self->{current_token};
1093    
1094     redo A;
1095     } else {
1096     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1097     ## Stay in the state
1098     !!!next-input-character;
1099     redo A;
1100     }
1101     } elsif ($self->{state} eq 'attribute value (unquoted)') {
1102     if ($self->{next_input_character} == 0x0009 or # HT
1103     $self->{next_input_character} == 0x000A or # LF
1104     $self->{next_input_character} == 0x000B or # HT
1105     $self->{next_input_character} == 0x000C or # FF
1106     $self->{next_input_character} == 0x0020) { # SP
1107     $self->{state} = 'before attribute name';
1108     !!!next-input-character;
1109     redo A;
1110     } elsif ($self->{next_input_character} == 0x0026) { # &
1111     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
1112     $self->{state} = 'entity in attribute value';
1113     !!!next-input-character;
1114     redo A;
1115     } elsif ($self->{next_input_character} == 0x003E) { # >
1116     if ($self->{current_token}->{type} eq 'start tag') {
1117     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1118     } elsif ($self->{current_token}->{type} eq 'end tag') {
1119     $self->{content_model_flag} = 'PCDATA'; # MUST
1120     if ($self->{current_token}->{attributes}) {
1121 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1122 wakaba 1.1 }
1123     } else {
1124     die "$0: $self->{current_token}->{type}: Unknown token type";
1125     }
1126     $self->{state} = 'data';
1127     !!!next-input-character;
1128    
1129     !!!emit ($self->{current_token}); # start tag or end tag
1130     undef $self->{current_token};
1131    
1132     redo A;
1133     } elsif ($self->{next_input_character} == 0x003C or # <
1134     $self->{next_input_character} == -1) {
1135 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1136 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1137     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1138     } elsif ($self->{current_token}->{type} eq 'end tag') {
1139     $self->{content_model_flag} = 'PCDATA'; # MUST
1140     if ($self->{current_token}->{attributes}) {
1141 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1142 wakaba 1.1 }
1143     } else {
1144     die "$0: $self->{current_token}->{type}: Unknown token type";
1145     }
1146     $self->{state} = 'data';
1147     ## reconsume
1148    
1149     !!!emit ($self->{current_token}); # start tag or end tag
1150     undef $self->{current_token};
1151    
1152     redo A;
1153     } else {
1154     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1155     ## Stay in the state
1156     !!!next-input-character;
1157     redo A;
1158     }
1159     } elsif ($self->{state} eq 'entity in attribute value') {
1160     my $token = $self->_tokenize_attempt_to_consume_an_entity;
1161    
1162     unless (defined $token) {
1163     $self->{current_attribute}->{value} .= '&';
1164     } else {
1165     $self->{current_attribute}->{value} .= $token->{data};
1166     ## ISSUE: spec says "append the returned character token to the current attribute's value"
1167     }
1168    
1169     $self->{state} = $self->{last_attribute_value_state};
1170     # next-input-character is already done
1171     redo A;
1172     } elsif ($self->{state} eq 'bogus comment') {
1173     ## (only happen if PCDATA state)
1174    
1175     my $token = {type => 'comment', data => ''};
1176    
1177     BC: {
1178     if ($self->{next_input_character} == 0x003E) { # >
1179     $self->{state} = 'data';
1180     !!!next-input-character;
1181    
1182     !!!emit ($token);
1183    
1184     redo A;
1185     } elsif ($self->{next_input_character} == -1) {
1186     $self->{state} = 'data';
1187     ## reconsume
1188    
1189     !!!emit ($token);
1190    
1191     redo A;
1192     } else {
1193     $token->{data} .= chr ($self->{next_input_character});
1194     !!!next-input-character;
1195     redo BC;
1196     }
1197     } # BC
1198     } elsif ($self->{state} eq 'markup declaration open') {
1199     ## (only happen if PCDATA state)
1200    
1201     my @next_char;
1202     push @next_char, $self->{next_input_character};
1203    
1204     if ($self->{next_input_character} == 0x002D) { # -
1205     !!!next-input-character;
1206     push @next_char, $self->{next_input_character};
1207     if ($self->{next_input_character} == 0x002D) { # -
1208     $self->{current_token} = {type => 'comment', data => ''};
1209     $self->{state} = 'comment';
1210     !!!next-input-character;
1211     redo A;
1212     }
1213     } elsif ($self->{next_input_character} == 0x0044 or # D
1214     $self->{next_input_character} == 0x0064) { # d
1215     !!!next-input-character;
1216     push @next_char, $self->{next_input_character};
1217     if ($self->{next_input_character} == 0x004F or # O
1218     $self->{next_input_character} == 0x006F) { # o
1219     !!!next-input-character;
1220     push @next_char, $self->{next_input_character};
1221     if ($self->{next_input_character} == 0x0043 or # C
1222     $self->{next_input_character} == 0x0063) { # c
1223     !!!next-input-character;
1224     push @next_char, $self->{next_input_character};
1225     if ($self->{next_input_character} == 0x0054 or # T
1226     $self->{next_input_character} == 0x0074) { # t
1227     !!!next-input-character;
1228     push @next_char, $self->{next_input_character};
1229     if ($self->{next_input_character} == 0x0059 or # Y
1230     $self->{next_input_character} == 0x0079) { # y
1231     !!!next-input-character;
1232     push @next_char, $self->{next_input_character};
1233     if ($self->{next_input_character} == 0x0050 or # P
1234     $self->{next_input_character} == 0x0070) { # p
1235     !!!next-input-character;
1236     push @next_char, $self->{next_input_character};
1237     if ($self->{next_input_character} == 0x0045 or # E
1238     $self->{next_input_character} == 0x0065) { # e
1239     ## ISSUE: What a stupid code this is!
1240     $self->{state} = 'DOCTYPE';
1241     !!!next-input-character;
1242     redo A;
1243     }
1244     }
1245     }
1246     }
1247     }
1248     }
1249     }
1250    
1251 wakaba 1.3 !!!parse-error (type => 'bogus comment open');
1252 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1253     !!!back-next-input-character (@next_char);
1254     $self->{state} = 'bogus comment';
1255     redo A;
1256    
1257     ## ISSUE: typos in spec: chacacters, is is a parse error
1258     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1259     } elsif ($self->{state} eq 'comment') {
1260     if ($self->{next_input_character} == 0x002D) { # -
1261     $self->{state} = 'comment dash';
1262     !!!next-input-character;
1263     redo A;
1264     } elsif ($self->{next_input_character} == -1) {
1265 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1266 wakaba 1.1 $self->{state} = 'data';
1267     ## reconsume
1268    
1269     !!!emit ($self->{current_token}); # comment
1270     undef $self->{current_token};
1271    
1272     redo A;
1273     } else {
1274     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1275     ## Stay in the state
1276     !!!next-input-character;
1277     redo A;
1278     }
1279     } elsif ($self->{state} eq 'comment dash') {
1280     if ($self->{next_input_character} == 0x002D) { # -
1281     $self->{state} = 'comment end';
1282     !!!next-input-character;
1283     redo A;
1284     } elsif ($self->{next_input_character} == -1) {
1285 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1286 wakaba 1.1 $self->{state} = 'data';
1287     ## reconsume
1288    
1289     !!!emit ($self->{current_token}); # comment
1290     undef $self->{current_token};
1291    
1292     redo A;
1293     } else {
1294     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1295     $self->{state} = 'comment';
1296     !!!next-input-character;
1297     redo A;
1298     }
1299     } elsif ($self->{state} eq 'comment end') {
1300     if ($self->{next_input_character} == 0x003E) { # >
1301     $self->{state} = 'data';
1302     !!!next-input-character;
1303    
1304     !!!emit ($self->{current_token}); # comment
1305     undef $self->{current_token};
1306    
1307     redo A;
1308     } elsif ($self->{next_input_character} == 0x002D) { # -
1309 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1310 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1311     ## Stay in the state
1312     !!!next-input-character;
1313     redo A;
1314     } elsif ($self->{next_input_character} == -1) {
1315 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1316 wakaba 1.1 $self->{state} = 'data';
1317     ## reconsume
1318    
1319     !!!emit ($self->{current_token}); # comment
1320     undef $self->{current_token};
1321    
1322     redo A;
1323     } else {
1324 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1325 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1326     $self->{state} = 'comment';
1327     !!!next-input-character;
1328     redo A;
1329     }
1330     } elsif ($self->{state} eq 'DOCTYPE') {
1331     if ($self->{next_input_character} == 0x0009 or # HT
1332     $self->{next_input_character} == 0x000A or # LF
1333     $self->{next_input_character} == 0x000B or # VT
1334     $self->{next_input_character} == 0x000C or # FF
1335     $self->{next_input_character} == 0x0020) { # SP
1336     $self->{state} = 'before DOCTYPE name';
1337     !!!next-input-character;
1338     redo A;
1339     } else {
1340 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
1341 wakaba 1.1 $self->{state} = 'before DOCTYPE name';
1342     ## reconsume
1343     redo A;
1344     }
1345     } elsif ($self->{state} eq 'before DOCTYPE name') {
1346     if ($self->{next_input_character} == 0x0009 or # HT
1347     $self->{next_input_character} == 0x000A or # LF
1348     $self->{next_input_character} == 0x000B or # VT
1349     $self->{next_input_character} == 0x000C or # FF
1350     $self->{next_input_character} == 0x0020) { # SP
1351     ## Stay in the state
1352     !!!next-input-character;
1353     redo A;
1354     } elsif (0x0061 <= $self->{next_input_character} and
1355     $self->{next_input_character} <= 0x007A) { # a..z
1356 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1357 wakaba 1.1 $self->{current_token} = {type => 'DOCTYPE',
1358     name => chr ($self->{next_input_character} - 0x0020),
1359     error => 1};
1360     $self->{state} = 'DOCTYPE name';
1361     !!!next-input-character;
1362     redo A;
1363     } elsif ($self->{next_input_character} == 0x003E) { # >
1364 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1365 wakaba 1.1 $self->{state} = 'data';
1366     !!!next-input-character;
1367    
1368     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1369    
1370     redo A;
1371     } elsif ($self->{next_input_character} == -1) {
1372 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1373 wakaba 1.1 $self->{state} = 'data';
1374     ## reconsume
1375    
1376     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1377    
1378     redo A;
1379     } else {
1380     $self->{current_token} = {type => 'DOCTYPE',
1381     name => chr ($self->{next_input_character}),
1382     error => 1};
1383 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1384 wakaba 1.1 $self->{state} = 'DOCTYPE name';
1385     !!!next-input-character;
1386     redo A;
1387     }
1388     } elsif ($self->{state} eq 'DOCTYPE name') {
1389     if ($self->{next_input_character} == 0x0009 or # HT
1390     $self->{next_input_character} == 0x000A or # LF
1391     $self->{next_input_character} == 0x000B or # VT
1392     $self->{next_input_character} == 0x000C or # FF
1393     $self->{next_input_character} == 0x0020) { # SP
1394     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1395     $self->{state} = 'after DOCTYPE name';
1396     !!!next-input-character;
1397     redo A;
1398     } elsif ($self->{next_input_character} == 0x003E) { # >
1399     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1400     $self->{state} = 'data';
1401     !!!next-input-character;
1402    
1403     !!!emit ($self->{current_token}); # DOCTYPE
1404     undef $self->{current_token};
1405    
1406     redo A;
1407     } elsif (0x0061 <= $self->{next_input_character} and
1408     $self->{next_input_character} <= 0x007A) { # a..z
1409     $self->{current_token}->{name} .= chr ($self->{next_input_character} - 0x0020); # DOCTYPE
1410     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1411     ## Stay in the state
1412     !!!next-input-character;
1413     redo A;
1414     } elsif ($self->{next_input_character} == -1) {
1415 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1416 wakaba 1.1 $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1417     $self->{state} = 'data';
1418     ## reconsume
1419    
1420     !!!emit ($self->{current_token});
1421     undef $self->{current_token};
1422    
1423     redo A;
1424     } else {
1425     $self->{current_token}->{name}
1426     .= chr ($self->{next_input_character}); # DOCTYPE
1427     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1428     ## Stay in the state
1429     !!!next-input-character;
1430     redo A;
1431     }
1432     } elsif ($self->{state} eq 'after DOCTYPE name') {
1433     if ($self->{next_input_character} == 0x0009 or # HT
1434     $self->{next_input_character} == 0x000A or # LF
1435     $self->{next_input_character} == 0x000B or # VT
1436     $self->{next_input_character} == 0x000C or # FF
1437     $self->{next_input_character} == 0x0020) { # SP
1438     ## Stay in the state
1439     !!!next-input-character;
1440     redo A;
1441     } elsif ($self->{next_input_character} == 0x003E) { # >
1442     $self->{state} = 'data';
1443     !!!next-input-character;
1444    
1445     !!!emit ($self->{current_token}); # DOCTYPE
1446     undef $self->{current_token};
1447    
1448     redo A;
1449     } elsif ($self->{next_input_character} == -1) {
1450 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1451 wakaba 1.1 $self->{state} = 'data';
1452     ## reconsume
1453    
1454     !!!emit ($self->{current_token}); # DOCTYPE
1455     undef $self->{current_token};
1456    
1457     redo A;
1458     } else {
1459 wakaba 1.3 !!!parse-error (type => 'string after DOCTYPE name');
1460 wakaba 1.1 $self->{current_token}->{error} = 1; # DOCTYPE
1461     $self->{state} = 'bogus DOCTYPE';
1462     !!!next-input-character;
1463     redo A;
1464     }
1465     } elsif ($self->{state} eq 'bogus DOCTYPE') {
1466     if ($self->{next_input_character} == 0x003E) { # >
1467     $self->{state} = 'data';
1468     !!!next-input-character;
1469    
1470     !!!emit ($self->{current_token}); # DOCTYPE
1471     undef $self->{current_token};
1472    
1473     redo A;
1474     } elsif ($self->{next_input_character} == -1) {
1475 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1476 wakaba 1.1 $self->{state} = 'data';
1477     ## reconsume
1478    
1479     !!!emit ($self->{current_token}); # DOCTYPE
1480     undef $self->{current_token};
1481    
1482     redo A;
1483     } else {
1484     ## Stay in the state
1485     !!!next-input-character;
1486     redo A;
1487     }
1488     } else {
1489     die "$0: $self->{state}: Unknown state";
1490     }
1491     } # A
1492    
1493     die "$0: _get_next_token: unexpected case";
1494     } # _get_next_token
1495    
1496     sub _tokenize_attempt_to_consume_an_entity ($) {
1497     my $self = shift;
1498    
1499     if ($self->{next_input_character} == 0x0023) { # #
1500     !!!next-input-character;
1501     if ($self->{next_input_character} == 0x0078 or # x
1502     $self->{next_input_character} == 0x0058) { # X
1503 wakaba 1.4 my $num;
1504 wakaba 1.1 X: {
1505     my $x_char = $self->{next_input_character};
1506     !!!next-input-character;
1507     if (0x0030 <= $self->{next_input_character} and
1508     $self->{next_input_character} <= 0x0039) { # 0..9
1509     $num ||= 0;
1510     $num *= 0x10;
1511     $num += $self->{next_input_character} - 0x0030;
1512     redo X;
1513     } elsif (0x0061 <= $self->{next_input_character} and
1514     $self->{next_input_character} <= 0x0066) { # a..f
1515     ## ISSUE: the spec says U+0078, which is apparently incorrect
1516     $num ||= 0;
1517     $num *= 0x10;
1518     $num += $self->{next_input_character} - 0x0060 + 9;
1519     redo X;
1520     } elsif (0x0041 <= $self->{next_input_character} and
1521     $self->{next_input_character} <= 0x0046) { # A..F
1522     ## ISSUE: the spec says U+0058, which is apparently incorrect
1523     $num ||= 0;
1524     $num *= 0x10;
1525     $num += $self->{next_input_character} - 0x0040 + 9;
1526     redo X;
1527     } elsif (not defined $num) { # no hexadecimal digit
1528 wakaba 1.3 !!!parse-error (type => 'bare hcro');
1529 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
1530     !!!back-next-input-character ($x_char);
1531     return undef;
1532     } elsif ($self->{next_input_character} == 0x003B) { # ;
1533     !!!next-input-character;
1534     } else {
1535 wakaba 1.3 !!!parse-error (type => 'no refc');
1536 wakaba 1.1 }
1537    
1538     ## TODO: check the definition for |a valid Unicode character|.
1539 wakaba 1.4 ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8189>
1540 wakaba 1.1 if ($num > 1114111 or $num == 0) {
1541     $num = 0xFFFD; # REPLACEMENT CHARACTER
1542     ## ISSUE: Why this is not an error?
1543 wakaba 1.4 } elsif (0x80 <= $num and $num <= 0x9F) {
1544 wakaba 1.8 !!!parse-error (type => sprintf 'c1 entity:U+%04X', $num);
1545 wakaba 1.4 $num = $c1_entity_char->{$num};
1546 wakaba 1.1 }
1547    
1548     return {type => 'character', data => chr $num};
1549     } # X
1550     } elsif (0x0030 <= $self->{next_input_character} and
1551     $self->{next_input_character} <= 0x0039) { # 0..9
1552     my $code = $self->{next_input_character} - 0x0030;
1553     !!!next-input-character;
1554    
1555     while (0x0030 <= $self->{next_input_character} and
1556     $self->{next_input_character} <= 0x0039) { # 0..9
1557     $code *= 10;
1558     $code += $self->{next_input_character} - 0x0030;
1559    
1560     !!!next-input-character;
1561     }
1562    
1563     if ($self->{next_input_character} == 0x003B) { # ;
1564     !!!next-input-character;
1565     } else {
1566 wakaba 1.3 !!!parse-error (type => 'no refc');
1567 wakaba 1.1 }
1568    
1569     ## TODO: check the definition for |a valid Unicode character|.
1570     if ($code > 1114111 or $code == 0) {
1571     $code = 0xFFFD; # REPLACEMENT CHARACTER
1572     ## ISSUE: Why this is not an error?
1573 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
1574 wakaba 1.8 !!!parse-error (type => sprintf 'c1 entity:U+%04X', $code);
1575 wakaba 1.4 $code = $c1_entity_char->{$code};
1576 wakaba 1.1 }
1577    
1578     return {type => 'character', data => chr $code};
1579     } else {
1580 wakaba 1.3 !!!parse-error (type => 'bare nero');
1581 wakaba 1.1 !!!back-next-input-character ($self->{next_input_character});
1582     $self->{next_input_character} = 0x0023; # #
1583     return undef;
1584     }
1585     } elsif ((0x0041 <= $self->{next_input_character} and
1586     $self->{next_input_character} <= 0x005A) or
1587     (0x0061 <= $self->{next_input_character} and
1588     $self->{next_input_character} <= 0x007A)) {
1589     my $entity_name = chr $self->{next_input_character};
1590     !!!next-input-character;
1591    
1592     my $value = $entity_name;
1593     my $match;
1594    
1595     while (length $entity_name < 10 and
1596     ## NOTE: Some number greater than the maximum length of entity name
1597     ((0x0041 <= $self->{next_input_character} and
1598     $self->{next_input_character} <= 0x005A) or
1599     (0x0061 <= $self->{next_input_character} and
1600     $self->{next_input_character} <= 0x007A) or
1601     (0x0030 <= $self->{next_input_character} and
1602     $self->{next_input_character} <= 0x0039))) {
1603     $entity_name .= chr $self->{next_input_character};
1604     if (defined $entity_char->{$entity_name}) {
1605     $value = $entity_char->{$entity_name};
1606     $match = 1;
1607     } else {
1608     $value .= chr $self->{next_input_character};
1609     }
1610     !!!next-input-character;
1611     }
1612    
1613     if ($match) {
1614     if ($self->{next_input_character} == 0x003B) { # ;
1615     !!!next-input-character;
1616     } else {
1617 wakaba 1.3 !!!parse-error (type => 'refc');
1618 wakaba 1.1 }
1619    
1620     return {type => 'character', data => $value};
1621     } else {
1622 wakaba 1.3 !!!parse-error (type => 'bare ero');
1623 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
1624     !!!back-token ({type => 'character', data => $value});
1625     return undef;
1626     }
1627     } else {
1628     ## no characters are consumed
1629 wakaba 1.3 !!!parse-error (type => 'bare ero');
1630 wakaba 1.1 return undef;
1631     }
1632     } # _tokenize_attempt_to_consume_an_entity
1633    
1634     sub _initialize_tree_constructor ($) {
1635     my $self = shift;
1636     ## NOTE: $self->{document} MUST be specified before this method is called
1637     $self->{document}->strict_error_checking (0);
1638     ## TODO: Turn mutation events off # MUST
1639     ## TODO: Turn loose Document option (manakai extension) on
1640     ## TODO: Mark the Document as an HTML document # MUST
1641     } # _initialize_tree_constructor
1642    
1643     sub _terminate_tree_constructor ($) {
1644     my $self = shift;
1645     $self->{document}->strict_error_checking (1);
1646     ## TODO: Turn mutation events on
1647     } # _terminate_tree_constructor
1648    
1649     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
1650    
1651 wakaba 1.3 { # tree construction stage
1652     my $token;
1653    
1654 wakaba 1.1 sub _construct_tree ($) {
1655     my ($self) = @_;
1656    
1657     ## When an interactive UA render the $self->{document} available
1658     ## to the user, or when it begin accepting user input, are
1659     ## not defined.
1660    
1661     ## Append a character: collect it and all subsequent consecutive
1662     ## characters and insert one Text node whose data is concatenation
1663     ## of all those characters. # MUST
1664    
1665     !!!next-token;
1666    
1667 wakaba 1.3 $self->{insertion_mode} = 'before head';
1668     undef $self->{form_element};
1669     undef $self->{head_element};
1670     $self->{open_elements} = [];
1671     undef $self->{inner_html_node};
1672    
1673     $self->_tree_construction_initial; # MUST
1674     $self->_tree_construction_root_element;
1675     $self->_tree_construction_main;
1676     } # _construct_tree
1677    
1678     sub _tree_construction_initial ($) {
1679     my $self = shift;
1680     B: {
1681     if ($token->{type} eq 'DOCTYPE') {
1682     if ($token->{error}) {
1683     ## ISSUE: Spec currently left this case undefined.
1684     !!!parse-error (type => 'bogus DOCTYPE');
1685     }
1686     my $doctype = $self->{document}->create_document_type_definition
1687     ($token->{name});
1688     $self->{document}->append_child ($doctype);
1689     #$phase = 'root element';
1690     !!!next-token;
1691     #redo B;
1692     return;
1693     } elsif ({
1694     comment => 1,
1695     'start tag' => 1,
1696     'end tag' => 1,
1697     'end-of-file' => 1,
1698     }->{$token->{type}}) {
1699     ## ISSUE: Spec currently left this case undefined.
1700     !!!parse-error (type => 'missing DOCTYPE');
1701     #$phase = 'root element';
1702     ## reprocess
1703     #redo B;
1704     return;
1705     } elsif ($token->{type} eq 'character') {
1706     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1707     $self->{document}->manakai_append_text ($1);
1708     ## ISSUE: DOM3 Core does not allow Document > Text
1709     unless (length $token->{data}) {
1710     ## Stay in the phase
1711     !!!next-token;
1712     redo B;
1713     }
1714     }
1715     ## ISSUE: Spec currently left this case undefined.
1716     !!!parse-error (type => 'missing DOCTYPE');
1717     #$phase = 'root element';
1718     ## reprocess
1719     #redo B;
1720     return;
1721     } else {
1722     die "$0: $token->{type}: Unknown token";
1723     }
1724     } # B
1725     } # _tree_construction_initial
1726    
1727     sub _tree_construction_root_element ($) {
1728     my $self = shift;
1729    
1730     B: {
1731     if ($token->{type} eq 'DOCTYPE') {
1732     !!!parse-error (type => 'in html:#DOCTYPE');
1733     ## Ignore the token
1734     ## Stay in the phase
1735     !!!next-token;
1736     redo B;
1737     } elsif ($token->{type} eq 'comment') {
1738     my $comment = $self->{document}->create_comment ($token->{data});
1739     $self->{document}->append_child ($comment);
1740     ## Stay in the phase
1741     !!!next-token;
1742     redo B;
1743     } elsif ($token->{type} eq 'character') {
1744     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1745     $self->{document}->manakai_append_text ($1);
1746     ## ISSUE: DOM3 Core does not allow Document > Text
1747     unless (length $token->{data}) {
1748     ## Stay in the phase
1749     !!!next-token;
1750     redo B;
1751     }
1752     }
1753     #
1754     } elsif ({
1755     'start tag' => 1,
1756     'end tag' => 1,
1757     'end-of-file' => 1,
1758     }->{$token->{type}}) {
1759     ## ISSUE: There is an issue in the spec
1760     #
1761     } else {
1762     die "$0: $token->{type}: Unknown token";
1763     }
1764     my $root_element; !!!create-element ($root_element, 'html');
1765     $self->{document}->append_child ($root_element);
1766     push @{$self->{open_elements}}, [$root_element, 'html'];
1767     #$phase = 'main';
1768     ## reprocess
1769     #redo B;
1770     return;
1771     } # B
1772     } # _tree_construction_root_element
1773    
1774     sub _reset_insertion_mode ($) {
1775     my $self = shift;
1776    
1777     ## Step 1
1778     my $last;
1779    
1780     ## Step 2
1781     my $i = -1;
1782     my $node = $self->{open_elements}->[$i];
1783    
1784     ## Step 3
1785     S3: {
1786     $last = 1 if $self->{open_elements}->[0]->[0] eq $node->[0];
1787     if (defined $self->{inner_html_node}) {
1788     if ($self->{inner_html_node}->[1] eq 'td' or
1789     $self->{inner_html_node}->[1] eq 'th') {
1790     #
1791     } else {
1792     $node = $self->{inner_html_node};
1793     }
1794     }
1795    
1796     ## Step 4..13
1797     my $new_mode = {
1798     select => 'in select',
1799     td => 'in cell',
1800     th => 'in cell',
1801     tr => 'in row',
1802     tbody => 'in table body',
1803     thead => 'in table head',
1804     tfoot => 'in table foot',
1805     caption => 'in caption',
1806     colgroup => 'in column group',
1807     table => 'in table',
1808     head => 'in body', # not in head!
1809     body => 'in body',
1810     frameset => 'in frameset',
1811     }->{$node->[1]};
1812     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1813    
1814     ## Step 14
1815     if ($node->[1] eq 'html') {
1816     unless (defined $self->{head_element}) {
1817     $self->{insertion_mode} = 'before head';
1818     } else {
1819     $self->{insertion_mode} = 'after head';
1820     }
1821     return;
1822     }
1823    
1824     ## Step 15
1825     $self->{insertion_mode} = 'in body' and return if $last;
1826    
1827     ## Step 16
1828     $i--;
1829     $node = $self->{open_elements}->[$i];
1830    
1831     ## Step 17
1832     redo S3;
1833     } # S3
1834     } # _reset_insertion_mode
1835    
1836     sub _tree_construction_main ($) {
1837     my $self = shift;
1838    
1839     my $phase = 'main';
1840 wakaba 1.1
1841     my $active_formatting_elements = [];
1842    
1843     my $reconstruct_active_formatting_elements = sub { # MUST
1844     my $insert = shift;
1845    
1846     ## Step 1
1847     return unless @$active_formatting_elements;
1848    
1849     ## Step 3
1850     my $i = -1;
1851     my $entry = $active_formatting_elements->[$i];
1852    
1853     ## Step 2
1854     return if $entry->[0] eq '#marker';
1855 wakaba 1.3 for (@{$self->{open_elements}}) {
1856 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1857     return;
1858     }
1859     }
1860    
1861     S4: {
1862     ## Step 4
1863     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1864    
1865     ## Step 5
1866     $i--;
1867     $entry = $active_formatting_elements->[$i];
1868    
1869     ## Step 6
1870     if ($entry->[0] eq '#marker') {
1871     #
1872     } else {
1873     my $in_open_elements;
1874 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
1875 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1876     $in_open_elements = 1;
1877     last OE;
1878     }
1879     }
1880     if ($in_open_elements) {
1881     #
1882     } else {
1883     redo S4;
1884     }
1885     }
1886    
1887     ## Step 7
1888     $i++;
1889     $entry = $active_formatting_elements->[$i];
1890     } # S4
1891    
1892     S7: {
1893     ## Step 8
1894     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1895    
1896     ## Step 9
1897     $insert->($clone->[0]);
1898 wakaba 1.3 push @{$self->{open_elements}}, $clone;
1899 wakaba 1.1
1900     ## Step 10
1901 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1902 wakaba 1.1
1903     ## Step 11
1904     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1905     ## Step 7'
1906     $i++;
1907     $entry = $active_formatting_elements->[$i];
1908    
1909     redo S7;
1910     }
1911     } # S7
1912     }; # $reconstruct_active_formatting_elements
1913    
1914     my $clear_up_to_marker = sub {
1915     for (reverse 0..$#$active_formatting_elements) {
1916     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1917     splice @$active_formatting_elements, $_;
1918     return;
1919     }
1920     }
1921     }; # $clear_up_to_marker
1922    
1923     my $style_start_tag = sub {
1924 wakaba 1.6 my $style_el; !!!create-element ($style_el, 'style', $token->{attributes});
1925 wakaba 1.3 ## $self->{insertion_mode} eq 'in head' and ... (always true)
1926     (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
1927     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
1928 wakaba 1.1 ->append_child ($style_el);
1929     $self->{content_model_flag} = 'CDATA';
1930    
1931     my $text = '';
1932     !!!next-token;
1933     while ($token->{type} eq 'character') {
1934     $text .= $token->{data};
1935     !!!next-token;
1936     } # stop if non-character token or tokenizer stops tokenising
1937     if (length $text) {
1938     $style_el->manakai_append_text ($text);
1939     }
1940    
1941     $self->{content_model_flag} = 'PCDATA';
1942    
1943     if ($token->{type} eq 'end tag' and $token->{tag_name} eq 'style') {
1944     ## Ignore the token
1945     } else {
1946 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
1947 wakaba 1.1 ## ISSUE: And ignore?
1948     }
1949     !!!next-token;
1950     }; # $style_start_tag
1951    
1952     my $script_start_tag = sub {
1953     my $script_el;
1954     !!!create-element ($script_el, 'script', $token->{attributes});
1955     ## TODO: mark as "parser-inserted"
1956    
1957     $self->{content_model_flag} = 'CDATA';
1958    
1959     my $text = '';
1960     !!!next-token;
1961     while ($token->{type} eq 'character') {
1962     $text .= $token->{data};
1963     !!!next-token;
1964     } # stop if non-character token or tokenizer stops tokenising
1965     if (length $text) {
1966     $script_el->manakai_append_text ($text);
1967     }
1968    
1969     $self->{content_model_flag} = 'PCDATA';
1970    
1971     if ($token->{type} eq 'end tag' and
1972     $token->{tag_name} eq 'script') {
1973     ## Ignore the token
1974     } else {
1975 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
1976 wakaba 1.1 ## ISSUE: And ignore?
1977     ## TODO: mark as "already executed"
1978     }
1979    
1980 wakaba 1.3 if (defined $self->{inner_html_node}) {
1981     ## TODO: mark as "already executed"
1982     } else {
1983 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
1984     ## TODO: insertion point = just before the next input character
1985    
1986 wakaba 1.3 (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
1987     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])->append_child ($script_el);
1988 wakaba 1.1
1989     ## TODO: insertion point = $old_insertion_point (might be "undefined")
1990    
1991     ## TODO: if there is a script that will execute as soon as the parser resume, then...
1992     }
1993    
1994     !!!next-token;
1995     }; # $script_start_tag
1996    
1997     my $formatting_end_tag = sub {
1998     my $tag_name = shift;
1999    
2000     FET: {
2001     ## Step 1
2002     my $formatting_element;
2003     my $formatting_element_i_in_active;
2004     AFE: for (reverse 0..$#$active_formatting_elements) {
2005     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
2006     $formatting_element = $active_formatting_elements->[$_];
2007     $formatting_element_i_in_active = $_;
2008     last AFE;
2009     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
2010     last AFE;
2011     }
2012     } # AFE
2013     unless (defined $formatting_element) {
2014 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$tag_name);
2015 wakaba 1.1 ## Ignore the token
2016     !!!next-token;
2017     return;
2018     }
2019     ## has an element in scope
2020     my $in_scope = 1;
2021     my $formatting_element_i_in_open;
2022 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2023     my $node = $self->{open_elements}->[$_];
2024 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
2025     if ($in_scope) {
2026     $formatting_element_i_in_open = $_;
2027     last INSCOPE;
2028     } else { # in open elements but not in scope
2029 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2030 wakaba 1.1 ## Ignore the token
2031     !!!next-token;
2032     return;
2033     }
2034     } elsif ({
2035     table => 1, caption => 1, td => 1, th => 1,
2036     button => 1, marquee => 1, object => 1, html => 1,
2037     }->{$node->[1]}) {
2038     $in_scope = 0;
2039     }
2040     } # INSCOPE
2041     unless (defined $formatting_element_i_in_open) {
2042 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2043 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
2044     !!!next-token; ## TODO: ok?
2045     return;
2046     }
2047 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
2048 wakaba 1.4 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2049 wakaba 1.1 }
2050    
2051     ## Step 2
2052     my $furthest_block;
2053     my $furthest_block_i_in_open;
2054 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2055     my $node = $self->{open_elements}->[$_];
2056 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
2057     #not $phrasing_category->{$node->[1]} and
2058     ($special_category->{$node->[1]} or
2059     $scoping_category->{$node->[1]})) {
2060     $furthest_block = $node;
2061     $furthest_block_i_in_open = $_;
2062     } elsif ($node->[0] eq $formatting_element->[0]) {
2063     last OE;
2064     }
2065     } # OE
2066    
2067     ## Step 3
2068     unless (defined $furthest_block) { # MUST
2069 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
2070 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
2071     !!!next-token;
2072     return;
2073     }
2074    
2075     ## Step 4
2076 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
2077 wakaba 1.1
2078     ## Step 5
2079     my $furthest_block_parent = $furthest_block->[0]->parent_node;
2080     if (defined $furthest_block_parent) {
2081     $furthest_block_parent->remove_child ($furthest_block->[0]);
2082     }
2083    
2084     ## Step 6
2085     my $bookmark_prev_el
2086     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
2087     ->[0];
2088    
2089     ## Step 7
2090     my $node = $furthest_block;
2091     my $node_i_in_open = $furthest_block_i_in_open;
2092     my $last_node = $furthest_block;
2093     S7: {
2094     ## Step 1
2095     $node_i_in_open--;
2096 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
2097 wakaba 1.1
2098     ## Step 2
2099     my $node_i_in_active;
2100     S7S2: {
2101     for (reverse 0..$#$active_formatting_elements) {
2102     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2103     $node_i_in_active = $_;
2104     last S7S2;
2105     }
2106     }
2107 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
2108 wakaba 1.1 redo S7;
2109     } # S7S2
2110    
2111     ## Step 3
2112     last S7 if $node->[0] eq $formatting_element->[0];
2113    
2114     ## Step 4
2115     if ($last_node->[0] eq $furthest_block->[0]) {
2116     $bookmark_prev_el = $node->[0];
2117     }
2118    
2119     ## Step 5
2120     if ($node->[0]->has_child_nodes ()) {
2121     my $clone = [$node->[0]->clone_node (0), $node->[1]];
2122     $active_formatting_elements->[$node_i_in_active] = $clone;
2123 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
2124 wakaba 1.1 $node = $clone;
2125     }
2126    
2127     ## Step 6
2128     $node->[0]->append_child ($last_node->[0]);
2129    
2130     ## Step 7
2131     $last_node = $node;
2132    
2133     ## Step 8
2134     redo S7;
2135     } # S7
2136    
2137     ## Step 8
2138     $common_ancestor_node->[0]->append_child ($last_node->[0]);
2139    
2140     ## Step 9
2141     my $clone = [$formatting_element->[0]->clone_node (0),
2142     $formatting_element->[1]];
2143    
2144     ## Step 10
2145     my @cn = @{$furthest_block->[0]->child_nodes};
2146     $clone->[0]->append_child ($_) for @cn;
2147    
2148     ## Step 11
2149     $furthest_block->[0]->append_child ($clone->[0]);
2150    
2151     ## Step 12
2152     my $i;
2153     AFE: for (reverse 0..$#$active_formatting_elements) {
2154     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
2155     splice @$active_formatting_elements, $_, 1;
2156     $i-- and last AFE if defined $i;
2157     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
2158     $i = $_;
2159     }
2160     } # AFE
2161     splice @$active_formatting_elements, $i + 1, 0, $clone;
2162    
2163     ## Step 13
2164     undef $i;
2165 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2166     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
2167     splice @{$self->{open_elements}}, $_, 1;
2168 wakaba 1.1 $i-- and last OE if defined $i;
2169 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
2170 wakaba 1.1 $i = $_;
2171     }
2172     } # OE
2173 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
2174 wakaba 1.1
2175     ## Step 14
2176     redo FET;
2177     } # FET
2178     }; # $formatting_end_tag
2179    
2180     my $insert_to_current = sub {
2181 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child (shift);
2182 wakaba 1.1 }; # $insert_to_current
2183    
2184     my $insert_to_foster = sub {
2185     my $child = shift;
2186     if ({
2187     table => 1, tbody => 1, tfoot => 1,
2188     thead => 1, tr => 1,
2189 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2190 wakaba 1.1 # MUST
2191     my $foster_parent_element;
2192     my $next_sibling;
2193 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2194     if ($self->{open_elements}->[$_]->[1] eq 'table') {
2195     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
2196 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
2197     $foster_parent_element = $parent;
2198 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
2199 wakaba 1.1 } else {
2200     $foster_parent_element
2201 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
2202 wakaba 1.1 }
2203     last OE;
2204     }
2205     } # OE
2206 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
2207 wakaba 1.1 unless defined $foster_parent_element;
2208     $foster_parent_element->insert_before
2209     ($child, $next_sibling);
2210     } else {
2211 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
2212 wakaba 1.1 }
2213     }; # $insert_to_foster
2214    
2215     my $in_body = sub {
2216     my $insert = shift;
2217     if ($token->{type} eq 'start tag') {
2218     if ($token->{tag_name} eq 'script') {
2219     $script_start_tag->();
2220     return;
2221     } elsif ($token->{tag_name} eq 'style') {
2222     $style_start_tag->();
2223     return;
2224     } elsif ({
2225     base => 1, link => 1, meta => 1,
2226     }->{$token->{tag_name}}) {
2227 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2228 wakaba 1.1 ## NOTE: This is an "as if in head" code clone
2229     my $el;
2230     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2231 wakaba 1.3 if (defined $self->{head_element}) {
2232     $self->{head_element}->append_child ($el);
2233 wakaba 1.1 } else {
2234     $insert->($el);
2235     }
2236    
2237     !!!next-token;
2238     return;
2239     } elsif ($token->{tag_name} eq 'title') {
2240 wakaba 1.3 !!!parse-error (type => 'in body:title');
2241 wakaba 1.1 ## NOTE: There is an "as if in head" code clone
2242     my $title_el;
2243     !!!create-element ($title_el, 'title', $token->{attributes});
2244 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2245 wakaba 1.1 ->append_child ($title_el);
2246     $self->{content_model_flag} = 'RCDATA';
2247    
2248     my $text = '';
2249     !!!next-token;
2250     while ($token->{type} eq 'character') {
2251     $text .= $token->{data};
2252     !!!next-token;
2253     }
2254     if (length $text) {
2255     $title_el->manakai_append_text ($text);
2256     }
2257    
2258     $self->{content_model_flag} = 'PCDATA';
2259    
2260     if ($token->{type} eq 'end tag' and
2261     $token->{tag_name} eq 'title') {
2262     ## Ignore the token
2263     } else {
2264 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2265 wakaba 1.1 ## ISSUE: And ignore?
2266     }
2267     !!!next-token;
2268     return;
2269     } elsif ($token->{tag_name} eq 'body') {
2270 wakaba 1.3 !!!parse-error (type => 'in body:body');
2271 wakaba 1.1
2272 wakaba 1.3 if (@{$self->{open_elements}} == 1 or
2273     $self->{open_elements}->[1]->[1] ne 'body') {
2274 wakaba 1.1 ## Ignore the token
2275     } else {
2276 wakaba 1.3 my $body_el = $self->{open_elements}->[1]->[0];
2277 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2278     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
2279     $body_el->set_attribute_ns
2280     (undef, [undef, $attr_name],
2281     $token->{attributes}->{$attr_name}->{value});
2282     }
2283     }
2284     }
2285     !!!next-token;
2286     return;
2287     } elsif ({
2288     address => 1, blockquote => 1, center => 1, dir => 1,
2289     div => 1, dl => 1, fieldset => 1, listing => 1,
2290     menu => 1, ol => 1, p => 1, ul => 1,
2291     pre => 1,
2292     }->{$token->{tag_name}}) {
2293     ## has a p element in scope
2294 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2295 wakaba 1.1 if ($_->[1] eq 'p') {
2296     !!!back-token;
2297     $token = {type => 'end tag', tag_name => 'p'};
2298     return;
2299     } elsif ({
2300     table => 1, caption => 1, td => 1, th => 1,
2301     button => 1, marquee => 1, object => 1, html => 1,
2302     }->{$_->[1]}) {
2303     last INSCOPE;
2304     }
2305     } # INSCOPE
2306    
2307     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2308     if ($token->{tag_name} eq 'pre') {
2309     !!!next-token;
2310     if ($token->{type} eq 'character') {
2311     $token->{data} =~ s/^\x0A//;
2312     unless (length $token->{data}) {
2313     !!!next-token;
2314     }
2315     }
2316     } else {
2317     !!!next-token;
2318     }
2319     return;
2320     } elsif ($token->{tag_name} eq 'form') {
2321 wakaba 1.3 if (defined $self->{form_element}) {
2322     !!!parse-error (type => 'in form:form');
2323 wakaba 1.1 ## Ignore the token
2324 wakaba 1.7 !!!next-token;
2325     return;
2326 wakaba 1.1 } else {
2327     ## has a p element in scope
2328 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2329 wakaba 1.1 if ($_->[1] eq 'p') {
2330     !!!back-token;
2331     $token = {type => 'end tag', tag_name => 'p'};
2332     return;
2333     } elsif ({
2334     table => 1, caption => 1, td => 1, th => 1,
2335     button => 1, marquee => 1, object => 1, html => 1,
2336     }->{$_->[1]}) {
2337     last INSCOPE;
2338     }
2339     } # INSCOPE
2340    
2341     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2342 wakaba 1.3 $self->{form_element} = $self->{open_elements}->[-1]->[0];
2343 wakaba 1.1 !!!next-token;
2344     return;
2345     }
2346     } elsif ($token->{tag_name} eq 'li') {
2347     ## has a p element in scope
2348 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2349 wakaba 1.1 if ($_->[1] eq 'p') {
2350     !!!back-token;
2351     $token = {type => 'end tag', tag_name => 'p'};
2352     return;
2353     } elsif ({
2354     table => 1, caption => 1, td => 1, th => 1,
2355     button => 1, marquee => 1, object => 1, html => 1,
2356     }->{$_->[1]}) {
2357     last INSCOPE;
2358     }
2359     } # INSCOPE
2360    
2361     ## Step 1
2362     my $i = -1;
2363 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2364 wakaba 1.1 LI: {
2365     ## Step 2
2366     if ($node->[1] eq 'li') {
2367 wakaba 1.8 if ($i != -1) {
2368     !!!parse-error (type => 'end tag missing:'.
2369     $self->{open_elements}->[-1]->[1]);
2370     ## TODO: test
2371     }
2372 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2373 wakaba 1.1 last LI;
2374     }
2375    
2376     ## Step 3
2377     if (not $formatting_category->{$node->[1]} and
2378     #not $phrasing_category->{$node->[1]} and
2379     ($special_category->{$node->[1]} or
2380     $scoping_category->{$node->[1]}) and
2381     $node->[1] ne 'address' and $node->[1] ne 'div') {
2382     last LI;
2383     }
2384    
2385     ## Step 4
2386     $i--;
2387 wakaba 1.3 $node = $self->{open_elements}->[$i];
2388 wakaba 1.1 redo LI;
2389     } # LI
2390    
2391     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2392     !!!next-token;
2393     return;
2394     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
2395     ## has a p element in scope
2396 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2397 wakaba 1.1 if ($_->[1] eq 'p') {
2398     !!!back-token;
2399     $token = {type => 'end tag', tag_name => 'p'};
2400     return;
2401     } elsif ({
2402     table => 1, caption => 1, td => 1, th => 1,
2403     button => 1, marquee => 1, object => 1, html => 1,
2404     }->{$_->[1]}) {
2405     last INSCOPE;
2406     }
2407     } # INSCOPE
2408    
2409     ## Step 1
2410     my $i = -1;
2411 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2412 wakaba 1.1 LI: {
2413     ## Step 2
2414     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
2415 wakaba 1.8 if ($i != -1) {
2416     !!!parse-error (type => 'end tag missing:'.
2417     $self->{open_elements}->[-1]->[1]);
2418     ## TODO: test
2419     }
2420 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2421 wakaba 1.1 last LI;
2422     }
2423    
2424     ## Step 3
2425     if (not $formatting_category->{$node->[1]} and
2426     #not $phrasing_category->{$node->[1]} and
2427     ($special_category->{$node->[1]} or
2428     $scoping_category->{$node->[1]}) and
2429     $node->[1] ne 'address' and $node->[1] ne 'div') {
2430     last LI;
2431     }
2432    
2433     ## Step 4
2434     $i--;
2435 wakaba 1.3 $node = $self->{open_elements}->[$i];
2436 wakaba 1.1 redo LI;
2437     } # LI
2438    
2439     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2440     !!!next-token;
2441     return;
2442     } elsif ($token->{tag_name} eq 'plaintext') {
2443     ## has a p element in scope
2444 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2445 wakaba 1.1 if ($_->[1] eq 'p') {
2446     !!!back-token;
2447     $token = {type => 'end tag', tag_name => 'p'};
2448     return;
2449     } elsif ({
2450     table => 1, caption => 1, td => 1, th => 1,
2451     button => 1, marquee => 1, object => 1, html => 1,
2452     }->{$_->[1]}) {
2453     last INSCOPE;
2454     }
2455     } # INSCOPE
2456    
2457     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2458    
2459     $self->{content_model_flag} = 'PLAINTEXT';
2460    
2461     !!!next-token;
2462     return;
2463     } elsif ({
2464     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2465     }->{$token->{tag_name}}) {
2466     ## has a p element in scope
2467 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2468     my $node = $self->{open_elements}->[$_];
2469 wakaba 1.1 if ($node->[1] eq 'p') {
2470     !!!back-token;
2471     $token = {type => 'end tag', tag_name => 'p'};
2472     return;
2473     } elsif ({
2474     table => 1, caption => 1, td => 1, th => 1,
2475     button => 1, marquee => 1, object => 1, html => 1,
2476     }->{$node->[1]}) {
2477     last INSCOPE;
2478     }
2479     } # INSCOPE
2480    
2481     ## has an element in scope
2482     my $i;
2483 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2484     my $node = $self->{open_elements}->[$_];
2485 wakaba 1.1 if ({
2486     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2487     }->{$node->[1]}) {
2488     $i = $_;
2489     last INSCOPE;
2490     } elsif ({
2491     table => 1, caption => 1, td => 1, th => 1,
2492     button => 1, marquee => 1, object => 1, html => 1,
2493     }->{$node->[1]}) {
2494     last INSCOPE;
2495     }
2496     } # INSCOPE
2497    
2498     if (defined $i) {
2499 wakaba 1.3 !!!parse-error (type => 'in hn:hn');
2500     splice @{$self->{open_elements}}, $i;
2501 wakaba 1.1 }
2502    
2503     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2504    
2505     !!!next-token;
2506     return;
2507     } elsif ($token->{tag_name} eq 'a') {
2508     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
2509     my $node = $active_formatting_elements->[$i];
2510     if ($node->[1] eq 'a') {
2511 wakaba 1.3 !!!parse-error (type => 'in a:a');
2512 wakaba 1.1
2513     !!!back-token;
2514     $token = {type => 'end tag', tag_name => 'a'};
2515     $formatting_end_tag->($token->{tag_name});
2516    
2517     AFE2: for (reverse 0..$#$active_formatting_elements) {
2518     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2519     splice @$active_formatting_elements, $_, 1;
2520     last AFE2;
2521     }
2522     } # AFE2
2523 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2524     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
2525     splice @{$self->{open_elements}}, $_, 1;
2526 wakaba 1.1 last OE;
2527     }
2528     } # OE
2529     last AFE;
2530     } elsif ($node->[0] eq '#marker') {
2531     last AFE;
2532     }
2533     } # AFE
2534    
2535     $reconstruct_active_formatting_elements->($insert_to_current);
2536    
2537     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2538 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2539 wakaba 1.1
2540     !!!next-token;
2541     return;
2542     } elsif ({
2543     b => 1, big => 1, em => 1, font => 1, i => 1,
2544     nobr => 1, s => 1, small => 1, strile => 1,
2545     strong => 1, tt => 1, u => 1,
2546     }->{$token->{tag_name}}) {
2547     $reconstruct_active_formatting_elements->($insert_to_current);
2548    
2549     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2550 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2551 wakaba 1.1
2552     !!!next-token;
2553     return;
2554     } elsif ($token->{tag_name} eq 'button') {
2555     ## has a button element in scope
2556 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2557     my $node = $self->{open_elements}->[$_];
2558 wakaba 1.1 if ($node->[1] eq 'button') {
2559 wakaba 1.3 !!!parse-error (type => 'in button:button');
2560 wakaba 1.1 !!!back-token;
2561     $token = {type => 'end tag', tag_name => 'button'};
2562     return;
2563     } elsif ({
2564     table => 1, caption => 1, td => 1, th => 1,
2565     button => 1, marquee => 1, object => 1, html => 1,
2566     }->{$node->[1]}) {
2567     last INSCOPE;
2568     }
2569     } # INSCOPE
2570    
2571     $reconstruct_active_formatting_elements->($insert_to_current);
2572    
2573     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2574     push @$active_formatting_elements, ['#marker', ''];
2575    
2576     !!!next-token;
2577     return;
2578     } elsif ($token->{tag_name} eq 'marquee' or
2579     $token->{tag_name} eq 'object') {
2580     $reconstruct_active_formatting_elements->($insert_to_current);
2581    
2582     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2583     push @$active_formatting_elements, ['#marker', ''];
2584    
2585     !!!next-token;
2586     return;
2587     } elsif ($token->{tag_name} eq 'xmp') {
2588     $reconstruct_active_formatting_elements->($insert_to_current);
2589    
2590     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2591    
2592     $self->{content_model_flag} = 'CDATA';
2593    
2594     !!!next-token;
2595     return;
2596     } elsif ($token->{tag_name} eq 'table') {
2597     ## has a p element in scope
2598 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2599 wakaba 1.1 if ($_->[1] eq 'p') {
2600     !!!back-token;
2601     $token = {type => 'end tag', tag_name => 'p'};
2602     return;
2603     } elsif ({
2604     table => 1, caption => 1, td => 1, th => 1,
2605     button => 1, marquee => 1, object => 1, html => 1,
2606     }->{$_->[1]}) {
2607     last INSCOPE;
2608     }
2609     } # INSCOPE
2610    
2611     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2612    
2613 wakaba 1.3 $self->{insertion_mode} = 'in table';
2614 wakaba 1.1
2615     !!!next-token;
2616     return;
2617     } elsif ({
2618     area => 1, basefont => 1, bgsound => 1, br => 1,
2619     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
2620     image => 1,
2621     }->{$token->{tag_name}}) {
2622     if ($token->{tag_name} eq 'image') {
2623 wakaba 1.3 !!!parse-error (type => 'image');
2624 wakaba 1.1 $token->{tag_name} = 'img';
2625     }
2626    
2627     $reconstruct_active_formatting_elements->($insert_to_current);
2628    
2629     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2630 wakaba 1.3 pop @{$self->{open_elements}};
2631 wakaba 1.1
2632     !!!next-token;
2633     return;
2634     } elsif ($token->{tag_name} eq 'hr') {
2635     ## has a p element in scope
2636 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2637 wakaba 1.1 if ($_->[1] eq 'p') {
2638     !!!back-token;
2639     $token = {type => 'end tag', tag_name => 'p'};
2640     return;
2641     } elsif ({
2642     table => 1, caption => 1, td => 1, th => 1,
2643     button => 1, marquee => 1, object => 1, html => 1,
2644     }->{$_->[1]}) {
2645     last INSCOPE;
2646     }
2647     } # INSCOPE
2648    
2649     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2650 wakaba 1.3 pop @{$self->{open_elements}};
2651 wakaba 1.1
2652     !!!next-token;
2653     return;
2654     } elsif ($token->{tag_name} eq 'input') {
2655     $reconstruct_active_formatting_elements->($insert_to_current);
2656    
2657     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2658 wakaba 1.3 ## TODO: associate with $self->{form_element} if defined
2659     pop @{$self->{open_elements}};
2660 wakaba 1.1
2661     !!!next-token;
2662     return;
2663     } elsif ($token->{tag_name} eq 'isindex') {
2664 wakaba 1.3 !!!parse-error (type => 'isindex');
2665 wakaba 1.1
2666 wakaba 1.3 if (defined $self->{form_element}) {
2667 wakaba 1.1 ## Ignore the token
2668     !!!next-token;
2669     return;
2670     } else {
2671     my $at = $token->{attributes};
2672     $at->{name} = {name => 'name', value => 'isindex'};
2673     my @tokens = (
2674     {type => 'start tag', tag_name => 'form'},
2675     {type => 'start tag', tag_name => 'hr'},
2676     {type => 'start tag', tag_name => 'p'},
2677     {type => 'start tag', tag_name => 'label'},
2678     {type => 'character',
2679     data => 'This is a searchable index. Insert your search keywords here: '}, # SHOULD
2680     ## TODO: make this configurable
2681     {type => 'start tag', tag_name => 'input', attributes => $at},
2682     #{type => 'character', data => ''}, # SHOULD
2683     {type => 'end tag', tag_name => 'label'},
2684     {type => 'end tag', tag_name => 'p'},
2685     {type => 'start tag', tag_name => 'hr'},
2686     {type => 'end tag', tag_name => 'form'},
2687     );
2688     $token = shift @tokens;
2689     !!!back-token (@tokens);
2690     return;
2691     }
2692     } elsif ({
2693     textarea => 1,
2694 wakaba 1.5 iframe => 1,
2695 wakaba 1.1 noembed => 1,
2696     noframes => 1,
2697     noscript => 0, ## TODO: 1 if scripting is enabled
2698     }->{$token->{tag_name}}) {
2699     my $tag_name = $token->{tag_name};
2700     my $el;
2701     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2702    
2703     if ($token->{tag_name} eq 'textarea') {
2704 wakaba 1.3 ## TODO: $self->{form_element} if defined
2705 wakaba 1.1 $self->{content_model_flag} = 'RCDATA';
2706     } else {
2707     $self->{content_model_flag} = 'CDATA';
2708     }
2709    
2710     $insert->($el);
2711    
2712     my $text = '';
2713 wakaba 1.9 if ($token->{tag_name} eq 'textarea') {
2714     !!!next-token;
2715     if ($token->{type} eq 'character') {
2716     $token->{data} =~ s/^\x0A//;
2717     unless (length $token->{data}) {
2718     !!!next-token;
2719     }
2720     }
2721     } else {
2722     !!!next-token;
2723     }
2724 wakaba 1.1 while ($token->{type} eq 'character') {
2725     $text .= $token->{data};
2726     !!!next-token;
2727     }
2728     if (length $text) {
2729     $el->manakai_append_text ($text);
2730     }
2731    
2732     $self->{content_model_flag} = 'PCDATA';
2733    
2734     if ($token->{type} eq 'end tag' and
2735     $token->{tag_name} eq $tag_name) {
2736     ## Ignore the token
2737     } else {
2738 wakaba 1.10 if ($token->{tag_name} eq 'textarea') {
2739     !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2740     } else {
2741 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
2742     }
2743 wakaba 1.1 ## ISSUE: And ignore?
2744     }
2745     !!!next-token;
2746     return;
2747     } elsif ($token->{tag_name} eq 'select') {
2748     $reconstruct_active_formatting_elements->($insert_to_current);
2749    
2750     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2751    
2752 wakaba 1.3 $self->{insertion_mode} = 'in select';
2753 wakaba 1.1 !!!next-token;
2754     return;
2755     } elsif ({
2756     caption => 1, col => 1, colgroup => 1, frame => 1,
2757     frameset => 1, head => 1, option => 1, optgroup => 1,
2758     tbody => 1, td => 1, tfoot => 1, th => 1,
2759     thead => 1, tr => 1,
2760     }->{$token->{tag_name}}) {
2761 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2762 wakaba 1.1 ## Ignore the token
2763     !!!next-token;
2764     return;
2765    
2766     ## ISSUE: An issue on HTML5 new elements in the spec.
2767     } else {
2768     $reconstruct_active_formatting_elements->($insert_to_current);
2769    
2770     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2771    
2772     !!!next-token;
2773     return;
2774     }
2775     } elsif ($token->{type} eq 'end tag') {
2776     if ($token->{tag_name} eq 'body') {
2777 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2778 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2779 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2780     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2781 wakaba 1.1 }
2782 wakaba 1.3 $self->{insertion_mode} = 'after body';
2783 wakaba 1.1 !!!next-token;
2784     return;
2785     } else {
2786 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2787 wakaba 1.1 ## Ignore the token
2788     !!!next-token;
2789     return;
2790     }
2791     } elsif ($token->{tag_name} eq 'html') {
2792 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2793 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2794 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2795     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
2796 wakaba 1.1 }
2797 wakaba 1.3 $self->{insertion_mode} = 'after body';
2798 wakaba 1.1 ## reprocess
2799     return;
2800     } else {
2801 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2802 wakaba 1.1 ## Ignore the token
2803     !!!next-token;
2804     return;
2805     }
2806     } elsif ({
2807     address => 1, blockquote => 1, center => 1, dir => 1,
2808     div => 1, dl => 1, fieldset => 1, listing => 1,
2809     menu => 1, ol => 1, pre => 1, ul => 1,
2810     form => 1,
2811     p => 1,
2812     dd => 1, dt => 1, li => 1,
2813     button => 1, marquee => 1, object => 1,
2814     }->{$token->{tag_name}}) {
2815     ## has an element in scope
2816     my $i;
2817 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2818     my $node = $self->{open_elements}->[$_];
2819 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
2820     ## generate implied end tags
2821     if ({
2822     dd => ($token->{tag_name} ne 'dd'),
2823     dt => ($token->{tag_name} ne 'dt'),
2824     li => ($token->{tag_name} ne 'li'),
2825     p => ($token->{tag_name} ne 'p'),
2826     td => 1, th => 1, tr => 1,
2827 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2828 wakaba 1.1 !!!back-token;
2829     $token = {type => 'end tag',
2830 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2831 wakaba 1.1 return;
2832     }
2833     $i = $_;
2834     last INSCOPE unless $token->{tag_name} eq 'p';
2835     } elsif ({
2836     table => 1, caption => 1, td => 1, th => 1,
2837     button => 1, marquee => 1, object => 1, html => 1,
2838     }->{$node->[1]}) {
2839     last INSCOPE;
2840     }
2841     } # INSCOPE
2842    
2843 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
2844     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2845 wakaba 1.1 }
2846    
2847 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
2848     undef $self->{form_element} if $token->{tag_name} eq 'form';
2849 wakaba 1.1 $clear_up_to_marker->()
2850     if {
2851     button => 1, marquee => 1, object => 1,
2852     }->{$token->{tag_name}};
2853     !!!next-token;
2854     return;
2855     } elsif ({
2856     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2857     }->{$token->{tag_name}}) {
2858     ## has an element in scope
2859     my $i;
2860 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2861     my $node = $self->{open_elements}->[$_];
2862 wakaba 1.1 if ({
2863     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2864     }->{$node->[1]}) {
2865     ## generate implied end tags
2866     if ({
2867     dd => 1, dt => 1, li => 1, p => 1,
2868     td => 1, th => 1, tr => 1,
2869 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2870 wakaba 1.1 !!!back-token;
2871     $token = {type => 'end tag',
2872 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2873 wakaba 1.1 return;
2874     }
2875     $i = $_;
2876     last INSCOPE;
2877     } elsif ({
2878     table => 1, caption => 1, td => 1, th => 1,
2879     button => 1, marquee => 1, object => 1, html => 1,
2880     }->{$node->[1]}) {
2881     last INSCOPE;
2882     }
2883     } # INSCOPE
2884    
2885 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
2886     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2887 wakaba 1.1 }
2888    
2889 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
2890 wakaba 1.1 !!!next-token;
2891     return;
2892     } elsif ({
2893     a => 1,
2894     b => 1, big => 1, em => 1, font => 1, i => 1,
2895     nobr => 1, s => 1, small => 1, strile => 1,
2896     strong => 1, tt => 1, u => 1,
2897     }->{$token->{tag_name}}) {
2898     $formatting_end_tag->($token->{tag_name});
2899 wakaba 1.8 ## TODO: <http://html5.org/tools/web-apps-tracker?from=883&to=884>
2900 wakaba 1.1 return;
2901     } elsif ({
2902     caption => 1, col => 1, colgroup => 1, frame => 1,
2903     frameset => 1, head => 1, option => 1, optgroup => 1,
2904     tbody => 1, td => 1, tfoot => 1, th => 1,
2905     thead => 1, tr => 1,
2906     area => 1, basefont => 1, bgsound => 1, br => 1,
2907     embed => 1, hr => 1, iframe => 1, image => 1,
2908 wakaba 1.5 img => 1, input => 1, isindex => 1, noembed => 1,
2909 wakaba 1.1 noframes => 1, param => 1, select => 1, spacer => 1,
2910     table => 1, textarea => 1, wbr => 1,
2911     noscript => 0, ## TODO: if scripting is enabled
2912     }->{$token->{tag_name}}) {
2913 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2914 wakaba 1.1 ## Ignore the token
2915     !!!next-token;
2916     return;
2917    
2918     ## ISSUE: Issue on HTML5 new elements in spec
2919    
2920     } else {
2921     ## Step 1
2922     my $node_i = -1;
2923 wakaba 1.3 my $node = $self->{open_elements}->[$node_i];
2924 wakaba 1.1
2925     ## Step 2
2926     S2: {
2927     if ($node->[1] eq $token->{tag_name}) {
2928     ## Step 1
2929     ## generate implied end tags
2930     if ({
2931     dd => 1, dt => 1, li => 1, p => 1,
2932     td => 1, th => 1, tr => 1,
2933 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2934 wakaba 1.1 !!!back-token;
2935     $token = {type => 'end tag',
2936 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2937 wakaba 1.1 return;
2938     }
2939    
2940     ## Step 2
2941 wakaba 1.3 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
2942     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2943 wakaba 1.1 }
2944    
2945     ## Step 3
2946 wakaba 1.3 splice @{$self->{open_elements}}, $node_i;
2947    
2948     !!!next-token;
2949 wakaba 1.1 last S2;
2950     } else {
2951     ## Step 3
2952     if (not $formatting_category->{$node->[1]} and
2953     #not $phrasing_category->{$node->[1]} and
2954     ($special_category->{$node->[1]} or
2955     $scoping_category->{$node->[1]})) {
2956 wakaba 1.3 !!!parse-error (type => 'not closed:'.$node->[1]);
2957 wakaba 1.1 ## Ignore the token
2958     !!!next-token;
2959     last S2;
2960     }
2961     }
2962    
2963     ## Step 4
2964     $node_i--;
2965 wakaba 1.3 $node = $self->{open_elements}->[$node_i];
2966 wakaba 1.1
2967     ## Step 5;
2968     redo S2;
2969     } # S2
2970 wakaba 1.3 return;
2971 wakaba 1.1 }
2972     }
2973     }; # $in_body
2974    
2975     B: {
2976 wakaba 1.3 if ($phase eq 'main') {
2977 wakaba 1.1 if ($token->{type} eq 'DOCTYPE') {
2978 wakaba 1.3 !!!parse-error (type => 'in html:#DOCTYPE');
2979 wakaba 1.1 ## Ignore the token
2980     ## Stay in the phase
2981     !!!next-token;
2982     redo B;
2983     } elsif ($token->{type} eq 'start tag' and
2984     $token->{tag_name} eq 'html') {
2985     ## TODO: unless it is the first start tag token, parse-error
2986 wakaba 1.3 my $top_el = $self->{open_elements}->[0]->[0];
2987 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2988     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
2989     $top_el->set_attribute_ns
2990     (undef, [undef, $attr_name],
2991     $token->{attributes}->{$attr_name}->{value});
2992     }
2993     }
2994     !!!next-token;
2995     redo B;
2996     } elsif ($token->{type} eq 'end-of-file') {
2997     ## Generate implied end tags
2998     if ({
2999     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
3000 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3001 wakaba 1.1 !!!back-token;
3002 wakaba 1.3 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
3003 wakaba 1.1 redo B;
3004     }
3005    
3006 wakaba 1.3 if (@{$self->{open_elements}} > 2 or
3007     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
3008     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3009     } elsif (defined $self->{inner_html_node} and
3010     @{$self->{open_elements}} > 1 and
3011     $self->{open_elements}->[1]->[1] ne 'body') {
3012     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3013 wakaba 1.1 }
3014    
3015     ## Stop parsing
3016     last B;
3017    
3018     ## ISSUE: There is an issue in the spec.
3019     } else {
3020 wakaba 1.3 if ($self->{insertion_mode} eq 'before head') {
3021 wakaba 1.1 if ($token->{type} eq 'character') {
3022     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3023 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3024 wakaba 1.1 unless (length $token->{data}) {
3025     !!!next-token;
3026     redo B;
3027     }
3028     }
3029     ## As if <head>
3030 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
3031     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3032     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3033     $self->{insertion_mode} = 'in head';
3034 wakaba 1.1 ## reprocess
3035     redo B;
3036     } elsif ($token->{type} eq 'comment') {
3037     my $comment = $self->{document}->create_comment ($token->{data});
3038 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3039 wakaba 1.1 !!!next-token;
3040     redo B;
3041     } elsif ($token->{type} eq 'start tag') {
3042     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
3043 wakaba 1.3 !!!create-element ($self->{head_element}, 'head', $attr);
3044     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3045     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3046     $self->{insertion_mode} = 'in head';
3047 wakaba 1.1 if ($token->{tag_name} eq 'head') {
3048     !!!next-token;
3049     #} elsif ({
3050     # base => 1, link => 1, meta => 1,
3051     # script => 1, style => 1, title => 1,
3052     # }->{$token->{tag_name}}) {
3053     # ## reprocess
3054     } else {
3055     ## reprocess
3056     }
3057     redo B;
3058     } elsif ($token->{type} eq 'end tag') {
3059     if ($token->{tag_name} eq 'html') {
3060     ## As if <head>
3061 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
3062     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3063     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3064     $self->{insertion_mode} = 'in head';
3065 wakaba 1.1 ## reprocess
3066     redo B;
3067     } else {
3068 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3069 wakaba 1.1 ## Ignore the token
3070     !!!next-token;
3071     redo B;
3072     }
3073     } else {
3074     die "$0: $token->{type}: Unknown type";
3075     }
3076 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in head') {
3077 wakaba 1.1 if ($token->{type} eq 'character') {
3078     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3079 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3080 wakaba 1.1 unless (length $token->{data}) {
3081     !!!next-token;
3082     redo B;
3083     }
3084     }
3085    
3086     #
3087     } elsif ($token->{type} eq 'comment') {
3088     my $comment = $self->{document}->create_comment ($token->{data});
3089 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3090 wakaba 1.1 !!!next-token;
3091     redo B;
3092     } elsif ($token->{type} eq 'start tag') {
3093     if ($token->{tag_name} eq 'title') {
3094     ## NOTE: There is an "as if in head" code clone
3095     my $title_el;
3096     !!!create-element ($title_el, 'title', $token->{attributes});
3097 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3098 wakaba 1.1 ->append_child ($title_el);
3099     $self->{content_model_flag} = 'RCDATA';
3100    
3101     my $text = '';
3102     !!!next-token;
3103     while ($token->{type} eq 'character') {
3104     $text .= $token->{data};
3105     !!!next-token;
3106     }
3107     if (length $text) {
3108     $title_el->manakai_append_text ($text);
3109     }
3110    
3111     $self->{content_model_flag} = 'PCDATA';
3112    
3113     if ($token->{type} eq 'end tag' and
3114     $token->{tag_name} eq 'title') {
3115     ## Ignore the token
3116     } else {
3117 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
3118 wakaba 1.1 ## ISSUE: And ignore?
3119     }
3120     !!!next-token;
3121     redo B;
3122     } elsif ($token->{tag_name} eq 'style') {
3123     $style_start_tag->();
3124     redo B;
3125     } elsif ($token->{tag_name} eq 'script') {
3126     $script_start_tag->();
3127     redo B;
3128     } elsif ({base => 1, link => 1, meta => 1}->{$token->{tag_name}}) {
3129     ## NOTE: There are "as if in head" code clones
3130     my $el;
3131     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
3132 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3133 wakaba 1.1 ->append_child ($el);
3134    
3135     !!!next-token;
3136     redo B;
3137     } elsif ($token->{tag_name} eq 'head') {
3138 wakaba 1.3 !!!parse-error (type => 'in head:head');
3139 wakaba 1.1 ## Ignore the token
3140     !!!next-token;
3141     redo B;
3142     } else {
3143     #
3144     }
3145     } elsif ($token->{type} eq 'end tag') {
3146     if ($token->{tag_name} eq 'head') {
3147 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
3148     pop @{$self->{open_elements}};
3149 wakaba 1.1 } else {
3150 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:head');
3151 wakaba 1.1 }
3152 wakaba 1.3 $self->{insertion_mode} = 'after head';
3153 wakaba 1.1 !!!next-token;
3154     redo B;
3155     } elsif ($token->{tag_name} eq 'html') {
3156     #
3157     } else {
3158 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3159 wakaba 1.1 ## Ignore the token
3160     !!!next-token;
3161     redo B;
3162     }
3163     } else {
3164     #
3165     }
3166    
3167 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
3168 wakaba 1.1 ## As if </head>
3169 wakaba 1.3 pop @{$self->{open_elements}};
3170 wakaba 1.1 }
3171 wakaba 1.3 $self->{insertion_mode} = 'after head';
3172 wakaba 1.1 ## reprocess
3173     redo B;
3174    
3175     ## ISSUE: An issue in the spec.
3176 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after head') {
3177 wakaba 1.1 if ($token->{type} eq 'character') {
3178     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3179 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3180 wakaba 1.1 unless (length $token->{data}) {
3181     !!!next-token;
3182     redo B;
3183     }
3184     }
3185    
3186     #
3187     } elsif ($token->{type} eq 'comment') {
3188     my $comment = $self->{document}->create_comment ($token->{data});
3189 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3190 wakaba 1.1 !!!next-token;
3191     redo B;
3192     } elsif ($token->{type} eq 'start tag') {
3193     if ($token->{tag_name} eq 'body') {
3194     !!!insert-element ('body', $token->{attributes});
3195 wakaba 1.3 $self->{insertion_mode} = 'in body';
3196 wakaba 1.1 !!!next-token;
3197     redo B;
3198     } elsif ($token->{tag_name} eq 'frameset') {
3199     !!!insert-element ('frameset', $token->{attributes});
3200 wakaba 1.3 $self->{insertion_mode} = 'in frameset';
3201 wakaba 1.1 !!!next-token;
3202     redo B;
3203     } elsif ({
3204     base => 1, link => 1, meta => 1,
3205 wakaba 1.3 script => 1, style => 1, title => 1,
3206 wakaba 1.1 }->{$token->{tag_name}}) {
3207 wakaba 1.3 !!!parse-error (type => 'after head:'.$token->{tag_name});
3208     $self->{insertion_mode} = 'in head';
3209 wakaba 1.1 ## reprocess
3210     redo B;
3211     } else {
3212     #
3213     }
3214     } else {
3215     #
3216     }
3217    
3218     ## As if <body>
3219     !!!insert-element ('body');
3220 wakaba 1.3 $self->{insertion_mode} = 'in body';
3221 wakaba 1.1 ## reprocess
3222     redo B;
3223 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in body') {
3224 wakaba 1.1 if ($token->{type} eq 'character') {
3225     ## NOTE: There is a code clone of "character in body".
3226     $reconstruct_active_formatting_elements->($insert_to_current);
3227    
3228 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3229 wakaba 1.1
3230     !!!next-token;
3231     redo B;
3232     } elsif ($token->{type} eq 'comment') {
3233     ## NOTE: There is a code clone of "comment in body".
3234     my $comment = $self->{document}->create_comment ($token->{data});
3235 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3236 wakaba 1.1 !!!next-token;
3237     redo B;
3238     } else {
3239     $in_body->($insert_to_current);
3240     redo B;
3241     }
3242 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table') {
3243 wakaba 1.1 if ($token->{type} eq 'character') {
3244     ## NOTE: There are "character in table" code clones.
3245     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3246 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3247 wakaba 1.1
3248     unless (length $token->{data}) {
3249     !!!next-token;
3250     redo B;
3251     }
3252     }
3253    
3254 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3255    
3256 wakaba 1.1 ## As if in body, but insert into foster parent element
3257     ## ISSUE: Spec says that "whenever a node would be inserted
3258     ## into the current node" while characters might not be
3259     ## result in a new Text node.
3260     $reconstruct_active_formatting_elements->($insert_to_foster);
3261    
3262     if ({
3263     table => 1, tbody => 1, tfoot => 1,
3264     thead => 1, tr => 1,
3265 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3266 wakaba 1.1 # MUST
3267     my $foster_parent_element;
3268     my $next_sibling;
3269     my $prev_sibling;
3270 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3271     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3272     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3273 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3274     $foster_parent_element = $parent;
3275 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3276 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3277     } else {
3278 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3279 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3280     }
3281     last OE;
3282     }
3283     } # OE
3284 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3285 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3286     unless defined $foster_parent_element;
3287     if (defined $prev_sibling and
3288     $prev_sibling->node_type == 3) {
3289     $prev_sibling->manakai_append_text ($token->{data});
3290     } else {
3291     $foster_parent_element->insert_before
3292     ($self->{document}->create_text_node ($token->{data}),
3293     $next_sibling);
3294     }
3295     } else {
3296 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3297 wakaba 1.1 }
3298    
3299     !!!next-token;
3300     redo B;
3301     } elsif ($token->{type} eq 'comment') {
3302     my $comment = $self->{document}->create_comment ($token->{data});
3303 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3304 wakaba 1.1 !!!next-token;
3305     redo B;
3306     } elsif ($token->{type} eq 'start tag') {
3307     if ({
3308     caption => 1,
3309     colgroup => 1,
3310     tbody => 1, tfoot => 1, thead => 1,
3311     }->{$token->{tag_name}}) {
3312     ## Clear back to table context
3313 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3314     $self->{open_elements}->[-1]->[1] ne 'html') {
3315     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3316     pop @{$self->{open_elements}};
3317 wakaba 1.1 }
3318    
3319     push @$active_formatting_elements, ['#marker', '']
3320     if $token->{tag_name} eq 'caption';
3321    
3322     !!!insert-element ($token->{tag_name}, $token->{attributes});
3323 wakaba 1.3 $self->{insertion_mode} = {
3324 wakaba 1.1 caption => 'in caption',
3325     colgroup => 'in column group',
3326     tbody => 'in table body',
3327     tfoot => 'in table body',
3328     thead => 'in table body',
3329     }->{$token->{tag_name}};
3330     !!!next-token;
3331     redo B;
3332     } elsif ({
3333     col => 1,
3334     td => 1, th => 1, tr => 1,
3335     }->{$token->{tag_name}}) {
3336     ## Clear back to table context
3337 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3338     $self->{open_elements}->[-1]->[1] ne 'html') {
3339     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3340     pop @{$self->{open_elements}};
3341 wakaba 1.1 }
3342    
3343     !!!insert-element ($token->{tag_name} eq 'col' ? 'colgroup' : 'tbody');
3344 wakaba 1.3 $self->{insertion_mode} = $token->{tag_name} eq 'col'
3345 wakaba 1.1 ? 'in column group' : 'in table body';
3346     ## reprocess
3347     redo B;
3348     } elsif ($token->{tag_name} eq 'table') {
3349     ## NOTE: There are code clones for this "table in table"
3350 wakaba 1.3 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3351 wakaba 1.1
3352     ## As if </table>
3353     ## have a table element in table scope
3354     my $i;
3355 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3356     my $node = $self->{open_elements}->[$_];
3357 wakaba 1.1 if ($node->[1] eq 'table') {
3358     $i = $_;
3359     last INSCOPE;
3360     } elsif ({
3361     table => 1, html => 1,
3362     }->{$node->[1]}) {
3363     last INSCOPE;
3364     }
3365     } # INSCOPE
3366     unless (defined $i) {
3367 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3368 wakaba 1.1 ## Ignore tokens </table><table>
3369     !!!next-token;
3370     redo B;
3371     }
3372    
3373     ## generate implied end tags
3374     if ({
3375     dd => 1, dt => 1, li => 1, p => 1,
3376     td => 1, th => 1, tr => 1,
3377 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3378 wakaba 1.1 !!!back-token; # <table>
3379     $token = {type => 'end tag', tag_name => 'table'};
3380     !!!back-token;
3381     $token = {type => 'end tag',
3382 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3383 wakaba 1.1 redo B;
3384     }
3385    
3386 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3387     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3388 wakaba 1.1 }
3389    
3390 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3391 wakaba 1.1
3392 wakaba 1.3 $self->_reset_insertion_mode;
3393 wakaba 1.1
3394     ## reprocess
3395     redo B;
3396     } else {
3397     #
3398     }
3399     } elsif ($token->{type} eq 'end tag') {
3400     if ($token->{tag_name} eq 'table') {
3401     ## have a table element in table scope
3402     my $i;
3403 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3404     my $node = $self->{open_elements}->[$_];
3405 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3406     $i = $_;
3407     last INSCOPE;
3408     } elsif ({
3409     table => 1, html => 1,
3410     }->{$node->[1]}) {
3411     last INSCOPE;
3412     }
3413     } # INSCOPE
3414     unless (defined $i) {
3415 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3416 wakaba 1.1 ## Ignore the token
3417     !!!next-token;
3418     redo B;
3419     }
3420    
3421     ## generate implied end tags
3422     if ({
3423     dd => 1, dt => 1, li => 1, p => 1,
3424     td => 1, th => 1, tr => 1,
3425 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3426 wakaba 1.1 !!!back-token;
3427     $token = {type => 'end tag',
3428 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3429 wakaba 1.1 redo B;
3430     }
3431    
3432 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3433     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3434 wakaba 1.1 }
3435    
3436 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3437 wakaba 1.1
3438 wakaba 1.3 $self->_reset_insertion_mode;
3439 wakaba 1.1
3440     !!!next-token;
3441     redo B;
3442     } elsif ({
3443     body => 1, caption => 1, col => 1, colgroup => 1,
3444     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
3445     thead => 1, tr => 1,
3446     }->{$token->{tag_name}}) {
3447 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3448 wakaba 1.1 ## Ignore the token
3449     !!!next-token;
3450     redo B;
3451     } else {
3452     #
3453     }
3454     } else {
3455     #
3456     }
3457    
3458 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3459 wakaba 1.1 $in_body->($insert_to_foster);
3460     redo B;
3461 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in caption') {
3462 wakaba 1.1 if ($token->{type} eq 'character') {
3463     ## NOTE: This is a code clone of "character in body".
3464     $reconstruct_active_formatting_elements->($insert_to_current);
3465    
3466 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3467 wakaba 1.1
3468     !!!next-token;
3469     redo B;
3470     } elsif ($token->{type} eq 'comment') {
3471     ## NOTE: This is a code clone of "comment in body".
3472     my $comment = $self->{document}->create_comment ($token->{data});
3473 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3474 wakaba 1.1 !!!next-token;
3475     redo B;
3476     } elsif ($token->{type} eq 'start tag') {
3477     if ({
3478     caption => 1, col => 1, colgroup => 1, tbody => 1,
3479     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
3480     }->{$token->{tag_name}}) {
3481 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3482 wakaba 1.1
3483     ## As if </caption>
3484     ## have a table element in table scope
3485     my $i;
3486 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3487     my $node = $self->{open_elements}->[$_];
3488 wakaba 1.1 if ($node->[1] eq 'caption') {
3489     $i = $_;
3490     last INSCOPE;
3491     } elsif ({
3492     table => 1, html => 1,
3493     }->{$node->[1]}) {
3494     last INSCOPE;
3495     }
3496     } # INSCOPE
3497     unless (defined $i) {
3498 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3499 wakaba 1.1 ## Ignore the token
3500     !!!next-token;
3501     redo B;
3502     }
3503    
3504     ## generate implied end tags
3505     if ({
3506     dd => 1, dt => 1, li => 1, p => 1,
3507     td => 1, th => 1, tr => 1,
3508 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3509 wakaba 1.1 !!!back-token; # <?>
3510     $token = {type => 'end tag', tag_name => 'caption'};
3511     !!!back-token;
3512     $token = {type => 'end tag',
3513 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3514 wakaba 1.1 redo B;
3515     }
3516    
3517 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3518     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3519 wakaba 1.1 }
3520    
3521 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3522 wakaba 1.1
3523     $clear_up_to_marker->();
3524    
3525 wakaba 1.3 $self->{insertion_mode} = 'in table';
3526 wakaba 1.1
3527     ## reprocess
3528     redo B;
3529     } else {
3530     #
3531     }
3532     } elsif ($token->{type} eq 'end tag') {
3533     if ($token->{tag_name} eq 'caption') {
3534     ## have a table element in table scope
3535     my $i;
3536 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3537     my $node = $self->{open_elements}->[$_];
3538 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3539     $i = $_;
3540     last INSCOPE;
3541     } elsif ({
3542     table => 1, html => 1,
3543     }->{$node->[1]}) {
3544     last INSCOPE;
3545     }
3546     } # INSCOPE
3547     unless (defined $i) {
3548 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3549 wakaba 1.1 ## Ignore the token
3550     !!!next-token;
3551     redo B;
3552     }
3553    
3554     ## generate implied end tags
3555     if ({
3556     dd => 1, dt => 1, li => 1, p => 1,
3557     td => 1, th => 1, tr => 1,
3558 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3559 wakaba 1.1 !!!back-token;
3560     $token = {type => 'end tag',
3561 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3562 wakaba 1.1 redo B;
3563     }
3564    
3565 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3566     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3567 wakaba 1.1 }
3568    
3569 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3570 wakaba 1.1
3571     $clear_up_to_marker->();
3572    
3573 wakaba 1.3 $self->{insertion_mode} = 'in table';
3574 wakaba 1.1
3575     !!!next-token;
3576     redo B;
3577     } elsif ($token->{tag_name} eq 'table') {
3578 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3579 wakaba 1.1
3580     ## As if </caption>
3581     ## have a table element in table scope
3582     my $i;
3583 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3584     my $node = $self->{open_elements}->[$_];
3585 wakaba 1.1 if ($node->[1] eq 'caption') {
3586     $i = $_;
3587     last INSCOPE;
3588     } elsif ({
3589     table => 1, html => 1,
3590     }->{$node->[1]}) {
3591     last INSCOPE;
3592     }
3593     } # INSCOPE
3594     unless (defined $i) {
3595 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3596 wakaba 1.1 ## Ignore the token
3597     !!!next-token;
3598     redo B;
3599     }
3600    
3601     ## generate implied end tags
3602     if ({
3603     dd => 1, dt => 1, li => 1, p => 1,
3604     td => 1, th => 1, tr => 1,
3605 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3606 wakaba 1.1 !!!back-token; # </table>
3607     $token = {type => 'end tag', tag_name => 'caption'};
3608     !!!back-token;
3609     $token = {type => 'end tag',
3610 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3611 wakaba 1.1 redo B;
3612     }
3613    
3614 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3615     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3616 wakaba 1.1 }
3617    
3618 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3619 wakaba 1.1
3620     $clear_up_to_marker->();
3621    
3622 wakaba 1.3 $self->{insertion_mode} = 'in table';
3623 wakaba 1.1
3624     ## reprocess
3625     redo B;
3626     } elsif ({
3627     body => 1, col => 1, colgroup => 1,
3628     html => 1, tbody => 1, td => 1, tfoot => 1,
3629     th => 1, thead => 1, tr => 1,
3630     }->{$token->{tag_name}}) {
3631 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3632 wakaba 1.1 ## Ignore the token
3633     redo B;
3634     } else {
3635     #
3636     }
3637     } else {
3638     #
3639     }
3640    
3641     $in_body->($insert_to_current);
3642     redo B;
3643 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in column group') {
3644 wakaba 1.1 if ($token->{type} eq 'character') {
3645     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3646 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3647 wakaba 1.1 unless (length $token->{data}) {
3648     !!!next-token;
3649     redo B;
3650     }
3651     }
3652    
3653     #
3654     } elsif ($token->{type} eq 'comment') {
3655     my $comment = $self->{document}->create_comment ($token->{data});
3656 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3657 wakaba 1.1 !!!next-token;
3658     redo B;
3659     } elsif ($token->{type} eq 'start tag') {
3660     if ($token->{tag_name} eq 'col') {
3661     !!!insert-element ($token->{tag_name}, $token->{attributes});
3662 wakaba 1.3 pop @{$self->{open_elements}};
3663 wakaba 1.1 !!!next-token;
3664     redo B;
3665     } else {
3666     #
3667     }
3668     } elsif ($token->{type} eq 'end tag') {
3669     if ($token->{tag_name} eq 'colgroup') {
3670 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3671     !!!parse-error (type => 'unmatched end tag:colgroup');
3672 wakaba 1.1 ## Ignore the token
3673     !!!next-token;
3674     redo B;
3675     } else {
3676 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3677     $self->{insertion_mode} = 'in table';
3678 wakaba 1.1 !!!next-token;
3679     redo B;
3680     }
3681     } elsif ($token->{tag_name} eq 'col') {
3682 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:col');
3683 wakaba 1.1 ## Ignore the token
3684     !!!next-token;
3685     redo B;
3686     } else {
3687     #
3688     }
3689     } else {
3690     #
3691     }
3692    
3693     ## As if </colgroup>
3694 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3695     !!!parse-error (type => 'unmatched end tag:colgroup');
3696 wakaba 1.1 ## Ignore the token
3697     !!!next-token;
3698     redo B;
3699     } else {
3700 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3701     $self->{insertion_mode} = 'in table';
3702 wakaba 1.1 ## reprocess
3703     redo B;
3704     }
3705 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table body') {
3706 wakaba 1.1 if ($token->{type} eq 'character') {
3707     ## NOTE: This is a "character in table" code clone.
3708     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3709 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3710 wakaba 1.1
3711     unless (length $token->{data}) {
3712     !!!next-token;
3713     redo B;
3714     }
3715     }
3716    
3717 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3718    
3719 wakaba 1.1 ## As if in body, but insert into foster parent element
3720     ## ISSUE: Spec says that "whenever a node would be inserted
3721     ## into the current node" while characters might not be
3722     ## result in a new Text node.
3723     $reconstruct_active_formatting_elements->($insert_to_foster);
3724    
3725     if ({
3726     table => 1, tbody => 1, tfoot => 1,
3727     thead => 1, tr => 1,
3728 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3729 wakaba 1.1 # MUST
3730     my $foster_parent_element;
3731     my $next_sibling;
3732     my $prev_sibling;
3733 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3734     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3735     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3736 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3737     $foster_parent_element = $parent;
3738 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3739 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3740     } else {
3741 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3742 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3743     }
3744     last OE;
3745     }
3746     } # OE
3747 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3748 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3749     unless defined $foster_parent_element;
3750     if (defined $prev_sibling and
3751     $prev_sibling->node_type == 3) {
3752     $prev_sibling->manakai_append_text ($token->{data});
3753     } else {
3754     $foster_parent_element->insert_before
3755     ($self->{document}->create_text_node ($token->{data}),
3756     $next_sibling);
3757     }
3758     } else {
3759 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3760 wakaba 1.1 }
3761    
3762     !!!next-token;
3763     redo B;
3764     } elsif ($token->{type} eq 'comment') {
3765     ## Copied from 'in table'
3766     my $comment = $self->{document}->create_comment ($token->{data});
3767 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3768 wakaba 1.1 !!!next-token;
3769     redo B;
3770     } elsif ($token->{type} eq 'start tag') {
3771     if ({
3772     tr => 1,
3773     th => 1, td => 1,
3774     }->{$token->{tag_name}}) {
3775 wakaba 1.3 unless ($token->{tag_name} eq 'tr') {
3776     !!!parse-error (type => 'missing start tag:tr');
3777     }
3778    
3779 wakaba 1.1 ## Clear back to table body context
3780     while (not {
3781     tbody => 1, tfoot => 1, thead => 1, html => 1,
3782 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3783     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3784     pop @{$self->{open_elements}};
3785 wakaba 1.1 }
3786    
3787 wakaba 1.3 $self->{insertion_mode} = 'in row';
3788 wakaba 1.1 if ($token->{tag_name} eq 'tr') {
3789     !!!insert-element ($token->{tag_name}, $token->{attributes});
3790     !!!next-token;
3791     } else {
3792     !!!insert-element ('tr');
3793     ## reprocess
3794     }
3795     redo B;
3796     } elsif ({
3797     caption => 1, col => 1, colgroup => 1,
3798     tbody => 1, tfoot => 1, thead => 1,
3799     }->{$token->{tag_name}}) {
3800     ## have an element in table scope
3801     my $i;
3802 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3803     my $node = $self->{open_elements}->[$_];
3804 wakaba 1.1 if ({
3805     tbody => 1, thead => 1, tfoot => 1,
3806     }->{$node->[1]}) {
3807     $i = $_;
3808     last INSCOPE;
3809     } elsif ({
3810     table => 1, html => 1,
3811     }->{$node->[1]}) {
3812     last INSCOPE;
3813     }
3814     } # INSCOPE
3815     unless (defined $i) {
3816 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3817 wakaba 1.1 ## Ignore the token
3818     !!!next-token;
3819     redo B;
3820     }
3821    
3822     ## Clear back to table body context
3823     while (not {
3824     tbody => 1, tfoot => 1, thead => 1, html => 1,
3825 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3826     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3827     pop @{$self->{open_elements}};
3828 wakaba 1.1 }
3829    
3830     ## As if <{current node}>
3831     ## have an element in table scope
3832     ## true by definition
3833    
3834     ## Clear back to table body context
3835     ## nop by definition
3836    
3837 wakaba 1.3 pop @{$self->{open_elements}};
3838     $self->{insertion_mode} = 'in table';
3839 wakaba 1.1 ## reprocess
3840     redo B;
3841     } elsif ($token->{tag_name} eq 'table') {
3842     ## NOTE: This is a code clone of "table in table"
3843 wakaba 1.3 !!!parse-error (type => 'not closed:table');
3844 wakaba 1.1
3845     ## As if </table>
3846     ## have a table element in table scope
3847     my $i;
3848 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3849     my $node = $self->{open_elements}->[$_];
3850 wakaba 1.1 if ($node->[1] eq 'table') {
3851     $i = $_;
3852     last INSCOPE;
3853     } elsif ({
3854     table => 1, html => 1,
3855     }->{$node->[1]}) {
3856     last INSCOPE;
3857     }
3858     } # INSCOPE
3859     unless (defined $i) {
3860 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3861 wakaba 1.1 ## Ignore tokens </table><table>
3862     !!!next-token;
3863     redo B;
3864     }
3865    
3866     ## generate implied end tags
3867     if ({
3868     dd => 1, dt => 1, li => 1, p => 1,
3869     td => 1, th => 1, tr => 1,
3870 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3871 wakaba 1.1 !!!back-token; # <table>
3872     $token = {type => 'end tag', tag_name => 'table'};
3873     !!!back-token;
3874     $token = {type => 'end tag',
3875 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3876 wakaba 1.1 redo B;
3877     }
3878    
3879 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3880     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3881 wakaba 1.1 }
3882    
3883 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3884 wakaba 1.1
3885 wakaba 1.3 $self->_reset_insertion_mode;
3886 wakaba 1.1
3887     ## reprocess
3888     redo B;
3889     } else {
3890     #
3891     }
3892     } elsif ($token->{type} eq 'end tag') {
3893     if ({
3894     tbody => 1, tfoot => 1, thead => 1,
3895     }->{$token->{tag_name}}) {
3896     ## have an element in table scope
3897     my $i;
3898 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3899     my $node = $self->{open_elements}->[$_];
3900 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3901     $i = $_;
3902     last INSCOPE;
3903     } elsif ({
3904     table => 1, html => 1,
3905     }->{$node->[1]}) {
3906     last INSCOPE;
3907     }
3908     } # INSCOPE
3909     unless (defined $i) {
3910 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3911 wakaba 1.1 ## Ignore the token
3912     !!!next-token;
3913     redo B;
3914     }
3915    
3916     ## Clear back to table body context
3917     while (not {
3918     tbody => 1, tfoot => 1, thead => 1, html => 1,
3919 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3920     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3921     pop @{$self->{open_elements}};
3922 wakaba 1.1 }
3923    
3924 wakaba 1.3 pop @{$self->{open_elements}};
3925     $self->{insertion_mode} = 'in table';
3926 wakaba 1.1 !!!next-token;
3927     redo B;
3928     } elsif ($token->{tag_name} eq 'table') {
3929     ## have an element in table scope
3930     my $i;
3931 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3932     my $node = $self->{open_elements}->[$_];
3933 wakaba 1.1 if ({
3934     tbody => 1, thead => 1, tfoot => 1,
3935     }->{$node->[1]}) {
3936     $i = $_;
3937     last INSCOPE;
3938     } elsif ({
3939     table => 1, html => 1,
3940     }->{$node->[1]}) {
3941     last INSCOPE;
3942     }
3943     } # INSCOPE
3944     unless (defined $i) {
3945 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3946 wakaba 1.1 ## Ignore the token
3947     !!!next-token;
3948     redo B;
3949     }
3950    
3951     ## Clear back to table body context
3952     while (not {
3953     tbody => 1, tfoot => 1, thead => 1, html => 1,
3954 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3955     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3956     pop @{$self->{open_elements}};
3957 wakaba 1.1 }
3958    
3959     ## As if <{current node}>
3960     ## have an element in table scope
3961     ## true by definition
3962    
3963     ## Clear back to table body context
3964     ## nop by definition
3965    
3966 wakaba 1.3 pop @{$self->{open_elements}};
3967     $self->{insertion_mode} = 'in table';
3968 wakaba 1.1 ## reprocess
3969     redo B;
3970     } elsif ({
3971     body => 1, caption => 1, col => 1, colgroup => 1,
3972     html => 1, td => 1, th => 1, tr => 1,
3973     }->{$token->{tag_name}}) {
3974 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3975 wakaba 1.1 ## Ignore the token
3976     !!!next-token;
3977     redo B;
3978     } else {
3979     #
3980     }
3981     } else {
3982     #
3983     }
3984    
3985     ## As if in table
3986 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3987 wakaba 1.1 $in_body->($insert_to_foster);
3988     redo B;
3989 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in row') {
3990 wakaba 1.1 if ($token->{type} eq 'character') {
3991     ## NOTE: This is a "character in table" code clone.
3992     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3993 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3994 wakaba 1.1
3995     unless (length $token->{data}) {
3996     !!!next-token;
3997     redo B;
3998     }
3999     }
4000    
4001 wakaba 1.3 !!!parse-error (type => 'in table:#character');
4002    
4003 wakaba 1.1 ## As if in body, but insert into foster parent element
4004     ## ISSUE: Spec says that "whenever a node would be inserted
4005     ## into the current node" while characters might not be
4006     ## result in a new Text node.
4007     $reconstruct_active_formatting_elements->($insert_to_foster);
4008    
4009     if ({
4010     table => 1, tbody => 1, tfoot => 1,
4011     thead => 1, tr => 1,
4012 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4013 wakaba 1.1 # MUST
4014     my $foster_parent_element;
4015     my $next_sibling;
4016     my $prev_sibling;
4017 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
4018     if ($self->{open_elements}->[$_]->[1] eq 'table') {
4019     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4020 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
4021     $foster_parent_element = $parent;
4022 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
4023 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
4024     } else {
4025 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4026 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
4027     }
4028     last OE;
4029     }
4030     } # OE
4031 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
4032 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
4033     unless defined $foster_parent_element;
4034     if (defined $prev_sibling and
4035     $prev_sibling->node_type == 3) {
4036     $prev_sibling->manakai_append_text ($token->{data});
4037     } else {
4038     $foster_parent_element->insert_before
4039     ($self->{document}->create_text_node ($token->{data}),
4040     $next_sibling);
4041     }
4042     } else {
4043 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4044 wakaba 1.1 }
4045    
4046     !!!next-token;
4047     redo B;
4048     } elsif ($token->{type} eq 'comment') {
4049     ## Copied from 'in table'
4050     my $comment = $self->{document}->create_comment ($token->{data});
4051 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4052 wakaba 1.1 !!!next-token;
4053     redo B;
4054     } elsif ($token->{type} eq 'start tag') {
4055     if ($token->{tag_name} eq 'th' or
4056     $token->{tag_name} eq 'td') {
4057     ## Clear back to table row context
4058     while (not {
4059     tr => 1, html => 1,
4060 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4061     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4062     pop @{$self->{open_elements}};
4063 wakaba 1.1 }
4064    
4065     !!!insert-element ($token->{tag_name}, $token->{attributes});
4066 wakaba 1.3 $self->{insertion_mode} = 'in cell';
4067 wakaba 1.1
4068     push @$active_formatting_elements, ['#marker', ''];
4069    
4070     !!!next-token;
4071     redo B;
4072     } elsif ({
4073     caption => 1, col => 1, colgroup => 1,
4074     tbody => 1, tfoot => 1, thead => 1, tr => 1,
4075     }->{$token->{tag_name}}) {
4076     ## As if </tr>
4077     ## have an element in table scope
4078     my $i;
4079 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4080     my $node = $self->{open_elements}->[$_];
4081 wakaba 1.1 if ($node->[1] eq 'tr') {
4082     $i = $_;
4083     last INSCOPE;
4084     } elsif ({
4085     table => 1, html => 1,
4086     }->{$node->[1]}) {
4087     last INSCOPE;
4088     }
4089     } # INSCOPE
4090     unless (defined $i) {
4091 wakaba 1.3 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name});
4092 wakaba 1.1 ## Ignore the token
4093     !!!next-token;
4094     redo B;
4095     }
4096    
4097     ## Clear back to table row context
4098     while (not {
4099     tr => 1, html => 1,
4100 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4101     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4102     pop @{$self->{open_elements}};
4103 wakaba 1.1 }
4104    
4105 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4106     $self->{insertion_mode} = 'in table body';
4107 wakaba 1.1 ## reprocess
4108     redo B;
4109     } elsif ($token->{tag_name} eq 'table') {
4110     ## NOTE: This is a code clone of "table in table"
4111 wakaba 1.3 !!!parse-error (type => 'not closed:table');
4112 wakaba 1.1
4113     ## As if </table>
4114     ## have a table element in table scope
4115     my $i;
4116 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4117     my $node = $self->{open_elements}->[$_];
4118 wakaba 1.1 if ($node->[1] eq 'table') {
4119     $i = $_;
4120     last INSCOPE;
4121     } elsif ({
4122     table => 1, html => 1,
4123     }->{$node->[1]}) {
4124     last INSCOPE;
4125     }
4126     } # INSCOPE
4127     unless (defined $i) {
4128 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
4129 wakaba 1.1 ## Ignore tokens </table><table>
4130     !!!next-token;
4131     redo B;
4132     }
4133    
4134     ## generate implied end tags
4135     if ({
4136     dd => 1, dt => 1, li => 1, p => 1,
4137     td => 1, th => 1, tr => 1,
4138 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4139 wakaba 1.1 !!!back-token; # <table>
4140     $token = {type => 'end tag', tag_name => 'table'};
4141     !!!back-token;
4142     $token = {type => 'end tag',
4143 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4144 wakaba 1.1 redo B;
4145     }
4146    
4147 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4148     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4149 wakaba 1.1 }
4150    
4151 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4152 wakaba 1.1
4153 wakaba 1.3 $self->_reset_insertion_mode;
4154 wakaba 1.1
4155     ## reprocess
4156     redo B;
4157     } else {
4158     #
4159     }
4160     } elsif ($token->{type} eq 'end tag') {
4161     if ($token->{tag_name} eq 'tr') {
4162     ## have an element in table scope
4163     my $i;
4164 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4165     my $node = $self->{open_elements}->[$_];
4166 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4167     $i = $_;
4168     last INSCOPE;
4169     } elsif ({
4170     table => 1, html => 1,
4171     }->{$node->[1]}) {
4172     last INSCOPE;
4173     }
4174     } # INSCOPE
4175     unless (defined $i) {
4176 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4177 wakaba 1.1 ## Ignore the token
4178     !!!next-token;
4179     redo B;
4180     }
4181    
4182     ## Clear back to table row context
4183     while (not {
4184     tr => 1, html => 1,
4185 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4186     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4187     pop @{$self->{open_elements}};
4188 wakaba 1.1 }
4189    
4190 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4191     $self->{insertion_mode} = 'in table body';
4192 wakaba 1.1 !!!next-token;
4193     redo B;
4194     } elsif ($token->{tag_name} eq 'table') {
4195     ## As if </tr>
4196     ## have an element in table scope
4197     my $i;
4198 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4199     my $node = $self->{open_elements}->[$_];
4200 wakaba 1.1 if ($node->[1] eq 'tr') {
4201     $i = $_;
4202     last INSCOPE;
4203     } elsif ({
4204     table => 1, html => 1,
4205     }->{$node->[1]}) {
4206     last INSCOPE;
4207     }
4208     } # INSCOPE
4209     unless (defined $i) {
4210 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{type});
4211 wakaba 1.1 ## Ignore the token
4212     !!!next-token;
4213     redo B;
4214     }
4215    
4216     ## Clear back to table row context
4217     while (not {
4218     tr => 1, html => 1,
4219 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4220     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4221     pop @{$self->{open_elements}};
4222 wakaba 1.1 }
4223    
4224 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4225     $self->{insertion_mode} = 'in table body';
4226 wakaba 1.1 ## reprocess
4227     redo B;
4228     } elsif ({
4229     tbody => 1, tfoot => 1, thead => 1,
4230     }->{$token->{tag_name}}) {
4231     ## have an element in table scope
4232     my $i;
4233 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4234     my $node = $self->{open_elements}->[$_];
4235 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4236     $i = $_;
4237     last INSCOPE;
4238     } elsif ({
4239     table => 1, html => 1,
4240     }->{$node->[1]}) {
4241     last INSCOPE;
4242     }
4243     } # INSCOPE
4244     unless (defined $i) {
4245 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4246 wakaba 1.1 ## Ignore the token
4247     !!!next-token;
4248     redo B;
4249     }
4250    
4251     ## As if </tr>
4252     ## have an element in table scope
4253     my $i;
4254 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4255     my $node = $self->{open_elements}->[$_];
4256 wakaba 1.1 if ($node->[1] eq 'tr') {
4257     $i = $_;
4258     last INSCOPE;
4259     } elsif ({
4260     table => 1, html => 1,
4261     }->{$node->[1]}) {
4262     last INSCOPE;
4263     }
4264     } # INSCOPE
4265     unless (defined $i) {
4266 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:tr');
4267 wakaba 1.1 ## Ignore the token
4268     !!!next-token;
4269     redo B;
4270     }
4271    
4272     ## Clear back to table row context
4273     while (not {
4274     tr => 1, html => 1,
4275 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4276     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4277     pop @{$self->{open_elements}};
4278 wakaba 1.1 }
4279    
4280 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4281     $self->{insertion_mode} = 'in table body';
4282 wakaba 1.1 ## reprocess
4283     redo B;
4284     } elsif ({
4285     body => 1, caption => 1, col => 1,
4286     colgroup => 1, html => 1, td => 1, th => 1,
4287     }->{$token->{tag_name}}) {
4288 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4289 wakaba 1.1 ## Ignore the token
4290     !!!next-token;
4291     redo B;
4292     } else {
4293     #
4294     }
4295     } else {
4296     #
4297     }
4298    
4299     ## As if in table
4300 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
4301 wakaba 1.1 $in_body->($insert_to_foster);
4302     redo B;
4303 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in cell') {
4304 wakaba 1.1 if ($token->{type} eq 'character') {
4305     ## NOTE: This is a code clone of "character in body".
4306     $reconstruct_active_formatting_elements->($insert_to_current);
4307    
4308 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4309 wakaba 1.1
4310     !!!next-token;
4311     redo B;
4312     } elsif ($token->{type} eq 'comment') {
4313     ## NOTE: This is a code clone of "comment in body".
4314     my $comment = $self->{document}->create_comment ($token->{data});
4315 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4316 wakaba 1.1 !!!next-token;
4317     redo B;
4318     } elsif ($token->{type} eq 'start tag') {
4319     if ({
4320     caption => 1, col => 1, colgroup => 1,
4321     tbody => 1, td => 1, tfoot => 1, th => 1,
4322     thead => 1, tr => 1,
4323     }->{$token->{tag_name}}) {
4324     ## have an element in table scope
4325     my $tn;
4326 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4327     my $node = $self->{open_elements}->[$_];
4328 wakaba 1.1 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
4329     $tn = $node->[1];
4330     last INSCOPE;
4331     } elsif ({
4332     table => 1, html => 1,
4333     }->{$node->[1]}) {
4334     last INSCOPE;
4335     }
4336     } # INSCOPE
4337     unless (defined $tn) {
4338 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4339 wakaba 1.1 ## Ignore the token
4340     !!!next-token;
4341     redo B;
4342     }
4343    
4344     ## Close the cell
4345     !!!back-token; # <?>
4346     $token = {type => 'end tag', tag_name => $tn};
4347     redo B;
4348     } else {
4349     #
4350     }
4351     } elsif ($token->{type} eq 'end tag') {
4352     if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4353     ## have an element in table scope
4354     my $i;
4355 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4356     my $node = $self->{open_elements}->[$_];
4357 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4358     $i = $_;
4359     last INSCOPE;
4360     } elsif ({
4361     table => 1, html => 1,
4362     }->{$node->[1]}) {
4363     last INSCOPE;
4364     }
4365     } # INSCOPE
4366     unless (defined $i) {
4367 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4368 wakaba 1.1 ## Ignore the token
4369     !!!next-token;
4370     redo B;
4371     }
4372    
4373     ## generate implied end tags
4374     if ({
4375     dd => 1, dt => 1, li => 1, p => 1,
4376     td => ($token->{tag_name} eq 'th'),
4377     th => ($token->{tag_name} eq 'td'),
4378     tr => 1,
4379 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4380 wakaba 1.1 !!!back-token;
4381     $token = {type => 'end tag',
4382 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4383 wakaba 1.1 redo B;
4384     }
4385    
4386 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4387     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4388 wakaba 1.1 }
4389    
4390 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4391 wakaba 1.1
4392     $clear_up_to_marker->();
4393    
4394 wakaba 1.3 $self->{insertion_mode} = 'in row';
4395 wakaba 1.1
4396     !!!next-token;
4397     redo B;
4398     } elsif ({
4399     body => 1, caption => 1, col => 1,
4400     colgroup => 1, html => 1,
4401     }->{$token->{tag_name}}) {
4402 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4403 wakaba 1.1 ## Ignore the token
4404     !!!next-token;
4405     redo B;
4406     } elsif ({
4407     table => 1, tbody => 1, tfoot => 1,
4408     thead => 1, tr => 1,
4409     }->{$token->{tag_name}}) {
4410     ## have an element in table scope
4411     my $i;
4412     my $tn;
4413 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4414     my $node = $self->{open_elements}->[$_];
4415 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4416     $i = $_;
4417     last INSCOPE;
4418     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4419     $tn = $node->[1];
4420     ## NOTE: There is exactly one |td| or |th| element
4421     ## in scope in the stack of open elements by definition.
4422     } elsif ({
4423     table => 1, html => 1,
4424     }->{$node->[1]}) {
4425     last INSCOPE;
4426     }
4427     } # INSCOPE
4428     unless (defined $i) {
4429 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4430 wakaba 1.1 ## Ignore the token
4431     !!!next-token;
4432     redo B;
4433     }
4434    
4435     ## Close the cell
4436     !!!back-token; # </?>
4437     $token = {type => 'end tag', tag_name => $tn};
4438     redo B;
4439     } else {
4440     #
4441     }
4442     } else {
4443     #
4444     }
4445    
4446     $in_body->($insert_to_current);
4447     redo B;
4448 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in select') {
4449 wakaba 1.1 if ($token->{type} eq 'character') {
4450 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4451 wakaba 1.1 !!!next-token;
4452     redo B;
4453     } elsif ($token->{type} eq 'comment') {
4454     my $comment = $self->{document}->create_comment ($token->{data});
4455 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4456 wakaba 1.1 !!!next-token;
4457     redo B;
4458     } elsif ($token->{type} eq 'start tag') {
4459     if ($token->{tag_name} eq 'option') {
4460 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4461 wakaba 1.1 ## As if </option>
4462 wakaba 1.3 pop @{$self->{open_elements}};
4463 wakaba 1.1 }
4464    
4465     !!!insert-element ($token->{tag_name}, $token->{attributes});
4466     !!!next-token;
4467     redo B;
4468     } elsif ($token->{tag_name} eq 'optgroup') {
4469 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4470 wakaba 1.1 ## As if </option>
4471 wakaba 1.3 pop @{$self->{open_elements}};
4472 wakaba 1.1 }
4473    
4474 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4475 wakaba 1.1 ## As if </optgroup>
4476 wakaba 1.3 pop @{$self->{open_elements}};
4477 wakaba 1.1 }
4478    
4479     !!!insert-element ($token->{tag_name}, $token->{attributes});
4480     !!!next-token;
4481     redo B;
4482     } elsif ($token->{tag_name} eq 'select') {
4483 wakaba 1.3 !!!parse-error (type => 'not closed:select');
4484 wakaba 1.1 ## As if </select> instead
4485     ## have an element in table scope
4486     my $i;
4487 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4488     my $node = $self->{open_elements}->[$_];
4489 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4490     $i = $_;
4491     last INSCOPE;
4492     } elsif ({
4493     table => 1, html => 1,
4494     }->{$node->[1]}) {
4495     last INSCOPE;
4496     }
4497     } # INSCOPE
4498     unless (defined $i) {
4499 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4500 wakaba 1.1 ## Ignore the token
4501     !!!next-token;
4502     redo B;
4503     }
4504    
4505 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4506 wakaba 1.1
4507 wakaba 1.3 $self->_reset_insertion_mode;
4508 wakaba 1.1
4509     !!!next-token;
4510     redo B;
4511     } else {
4512     #
4513     }
4514     } elsif ($token->{type} eq 'end tag') {
4515     if ($token->{tag_name} eq 'optgroup') {
4516 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option' and
4517     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
4518 wakaba 1.1 ## As if </option>
4519 wakaba 1.3 splice @{$self->{open_elements}}, -2;
4520     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4521     pop @{$self->{open_elements}};
4522 wakaba 1.1 } else {
4523 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4524 wakaba 1.1 ## Ignore the token
4525     }
4526     !!!next-token;
4527     redo B;
4528     } elsif ($token->{tag_name} eq 'option') {
4529 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4530     pop @{$self->{open_elements}};
4531 wakaba 1.1 } else {
4532 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4533 wakaba 1.1 ## Ignore the token
4534     }
4535     !!!next-token;
4536     redo B;
4537     } elsif ($token->{tag_name} eq 'select') {
4538     ## have an element in table scope
4539     my $i;
4540 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4541     my $node = $self->{open_elements}->[$_];
4542 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4543     $i = $_;
4544     last INSCOPE;
4545     } elsif ({
4546     table => 1, html => 1,
4547     }->{$node->[1]}) {
4548     last INSCOPE;
4549     }
4550     } # INSCOPE
4551     unless (defined $i) {
4552 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4553 wakaba 1.1 ## Ignore the token
4554     !!!next-token;
4555     redo B;
4556     }
4557    
4558 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4559 wakaba 1.1
4560 wakaba 1.3 $self->_reset_insertion_mode;
4561 wakaba 1.1
4562     !!!next-token;
4563     redo B;
4564     } elsif ({
4565     caption => 1, table => 1, tbody => 1,
4566     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
4567     }->{$token->{tag_name}}) {
4568 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4569 wakaba 1.1
4570     ## have an element in table scope
4571     my $i;
4572 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4573     my $node = $self->{open_elements}->[$_];
4574 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4575     $i = $_;
4576     last INSCOPE;
4577     } elsif ({
4578     table => 1, html => 1,
4579     }->{$node->[1]}) {
4580     last INSCOPE;
4581     }
4582     } # INSCOPE
4583     unless (defined $i) {
4584     ## Ignore the token
4585     !!!next-token;
4586     redo B;
4587     }
4588    
4589     ## As if </select>
4590     ## have an element in table scope
4591     undef $i;
4592 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4593     my $node = $self->{open_elements}->[$_];
4594 wakaba 1.1 if ($node->[1] eq 'select') {
4595     $i = $_;
4596     last INSCOPE;
4597     } elsif ({
4598     table => 1, html => 1,
4599     }->{$node->[1]}) {
4600     last INSCOPE;
4601     }
4602     } # INSCOPE
4603     unless (defined $i) {
4604 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4605 wakaba 1.1 ## Ignore the </select> token
4606     !!!next-token; ## TODO: ok?
4607     redo B;
4608     }
4609    
4610 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4611 wakaba 1.1
4612 wakaba 1.3 $self->_reset_insertion_mode;
4613 wakaba 1.1
4614     ## reprocess
4615     redo B;
4616     } else {
4617     #
4618     }
4619     } else {
4620     #
4621     }
4622    
4623 wakaba 1.3 !!!parse-error (type => 'in select:'.$token->{tag_name});
4624 wakaba 1.1 ## Ignore the token
4625     !!!next-token;
4626     redo B;
4627 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after body') {
4628 wakaba 1.1 if ($token->{type} eq 'character') {
4629     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4630     ## As if in body
4631     $reconstruct_active_formatting_elements->($insert_to_current);
4632    
4633 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4634 wakaba 1.1
4635     unless (length $token->{data}) {
4636     !!!next-token;
4637     redo B;
4638     }
4639     }
4640    
4641     #
4642 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4643 wakaba 1.1 } elsif ($token->{type} eq 'comment') {
4644     my $comment = $self->{document}->create_comment ($token->{data});
4645 wakaba 1.3 $self->{open_elements}->[0]->[0]->append_child ($comment);
4646 wakaba 1.1 !!!next-token;
4647     redo B;
4648 wakaba 1.3 } elsif ($token->{type} eq 'start tag') {
4649     !!!parse-error (type => 'after body:'.$token->{tag_name});
4650     #
4651 wakaba 1.1 } elsif ($token->{type} eq 'end tag') {
4652     if ($token->{tag_name} eq 'html') {
4653 wakaba 1.3 if (defined $self->{inner_html_node}) {
4654     !!!parse-error (type => 'unmatched end tag:html');
4655     ## Ignore the token
4656     !!!next-token;
4657     redo B;
4658     } else {
4659     $phase = 'trailing end';
4660     !!!next-token;
4661     redo B;
4662     }
4663 wakaba 1.1 } else {
4664 wakaba 1.3 !!!parse-error (type => 'after body:/'.$token->{tag_name});
4665 wakaba 1.1 }
4666     } else {
4667 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4668 wakaba 1.1 }
4669    
4670 wakaba 1.3 $self->{insertion_mode} = 'in body';
4671 wakaba 1.1 ## reprocess
4672     redo B;
4673 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in frameset') {
4674 wakaba 1.1 if ($token->{type} eq 'character') {
4675     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4676 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4677 wakaba 1.1
4678     unless (length $token->{data}) {
4679     !!!next-token;
4680     redo B;
4681     }
4682     }
4683    
4684     #
4685     } elsif ($token->{type} eq 'comment') {
4686     my $comment = $self->{document}->create_comment ($token->{data});
4687 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4688 wakaba 1.1 !!!next-token;
4689     redo B;
4690     } elsif ($token->{type} eq 'start tag') {
4691     if ($token->{tag_name} eq 'frameset') {
4692     !!!insert-element ($token->{tag_name}, $token->{attributes});
4693     !!!next-token;
4694     redo B;
4695     } elsif ($token->{tag_name} eq 'frame') {
4696     !!!insert-element ($token->{tag_name}, $token->{attributes});
4697 wakaba 1.3 pop @{$self->{open_elements}};
4698 wakaba 1.1 !!!next-token;
4699     redo B;
4700     } elsif ($token->{tag_name} eq 'noframes') {
4701     $in_body->($insert_to_current);
4702     redo B;
4703     } else {
4704     #
4705     }
4706     } elsif ($token->{type} eq 'end tag') {
4707     if ($token->{tag_name} eq 'frameset') {
4708 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html' and
4709     @{$self->{open_elements}} == 1) {
4710     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4711 wakaba 1.1 ## Ignore the token
4712     !!!next-token;
4713     } else {
4714 wakaba 1.3 pop @{$self->{open_elements}};
4715 wakaba 1.1 !!!next-token;
4716     }
4717    
4718     ## if not inner_html and
4719 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
4720     $self->{insertion_mode} = 'after frameset';
4721 wakaba 1.1 }
4722     redo B;
4723     } else {
4724     #
4725     }
4726     } else {
4727     #
4728     }
4729    
4730 wakaba 1.3 if (defined $token->{tag_name}) {
4731     !!!parse-error (type => 'in frameset:'.$token->{tag_name});
4732     } else {
4733     !!!parse-error (type => 'in frameset:#'.$token->{type});
4734     }
4735 wakaba 1.1 ## Ignore the token
4736     !!!next-token;
4737     redo B;
4738 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after frameset') {
4739 wakaba 1.1 if ($token->{type} eq 'character') {
4740     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4741 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4742 wakaba 1.1
4743     unless (length $token->{data}) {
4744     !!!next-token;
4745     redo B;
4746     }
4747     }
4748    
4749     #
4750     } elsif ($token->{type} eq 'comment') {
4751     my $comment = $self->{document}->create_comment ($token->{data});
4752 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4753 wakaba 1.1 !!!next-token;
4754     redo B;
4755     } elsif ($token->{type} eq 'start tag') {
4756     if ($token->{tag_name} eq 'noframes') {
4757     $in_body->($insert_to_current);
4758     redo B;
4759     } else {
4760     #
4761     }
4762     } elsif ($token->{type} eq 'end tag') {
4763     if ($token->{tag_name} eq 'html') {
4764     $phase = 'trailing end';
4765     !!!next-token;
4766     redo B;
4767     } else {
4768     #
4769     }
4770     } else {
4771     #
4772     }
4773    
4774 wakaba 1.3 if (defined $token->{tag_name}) {
4775     !!!parse-error (type => 'after frameset:'.$token->{tag_name});
4776     } else {
4777     !!!parse-error (type => 'after frameset:#'.$token->{type});
4778     }
4779 wakaba 1.1 ## Ignore the token
4780     !!!next-token;
4781     redo B;
4782    
4783     ## ISSUE: An issue in spec there
4784     } else {
4785 wakaba 1.3 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4786 wakaba 1.1 }
4787     }
4788     } elsif ($phase eq 'trailing end') {
4789     ## states in the main stage is preserved yet # MUST
4790    
4791     if ($token->{type} eq 'DOCTYPE') {
4792 wakaba 1.3 !!!parse-error (type => 'after html:#DOCTYPE');
4793 wakaba 1.1 ## Ignore the token
4794     !!!next-token;
4795     redo B;
4796     } elsif ($token->{type} eq 'comment') {
4797     my $comment = $self->{document}->create_comment ($token->{data});
4798     $self->{document}->append_child ($comment);
4799     !!!next-token;
4800     redo B;
4801     } elsif ($token->{type} eq 'character') {
4802     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4803     my $data = $1;
4804     ## As if in the main phase.
4805     ## NOTE: The insertion mode in the main phase
4806     ## just before the phase has been changed to the trailing
4807     ## end phase is either "after body" or "after frameset".
4808     $reconstruct_active_formatting_elements->($insert_to_current)
4809     if $phase eq 'main';
4810    
4811 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
4812 wakaba 1.1
4813     unless (length $token->{data}) {
4814     !!!next-token;
4815     redo B;
4816     }
4817     }
4818    
4819 wakaba 1.3 !!!parse-error (type => 'after html:#character');
4820 wakaba 1.1 $phase = 'main';
4821     ## reprocess
4822     redo B;
4823     } elsif ($token->{type} eq 'start tag' or
4824     $token->{type} eq 'end tag') {
4825 wakaba 1.3 !!!parse-error (type => 'after html:'.$token->{tag_name});
4826 wakaba 1.1 $phase = 'main';
4827     ## reprocess
4828     redo B;
4829     } elsif ($token->{type} eq 'end-of-file') {
4830     ## Stop parsing
4831     last B;
4832     } else {
4833     die "$0: $token->{type}: Unknown token";
4834     }
4835     }
4836     } # B
4837    
4838     ## Stop parsing # MUST
4839    
4840     ## TODO: script stuffs
4841 wakaba 1.3 } # _tree_construct_main
4842    
4843     sub set_inner_html ($$$) {
4844     my $class = shift;
4845     my $node = shift;
4846     my $s = \$_[0];
4847     my $onerror = $_[1];
4848    
4849     my $nt = $node->node_type;
4850     if ($nt == 9) {
4851     # MUST
4852    
4853     ## Step 1 # MUST
4854     ## TODO: If the document has an active parser, ...
4855     ## ISSUE: There is an issue in the spec.
4856    
4857     ## Step 2 # MUST
4858     my @cn = @{$node->child_nodes};
4859     for (@cn) {
4860     $node->remove_child ($_);
4861     }
4862    
4863     ## Step 3, 4, 5 # MUST
4864     $class->parse_string ($$s => $node, $onerror);
4865     } elsif ($nt == 1) {
4866     ## TODO: If non-html element
4867    
4868     ## NOTE: Most of this code is copied from |parse_string|
4869    
4870     ## Step 1 # MUST
4871     my $doc = $node->owner_document->implementation->create_document;
4872     ## TODO: Mark as HTML document
4873     my $p = $class->new;
4874     $p->{document} = $doc;
4875    
4876     ## Step 9 # MUST
4877     my $i = 0;
4878     my $line = 1;
4879     my $column = 0;
4880     $p->{set_next_input_character} = sub {
4881     my $self = shift;
4882     $self->{next_input_character} = -1 and return if $i >= length $$s;
4883     $self->{next_input_character} = ord substr $$s, $i++, 1;
4884     $column++;
4885 wakaba 1.4
4886     if ($self->{next_input_character} == 0x000A) { # LF
4887     $line++;
4888     $column = 0;
4889     } elsif ($self->{next_input_character} == 0x000D) { # CR
4890 wakaba 1.3 if ($i >= length $$s) {
4891     #
4892     } else {
4893     my $next_char = ord substr $$s, $i++, 1;
4894     if ($next_char == 0x000A) { # LF
4895     #
4896     } else {
4897     push @{$self->{char}}, $next_char;
4898     }
4899     }
4900     $self->{next_input_character} = 0x000A; # LF # MUST
4901     $line++;
4902 wakaba 1.4 $column = 0;
4903 wakaba 1.3 } elsif ($self->{next_input_character} > 0x10FFFF) {
4904     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4905     } elsif ($self->{next_input_character} == 0x0000) { # NULL
4906     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4907     }
4908     };
4909    
4910     my $ponerror = $onerror || sub {
4911     my (%opt) = @_;
4912     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
4913     };
4914     $p->{parse_error} = sub {
4915     $ponerror->(@_, line => $line, column => $column);
4916     };
4917    
4918     $p->_initialize_tokenizer;
4919     $p->_initialize_tree_constructor;
4920    
4921     ## Step 2
4922     my $node_ln = $node->local_name;
4923     $p->{content_model_flag} = {
4924     title => 'RCDATA',
4925     textarea => 'RCDATA',
4926     style => 'CDATA',
4927     script => 'CDATA',
4928     xmp => 'CDATA',
4929     iframe => 'CDATA',
4930     noembed => 'CDATA',
4931     noframes => 'CDATA',
4932     noscript => 'CDATA',
4933     plaintext => 'PLAINTEXT',
4934     }->{$node_ln} || 'PCDATA';
4935     ## ISSUE: What is "the name of the element"? local name?
4936    
4937     $p->{inner_html_node} = [$node, $node_ln];
4938    
4939     ## Step 4
4940     my $root = $doc->create_element_ns
4941     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
4942    
4943     ## Step 5 # MUST
4944     $doc->append_child ($root);
4945    
4946     ## Step 6 # MUST
4947     push @{$p->{open_elements}}, [$root, 'html'];
4948    
4949     undef $p->{head_element};
4950    
4951     ## Step 7 # MUST
4952     $p->_reset_insertion_mode;
4953    
4954     ## Step 8 # MUST
4955     my $anode = $node;
4956     AN: while (defined $anode) {
4957     if ($anode->node_type == 1) {
4958     my $nsuri = $anode->namespace_uri;
4959     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
4960     if ($anode->local_name eq 'form') { ## TODO: case?
4961     $p->{form_element} = $anode;
4962     last AN;
4963     }
4964     }
4965     }
4966     $anode = $anode->parent_node;
4967     } # AN
4968    
4969     ## Step 3 # MUST
4970     ## Step 10 # MUST
4971     {
4972     my $self = $p;
4973     !!!next-token;
4974     }
4975     $p->_tree_construction_main;
4976    
4977     ## Step 11 # MUST
4978     my @cn = @{$node->child_nodes};
4979     for (@cn) {
4980     $node->remove_child ($_);
4981     }
4982     ## ISSUE: mutation events? read-only?
4983    
4984     ## Step 12 # MUST
4985     @cn = @{$root->child_nodes};
4986     for (@cn) {
4987     $node->append_child ($_);
4988     }
4989     ## ISSUE: adopt_node? mutation events?
4990    
4991     $p->_terminate_tree_constructor;
4992     } else {
4993     die "$0: |set_inner_html| is not defined for node of type $nt";
4994     }
4995     } # set_inner_html
4996    
4997     } # tree construction stage
4998 wakaba 1.1
4999     sub get_inner_html ($$$) {
5000 wakaba 1.3 my (undef, $node, $on_error) = @_;
5001 wakaba 1.1
5002     ## Step 1
5003     my $s = '';
5004    
5005     my $in_cdata;
5006     my $parent = $node;
5007     while (defined $parent) {
5008     if ($parent->node_type == 1 and
5009     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
5010     {
5011     style => 1, script => 1, xmp => 1, iframe => 1,
5012     noembed => 1, noframes => 1, noscript => 1,
5013     }->{$parent->local_name}) { ## TODO: case thingy
5014     $in_cdata = 1;
5015     }
5016     $parent = $parent->parent_node;
5017     }
5018    
5019     ## Step 2
5020     my @node = @{$node->child_nodes};
5021     C: while (@node) {
5022     my $child = shift @node;
5023     unless (ref $child) {
5024     if ($child eq 'cdata-out') {
5025     $in_cdata = 0;
5026     } else {
5027     $s .= $child; # end tag
5028     }
5029     next C;
5030     }
5031    
5032     my $nt = $child->node_type;
5033     if ($nt == 1) { # Element
5034     my $tag_name = lc $child->tag_name; ## ISSUE: Definition of "lowercase"
5035     $s .= '<' . $tag_name;
5036    
5037     ## ISSUE: Non-html elements
5038    
5039     my @attrs = @{$child->attributes}; # sort order MUST be stable
5040     for my $attr (@attrs) { # order is implementation dependent
5041     my $attr_name = lc $attr->name; ## ISSUE: Definition of "lowercase"
5042     $s .= ' ' . $attr_name . '="';
5043     my $attr_value = $attr->value;
5044     ## escape
5045     $attr_value =~ s/&/&amp;/g;
5046     $attr_value =~ s/</&lt;/g;
5047     $attr_value =~ s/>/&gt;/g;
5048     $attr_value =~ s/"/&quot;/g;
5049     $s .= $attr_value . '"';
5050     }
5051     $s .= '>';
5052    
5053     next C if {
5054     area => 1, base => 1, basefont => 1, bgsound => 1,
5055     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
5056     img => 1, input => 1, link => 1, meta => 1, param => 1,
5057     spacer => 1, wbr => 1,
5058     }->{$tag_name};
5059    
5060     if (not $in_cdata and {
5061     style => 1, script => 1, xmp => 1, iframe => 1,
5062     noembed => 1, noframes => 1, noscript => 1,
5063     }->{$tag_name}) {
5064     unshift @node, 'cdata-out';
5065     $in_cdata = 1;
5066     }
5067    
5068     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
5069     } elsif ($nt == 3 or $nt == 4) {
5070     if ($in_cdata) {
5071     $s .= $child->data;
5072     } else {
5073     my $value = $child->data;
5074     $value =~ s/&/&amp;/g;
5075     $value =~ s/</&lt;/g;
5076     $value =~ s/>/&gt;/g;
5077     $value =~ s/"/&quot;/g;
5078     $s .= $value;
5079     }
5080     } elsif ($nt == 8) {
5081     $s .= '<!--' . $child->data . '-->';
5082     } elsif ($nt == 10) {
5083     $s .= '<!DOCTYPE ' . $child->name . '>';
5084     } elsif ($nt == 5) { # entrefs
5085     push @node, @{$child->child_nodes};
5086     } else {
5087     $on_error->($child) if defined $on_error;
5088     }
5089     ## ISSUE: This code does not support PIs.
5090     } # C
5091    
5092     ## Step 3
5093     return \$s;
5094     } # get_inner_html
5095    
5096     1;
5097 wakaba 1.10 # $Date: 2007/06/23 02:41:51 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24