/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.14 - (hide annotations) (download) (as text)
Sat Jun 23 06:38:12 2007 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.13: +14 -4 lines
File MIME type: application/x-wais-source
++ whatpm/t/ChangeLog	23 Jun 2007 06:37:09 -0000
	* tokenizer-test-1.test: |™| test added.  (HTML5 revision 889.)

	* HTML-tree.t: Output test file names.  Escaped
	new line at the end of test data was removed.

	* tokenizer-test-2.dat: Tests for newlines, NULL, and
	escape flag stuff in |set_inner_html|.

2007-06-23  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/ChangeLog	23 Jun 2007 06:35:23 -0000
	* HTML.pm.src (set_inner_html): HTML5 revision 892 (adopt
	nodes before appended).  Parser was not ready for NULL
	parse error and escape flag.

	* NanoDOM.pm (adopt_node): New.

2007-06-23  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.14 our $VERSION=do{my @r=(q$Revision: 1.13 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5     ## This is an early version of an HTML parser.
6    
7     my $permitted_slash_tag_name = {
8     base => 1,
9     link => 1,
10     meta => 1,
11     hr => 1,
12     br => 1,
13     img=> 1,
14     embed => 1,
15     param => 1,
16     area => 1,
17     col => 1,
18     input => 1,
19     };
20    
21     my $entity_char = {
22     AElig => "\x{00C6}",
23     Aacute => "\x{00C1}",
24     Acirc => "\x{00C2}",
25     Agrave => "\x{00C0}",
26     Alpha => "\x{0391}",
27     Aring => "\x{00C5}",
28     Atilde => "\x{00C3}",
29     Auml => "\x{00C4}",
30     Beta => "\x{0392}",
31     Ccedil => "\x{00C7}",
32     Chi => "\x{03A7}",
33     Dagger => "\x{2021}",
34     Delta => "\x{0394}",
35     ETH => "\x{00D0}",
36     Eacute => "\x{00C9}",
37     Ecirc => "\x{00CA}",
38     Egrave => "\x{00C8}",
39     Epsilon => "\x{0395}",
40     Eta => "\x{0397}",
41     Euml => "\x{00CB}",
42     Gamma => "\x{0393}",
43     Iacute => "\x{00CD}",
44     Icirc => "\x{00CE}",
45     Igrave => "\x{00CC}",
46     Iota => "\x{0399}",
47     Iuml => "\x{00CF}",
48     Kappa => "\x{039A}",
49     Lambda => "\x{039B}",
50     Mu => "\x{039C}",
51     Ntilde => "\x{00D1}",
52     Nu => "\x{039D}",
53     OElig => "\x{0152}",
54     Oacute => "\x{00D3}",
55     Ocirc => "\x{00D4}",
56     Ograve => "\x{00D2}",
57     Omega => "\x{03A9}",
58     Omicron => "\x{039F}",
59     Oslash => "\x{00D8}",
60     Otilde => "\x{00D5}",
61     Ouml => "\x{00D6}",
62     Phi => "\x{03A6}",
63     Pi => "\x{03A0}",
64     Prime => "\x{2033}",
65     Psi => "\x{03A8}",
66     Rho => "\x{03A1}",
67     Scaron => "\x{0160}",
68     Sigma => "\x{03A3}",
69     THORN => "\x{00DE}",
70     Tau => "\x{03A4}",
71     Theta => "\x{0398}",
72     Uacute => "\x{00DA}",
73     Ucirc => "\x{00DB}",
74     Ugrave => "\x{00D9}",
75     Upsilon => "\x{03A5}",
76     Uuml => "\x{00DC}",
77     Xi => "\x{039E}",
78     Yacute => "\x{00DD}",
79     Yuml => "\x{0178}",
80     Zeta => "\x{0396}",
81     aacute => "\x{00E1}",
82     acirc => "\x{00E2}",
83     acute => "\x{00B4}",
84     aelig => "\x{00E6}",
85     agrave => "\x{00E0}",
86     alefsym => "\x{2135}",
87     alpha => "\x{03B1}",
88     amp => "\x{0026}",
89     AMP => "\x{0026}",
90     and => "\x{2227}",
91     ang => "\x{2220}",
92     apos => "\x{0027}",
93     aring => "\x{00E5}",
94     asymp => "\x{2248}",
95     atilde => "\x{00E3}",
96     auml => "\x{00E4}",
97     bdquo => "\x{201E}",
98     beta => "\x{03B2}",
99     brvbar => "\x{00A6}",
100     bull => "\x{2022}",
101     cap => "\x{2229}",
102     ccedil => "\x{00E7}",
103     cedil => "\x{00B8}",
104     cent => "\x{00A2}",
105     chi => "\x{03C7}",
106     circ => "\x{02C6}",
107     clubs => "\x{2663}",
108     cong => "\x{2245}",
109     copy => "\x{00A9}",
110     COPY => "\x{00A9}",
111     crarr => "\x{21B5}",
112     cup => "\x{222A}",
113     curren => "\x{00A4}",
114     dArr => "\x{21D3}",
115     dagger => "\x{2020}",
116     darr => "\x{2193}",
117     deg => "\x{00B0}",
118     delta => "\x{03B4}",
119     diams => "\x{2666}",
120     divide => "\x{00F7}",
121     eacute => "\x{00E9}",
122     ecirc => "\x{00EA}",
123     egrave => "\x{00E8}",
124     empty => "\x{2205}",
125     emsp => "\x{2003}",
126     ensp => "\x{2002}",
127     epsilon => "\x{03B5}",
128     equiv => "\x{2261}",
129     eta => "\x{03B7}",
130     eth => "\x{00F0}",
131     euml => "\x{00EB}",
132     euro => "\x{20AC}",
133     exist => "\x{2203}",
134     fnof => "\x{0192}",
135     forall => "\x{2200}",
136     frac12 => "\x{00BD}",
137     frac14 => "\x{00BC}",
138     frac34 => "\x{00BE}",
139     frasl => "\x{2044}",
140     gamma => "\x{03B3}",
141     ge => "\x{2265}",
142     gt => "\x{003E}",
143     GT => "\x{003E}",
144     hArr => "\x{21D4}",
145     harr => "\x{2194}",
146     hearts => "\x{2665}",
147     hellip => "\x{2026}",
148     iacute => "\x{00ED}",
149     icirc => "\x{00EE}",
150     iexcl => "\x{00A1}",
151     igrave => "\x{00EC}",
152     image => "\x{2111}",
153     infin => "\x{221E}",
154     int => "\x{222B}",
155     iota => "\x{03B9}",
156     iquest => "\x{00BF}",
157     isin => "\x{2208}",
158     iuml => "\x{00EF}",
159     kappa => "\x{03BA}",
160     lArr => "\x{21D0}",
161     lambda => "\x{03BB}",
162     lang => "\x{2329}",
163     laquo => "\x{00AB}",
164     larr => "\x{2190}",
165     lceil => "\x{2308}",
166     ldquo => "\x{201C}",
167     le => "\x{2264}",
168     lfloor => "\x{230A}",
169     lowast => "\x{2217}",
170     loz => "\x{25CA}",
171     lrm => "\x{200E}",
172     lsaquo => "\x{2039}",
173     lsquo => "\x{2018}",
174     lt => "\x{003C}",
175     LT => "\x{003C}",
176     macr => "\x{00AF}",
177     mdash => "\x{2014}",
178     micro => "\x{00B5}",
179     middot => "\x{00B7}",
180     minus => "\x{2212}",
181     mu => "\x{03BC}",
182     nabla => "\x{2207}",
183     nbsp => "\x{00A0}",
184     ndash => "\x{2013}",
185     ne => "\x{2260}",
186     ni => "\x{220B}",
187     not => "\x{00AC}",
188     notin => "\x{2209}",
189     nsub => "\x{2284}",
190     ntilde => "\x{00F1}",
191     nu => "\x{03BD}",
192     oacute => "\x{00F3}",
193     ocirc => "\x{00F4}",
194     oelig => "\x{0153}",
195     ograve => "\x{00F2}",
196     oline => "\x{203E}",
197     omega => "\x{03C9}",
198     omicron => "\x{03BF}",
199     oplus => "\x{2295}",
200     or => "\x{2228}",
201     ordf => "\x{00AA}",
202     ordm => "\x{00BA}",
203     oslash => "\x{00F8}",
204     otilde => "\x{00F5}",
205     otimes => "\x{2297}",
206     ouml => "\x{00F6}",
207     para => "\x{00B6}",
208     part => "\x{2202}",
209     permil => "\x{2030}",
210     perp => "\x{22A5}",
211     phi => "\x{03C6}",
212     pi => "\x{03C0}",
213     piv => "\x{03D6}",
214     plusmn => "\x{00B1}",
215     pound => "\x{00A3}",
216     prime => "\x{2032}",
217     prod => "\x{220F}",
218     prop => "\x{221D}",
219     psi => "\x{03C8}",
220     quot => "\x{0022}",
221     QUOT => "\x{0022}",
222     rArr => "\x{21D2}",
223     radic => "\x{221A}",
224     rang => "\x{232A}",
225     raquo => "\x{00BB}",
226     rarr => "\x{2192}",
227     rceil => "\x{2309}",
228     rdquo => "\x{201D}",
229     real => "\x{211C}",
230     reg => "\x{00AE}",
231     REG => "\x{00AE}",
232     rfloor => "\x{230B}",
233     rho => "\x{03C1}",
234     rlm => "\x{200F}",
235     rsaquo => "\x{203A}",
236     rsquo => "\x{2019}",
237     sbquo => "\x{201A}",
238     scaron => "\x{0161}",
239     sdot => "\x{22C5}",
240     sect => "\x{00A7}",
241     shy => "\x{00AD}",
242     sigma => "\x{03C3}",
243     sigmaf => "\x{03C2}",
244     sim => "\x{223C}",
245     spades => "\x{2660}",
246     sub => "\x{2282}",
247     sube => "\x{2286}",
248     sum => "\x{2211}",
249     sup => "\x{2283}",
250     sup1 => "\x{00B9}",
251     sup2 => "\x{00B2}",
252     sup3 => "\x{00B3}",
253     supe => "\x{2287}",
254     szlig => "\x{00DF}",
255     tau => "\x{03C4}",
256     there4 => "\x{2234}",
257     theta => "\x{03B8}",
258     thetasym => "\x{03D1}",
259     thinsp => "\x{2009}",
260     thorn => "\x{00FE}",
261     tilde => "\x{02DC}",
262     times => "\x{00D7}",
263     trade => "\x{2122}",
264     uArr => "\x{21D1}",
265     uacute => "\x{00FA}",
266     uarr => "\x{2191}",
267     ucirc => "\x{00FB}",
268     ugrave => "\x{00F9}",
269     uml => "\x{00A8}",
270     upsih => "\x{03D2}",
271     upsilon => "\x{03C5}",
272     uuml => "\x{00FC}",
273     weierp => "\x{2118}",
274     xi => "\x{03BE}",
275     yacute => "\x{00FD}",
276     yen => "\x{00A5}",
277     yuml => "\x{00FF}",
278     zeta => "\x{03B6}",
279     zwj => "\x{200D}",
280     zwnj => "\x{200C}",
281 wakaba 1.4 }; # $entity_char
282    
283     my $c1_entity_char = {
284 wakaba 1.10 0x80 => 0x20AC,
285     0x81 => 0xFFFD,
286     0x82 => 0x201A,
287     0x83 => 0x0192,
288     0x84 => 0x201E,
289     0x85 => 0x2026,
290     0x86 => 0x2020,
291     0x87 => 0x2021,
292     0x88 => 0x02C6,
293     0x89 => 0x2030,
294     0x8A => 0x0160,
295     0x8B => 0x2039,
296     0x8C => 0x0152,
297     0x8D => 0xFFFD,
298     0x8E => 0x017D,
299     0x8F => 0xFFFD,
300     0x90 => 0xFFFD,
301     0x91 => 0x2018,
302     0x92 => 0x2019,
303     0x93 => 0x201C,
304     0x94 => 0x201D,
305     0x95 => 0x2022,
306     0x96 => 0x2013,
307     0x97 => 0x2014,
308     0x98 => 0x02DC,
309     0x99 => 0x2122,
310     0x9A => 0x0161,
311     0x9B => 0x203A,
312     0x9C => 0x0153,
313     0x9D => 0xFFFD,
314     0x9E => 0x017E,
315     0x9F => 0x0178,
316 wakaba 1.4 }; # $c1_entity_char
317 wakaba 1.1
318     my $special_category = {
319     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
320     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
321     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
322     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
323     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
324     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
325     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
326     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
327     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
328     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
329     };
330     my $scoping_category = {
331     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
332     table => 1, td => 1, th => 1,
333     };
334     my $formatting_category = {
335     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
336     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
337     };
338     # $phrasing_category: all other elements
339    
340     sub parse_string ($$$;$) {
341     my $self = shift->new;
342     my $s = \$_[0];
343     $self->{document} = $_[1];
344    
345 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
346    
347 wakaba 1.1 my $i = 0;
348 wakaba 1.3 my $line = 1;
349     my $column = 0;
350 wakaba 1.1 $self->{set_next_input_character} = sub {
351     my $self = shift;
352 wakaba 1.13
353     pop @{$self->{prev_input_character}};
354     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
355    
356 wakaba 1.1 $self->{next_input_character} = -1 and return if $i >= length $$s;
357     $self->{next_input_character} = ord substr $$s, $i++, 1;
358 wakaba 1.3 $column++;
359 wakaba 1.1
360 wakaba 1.4 if ($self->{next_input_character} == 0x000A) { # LF
361     $line++;
362     $column = 0;
363     } elsif ($self->{next_input_character} == 0x000D) { # CR
364 wakaba 1.1 if ($i >= length $$s) {
365     #
366     } else {
367     my $next_char = ord substr $$s, $i++, 1;
368     if ($next_char == 0x000A) { # LF
369     #
370     } else {
371     push @{$self->{char}}, $next_char;
372     }
373     }
374     $self->{next_input_character} = 0x000A; # LF # MUST
375 wakaba 1.3 $line++;
376 wakaba 1.4 $column = 0;
377 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
378     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
379     } elsif ($self->{next_input_character} == 0x0000) { # NULL
380 wakaba 1.8 !!!parse-error (type => 'NULL');
381 wakaba 1.1 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
382     }
383     };
384 wakaba 1.13 $self->{prev_input_character} = [-1, -1, -1];
385     $self->{next_input_character} = -1;
386 wakaba 1.1
387 wakaba 1.3 my $onerror = $_[2] || sub {
388     my (%opt) = @_;
389     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
390     };
391     $self->{parse_error} = sub {
392     $onerror->(@_, line => $line, column => $column);
393 wakaba 1.1 };
394    
395     $self->_initialize_tokenizer;
396     $self->_initialize_tree_constructor;
397     $self->_construct_tree;
398     $self->_terminate_tree_constructor;
399    
400     return $self->{document};
401     } # parse_string
402    
403     sub new ($) {
404     my $class = shift;
405     my $self = bless {}, $class;
406     $self->{set_next_input_character} = sub {
407     $self->{next_input_character} = -1;
408     };
409     $self->{parse_error} = sub {
410     #
411     };
412     return $self;
413     } # new
414    
415     ## Implementations MUST act as if state machine in the spec
416    
417     sub _initialize_tokenizer ($) {
418     my $self = shift;
419     $self->{state} = 'data'; # MUST
420     $self->{content_model_flag} = 'PCDATA'; # be
421     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
422     undef $self->{current_attribute};
423     undef $self->{last_emitted_start_tag_name};
424     undef $self->{last_attribute_value_state};
425     $self->{char} = [];
426     # $self->{next_input_character}
427     !!!next-input-character;
428     $self->{token} = [];
429     } # _initialize_tokenizer
430    
431     ## A token has:
432     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
433     ## 'character', or 'end-of-file'
434     ## ->{name} (DOCTYPE, start tag (tagname), end tag (tagname))
435     ## ISSUE: the spec need s/tagname/tag name/
436     ## ->{error} == 1 or 0 (DOCTYPE)
437     ## ->{attributes} isa HASH (start tag, end tag)
438     ## ->{data} (comment, character)
439    
440     ## Macros
441     ## Macros MUST be preceded by three EXCLAMATION MARKs.
442     ## emit ($token)
443     ## Emits the specified token.
444    
445     ## Emitted token MUST immediately be handled by the tree construction state.
446    
447     ## Before each step, UA MAY check to see if either one of the scripts in
448     ## "list of scripts that will execute as soon as possible" or the first
449     ## script in the "list of scripts that will execute asynchronously",
450     ## has completed loading. If one has, then it MUST be executed
451     ## and removed from the list.
452    
453     sub _get_next_token ($) {
454     my $self = shift;
455     if (@{$self->{token}}) {
456     return shift @{$self->{token}};
457     }
458    
459     A: {
460     if ($self->{state} eq 'data') {
461     if ($self->{next_input_character} == 0x0026) { # &
462     if ($self->{content_model_flag} eq 'PCDATA' or
463     $self->{content_model_flag} eq 'RCDATA') {
464     $self->{state} = 'entity data';
465     !!!next-input-character;
466     redo A;
467     } else {
468     #
469     }
470 wakaba 1.13 } elsif ($self->{next_input_character} == 0x002D) { # -
471     if ($self->{content_model_flag} eq 'RCDATA' or
472     $self->{content_model_flag} eq 'CDATA') {
473     unless ($self->{escape}) {
474     if ($self->{prev_input_character}->[0] == 0x002D and # -
475     $self->{prev_input_character}->[1] == 0x0021 and # !
476     $self->{prev_input_character}->[2] == 0x003C) { # <
477     $self->{escape} = 1;
478     }
479     }
480     }
481    
482     #
483 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003C) { # <
484 wakaba 1.13 if ($self->{content_model_flag} eq 'PCDATA' or
485     (($self->{content_model_flag} eq 'CDATA' or
486     $self->{content_model_flag} eq 'RCDATA') and
487     not $self->{escape})) {
488 wakaba 1.1 $self->{state} = 'tag open';
489     !!!next-input-character;
490     redo A;
491     } else {
492     #
493     }
494 wakaba 1.13 } elsif ($self->{next_input_character} == 0x003E) { # >
495     if ($self->{escape} and
496     ($self->{content_model_flag} eq 'RCDATA' or
497     $self->{content_model_flag} eq 'CDATA')) {
498     if ($self->{prev_input_character}->[0] == 0x002D and # -
499     $self->{prev_input_character}->[1] == 0x002D) { # -
500     delete $self->{escape};
501     }
502     }
503    
504     #
505 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
506     !!!emit ({type => 'end-of-file'});
507     last A; ## TODO: ok?
508     }
509     # Anything else
510     my $token = {type => 'character',
511     data => chr $self->{next_input_character}};
512     ## Stay in the data state
513     !!!next-input-character;
514    
515     !!!emit ($token);
516    
517     redo A;
518     } elsif ($self->{state} eq 'entity data') {
519     ## (cannot happen in CDATA state)
520    
521     my $token = $self->_tokenize_attempt_to_consume_an_entity;
522    
523     $self->{state} = 'data';
524     # next-input-character is already done
525    
526     unless (defined $token) {
527     !!!emit ({type => 'character', data => '&'});
528     } else {
529     !!!emit ($token);
530     }
531    
532     redo A;
533     } elsif ($self->{state} eq 'tag open') {
534     if ($self->{content_model_flag} eq 'RCDATA' or
535     $self->{content_model_flag} eq 'CDATA') {
536     if ($self->{next_input_character} == 0x002F) { # /
537     !!!next-input-character;
538     $self->{state} = 'close tag open';
539     redo A;
540     } else {
541     ## reconsume
542     $self->{state} = 'data';
543    
544     !!!emit ({type => 'character', data => '<'});
545    
546     redo A;
547     }
548     } elsif ($self->{content_model_flag} eq 'PCDATA') {
549     if ($self->{next_input_character} == 0x0021) { # !
550     $self->{state} = 'markup declaration open';
551     !!!next-input-character;
552     redo A;
553     } elsif ($self->{next_input_character} == 0x002F) { # /
554     $self->{state} = 'close tag open';
555     !!!next-input-character;
556     redo A;
557     } elsif (0x0041 <= $self->{next_input_character} and
558     $self->{next_input_character} <= 0x005A) { # A..Z
559     $self->{current_token}
560     = {type => 'start tag',
561     tag_name => chr ($self->{next_input_character} + 0x0020)};
562     $self->{state} = 'tag name';
563     !!!next-input-character;
564     redo A;
565     } elsif (0x0061 <= $self->{next_input_character} and
566     $self->{next_input_character} <= 0x007A) { # a..z
567     $self->{current_token} = {type => 'start tag',
568     tag_name => chr ($self->{next_input_character})};
569     $self->{state} = 'tag name';
570     !!!next-input-character;
571     redo A;
572     } elsif ($self->{next_input_character} == 0x003E) { # >
573 wakaba 1.3 !!!parse-error (type => 'empty start tag');
574 wakaba 1.1 $self->{state} = 'data';
575     !!!next-input-character;
576    
577     !!!emit ({type => 'character', data => '<>'});
578    
579     redo A;
580     } elsif ($self->{next_input_character} == 0x003F) { # ?
581 wakaba 1.3 !!!parse-error (type => 'pio');
582 wakaba 1.1 $self->{state} = 'bogus comment';
583     ## $self->{next_input_character} is intentionally left as is
584     redo A;
585     } else {
586 wakaba 1.3 !!!parse-error (type => 'bare stago');
587 wakaba 1.1 $self->{state} = 'data';
588     ## reconsume
589    
590     !!!emit ({type => 'character', data => '<'});
591    
592     redo A;
593     }
594     } else {
595     die "$0: $self->{content_model_flag}: Unknown content model flag";
596     }
597     } elsif ($self->{state} eq 'close tag open') {
598     if ($self->{content_model_flag} eq 'RCDATA' or
599     $self->{content_model_flag} eq 'CDATA') {
600     my @next_char;
601     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
602     push @next_char, $self->{next_input_character};
603     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
604     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
605     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
606     !!!next-input-character;
607     next TAGNAME;
608     } else {
609 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
610 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
611     !!!back-next-input-character (@next_char);
612     $self->{state} = 'data';
613    
614     !!!emit ({type => 'character', data => '</'});
615    
616     redo A;
617     }
618     }
619     push @next_char, $self->{next_input_character};
620    
621     unless ($self->{next_input_character} == 0x0009 or # HT
622     $self->{next_input_character} == 0x000A or # LF
623     $self->{next_input_character} == 0x000B or # VT
624     $self->{next_input_character} == 0x000C or # FF
625     $self->{next_input_character} == 0x0020 or # SP
626     $self->{next_input_character} == 0x003E or # >
627     $self->{next_input_character} == 0x002F or # /
628     $self->{next_input_character} == 0x003C or # <
629     $self->{next_input_character} == -1) {
630 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
631 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
632     !!!back-next-input-character (@next_char);
633     $self->{state} = 'data';
634    
635     !!!emit ({type => 'character', data => '</'});
636    
637     redo A;
638     } else {
639     $self->{next_input_character} = shift @next_char;
640     !!!back-next-input-character (@next_char);
641     # and consume...
642     }
643     }
644    
645     if (0x0041 <= $self->{next_input_character} and
646     $self->{next_input_character} <= 0x005A) { # A..Z
647     $self->{current_token} = {type => 'end tag',
648     tag_name => chr ($self->{next_input_character} + 0x0020)};
649     $self->{state} = 'tag name';
650     !!!next-input-character;
651     redo A;
652     } elsif (0x0061 <= $self->{next_input_character} and
653     $self->{next_input_character} <= 0x007A) { # a..z
654     $self->{current_token} = {type => 'end tag',
655     tag_name => chr ($self->{next_input_character})};
656     $self->{state} = 'tag name';
657     !!!next-input-character;
658     redo A;
659     } elsif ($self->{next_input_character} == 0x003E) { # >
660 wakaba 1.3 !!!parse-error (type => 'empty end tag');
661 wakaba 1.1 $self->{state} = 'data';
662     !!!next-input-character;
663     redo A;
664     } elsif ($self->{next_input_character} == -1) {
665 wakaba 1.3 !!!parse-error (type => 'bare etago');
666 wakaba 1.1 $self->{state} = 'data';
667     # reconsume
668    
669     !!!emit ({type => 'character', data => '</'});
670    
671     redo A;
672     } else {
673 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
674 wakaba 1.1 $self->{state} = 'bogus comment';
675     ## $self->{next_input_character} is intentionally left as is
676     redo A;
677     }
678     } elsif ($self->{state} eq 'tag name') {
679     if ($self->{next_input_character} == 0x0009 or # HT
680     $self->{next_input_character} == 0x000A or # LF
681     $self->{next_input_character} == 0x000B or # VT
682     $self->{next_input_character} == 0x000C or # FF
683     $self->{next_input_character} == 0x0020) { # SP
684     $self->{state} = 'before attribute name';
685     !!!next-input-character;
686     redo A;
687     } elsif ($self->{next_input_character} == 0x003E) { # >
688     if ($self->{current_token}->{type} eq 'start tag') {
689     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
690     } elsif ($self->{current_token}->{type} eq 'end tag') {
691     $self->{content_model_flag} = 'PCDATA'; # MUST
692     if ($self->{current_token}->{attributes}) {
693 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
694 wakaba 1.1 }
695     } else {
696     die "$0: $self->{current_token}->{type}: Unknown token type";
697     }
698     $self->{state} = 'data';
699     !!!next-input-character;
700    
701     !!!emit ($self->{current_token}); # start tag or end tag
702     undef $self->{current_token};
703    
704     redo A;
705     } elsif (0x0041 <= $self->{next_input_character} and
706     $self->{next_input_character} <= 0x005A) { # A..Z
707     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
708     # start tag or end tag
709     ## Stay in this state
710     !!!next-input-character;
711     redo A;
712     } elsif ($self->{next_input_character} == 0x003C or # <
713     $self->{next_input_character} == -1) {
714 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
715 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
716     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
717     } elsif ($self->{current_token}->{type} eq 'end tag') {
718     $self->{content_model_flag} = 'PCDATA'; # MUST
719     if ($self->{current_token}->{attributes}) {
720 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
721 wakaba 1.1 }
722     } else {
723     die "$0: $self->{current_token}->{type}: Unknown token type";
724     }
725     $self->{state} = 'data';
726     # reconsume
727    
728     !!!emit ($self->{current_token}); # start tag or end tag
729     undef $self->{current_token};
730    
731     redo A;
732     } elsif ($self->{next_input_character} == 0x002F) { # /
733     !!!next-input-character;
734     if ($self->{next_input_character} == 0x003E and # >
735     $self->{current_token}->{type} eq 'start tag' and
736     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
737     # permitted slash
738     #
739     } else {
740 wakaba 1.3 !!!parse-error (type => 'nestc');
741 wakaba 1.1 }
742     $self->{state} = 'before attribute name';
743     # next-input-character is already done
744     redo A;
745     } else {
746     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
747     # start tag or end tag
748     ## Stay in the state
749     !!!next-input-character;
750     redo A;
751     }
752     } elsif ($self->{state} eq 'before attribute name') {
753     if ($self->{next_input_character} == 0x0009 or # HT
754     $self->{next_input_character} == 0x000A or # LF
755     $self->{next_input_character} == 0x000B or # VT
756     $self->{next_input_character} == 0x000C or # FF
757     $self->{next_input_character} == 0x0020) { # SP
758     ## Stay in the state
759     !!!next-input-character;
760     redo A;
761     } elsif ($self->{next_input_character} == 0x003E) { # >
762     if ($self->{current_token}->{type} eq 'start tag') {
763     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
764     } elsif ($self->{current_token}->{type} eq 'end tag') {
765     $self->{content_model_flag} = 'PCDATA'; # MUST
766     if ($self->{current_token}->{attributes}) {
767 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
768 wakaba 1.1 }
769     } else {
770     die "$0: $self->{current_token}->{type}: Unknown token type";
771     }
772     $self->{state} = 'data';
773     !!!next-input-character;
774    
775     !!!emit ($self->{current_token}); # start tag or end tag
776     undef $self->{current_token};
777    
778     redo A;
779     } elsif (0x0041 <= $self->{next_input_character} and
780     $self->{next_input_character} <= 0x005A) { # A..Z
781     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
782     value => ''};
783     $self->{state} = 'attribute name';
784     !!!next-input-character;
785     redo A;
786     } elsif ($self->{next_input_character} == 0x002F) { # /
787     !!!next-input-character;
788     if ($self->{next_input_character} == 0x003E and # >
789     $self->{current_token}->{type} eq 'start tag' and
790     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
791     # permitted slash
792     #
793     } else {
794 wakaba 1.3 !!!parse-error (type => 'nestc');
795 wakaba 1.1 }
796     ## Stay in the state
797     # next-input-character is already done
798     redo A;
799     } elsif ($self->{next_input_character} == 0x003C or # <
800     $self->{next_input_character} == -1) {
801 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
802 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
803     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
804     } elsif ($self->{current_token}->{type} eq 'end tag') {
805     $self->{content_model_flag} = 'PCDATA'; # MUST
806     if ($self->{current_token}->{attributes}) {
807 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
808 wakaba 1.1 }
809     } else {
810     die "$0: $self->{current_token}->{type}: Unknown token type";
811     }
812     $self->{state} = 'data';
813     # reconsume
814    
815     !!!emit ($self->{current_token}); # start tag or end tag
816     undef $self->{current_token};
817    
818     redo A;
819     } else {
820     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
821     value => ''};
822     $self->{state} = 'attribute name';
823     !!!next-input-character;
824     redo A;
825     }
826     } elsif ($self->{state} eq 'attribute name') {
827     my $before_leave = sub {
828     if (exists $self->{current_token}->{attributes} # start tag or end tag
829     ->{$self->{current_attribute}->{name}}) { # MUST
830 wakaba 1.3 !!!parse-error (type => 'dupulicate attribute');
831 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
832     } else {
833     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
834     = $self->{current_attribute};
835     }
836     }; # $before_leave
837    
838     if ($self->{next_input_character} == 0x0009 or # HT
839     $self->{next_input_character} == 0x000A or # LF
840     $self->{next_input_character} == 0x000B or # VT
841     $self->{next_input_character} == 0x000C or # FF
842     $self->{next_input_character} == 0x0020) { # SP
843     $before_leave->();
844     $self->{state} = 'after attribute name';
845     !!!next-input-character;
846     redo A;
847     } elsif ($self->{next_input_character} == 0x003D) { # =
848     $before_leave->();
849     $self->{state} = 'before attribute value';
850     !!!next-input-character;
851     redo A;
852     } elsif ($self->{next_input_character} == 0x003E) { # >
853     $before_leave->();
854     if ($self->{current_token}->{type} eq 'start tag') {
855     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
856     } elsif ($self->{current_token}->{type} eq 'end tag') {
857     $self->{content_model_flag} = 'PCDATA'; # MUST
858     if ($self->{current_token}->{attributes}) {
859 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
860 wakaba 1.1 }
861     } else {
862     die "$0: $self->{current_token}->{type}: Unknown token type";
863     }
864     $self->{state} = 'data';
865     !!!next-input-character;
866    
867     !!!emit ($self->{current_token}); # start tag or end tag
868     undef $self->{current_token};
869    
870     redo A;
871     } elsif (0x0041 <= $self->{next_input_character} and
872     $self->{next_input_character} <= 0x005A) { # A..Z
873     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
874     ## Stay in the state
875     !!!next-input-character;
876     redo A;
877     } elsif ($self->{next_input_character} == 0x002F) { # /
878     $before_leave->();
879     !!!next-input-character;
880     if ($self->{next_input_character} == 0x003E and # >
881     $self->{current_token}->{type} eq 'start tag' and
882     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
883     # permitted slash
884     #
885     } else {
886 wakaba 1.3 !!!parse-error (type => 'nestc');
887 wakaba 1.1 }
888     $self->{state} = 'before attribute name';
889     # next-input-character is already done
890     redo A;
891     } elsif ($self->{next_input_character} == 0x003C or # <
892     $self->{next_input_character} == -1) {
893 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
894 wakaba 1.1 $before_leave->();
895     if ($self->{current_token}->{type} eq 'start tag') {
896     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
897     } elsif ($self->{current_token}->{type} eq 'end tag') {
898     $self->{content_model_flag} = 'PCDATA'; # MUST
899     if ($self->{current_token}->{attributes}) {
900 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
901 wakaba 1.1 }
902     } else {
903     die "$0: $self->{current_token}->{type}: Unknown token type";
904     }
905     $self->{state} = 'data';
906     # reconsume
907    
908     !!!emit ($self->{current_token}); # start tag or end tag
909     undef $self->{current_token};
910    
911     redo A;
912     } else {
913     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
914     ## Stay in the state
915     !!!next-input-character;
916     redo A;
917     }
918     } elsif ($self->{state} eq 'after attribute name') {
919     if ($self->{next_input_character} == 0x0009 or # HT
920     $self->{next_input_character} == 0x000A or # LF
921     $self->{next_input_character} == 0x000B or # VT
922     $self->{next_input_character} == 0x000C or # FF
923     $self->{next_input_character} == 0x0020) { # SP
924     ## Stay in the state
925     !!!next-input-character;
926     redo A;
927     } elsif ($self->{next_input_character} == 0x003D) { # =
928     $self->{state} = 'before attribute value';
929     !!!next-input-character;
930     redo A;
931     } elsif ($self->{next_input_character} == 0x003E) { # >
932     if ($self->{current_token}->{type} eq 'start tag') {
933     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
934     } elsif ($self->{current_token}->{type} eq 'end tag') {
935     $self->{content_model_flag} = 'PCDATA'; # MUST
936     if ($self->{current_token}->{attributes}) {
937 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
938 wakaba 1.1 }
939     } else {
940     die "$0: $self->{current_token}->{type}: Unknown token type";
941     }
942     $self->{state} = 'data';
943     !!!next-input-character;
944    
945     !!!emit ($self->{current_token}); # start tag or end tag
946     undef $self->{current_token};
947    
948     redo A;
949     } elsif (0x0041 <= $self->{next_input_character} and
950     $self->{next_input_character} <= 0x005A) { # A..Z
951     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
952     value => ''};
953     $self->{state} = 'attribute name';
954     !!!next-input-character;
955     redo A;
956     } elsif ($self->{next_input_character} == 0x002F) { # /
957     !!!next-input-character;
958     if ($self->{next_input_character} == 0x003E and # >
959     $self->{current_token}->{type} eq 'start tag' and
960     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
961     # permitted slash
962     #
963     } else {
964 wakaba 1.3 !!!parse-error (type => 'nestc');
965 wakaba 1.1 }
966     $self->{state} = 'before attribute name';
967     # next-input-character is already done
968     redo A;
969     } elsif ($self->{next_input_character} == 0x003C or # <
970     $self->{next_input_character} == -1) {
971 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
972 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
973     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
974     } elsif ($self->{current_token}->{type} eq 'end tag') {
975     $self->{content_model_flag} = 'PCDATA'; # MUST
976     if ($self->{current_token}->{attributes}) {
977 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
978 wakaba 1.1 }
979     } else {
980     die "$0: $self->{current_token}->{type}: Unknown token type";
981     }
982     $self->{state} = 'data';
983     # reconsume
984    
985     !!!emit ($self->{current_token}); # start tag or end tag
986     undef $self->{current_token};
987    
988     redo A;
989     } else {
990     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
991     value => ''};
992     $self->{state} = 'attribute name';
993     !!!next-input-character;
994     redo A;
995     }
996     } elsif ($self->{state} eq 'before attribute value') {
997     if ($self->{next_input_character} == 0x0009 or # HT
998     $self->{next_input_character} == 0x000A or # LF
999     $self->{next_input_character} == 0x000B or # VT
1000     $self->{next_input_character} == 0x000C or # FF
1001     $self->{next_input_character} == 0x0020) { # SP
1002     ## Stay in the state
1003     !!!next-input-character;
1004     redo A;
1005     } elsif ($self->{next_input_character} == 0x0022) { # "
1006     $self->{state} = 'attribute value (double-quoted)';
1007     !!!next-input-character;
1008     redo A;
1009     } elsif ($self->{next_input_character} == 0x0026) { # &
1010     $self->{state} = 'attribute value (unquoted)';
1011     ## reconsume
1012     redo A;
1013     } elsif ($self->{next_input_character} == 0x0027) { # '
1014     $self->{state} = 'attribute value (single-quoted)';
1015     !!!next-input-character;
1016     redo A;
1017     } elsif ($self->{next_input_character} == 0x003E) { # >
1018     if ($self->{current_token}->{type} eq 'start tag') {
1019     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1020     } elsif ($self->{current_token}->{type} eq 'end tag') {
1021     $self->{content_model_flag} = 'PCDATA'; # MUST
1022     if ($self->{current_token}->{attributes}) {
1023 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1024 wakaba 1.1 }
1025     } else {
1026     die "$0: $self->{current_token}->{type}: Unknown token type";
1027     }
1028     $self->{state} = 'data';
1029     !!!next-input-character;
1030    
1031     !!!emit ($self->{current_token}); # start tag or end tag
1032     undef $self->{current_token};
1033    
1034     redo A;
1035     } elsif ($self->{next_input_character} == 0x003C or # <
1036     $self->{next_input_character} == -1) {
1037 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1038 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1039     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1040     } elsif ($self->{current_token}->{type} eq 'end tag') {
1041     $self->{content_model_flag} = 'PCDATA'; # MUST
1042     if ($self->{current_token}->{attributes}) {
1043 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1044 wakaba 1.1 }
1045     } else {
1046     die "$0: $self->{current_token}->{type}: Unknown token type";
1047     }
1048     $self->{state} = 'data';
1049     ## reconsume
1050    
1051     !!!emit ($self->{current_token}); # start tag or end tag
1052     undef $self->{current_token};
1053    
1054     redo A;
1055     } else {
1056     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1057     $self->{state} = 'attribute value (unquoted)';
1058     !!!next-input-character;
1059     redo A;
1060     }
1061     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
1062     if ($self->{next_input_character} == 0x0022) { # "
1063     $self->{state} = 'before attribute name';
1064     !!!next-input-character;
1065     redo A;
1066     } elsif ($self->{next_input_character} == 0x0026) { # &
1067     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
1068     $self->{state} = 'entity in attribute value';
1069     !!!next-input-character;
1070     redo A;
1071     } elsif ($self->{next_input_character} == -1) {
1072 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1073 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1074     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1075     } elsif ($self->{current_token}->{type} eq 'end tag') {
1076     $self->{content_model_flag} = 'PCDATA'; # MUST
1077     if ($self->{current_token}->{attributes}) {
1078 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1079 wakaba 1.1 }
1080     } else {
1081     die "$0: $self->{current_token}->{type}: Unknown token type";
1082     }
1083     $self->{state} = 'data';
1084     ## reconsume
1085    
1086     !!!emit ($self->{current_token}); # start tag or end tag
1087     undef $self->{current_token};
1088    
1089     redo A;
1090     } else {
1091     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1092     ## Stay in the state
1093     !!!next-input-character;
1094     redo A;
1095     }
1096     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
1097     if ($self->{next_input_character} == 0x0027) { # '
1098     $self->{state} = 'before attribute name';
1099     !!!next-input-character;
1100     redo A;
1101     } elsif ($self->{next_input_character} == 0x0026) { # &
1102     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
1103     $self->{state} = 'entity in attribute value';
1104     !!!next-input-character;
1105     redo A;
1106     } elsif ($self->{next_input_character} == -1) {
1107 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1108 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1109     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1110     } elsif ($self->{current_token}->{type} eq 'end tag') {
1111     $self->{content_model_flag} = 'PCDATA'; # MUST
1112     if ($self->{current_token}->{attributes}) {
1113 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1114 wakaba 1.1 }
1115     } else {
1116     die "$0: $self->{current_token}->{type}: Unknown token type";
1117     }
1118     $self->{state} = 'data';
1119     ## reconsume
1120    
1121     !!!emit ($self->{current_token}); # start tag or end tag
1122     undef $self->{current_token};
1123    
1124     redo A;
1125     } else {
1126     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1127     ## Stay in the state
1128     !!!next-input-character;
1129     redo A;
1130     }
1131     } elsif ($self->{state} eq 'attribute value (unquoted)') {
1132     if ($self->{next_input_character} == 0x0009 or # HT
1133     $self->{next_input_character} == 0x000A or # LF
1134     $self->{next_input_character} == 0x000B or # HT
1135     $self->{next_input_character} == 0x000C or # FF
1136     $self->{next_input_character} == 0x0020) { # SP
1137     $self->{state} = 'before attribute name';
1138     !!!next-input-character;
1139     redo A;
1140     } elsif ($self->{next_input_character} == 0x0026) { # &
1141     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
1142     $self->{state} = 'entity in attribute value';
1143     !!!next-input-character;
1144     redo A;
1145     } elsif ($self->{next_input_character} == 0x003E) { # >
1146     if ($self->{current_token}->{type} eq 'start tag') {
1147     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1148     } elsif ($self->{current_token}->{type} eq 'end tag') {
1149     $self->{content_model_flag} = 'PCDATA'; # MUST
1150     if ($self->{current_token}->{attributes}) {
1151 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1152 wakaba 1.1 }
1153     } else {
1154     die "$0: $self->{current_token}->{type}: Unknown token type";
1155     }
1156     $self->{state} = 'data';
1157     !!!next-input-character;
1158    
1159     !!!emit ($self->{current_token}); # start tag or end tag
1160     undef $self->{current_token};
1161    
1162     redo A;
1163     } elsif ($self->{next_input_character} == 0x003C or # <
1164     $self->{next_input_character} == -1) {
1165 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1166 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1167     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1168     } elsif ($self->{current_token}->{type} eq 'end tag') {
1169     $self->{content_model_flag} = 'PCDATA'; # MUST
1170     if ($self->{current_token}->{attributes}) {
1171 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1172 wakaba 1.1 }
1173     } else {
1174     die "$0: $self->{current_token}->{type}: Unknown token type";
1175     }
1176     $self->{state} = 'data';
1177     ## reconsume
1178    
1179     !!!emit ($self->{current_token}); # start tag or end tag
1180     undef $self->{current_token};
1181    
1182     redo A;
1183     } else {
1184     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1185     ## Stay in the state
1186     !!!next-input-character;
1187     redo A;
1188     }
1189     } elsif ($self->{state} eq 'entity in attribute value') {
1190     my $token = $self->_tokenize_attempt_to_consume_an_entity;
1191    
1192     unless (defined $token) {
1193     $self->{current_attribute}->{value} .= '&';
1194     } else {
1195     $self->{current_attribute}->{value} .= $token->{data};
1196     ## ISSUE: spec says "append the returned character token to the current attribute's value"
1197     }
1198    
1199     $self->{state} = $self->{last_attribute_value_state};
1200     # next-input-character is already done
1201     redo A;
1202     } elsif ($self->{state} eq 'bogus comment') {
1203     ## (only happen if PCDATA state)
1204    
1205     my $token = {type => 'comment', data => ''};
1206    
1207     BC: {
1208     if ($self->{next_input_character} == 0x003E) { # >
1209     $self->{state} = 'data';
1210     !!!next-input-character;
1211    
1212     !!!emit ($token);
1213    
1214     redo A;
1215     } elsif ($self->{next_input_character} == -1) {
1216     $self->{state} = 'data';
1217     ## reconsume
1218    
1219     !!!emit ($token);
1220    
1221     redo A;
1222     } else {
1223     $token->{data} .= chr ($self->{next_input_character});
1224     !!!next-input-character;
1225     redo BC;
1226     }
1227     } # BC
1228     } elsif ($self->{state} eq 'markup declaration open') {
1229     ## (only happen if PCDATA state)
1230    
1231     my @next_char;
1232     push @next_char, $self->{next_input_character};
1233    
1234     if ($self->{next_input_character} == 0x002D) { # -
1235     !!!next-input-character;
1236     push @next_char, $self->{next_input_character};
1237     if ($self->{next_input_character} == 0x002D) { # -
1238     $self->{current_token} = {type => 'comment', data => ''};
1239     $self->{state} = 'comment';
1240     !!!next-input-character;
1241     redo A;
1242     }
1243     } elsif ($self->{next_input_character} == 0x0044 or # D
1244     $self->{next_input_character} == 0x0064) { # d
1245     !!!next-input-character;
1246     push @next_char, $self->{next_input_character};
1247     if ($self->{next_input_character} == 0x004F or # O
1248     $self->{next_input_character} == 0x006F) { # o
1249     !!!next-input-character;
1250     push @next_char, $self->{next_input_character};
1251     if ($self->{next_input_character} == 0x0043 or # C
1252     $self->{next_input_character} == 0x0063) { # c
1253     !!!next-input-character;
1254     push @next_char, $self->{next_input_character};
1255     if ($self->{next_input_character} == 0x0054 or # T
1256     $self->{next_input_character} == 0x0074) { # t
1257     !!!next-input-character;
1258     push @next_char, $self->{next_input_character};
1259     if ($self->{next_input_character} == 0x0059 or # Y
1260     $self->{next_input_character} == 0x0079) { # y
1261     !!!next-input-character;
1262     push @next_char, $self->{next_input_character};
1263     if ($self->{next_input_character} == 0x0050 or # P
1264     $self->{next_input_character} == 0x0070) { # p
1265     !!!next-input-character;
1266     push @next_char, $self->{next_input_character};
1267     if ($self->{next_input_character} == 0x0045 or # E
1268     $self->{next_input_character} == 0x0065) { # e
1269     ## ISSUE: What a stupid code this is!
1270     $self->{state} = 'DOCTYPE';
1271     !!!next-input-character;
1272     redo A;
1273     }
1274     }
1275     }
1276     }
1277     }
1278     }
1279     }
1280    
1281 wakaba 1.3 !!!parse-error (type => 'bogus comment open');
1282 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1283     !!!back-next-input-character (@next_char);
1284     $self->{state} = 'bogus comment';
1285     redo A;
1286    
1287     ## ISSUE: typos in spec: chacacters, is is a parse error
1288     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1289     } elsif ($self->{state} eq 'comment') {
1290     if ($self->{next_input_character} == 0x002D) { # -
1291     $self->{state} = 'comment dash';
1292     !!!next-input-character;
1293     redo A;
1294     } elsif ($self->{next_input_character} == -1) {
1295 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1296 wakaba 1.1 $self->{state} = 'data';
1297     ## reconsume
1298    
1299     !!!emit ($self->{current_token}); # comment
1300     undef $self->{current_token};
1301    
1302     redo A;
1303     } else {
1304     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1305     ## Stay in the state
1306     !!!next-input-character;
1307     redo A;
1308     }
1309     } elsif ($self->{state} eq 'comment dash') {
1310     if ($self->{next_input_character} == 0x002D) { # -
1311     $self->{state} = 'comment end';
1312     !!!next-input-character;
1313     redo A;
1314     } elsif ($self->{next_input_character} == -1) {
1315 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1316 wakaba 1.1 $self->{state} = 'data';
1317     ## reconsume
1318    
1319     !!!emit ($self->{current_token}); # comment
1320     undef $self->{current_token};
1321    
1322     redo A;
1323     } else {
1324     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1325     $self->{state} = 'comment';
1326     !!!next-input-character;
1327     redo A;
1328     }
1329     } elsif ($self->{state} eq 'comment end') {
1330     if ($self->{next_input_character} == 0x003E) { # >
1331     $self->{state} = 'data';
1332     !!!next-input-character;
1333    
1334     !!!emit ($self->{current_token}); # comment
1335     undef $self->{current_token};
1336    
1337     redo A;
1338     } elsif ($self->{next_input_character} == 0x002D) { # -
1339 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1340 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1341     ## Stay in the state
1342     !!!next-input-character;
1343     redo A;
1344     } elsif ($self->{next_input_character} == -1) {
1345 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1346 wakaba 1.1 $self->{state} = 'data';
1347     ## reconsume
1348    
1349     !!!emit ($self->{current_token}); # comment
1350     undef $self->{current_token};
1351    
1352     redo A;
1353     } else {
1354 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1355 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1356     $self->{state} = 'comment';
1357     !!!next-input-character;
1358     redo A;
1359     }
1360     } elsif ($self->{state} eq 'DOCTYPE') {
1361     if ($self->{next_input_character} == 0x0009 or # HT
1362     $self->{next_input_character} == 0x000A or # LF
1363     $self->{next_input_character} == 0x000B or # VT
1364     $self->{next_input_character} == 0x000C or # FF
1365     $self->{next_input_character} == 0x0020) { # SP
1366     $self->{state} = 'before DOCTYPE name';
1367     !!!next-input-character;
1368     redo A;
1369     } else {
1370 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
1371 wakaba 1.1 $self->{state} = 'before DOCTYPE name';
1372     ## reconsume
1373     redo A;
1374     }
1375     } elsif ($self->{state} eq 'before DOCTYPE name') {
1376     if ($self->{next_input_character} == 0x0009 or # HT
1377     $self->{next_input_character} == 0x000A or # LF
1378     $self->{next_input_character} == 0x000B or # VT
1379     $self->{next_input_character} == 0x000C or # FF
1380     $self->{next_input_character} == 0x0020) { # SP
1381     ## Stay in the state
1382     !!!next-input-character;
1383     redo A;
1384     } elsif (0x0061 <= $self->{next_input_character} and
1385     $self->{next_input_character} <= 0x007A) { # a..z
1386 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1387 wakaba 1.1 $self->{current_token} = {type => 'DOCTYPE',
1388     name => chr ($self->{next_input_character} - 0x0020),
1389     error => 1};
1390     $self->{state} = 'DOCTYPE name';
1391     !!!next-input-character;
1392     redo A;
1393     } elsif ($self->{next_input_character} == 0x003E) { # >
1394 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1395 wakaba 1.1 $self->{state} = 'data';
1396     !!!next-input-character;
1397    
1398     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1399    
1400     redo A;
1401     } elsif ($self->{next_input_character} == -1) {
1402 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1403 wakaba 1.1 $self->{state} = 'data';
1404     ## reconsume
1405    
1406     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1407    
1408     redo A;
1409     } else {
1410     $self->{current_token} = {type => 'DOCTYPE',
1411     name => chr ($self->{next_input_character}),
1412     error => 1};
1413 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1414 wakaba 1.1 $self->{state} = 'DOCTYPE name';
1415     !!!next-input-character;
1416     redo A;
1417     }
1418     } elsif ($self->{state} eq 'DOCTYPE name') {
1419     if ($self->{next_input_character} == 0x0009 or # HT
1420     $self->{next_input_character} == 0x000A or # LF
1421     $self->{next_input_character} == 0x000B or # VT
1422     $self->{next_input_character} == 0x000C or # FF
1423     $self->{next_input_character} == 0x0020) { # SP
1424     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1425     $self->{state} = 'after DOCTYPE name';
1426     !!!next-input-character;
1427     redo A;
1428     } elsif ($self->{next_input_character} == 0x003E) { # >
1429     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1430     $self->{state} = 'data';
1431     !!!next-input-character;
1432    
1433     !!!emit ($self->{current_token}); # DOCTYPE
1434     undef $self->{current_token};
1435    
1436     redo A;
1437     } elsif (0x0061 <= $self->{next_input_character} and
1438     $self->{next_input_character} <= 0x007A) { # a..z
1439     $self->{current_token}->{name} .= chr ($self->{next_input_character} - 0x0020); # DOCTYPE
1440     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1441     ## Stay in the state
1442     !!!next-input-character;
1443     redo A;
1444     } elsif ($self->{next_input_character} == -1) {
1445 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1446 wakaba 1.1 $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1447     $self->{state} = 'data';
1448     ## reconsume
1449    
1450     !!!emit ($self->{current_token});
1451     undef $self->{current_token};
1452    
1453     redo A;
1454     } else {
1455     $self->{current_token}->{name}
1456     .= chr ($self->{next_input_character}); # DOCTYPE
1457     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1458     ## Stay in the state
1459     !!!next-input-character;
1460     redo A;
1461     }
1462     } elsif ($self->{state} eq 'after DOCTYPE name') {
1463     if ($self->{next_input_character} == 0x0009 or # HT
1464     $self->{next_input_character} == 0x000A or # LF
1465     $self->{next_input_character} == 0x000B or # VT
1466     $self->{next_input_character} == 0x000C or # FF
1467     $self->{next_input_character} == 0x0020) { # SP
1468     ## Stay in the state
1469     !!!next-input-character;
1470     redo A;
1471     } elsif ($self->{next_input_character} == 0x003E) { # >
1472     $self->{state} = 'data';
1473     !!!next-input-character;
1474    
1475     !!!emit ($self->{current_token}); # DOCTYPE
1476     undef $self->{current_token};
1477    
1478     redo A;
1479     } elsif ($self->{next_input_character} == -1) {
1480 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1481 wakaba 1.1 $self->{state} = 'data';
1482     ## reconsume
1483    
1484     !!!emit ($self->{current_token}); # DOCTYPE
1485     undef $self->{current_token};
1486    
1487     redo A;
1488     } else {
1489 wakaba 1.3 !!!parse-error (type => 'string after DOCTYPE name');
1490 wakaba 1.1 $self->{current_token}->{error} = 1; # DOCTYPE
1491     $self->{state} = 'bogus DOCTYPE';
1492     !!!next-input-character;
1493     redo A;
1494     }
1495     } elsif ($self->{state} eq 'bogus DOCTYPE') {
1496     if ($self->{next_input_character} == 0x003E) { # >
1497     $self->{state} = 'data';
1498     !!!next-input-character;
1499    
1500     !!!emit ($self->{current_token}); # DOCTYPE
1501     undef $self->{current_token};
1502    
1503     redo A;
1504     } elsif ($self->{next_input_character} == -1) {
1505 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1506 wakaba 1.1 $self->{state} = 'data';
1507     ## reconsume
1508    
1509     !!!emit ($self->{current_token}); # DOCTYPE
1510     undef $self->{current_token};
1511    
1512     redo A;
1513     } else {
1514     ## Stay in the state
1515     !!!next-input-character;
1516     redo A;
1517     }
1518     } else {
1519     die "$0: $self->{state}: Unknown state";
1520     }
1521     } # A
1522    
1523     die "$0: _get_next_token: unexpected case";
1524     } # _get_next_token
1525    
1526     sub _tokenize_attempt_to_consume_an_entity ($) {
1527     my $self = shift;
1528    
1529     if ($self->{next_input_character} == 0x0023) { # #
1530     !!!next-input-character;
1531     if ($self->{next_input_character} == 0x0078 or # x
1532     $self->{next_input_character} == 0x0058) { # X
1533 wakaba 1.4 my $num;
1534 wakaba 1.1 X: {
1535     my $x_char = $self->{next_input_character};
1536     !!!next-input-character;
1537     if (0x0030 <= $self->{next_input_character} and
1538     $self->{next_input_character} <= 0x0039) { # 0..9
1539     $num ||= 0;
1540     $num *= 0x10;
1541     $num += $self->{next_input_character} - 0x0030;
1542     redo X;
1543     } elsif (0x0061 <= $self->{next_input_character} and
1544     $self->{next_input_character} <= 0x0066) { # a..f
1545     ## ISSUE: the spec says U+0078, which is apparently incorrect
1546     $num ||= 0;
1547     $num *= 0x10;
1548     $num += $self->{next_input_character} - 0x0060 + 9;
1549     redo X;
1550     } elsif (0x0041 <= $self->{next_input_character} and
1551     $self->{next_input_character} <= 0x0046) { # A..F
1552     ## ISSUE: the spec says U+0058, which is apparently incorrect
1553     $num ||= 0;
1554     $num *= 0x10;
1555     $num += $self->{next_input_character} - 0x0040 + 9;
1556     redo X;
1557     } elsif (not defined $num) { # no hexadecimal digit
1558 wakaba 1.3 !!!parse-error (type => 'bare hcro');
1559 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
1560     !!!back-next-input-character ($x_char);
1561     return undef;
1562     } elsif ($self->{next_input_character} == 0x003B) { # ;
1563     !!!next-input-character;
1564     } else {
1565 wakaba 1.3 !!!parse-error (type => 'no refc');
1566 wakaba 1.1 }
1567    
1568     ## TODO: check the definition for |a valid Unicode character|.
1569 wakaba 1.4 ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8189>
1570 wakaba 1.1 if ($num > 1114111 or $num == 0) {
1571     $num = 0xFFFD; # REPLACEMENT CHARACTER
1572     ## ISSUE: Why this is not an error?
1573 wakaba 1.4 } elsif (0x80 <= $num and $num <= 0x9F) {
1574 wakaba 1.8 !!!parse-error (type => sprintf 'c1 entity:U+%04X', $num);
1575 wakaba 1.4 $num = $c1_entity_char->{$num};
1576 wakaba 1.1 }
1577    
1578     return {type => 'character', data => chr $num};
1579     } # X
1580     } elsif (0x0030 <= $self->{next_input_character} and
1581     $self->{next_input_character} <= 0x0039) { # 0..9
1582     my $code = $self->{next_input_character} - 0x0030;
1583     !!!next-input-character;
1584    
1585     while (0x0030 <= $self->{next_input_character} and
1586     $self->{next_input_character} <= 0x0039) { # 0..9
1587     $code *= 10;
1588     $code += $self->{next_input_character} - 0x0030;
1589    
1590     !!!next-input-character;
1591     }
1592    
1593     if ($self->{next_input_character} == 0x003B) { # ;
1594     !!!next-input-character;
1595     } else {
1596 wakaba 1.3 !!!parse-error (type => 'no refc');
1597 wakaba 1.1 }
1598    
1599     ## TODO: check the definition for |a valid Unicode character|.
1600     if ($code > 1114111 or $code == 0) {
1601     $code = 0xFFFD; # REPLACEMENT CHARACTER
1602     ## ISSUE: Why this is not an error?
1603 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
1604 wakaba 1.8 !!!parse-error (type => sprintf 'c1 entity:U+%04X', $code);
1605 wakaba 1.4 $code = $c1_entity_char->{$code};
1606 wakaba 1.1 }
1607    
1608     return {type => 'character', data => chr $code};
1609     } else {
1610 wakaba 1.3 !!!parse-error (type => 'bare nero');
1611 wakaba 1.1 !!!back-next-input-character ($self->{next_input_character});
1612     $self->{next_input_character} = 0x0023; # #
1613     return undef;
1614     }
1615     } elsif ((0x0041 <= $self->{next_input_character} and
1616     $self->{next_input_character} <= 0x005A) or
1617     (0x0061 <= $self->{next_input_character} and
1618     $self->{next_input_character} <= 0x007A)) {
1619     my $entity_name = chr $self->{next_input_character};
1620     !!!next-input-character;
1621    
1622     my $value = $entity_name;
1623     my $match;
1624    
1625     while (length $entity_name < 10 and
1626     ## NOTE: Some number greater than the maximum length of entity name
1627     ((0x0041 <= $self->{next_input_character} and
1628     $self->{next_input_character} <= 0x005A) or
1629     (0x0061 <= $self->{next_input_character} and
1630     $self->{next_input_character} <= 0x007A) or
1631     (0x0030 <= $self->{next_input_character} and
1632     $self->{next_input_character} <= 0x0039))) {
1633     $entity_name .= chr $self->{next_input_character};
1634     if (defined $entity_char->{$entity_name}) {
1635     $value = $entity_char->{$entity_name};
1636     $match = 1;
1637     } else {
1638     $value .= chr $self->{next_input_character};
1639     }
1640     !!!next-input-character;
1641     }
1642    
1643     if ($match) {
1644     if ($self->{next_input_character} == 0x003B) { # ;
1645     !!!next-input-character;
1646     } else {
1647 wakaba 1.3 !!!parse-error (type => 'refc');
1648 wakaba 1.1 }
1649    
1650     return {type => 'character', data => $value};
1651     } else {
1652 wakaba 1.3 !!!parse-error (type => 'bare ero');
1653 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
1654     !!!back-token ({type => 'character', data => $value});
1655     return undef;
1656     }
1657     } else {
1658     ## no characters are consumed
1659 wakaba 1.3 !!!parse-error (type => 'bare ero');
1660 wakaba 1.1 return undef;
1661     }
1662     } # _tokenize_attempt_to_consume_an_entity
1663    
1664     sub _initialize_tree_constructor ($) {
1665     my $self = shift;
1666     ## NOTE: $self->{document} MUST be specified before this method is called
1667     $self->{document}->strict_error_checking (0);
1668     ## TODO: Turn mutation events off # MUST
1669     ## TODO: Turn loose Document option (manakai extension) on
1670     ## TODO: Mark the Document as an HTML document # MUST
1671     } # _initialize_tree_constructor
1672    
1673     sub _terminate_tree_constructor ($) {
1674     my $self = shift;
1675     $self->{document}->strict_error_checking (1);
1676     ## TODO: Turn mutation events on
1677     } # _terminate_tree_constructor
1678    
1679     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
1680    
1681 wakaba 1.3 { # tree construction stage
1682     my $token;
1683    
1684 wakaba 1.1 sub _construct_tree ($) {
1685     my ($self) = @_;
1686    
1687     ## When an interactive UA render the $self->{document} available
1688     ## to the user, or when it begin accepting user input, are
1689     ## not defined.
1690    
1691     ## Append a character: collect it and all subsequent consecutive
1692     ## characters and insert one Text node whose data is concatenation
1693     ## of all those characters. # MUST
1694    
1695     !!!next-token;
1696    
1697 wakaba 1.3 $self->{insertion_mode} = 'before head';
1698     undef $self->{form_element};
1699     undef $self->{head_element};
1700     $self->{open_elements} = [];
1701     undef $self->{inner_html_node};
1702    
1703     $self->_tree_construction_initial; # MUST
1704     $self->_tree_construction_root_element;
1705     $self->_tree_construction_main;
1706     } # _construct_tree
1707    
1708     sub _tree_construction_initial ($) {
1709     my $self = shift;
1710     B: {
1711     if ($token->{type} eq 'DOCTYPE') {
1712     if ($token->{error}) {
1713     ## ISSUE: Spec currently left this case undefined.
1714     !!!parse-error (type => 'bogus DOCTYPE');
1715     }
1716     my $doctype = $self->{document}->create_document_type_definition
1717     ($token->{name});
1718     $self->{document}->append_child ($doctype);
1719     #$phase = 'root element';
1720     !!!next-token;
1721     #redo B;
1722     return;
1723     } elsif ({
1724     comment => 1,
1725     'start tag' => 1,
1726     'end tag' => 1,
1727     'end-of-file' => 1,
1728     }->{$token->{type}}) {
1729     ## ISSUE: Spec currently left this case undefined.
1730     !!!parse-error (type => 'missing DOCTYPE');
1731     #$phase = 'root element';
1732     ## reprocess
1733     #redo B;
1734     return;
1735     } elsif ($token->{type} eq 'character') {
1736     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1737     $self->{document}->manakai_append_text ($1);
1738     ## ISSUE: DOM3 Core does not allow Document > Text
1739     unless (length $token->{data}) {
1740     ## Stay in the phase
1741     !!!next-token;
1742     redo B;
1743     }
1744     }
1745     ## ISSUE: Spec currently left this case undefined.
1746     !!!parse-error (type => 'missing DOCTYPE');
1747     #$phase = 'root element';
1748     ## reprocess
1749     #redo B;
1750     return;
1751     } else {
1752     die "$0: $token->{type}: Unknown token";
1753     }
1754     } # B
1755     } # _tree_construction_initial
1756    
1757     sub _tree_construction_root_element ($) {
1758     my $self = shift;
1759    
1760     B: {
1761     if ($token->{type} eq 'DOCTYPE') {
1762     !!!parse-error (type => 'in html:#DOCTYPE');
1763     ## Ignore the token
1764     ## Stay in the phase
1765     !!!next-token;
1766     redo B;
1767     } elsif ($token->{type} eq 'comment') {
1768     my $comment = $self->{document}->create_comment ($token->{data});
1769     $self->{document}->append_child ($comment);
1770     ## Stay in the phase
1771     !!!next-token;
1772     redo B;
1773     } elsif ($token->{type} eq 'character') {
1774     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1775     $self->{document}->manakai_append_text ($1);
1776     ## ISSUE: DOM3 Core does not allow Document > Text
1777     unless (length $token->{data}) {
1778     ## Stay in the phase
1779     !!!next-token;
1780     redo B;
1781     }
1782     }
1783     #
1784     } elsif ({
1785     'start tag' => 1,
1786     'end tag' => 1,
1787     'end-of-file' => 1,
1788     }->{$token->{type}}) {
1789     ## ISSUE: There is an issue in the spec
1790     #
1791     } else {
1792     die "$0: $token->{type}: Unknown token";
1793     }
1794     my $root_element; !!!create-element ($root_element, 'html');
1795     $self->{document}->append_child ($root_element);
1796     push @{$self->{open_elements}}, [$root_element, 'html'];
1797     #$phase = 'main';
1798     ## reprocess
1799     #redo B;
1800     return;
1801     } # B
1802     } # _tree_construction_root_element
1803    
1804     sub _reset_insertion_mode ($) {
1805     my $self = shift;
1806    
1807     ## Step 1
1808     my $last;
1809    
1810     ## Step 2
1811     my $i = -1;
1812     my $node = $self->{open_elements}->[$i];
1813    
1814     ## Step 3
1815     S3: {
1816     $last = 1 if $self->{open_elements}->[0]->[0] eq $node->[0];
1817     if (defined $self->{inner_html_node}) {
1818     if ($self->{inner_html_node}->[1] eq 'td' or
1819     $self->{inner_html_node}->[1] eq 'th') {
1820     #
1821     } else {
1822     $node = $self->{inner_html_node};
1823     }
1824     }
1825    
1826     ## Step 4..13
1827     my $new_mode = {
1828     select => 'in select',
1829     td => 'in cell',
1830     th => 'in cell',
1831     tr => 'in row',
1832     tbody => 'in table body',
1833     thead => 'in table head',
1834     tfoot => 'in table foot',
1835     caption => 'in caption',
1836     colgroup => 'in column group',
1837     table => 'in table',
1838     head => 'in body', # not in head!
1839     body => 'in body',
1840     frameset => 'in frameset',
1841     }->{$node->[1]};
1842     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1843    
1844     ## Step 14
1845     if ($node->[1] eq 'html') {
1846     unless (defined $self->{head_element}) {
1847     $self->{insertion_mode} = 'before head';
1848     } else {
1849     $self->{insertion_mode} = 'after head';
1850     }
1851     return;
1852     }
1853    
1854     ## Step 15
1855     $self->{insertion_mode} = 'in body' and return if $last;
1856    
1857     ## Step 16
1858     $i--;
1859     $node = $self->{open_elements}->[$i];
1860    
1861     ## Step 17
1862     redo S3;
1863     } # S3
1864     } # _reset_insertion_mode
1865    
1866     sub _tree_construction_main ($) {
1867     my $self = shift;
1868    
1869     my $phase = 'main';
1870 wakaba 1.1
1871     my $active_formatting_elements = [];
1872    
1873     my $reconstruct_active_formatting_elements = sub { # MUST
1874     my $insert = shift;
1875    
1876     ## Step 1
1877     return unless @$active_formatting_elements;
1878    
1879     ## Step 3
1880     my $i = -1;
1881     my $entry = $active_formatting_elements->[$i];
1882    
1883     ## Step 2
1884     return if $entry->[0] eq '#marker';
1885 wakaba 1.3 for (@{$self->{open_elements}}) {
1886 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1887     return;
1888     }
1889     }
1890    
1891     S4: {
1892     ## Step 4
1893     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1894    
1895     ## Step 5
1896     $i--;
1897     $entry = $active_formatting_elements->[$i];
1898    
1899     ## Step 6
1900     if ($entry->[0] eq '#marker') {
1901     #
1902     } else {
1903     my $in_open_elements;
1904 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
1905 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1906     $in_open_elements = 1;
1907     last OE;
1908     }
1909     }
1910     if ($in_open_elements) {
1911     #
1912     } else {
1913     redo S4;
1914     }
1915     }
1916    
1917     ## Step 7
1918     $i++;
1919     $entry = $active_formatting_elements->[$i];
1920     } # S4
1921    
1922     S7: {
1923     ## Step 8
1924     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1925    
1926     ## Step 9
1927     $insert->($clone->[0]);
1928 wakaba 1.3 push @{$self->{open_elements}}, $clone;
1929 wakaba 1.1
1930     ## Step 10
1931 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1932 wakaba 1.1
1933     ## Step 11
1934     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1935     ## Step 7'
1936     $i++;
1937     $entry = $active_formatting_elements->[$i];
1938    
1939     redo S7;
1940     }
1941     } # S7
1942     }; # $reconstruct_active_formatting_elements
1943    
1944     my $clear_up_to_marker = sub {
1945     for (reverse 0..$#$active_formatting_elements) {
1946     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1947     splice @$active_formatting_elements, $_;
1948     return;
1949     }
1950     }
1951     }; # $clear_up_to_marker
1952    
1953     my $style_start_tag = sub {
1954 wakaba 1.6 my $style_el; !!!create-element ($style_el, 'style', $token->{attributes});
1955 wakaba 1.3 ## $self->{insertion_mode} eq 'in head' and ... (always true)
1956     (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
1957     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
1958 wakaba 1.1 ->append_child ($style_el);
1959     $self->{content_model_flag} = 'CDATA';
1960 wakaba 1.13 delete $self->{escape}; # MUST
1961 wakaba 1.1
1962     my $text = '';
1963     !!!next-token;
1964     while ($token->{type} eq 'character') {
1965     $text .= $token->{data};
1966     !!!next-token;
1967     } # stop if non-character token or tokenizer stops tokenising
1968     if (length $text) {
1969     $style_el->manakai_append_text ($text);
1970     }
1971    
1972     $self->{content_model_flag} = 'PCDATA';
1973    
1974     if ($token->{type} eq 'end tag' and $token->{tag_name} eq 'style') {
1975     ## Ignore the token
1976     } else {
1977 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
1978 wakaba 1.1 ## ISSUE: And ignore?
1979     }
1980     !!!next-token;
1981     }; # $style_start_tag
1982    
1983     my $script_start_tag = sub {
1984     my $script_el;
1985     !!!create-element ($script_el, 'script', $token->{attributes});
1986     ## TODO: mark as "parser-inserted"
1987    
1988     $self->{content_model_flag} = 'CDATA';
1989 wakaba 1.13 delete $self->{escape}; # MUST
1990 wakaba 1.1
1991     my $text = '';
1992     !!!next-token;
1993     while ($token->{type} eq 'character') {
1994     $text .= $token->{data};
1995     !!!next-token;
1996     } # stop if non-character token or tokenizer stops tokenising
1997     if (length $text) {
1998     $script_el->manakai_append_text ($text);
1999     }
2000    
2001     $self->{content_model_flag} = 'PCDATA';
2002    
2003     if ($token->{type} eq 'end tag' and
2004     $token->{tag_name} eq 'script') {
2005     ## Ignore the token
2006     } else {
2007 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
2008 wakaba 1.1 ## ISSUE: And ignore?
2009     ## TODO: mark as "already executed"
2010     }
2011    
2012 wakaba 1.3 if (defined $self->{inner_html_node}) {
2013     ## TODO: mark as "already executed"
2014     } else {
2015 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
2016     ## TODO: insertion point = just before the next input character
2017    
2018 wakaba 1.3 (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
2019     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])->append_child ($script_el);
2020 wakaba 1.1
2021     ## TODO: insertion point = $old_insertion_point (might be "undefined")
2022    
2023     ## TODO: if there is a script that will execute as soon as the parser resume, then...
2024     }
2025    
2026     !!!next-token;
2027     }; # $script_start_tag
2028    
2029     my $formatting_end_tag = sub {
2030     my $tag_name = shift;
2031    
2032     FET: {
2033     ## Step 1
2034     my $formatting_element;
2035     my $formatting_element_i_in_active;
2036     AFE: for (reverse 0..$#$active_formatting_elements) {
2037     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
2038     $formatting_element = $active_formatting_elements->[$_];
2039     $formatting_element_i_in_active = $_;
2040     last AFE;
2041     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
2042     last AFE;
2043     }
2044     } # AFE
2045     unless (defined $formatting_element) {
2046 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$tag_name);
2047 wakaba 1.1 ## Ignore the token
2048     !!!next-token;
2049     return;
2050     }
2051     ## has an element in scope
2052     my $in_scope = 1;
2053     my $formatting_element_i_in_open;
2054 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2055     my $node = $self->{open_elements}->[$_];
2056 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
2057     if ($in_scope) {
2058     $formatting_element_i_in_open = $_;
2059     last INSCOPE;
2060     } else { # in open elements but not in scope
2061 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2062 wakaba 1.1 ## Ignore the token
2063     !!!next-token;
2064     return;
2065     }
2066     } elsif ({
2067     table => 1, caption => 1, td => 1, th => 1,
2068     button => 1, marquee => 1, object => 1, html => 1,
2069     }->{$node->[1]}) {
2070     $in_scope = 0;
2071     }
2072     } # INSCOPE
2073     unless (defined $formatting_element_i_in_open) {
2074 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2075 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
2076     !!!next-token; ## TODO: ok?
2077     return;
2078     }
2079 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
2080 wakaba 1.4 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2081 wakaba 1.1 }
2082    
2083     ## Step 2
2084     my $furthest_block;
2085     my $furthest_block_i_in_open;
2086 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2087     my $node = $self->{open_elements}->[$_];
2088 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
2089     #not $phrasing_category->{$node->[1]} and
2090     ($special_category->{$node->[1]} or
2091     $scoping_category->{$node->[1]})) {
2092     $furthest_block = $node;
2093     $furthest_block_i_in_open = $_;
2094     } elsif ($node->[0] eq $formatting_element->[0]) {
2095     last OE;
2096     }
2097     } # OE
2098    
2099     ## Step 3
2100     unless (defined $furthest_block) { # MUST
2101 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
2102 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
2103     !!!next-token;
2104     return;
2105     }
2106    
2107     ## Step 4
2108 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
2109 wakaba 1.1
2110     ## Step 5
2111     my $furthest_block_parent = $furthest_block->[0]->parent_node;
2112     if (defined $furthest_block_parent) {
2113     $furthest_block_parent->remove_child ($furthest_block->[0]);
2114     }
2115    
2116     ## Step 6
2117     my $bookmark_prev_el
2118     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
2119     ->[0];
2120    
2121     ## Step 7
2122     my $node = $furthest_block;
2123     my $node_i_in_open = $furthest_block_i_in_open;
2124     my $last_node = $furthest_block;
2125     S7: {
2126     ## Step 1
2127     $node_i_in_open--;
2128 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
2129 wakaba 1.1
2130     ## Step 2
2131     my $node_i_in_active;
2132     S7S2: {
2133     for (reverse 0..$#$active_formatting_elements) {
2134     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2135     $node_i_in_active = $_;
2136     last S7S2;
2137     }
2138     }
2139 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
2140 wakaba 1.1 redo S7;
2141     } # S7S2
2142    
2143     ## Step 3
2144     last S7 if $node->[0] eq $formatting_element->[0];
2145    
2146     ## Step 4
2147     if ($last_node->[0] eq $furthest_block->[0]) {
2148     $bookmark_prev_el = $node->[0];
2149     }
2150    
2151     ## Step 5
2152     if ($node->[0]->has_child_nodes ()) {
2153     my $clone = [$node->[0]->clone_node (0), $node->[1]];
2154     $active_formatting_elements->[$node_i_in_active] = $clone;
2155 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
2156 wakaba 1.1 $node = $clone;
2157     }
2158    
2159     ## Step 6
2160     $node->[0]->append_child ($last_node->[0]);
2161    
2162     ## Step 7
2163     $last_node = $node;
2164    
2165     ## Step 8
2166     redo S7;
2167     } # S7
2168    
2169     ## Step 8
2170     $common_ancestor_node->[0]->append_child ($last_node->[0]);
2171    
2172     ## Step 9
2173     my $clone = [$formatting_element->[0]->clone_node (0),
2174     $formatting_element->[1]];
2175    
2176     ## Step 10
2177     my @cn = @{$furthest_block->[0]->child_nodes};
2178     $clone->[0]->append_child ($_) for @cn;
2179    
2180     ## Step 11
2181     $furthest_block->[0]->append_child ($clone->[0]);
2182    
2183     ## Step 12
2184     my $i;
2185     AFE: for (reverse 0..$#$active_formatting_elements) {
2186     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
2187     splice @$active_formatting_elements, $_, 1;
2188     $i-- and last AFE if defined $i;
2189     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
2190     $i = $_;
2191     }
2192     } # AFE
2193     splice @$active_formatting_elements, $i + 1, 0, $clone;
2194    
2195     ## Step 13
2196     undef $i;
2197 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2198     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
2199     splice @{$self->{open_elements}}, $_, 1;
2200 wakaba 1.1 $i-- and last OE if defined $i;
2201 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
2202 wakaba 1.1 $i = $_;
2203     }
2204     } # OE
2205 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
2206 wakaba 1.1
2207     ## Step 14
2208     redo FET;
2209     } # FET
2210     }; # $formatting_end_tag
2211    
2212     my $insert_to_current = sub {
2213 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child (shift);
2214 wakaba 1.1 }; # $insert_to_current
2215    
2216     my $insert_to_foster = sub {
2217     my $child = shift;
2218     if ({
2219     table => 1, tbody => 1, tfoot => 1,
2220     thead => 1, tr => 1,
2221 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2222 wakaba 1.1 # MUST
2223     my $foster_parent_element;
2224     my $next_sibling;
2225 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2226     if ($self->{open_elements}->[$_]->[1] eq 'table') {
2227     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
2228 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
2229     $foster_parent_element = $parent;
2230 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
2231 wakaba 1.1 } else {
2232     $foster_parent_element
2233 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
2234 wakaba 1.1 }
2235     last OE;
2236     }
2237     } # OE
2238 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
2239 wakaba 1.1 unless defined $foster_parent_element;
2240     $foster_parent_element->insert_before
2241     ($child, $next_sibling);
2242     } else {
2243 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
2244 wakaba 1.1 }
2245     }; # $insert_to_foster
2246    
2247     my $in_body = sub {
2248     my $insert = shift;
2249     if ($token->{type} eq 'start tag') {
2250     if ($token->{tag_name} eq 'script') {
2251     $script_start_tag->();
2252     return;
2253     } elsif ($token->{tag_name} eq 'style') {
2254     $style_start_tag->();
2255     return;
2256     } elsif ({
2257     base => 1, link => 1, meta => 1,
2258     }->{$token->{tag_name}}) {
2259 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2260 wakaba 1.1 ## NOTE: This is an "as if in head" code clone
2261     my $el;
2262     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2263 wakaba 1.3 if (defined $self->{head_element}) {
2264     $self->{head_element}->append_child ($el);
2265 wakaba 1.1 } else {
2266     $insert->($el);
2267     }
2268    
2269     !!!next-token;
2270     return;
2271     } elsif ($token->{tag_name} eq 'title') {
2272 wakaba 1.3 !!!parse-error (type => 'in body:title');
2273 wakaba 1.1 ## NOTE: There is an "as if in head" code clone
2274     my $title_el;
2275     !!!create-element ($title_el, 'title', $token->{attributes});
2276 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2277 wakaba 1.1 ->append_child ($title_el);
2278     $self->{content_model_flag} = 'RCDATA';
2279 wakaba 1.13 delete $self->{escape}; # MUST
2280 wakaba 1.1
2281     my $text = '';
2282     !!!next-token;
2283     while ($token->{type} eq 'character') {
2284     $text .= $token->{data};
2285     !!!next-token;
2286     }
2287     if (length $text) {
2288     $title_el->manakai_append_text ($text);
2289     }
2290    
2291     $self->{content_model_flag} = 'PCDATA';
2292    
2293     if ($token->{type} eq 'end tag' and
2294     $token->{tag_name} eq 'title') {
2295     ## Ignore the token
2296     } else {
2297 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2298 wakaba 1.1 ## ISSUE: And ignore?
2299     }
2300     !!!next-token;
2301     return;
2302     } elsif ($token->{tag_name} eq 'body') {
2303 wakaba 1.3 !!!parse-error (type => 'in body:body');
2304 wakaba 1.1
2305 wakaba 1.3 if (@{$self->{open_elements}} == 1 or
2306     $self->{open_elements}->[1]->[1] ne 'body') {
2307 wakaba 1.1 ## Ignore the token
2308     } else {
2309 wakaba 1.3 my $body_el = $self->{open_elements}->[1]->[0];
2310 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2311     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
2312     $body_el->set_attribute_ns
2313     (undef, [undef, $attr_name],
2314     $token->{attributes}->{$attr_name}->{value});
2315     }
2316     }
2317     }
2318     !!!next-token;
2319     return;
2320     } elsif ({
2321     address => 1, blockquote => 1, center => 1, dir => 1,
2322     div => 1, dl => 1, fieldset => 1, listing => 1,
2323     menu => 1, ol => 1, p => 1, ul => 1,
2324     pre => 1,
2325     }->{$token->{tag_name}}) {
2326     ## has a p element in scope
2327 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2328 wakaba 1.1 if ($_->[1] eq 'p') {
2329     !!!back-token;
2330     $token = {type => 'end tag', tag_name => 'p'};
2331     return;
2332     } elsif ({
2333     table => 1, caption => 1, td => 1, th => 1,
2334     button => 1, marquee => 1, object => 1, html => 1,
2335     }->{$_->[1]}) {
2336     last INSCOPE;
2337     }
2338     } # INSCOPE
2339    
2340     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2341     if ($token->{tag_name} eq 'pre') {
2342     !!!next-token;
2343     if ($token->{type} eq 'character') {
2344     $token->{data} =~ s/^\x0A//;
2345     unless (length $token->{data}) {
2346     !!!next-token;
2347     }
2348     }
2349     } else {
2350     !!!next-token;
2351     }
2352     return;
2353     } elsif ($token->{tag_name} eq 'form') {
2354 wakaba 1.3 if (defined $self->{form_element}) {
2355     !!!parse-error (type => 'in form:form');
2356 wakaba 1.1 ## Ignore the token
2357 wakaba 1.7 !!!next-token;
2358     return;
2359 wakaba 1.1 } else {
2360     ## has a p element in scope
2361 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2362 wakaba 1.1 if ($_->[1] eq 'p') {
2363     !!!back-token;
2364     $token = {type => 'end tag', tag_name => 'p'};
2365     return;
2366     } elsif ({
2367     table => 1, caption => 1, td => 1, th => 1,
2368     button => 1, marquee => 1, object => 1, html => 1,
2369     }->{$_->[1]}) {
2370     last INSCOPE;
2371     }
2372     } # INSCOPE
2373    
2374     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2375 wakaba 1.3 $self->{form_element} = $self->{open_elements}->[-1]->[0];
2376 wakaba 1.1 !!!next-token;
2377     return;
2378     }
2379     } elsif ($token->{tag_name} eq 'li') {
2380     ## has a p element in scope
2381 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2382 wakaba 1.1 if ($_->[1] eq 'p') {
2383     !!!back-token;
2384     $token = {type => 'end tag', tag_name => 'p'};
2385     return;
2386     } elsif ({
2387     table => 1, caption => 1, td => 1, th => 1,
2388     button => 1, marquee => 1, object => 1, html => 1,
2389     }->{$_->[1]}) {
2390     last INSCOPE;
2391     }
2392     } # INSCOPE
2393    
2394     ## Step 1
2395     my $i = -1;
2396 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2397 wakaba 1.1 LI: {
2398     ## Step 2
2399     if ($node->[1] eq 'li') {
2400 wakaba 1.8 if ($i != -1) {
2401     !!!parse-error (type => 'end tag missing:'.
2402     $self->{open_elements}->[-1]->[1]);
2403     ## TODO: test
2404     }
2405 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2406 wakaba 1.1 last LI;
2407     }
2408    
2409     ## Step 3
2410     if (not $formatting_category->{$node->[1]} and
2411     #not $phrasing_category->{$node->[1]} and
2412     ($special_category->{$node->[1]} or
2413     $scoping_category->{$node->[1]}) and
2414     $node->[1] ne 'address' and $node->[1] ne 'div') {
2415     last LI;
2416     }
2417    
2418     ## Step 4
2419     $i--;
2420 wakaba 1.3 $node = $self->{open_elements}->[$i];
2421 wakaba 1.1 redo LI;
2422     } # LI
2423    
2424     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2425     !!!next-token;
2426     return;
2427     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
2428     ## has a p element in scope
2429 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2430 wakaba 1.1 if ($_->[1] eq 'p') {
2431     !!!back-token;
2432     $token = {type => 'end tag', tag_name => 'p'};
2433     return;
2434     } elsif ({
2435     table => 1, caption => 1, td => 1, th => 1,
2436     button => 1, marquee => 1, object => 1, html => 1,
2437     }->{$_->[1]}) {
2438     last INSCOPE;
2439     }
2440     } # INSCOPE
2441    
2442     ## Step 1
2443     my $i = -1;
2444 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2445 wakaba 1.1 LI: {
2446     ## Step 2
2447     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
2448 wakaba 1.8 if ($i != -1) {
2449     !!!parse-error (type => 'end tag missing:'.
2450     $self->{open_elements}->[-1]->[1]);
2451     ## TODO: test
2452     }
2453 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2454 wakaba 1.1 last LI;
2455     }
2456    
2457     ## Step 3
2458     if (not $formatting_category->{$node->[1]} and
2459     #not $phrasing_category->{$node->[1]} and
2460     ($special_category->{$node->[1]} or
2461     $scoping_category->{$node->[1]}) and
2462     $node->[1] ne 'address' and $node->[1] ne 'div') {
2463     last LI;
2464     }
2465    
2466     ## Step 4
2467     $i--;
2468 wakaba 1.3 $node = $self->{open_elements}->[$i];
2469 wakaba 1.1 redo LI;
2470     } # LI
2471    
2472     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2473     !!!next-token;
2474     return;
2475     } elsif ($token->{tag_name} eq 'plaintext') {
2476     ## has a p element in scope
2477 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2478 wakaba 1.1 if ($_->[1] eq 'p') {
2479     !!!back-token;
2480     $token = {type => 'end tag', tag_name => 'p'};
2481     return;
2482     } elsif ({
2483     table => 1, caption => 1, td => 1, th => 1,
2484     button => 1, marquee => 1, object => 1, html => 1,
2485     }->{$_->[1]}) {
2486     last INSCOPE;
2487     }
2488     } # INSCOPE
2489    
2490     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2491    
2492     $self->{content_model_flag} = 'PLAINTEXT';
2493    
2494     !!!next-token;
2495     return;
2496     } elsif ({
2497     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2498     }->{$token->{tag_name}}) {
2499     ## has a p element in scope
2500 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2501     my $node = $self->{open_elements}->[$_];
2502 wakaba 1.1 if ($node->[1] eq 'p') {
2503     !!!back-token;
2504     $token = {type => 'end tag', tag_name => 'p'};
2505     return;
2506     } elsif ({
2507     table => 1, caption => 1, td => 1, th => 1,
2508     button => 1, marquee => 1, object => 1, html => 1,
2509     }->{$node->[1]}) {
2510     last INSCOPE;
2511     }
2512     } # INSCOPE
2513    
2514     ## has an element in scope
2515     my $i;
2516 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2517     my $node = $self->{open_elements}->[$_];
2518 wakaba 1.1 if ({
2519     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2520     }->{$node->[1]}) {
2521     $i = $_;
2522     last INSCOPE;
2523     } elsif ({
2524     table => 1, caption => 1, td => 1, th => 1,
2525     button => 1, marquee => 1, object => 1, html => 1,
2526     }->{$node->[1]}) {
2527     last INSCOPE;
2528     }
2529     } # INSCOPE
2530    
2531     if (defined $i) {
2532 wakaba 1.3 !!!parse-error (type => 'in hn:hn');
2533     splice @{$self->{open_elements}}, $i;
2534 wakaba 1.1 }
2535    
2536     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2537    
2538     !!!next-token;
2539     return;
2540     } elsif ($token->{tag_name} eq 'a') {
2541     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
2542     my $node = $active_formatting_elements->[$i];
2543     if ($node->[1] eq 'a') {
2544 wakaba 1.3 !!!parse-error (type => 'in a:a');
2545 wakaba 1.1
2546     !!!back-token;
2547     $token = {type => 'end tag', tag_name => 'a'};
2548     $formatting_end_tag->($token->{tag_name});
2549    
2550     AFE2: for (reverse 0..$#$active_formatting_elements) {
2551     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2552     splice @$active_formatting_elements, $_, 1;
2553     last AFE2;
2554     }
2555     } # AFE2
2556 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2557     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
2558     splice @{$self->{open_elements}}, $_, 1;
2559 wakaba 1.1 last OE;
2560     }
2561     } # OE
2562     last AFE;
2563     } elsif ($node->[0] eq '#marker') {
2564     last AFE;
2565     }
2566     } # AFE
2567    
2568     $reconstruct_active_formatting_elements->($insert_to_current);
2569    
2570     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2571 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2572 wakaba 1.1
2573     !!!next-token;
2574     return;
2575     } elsif ({
2576     b => 1, big => 1, em => 1, font => 1, i => 1,
2577     nobr => 1, s => 1, small => 1, strile => 1,
2578     strong => 1, tt => 1, u => 1,
2579     }->{$token->{tag_name}}) {
2580     $reconstruct_active_formatting_elements->($insert_to_current);
2581    
2582     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2583 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2584 wakaba 1.1
2585     !!!next-token;
2586     return;
2587     } elsif ($token->{tag_name} eq 'button') {
2588     ## has a button element in scope
2589 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2590     my $node = $self->{open_elements}->[$_];
2591 wakaba 1.1 if ($node->[1] eq 'button') {
2592 wakaba 1.3 !!!parse-error (type => 'in button:button');
2593 wakaba 1.1 !!!back-token;
2594     $token = {type => 'end tag', tag_name => 'button'};
2595     return;
2596     } elsif ({
2597     table => 1, caption => 1, td => 1, th => 1,
2598     button => 1, marquee => 1, object => 1, html => 1,
2599     }->{$node->[1]}) {
2600     last INSCOPE;
2601     }
2602     } # INSCOPE
2603    
2604     $reconstruct_active_formatting_elements->($insert_to_current);
2605    
2606     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2607     push @$active_formatting_elements, ['#marker', ''];
2608    
2609     !!!next-token;
2610     return;
2611     } elsif ($token->{tag_name} eq 'marquee' or
2612     $token->{tag_name} eq 'object') {
2613     $reconstruct_active_formatting_elements->($insert_to_current);
2614    
2615     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2616     push @$active_formatting_elements, ['#marker', ''];
2617    
2618     !!!next-token;
2619     return;
2620     } elsif ($token->{tag_name} eq 'xmp') {
2621     $reconstruct_active_formatting_elements->($insert_to_current);
2622    
2623     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2624    
2625     $self->{content_model_flag} = 'CDATA';
2626 wakaba 1.13 delete $self->{escape}; # MUST
2627 wakaba 1.1
2628     !!!next-token;
2629     return;
2630     } elsif ($token->{tag_name} eq 'table') {
2631     ## has a p element in scope
2632 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2633 wakaba 1.1 if ($_->[1] eq 'p') {
2634     !!!back-token;
2635     $token = {type => 'end tag', tag_name => 'p'};
2636     return;
2637     } elsif ({
2638     table => 1, caption => 1, td => 1, th => 1,
2639     button => 1, marquee => 1, object => 1, html => 1,
2640     }->{$_->[1]}) {
2641     last INSCOPE;
2642     }
2643     } # INSCOPE
2644    
2645     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2646    
2647 wakaba 1.3 $self->{insertion_mode} = 'in table';
2648 wakaba 1.1
2649     !!!next-token;
2650     return;
2651     } elsif ({
2652     area => 1, basefont => 1, bgsound => 1, br => 1,
2653     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
2654     image => 1,
2655     }->{$token->{tag_name}}) {
2656     if ($token->{tag_name} eq 'image') {
2657 wakaba 1.3 !!!parse-error (type => 'image');
2658 wakaba 1.1 $token->{tag_name} = 'img';
2659     }
2660    
2661     $reconstruct_active_formatting_elements->($insert_to_current);
2662    
2663     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2664 wakaba 1.3 pop @{$self->{open_elements}};
2665 wakaba 1.1
2666     !!!next-token;
2667     return;
2668     } elsif ($token->{tag_name} eq 'hr') {
2669     ## has a p element in scope
2670 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2671 wakaba 1.1 if ($_->[1] eq 'p') {
2672     !!!back-token;
2673     $token = {type => 'end tag', tag_name => 'p'};
2674     return;
2675     } elsif ({
2676     table => 1, caption => 1, td => 1, th => 1,
2677     button => 1, marquee => 1, object => 1, html => 1,
2678     }->{$_->[1]}) {
2679     last INSCOPE;
2680     }
2681     } # INSCOPE
2682    
2683     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2684 wakaba 1.3 pop @{$self->{open_elements}};
2685 wakaba 1.1
2686     !!!next-token;
2687     return;
2688     } elsif ($token->{tag_name} eq 'input') {
2689     $reconstruct_active_formatting_elements->($insert_to_current);
2690    
2691     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2692 wakaba 1.3 ## TODO: associate with $self->{form_element} if defined
2693     pop @{$self->{open_elements}};
2694 wakaba 1.1
2695     !!!next-token;
2696     return;
2697     } elsif ($token->{tag_name} eq 'isindex') {
2698 wakaba 1.3 !!!parse-error (type => 'isindex');
2699 wakaba 1.1
2700 wakaba 1.3 if (defined $self->{form_element}) {
2701 wakaba 1.1 ## Ignore the token
2702     !!!next-token;
2703     return;
2704     } else {
2705     my $at = $token->{attributes};
2706     $at->{name} = {name => 'name', value => 'isindex'};
2707     my @tokens = (
2708     {type => 'start tag', tag_name => 'form'},
2709     {type => 'start tag', tag_name => 'hr'},
2710     {type => 'start tag', tag_name => 'p'},
2711     {type => 'start tag', tag_name => 'label'},
2712     {type => 'character',
2713     data => 'This is a searchable index. Insert your search keywords here: '}, # SHOULD
2714     ## TODO: make this configurable
2715     {type => 'start tag', tag_name => 'input', attributes => $at},
2716     #{type => 'character', data => ''}, # SHOULD
2717     {type => 'end tag', tag_name => 'label'},
2718     {type => 'end tag', tag_name => 'p'},
2719     {type => 'start tag', tag_name => 'hr'},
2720     {type => 'end tag', tag_name => 'form'},
2721     );
2722     $token = shift @tokens;
2723     !!!back-token (@tokens);
2724     return;
2725     }
2726     } elsif ({
2727     textarea => 1,
2728 wakaba 1.5 iframe => 1,
2729 wakaba 1.1 noembed => 1,
2730     noframes => 1,
2731     noscript => 0, ## TODO: 1 if scripting is enabled
2732     }->{$token->{tag_name}}) {
2733     my $tag_name = $token->{tag_name};
2734     my $el;
2735     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2736    
2737     if ($token->{tag_name} eq 'textarea') {
2738 wakaba 1.3 ## TODO: $self->{form_element} if defined
2739 wakaba 1.1 $self->{content_model_flag} = 'RCDATA';
2740     } else {
2741     $self->{content_model_flag} = 'CDATA';
2742     }
2743 wakaba 1.13 delete $self->{escape}; # MUST
2744 wakaba 1.1
2745     $insert->($el);
2746    
2747     my $text = '';
2748 wakaba 1.9 if ($token->{tag_name} eq 'textarea') {
2749     !!!next-token;
2750     if ($token->{type} eq 'character') {
2751     $token->{data} =~ s/^\x0A//;
2752     unless (length $token->{data}) {
2753     !!!next-token;
2754     }
2755     }
2756     } else {
2757     !!!next-token;
2758     }
2759 wakaba 1.1 while ($token->{type} eq 'character') {
2760     $text .= $token->{data};
2761     !!!next-token;
2762     }
2763     if (length $text) {
2764     $el->manakai_append_text ($text);
2765     }
2766    
2767     $self->{content_model_flag} = 'PCDATA';
2768    
2769     if ($token->{type} eq 'end tag' and
2770     $token->{tag_name} eq $tag_name) {
2771     ## Ignore the token
2772     } else {
2773 wakaba 1.10 if ($token->{tag_name} eq 'textarea') {
2774     !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2775     } else {
2776 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
2777     }
2778 wakaba 1.1 ## ISSUE: And ignore?
2779     }
2780     !!!next-token;
2781     return;
2782     } elsif ($token->{tag_name} eq 'select') {
2783     $reconstruct_active_formatting_elements->($insert_to_current);
2784    
2785     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2786    
2787 wakaba 1.3 $self->{insertion_mode} = 'in select';
2788 wakaba 1.1 !!!next-token;
2789     return;
2790     } elsif ({
2791     caption => 1, col => 1, colgroup => 1, frame => 1,
2792     frameset => 1, head => 1, option => 1, optgroup => 1,
2793     tbody => 1, td => 1, tfoot => 1, th => 1,
2794     thead => 1, tr => 1,
2795     }->{$token->{tag_name}}) {
2796 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2797 wakaba 1.1 ## Ignore the token
2798     !!!next-token;
2799     return;
2800    
2801     ## ISSUE: An issue on HTML5 new elements in the spec.
2802     } else {
2803     $reconstruct_active_formatting_elements->($insert_to_current);
2804    
2805     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2806    
2807     !!!next-token;
2808     return;
2809     }
2810     } elsif ($token->{type} eq 'end tag') {
2811     if ($token->{tag_name} eq 'body') {
2812 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2813 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2814 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2815     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2816 wakaba 1.1 }
2817 wakaba 1.3 $self->{insertion_mode} = 'after body';
2818 wakaba 1.1 !!!next-token;
2819     return;
2820     } else {
2821 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2822 wakaba 1.1 ## Ignore the token
2823     !!!next-token;
2824     return;
2825     }
2826     } elsif ($token->{tag_name} eq 'html') {
2827 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2828 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2829 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2830     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
2831 wakaba 1.1 }
2832 wakaba 1.3 $self->{insertion_mode} = 'after body';
2833 wakaba 1.1 ## reprocess
2834     return;
2835     } else {
2836 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2837 wakaba 1.1 ## Ignore the token
2838     !!!next-token;
2839     return;
2840     }
2841     } elsif ({
2842     address => 1, blockquote => 1, center => 1, dir => 1,
2843     div => 1, dl => 1, fieldset => 1, listing => 1,
2844     menu => 1, ol => 1, pre => 1, ul => 1,
2845     p => 1,
2846     dd => 1, dt => 1, li => 1,
2847     button => 1, marquee => 1, object => 1,
2848     }->{$token->{tag_name}}) {
2849     ## has an element in scope
2850     my $i;
2851 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2852     my $node = $self->{open_elements}->[$_];
2853 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
2854     ## generate implied end tags
2855     if ({
2856     dd => ($token->{tag_name} ne 'dd'),
2857     dt => ($token->{tag_name} ne 'dt'),
2858     li => ($token->{tag_name} ne 'li'),
2859     p => ($token->{tag_name} ne 'p'),
2860     td => 1, th => 1, tr => 1,
2861 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2862 wakaba 1.1 !!!back-token;
2863     $token = {type => 'end tag',
2864 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2865 wakaba 1.1 return;
2866     }
2867     $i = $_;
2868     last INSCOPE unless $token->{tag_name} eq 'p';
2869     } elsif ({
2870     table => 1, caption => 1, td => 1, th => 1,
2871     button => 1, marquee => 1, object => 1, html => 1,
2872     }->{$node->[1]}) {
2873     last INSCOPE;
2874     }
2875     } # INSCOPE
2876    
2877 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
2878     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2879 wakaba 1.1 }
2880    
2881 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
2882 wakaba 1.1 $clear_up_to_marker->()
2883     if {
2884     button => 1, marquee => 1, object => 1,
2885     }->{$token->{tag_name}};
2886     !!!next-token;
2887     return;
2888 wakaba 1.12 } elsif ($token->{tag_name} eq 'form') {
2889     ## has an element in scope
2890     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2891     my $node = $self->{open_elements}->[$_];
2892     if ($node->[1] eq $token->{tag_name}) {
2893     ## generate implied end tags
2894     if ({
2895     dd => 1, dt => 1, li => 1, p => 1,
2896     td => 1, th => 1, tr => 1,
2897     }->{$self->{open_elements}->[-1]->[1]}) {
2898     !!!back-token;
2899     $token = {type => 'end tag',
2900     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2901     return;
2902     }
2903     last INSCOPE;
2904     } elsif ({
2905     table => 1, caption => 1, td => 1, th => 1,
2906     button => 1, marquee => 1, object => 1, html => 1,
2907     }->{$node->[1]}) {
2908     last INSCOPE;
2909     }
2910     } # INSCOPE
2911    
2912     if ($self->{open_elements}->[-1]->[1] eq $token->{tag_name}) {
2913     pop @{$self->{open_elements}};
2914     } else {
2915     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2916     }
2917    
2918     undef $self->{form_element};
2919     !!!next-token;
2920     return;
2921 wakaba 1.1 } elsif ({
2922     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2923     }->{$token->{tag_name}}) {
2924     ## has an element in scope
2925     my $i;
2926 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2927     my $node = $self->{open_elements}->[$_];
2928 wakaba 1.1 if ({
2929     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2930     }->{$node->[1]}) {
2931     ## generate implied end tags
2932     if ({
2933     dd => 1, dt => 1, li => 1, p => 1,
2934     td => 1, th => 1, tr => 1,
2935 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2936 wakaba 1.1 !!!back-token;
2937     $token = {type => 'end tag',
2938 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2939 wakaba 1.1 return;
2940     }
2941     $i = $_;
2942     last INSCOPE;
2943     } elsif ({
2944     table => 1, caption => 1, td => 1, th => 1,
2945     button => 1, marquee => 1, object => 1, html => 1,
2946     }->{$node->[1]}) {
2947     last INSCOPE;
2948     }
2949     } # INSCOPE
2950    
2951 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
2952     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2953 wakaba 1.1 }
2954    
2955 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
2956 wakaba 1.1 !!!next-token;
2957     return;
2958     } elsif ({
2959     a => 1,
2960     b => 1, big => 1, em => 1, font => 1, i => 1,
2961     nobr => 1, s => 1, small => 1, strile => 1,
2962     strong => 1, tt => 1, u => 1,
2963     }->{$token->{tag_name}}) {
2964     $formatting_end_tag->($token->{tag_name});
2965 wakaba 1.8 ## TODO: <http://html5.org/tools/web-apps-tracker?from=883&to=884>
2966 wakaba 1.1 return;
2967     } elsif ({
2968     caption => 1, col => 1, colgroup => 1, frame => 1,
2969     frameset => 1, head => 1, option => 1, optgroup => 1,
2970     tbody => 1, td => 1, tfoot => 1, th => 1,
2971     thead => 1, tr => 1,
2972     area => 1, basefont => 1, bgsound => 1, br => 1,
2973     embed => 1, hr => 1, iframe => 1, image => 1,
2974 wakaba 1.5 img => 1, input => 1, isindex => 1, noembed => 1,
2975 wakaba 1.1 noframes => 1, param => 1, select => 1, spacer => 1,
2976     table => 1, textarea => 1, wbr => 1,
2977     noscript => 0, ## TODO: if scripting is enabled
2978     }->{$token->{tag_name}}) {
2979 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2980 wakaba 1.1 ## Ignore the token
2981     !!!next-token;
2982     return;
2983    
2984     ## ISSUE: Issue on HTML5 new elements in spec
2985    
2986     } else {
2987     ## Step 1
2988     my $node_i = -1;
2989 wakaba 1.3 my $node = $self->{open_elements}->[$node_i];
2990 wakaba 1.1
2991     ## Step 2
2992     S2: {
2993     if ($node->[1] eq $token->{tag_name}) {
2994     ## Step 1
2995     ## generate implied end tags
2996     if ({
2997     dd => 1, dt => 1, li => 1, p => 1,
2998     td => 1, th => 1, tr => 1,
2999 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3000 wakaba 1.1 !!!back-token;
3001     $token = {type => 'end tag',
3002 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3003 wakaba 1.1 return;
3004     }
3005    
3006     ## Step 2
3007 wakaba 1.3 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
3008     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3009 wakaba 1.1 }
3010    
3011     ## Step 3
3012 wakaba 1.3 splice @{$self->{open_elements}}, $node_i;
3013    
3014     !!!next-token;
3015 wakaba 1.1 last S2;
3016     } else {
3017     ## Step 3
3018     if (not $formatting_category->{$node->[1]} and
3019     #not $phrasing_category->{$node->[1]} and
3020     ($special_category->{$node->[1]} or
3021     $scoping_category->{$node->[1]})) {
3022 wakaba 1.3 !!!parse-error (type => 'not closed:'.$node->[1]);
3023 wakaba 1.1 ## Ignore the token
3024     !!!next-token;
3025     last S2;
3026     }
3027     }
3028    
3029     ## Step 4
3030     $node_i--;
3031 wakaba 1.3 $node = $self->{open_elements}->[$node_i];
3032 wakaba 1.1
3033     ## Step 5;
3034     redo S2;
3035     } # S2
3036 wakaba 1.3 return;
3037 wakaba 1.1 }
3038     }
3039     }; # $in_body
3040    
3041     B: {
3042 wakaba 1.3 if ($phase eq 'main') {
3043 wakaba 1.1 if ($token->{type} eq 'DOCTYPE') {
3044 wakaba 1.3 !!!parse-error (type => 'in html:#DOCTYPE');
3045 wakaba 1.1 ## Ignore the token
3046     ## Stay in the phase
3047     !!!next-token;
3048     redo B;
3049     } elsif ($token->{type} eq 'start tag' and
3050     $token->{tag_name} eq 'html') {
3051     ## TODO: unless it is the first start tag token, parse-error
3052 wakaba 1.3 my $top_el = $self->{open_elements}->[0]->[0];
3053 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
3054     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
3055     $top_el->set_attribute_ns
3056     (undef, [undef, $attr_name],
3057     $token->{attributes}->{$attr_name}->{value});
3058     }
3059     }
3060     !!!next-token;
3061     redo B;
3062     } elsif ($token->{type} eq 'end-of-file') {
3063     ## Generate implied end tags
3064     if ({
3065     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
3066 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3067 wakaba 1.1 !!!back-token;
3068 wakaba 1.3 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
3069 wakaba 1.1 redo B;
3070     }
3071    
3072 wakaba 1.3 if (@{$self->{open_elements}} > 2 or
3073     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
3074     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3075     } elsif (defined $self->{inner_html_node} and
3076     @{$self->{open_elements}} > 1 and
3077     $self->{open_elements}->[1]->[1] ne 'body') {
3078     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3079 wakaba 1.1 }
3080    
3081     ## Stop parsing
3082     last B;
3083    
3084     ## ISSUE: There is an issue in the spec.
3085     } else {
3086 wakaba 1.3 if ($self->{insertion_mode} eq 'before head') {
3087 wakaba 1.1 if ($token->{type} eq 'character') {
3088     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3089 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3090 wakaba 1.1 unless (length $token->{data}) {
3091     !!!next-token;
3092     redo B;
3093     }
3094     }
3095     ## As if <head>
3096 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
3097     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3098     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3099     $self->{insertion_mode} = 'in head';
3100 wakaba 1.1 ## reprocess
3101     redo B;
3102     } elsif ($token->{type} eq 'comment') {
3103     my $comment = $self->{document}->create_comment ($token->{data});
3104 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3105 wakaba 1.1 !!!next-token;
3106     redo B;
3107     } elsif ($token->{type} eq 'start tag') {
3108     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
3109 wakaba 1.3 !!!create-element ($self->{head_element}, 'head', $attr);
3110     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3111     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3112     $self->{insertion_mode} = 'in head';
3113 wakaba 1.1 if ($token->{tag_name} eq 'head') {
3114     !!!next-token;
3115     #} elsif ({
3116     # base => 1, link => 1, meta => 1,
3117     # script => 1, style => 1, title => 1,
3118     # }->{$token->{tag_name}}) {
3119     # ## reprocess
3120     } else {
3121     ## reprocess
3122     }
3123     redo B;
3124     } elsif ($token->{type} eq 'end tag') {
3125     if ($token->{tag_name} eq 'html') {
3126     ## As if <head>
3127 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
3128     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3129     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3130     $self->{insertion_mode} = 'in head';
3131 wakaba 1.1 ## reprocess
3132     redo B;
3133     } else {
3134 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3135 wakaba 1.1 ## Ignore the token
3136     !!!next-token;
3137     redo B;
3138     }
3139     } else {
3140     die "$0: $token->{type}: Unknown type";
3141     }
3142 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in head') {
3143 wakaba 1.1 if ($token->{type} eq 'character') {
3144     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3145 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3146 wakaba 1.1 unless (length $token->{data}) {
3147     !!!next-token;
3148     redo B;
3149     }
3150     }
3151    
3152     #
3153     } elsif ($token->{type} eq 'comment') {
3154     my $comment = $self->{document}->create_comment ($token->{data});
3155 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3156 wakaba 1.1 !!!next-token;
3157     redo B;
3158     } elsif ($token->{type} eq 'start tag') {
3159     if ($token->{tag_name} eq 'title') {
3160     ## NOTE: There is an "as if in head" code clone
3161     my $title_el;
3162     !!!create-element ($title_el, 'title', $token->{attributes});
3163 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3164 wakaba 1.1 ->append_child ($title_el);
3165     $self->{content_model_flag} = 'RCDATA';
3166 wakaba 1.13 delete $self->{escape}; # MUST
3167 wakaba 1.1
3168     my $text = '';
3169     !!!next-token;
3170     while ($token->{type} eq 'character') {
3171     $text .= $token->{data};
3172     !!!next-token;
3173     }
3174     if (length $text) {
3175     $title_el->manakai_append_text ($text);
3176     }
3177    
3178     $self->{content_model_flag} = 'PCDATA';
3179    
3180     if ($token->{type} eq 'end tag' and
3181     $token->{tag_name} eq 'title') {
3182     ## Ignore the token
3183     } else {
3184 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
3185 wakaba 1.1 ## ISSUE: And ignore?
3186     }
3187     !!!next-token;
3188     redo B;
3189     } elsif ($token->{tag_name} eq 'style') {
3190     $style_start_tag->();
3191     redo B;
3192     } elsif ($token->{tag_name} eq 'script') {
3193     $script_start_tag->();
3194     redo B;
3195     } elsif ({base => 1, link => 1, meta => 1}->{$token->{tag_name}}) {
3196     ## NOTE: There are "as if in head" code clones
3197     my $el;
3198     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
3199 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3200 wakaba 1.1 ->append_child ($el);
3201    
3202     !!!next-token;
3203     redo B;
3204     } elsif ($token->{tag_name} eq 'head') {
3205 wakaba 1.3 !!!parse-error (type => 'in head:head');
3206 wakaba 1.1 ## Ignore the token
3207     !!!next-token;
3208     redo B;
3209     } else {
3210     #
3211     }
3212     } elsif ($token->{type} eq 'end tag') {
3213     if ($token->{tag_name} eq 'head') {
3214 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
3215     pop @{$self->{open_elements}};
3216 wakaba 1.1 } else {
3217 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:head');
3218 wakaba 1.1 }
3219 wakaba 1.3 $self->{insertion_mode} = 'after head';
3220 wakaba 1.1 !!!next-token;
3221     redo B;
3222     } elsif ($token->{tag_name} eq 'html') {
3223     #
3224     } else {
3225 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3226 wakaba 1.1 ## Ignore the token
3227     !!!next-token;
3228     redo B;
3229     }
3230     } else {
3231     #
3232     }
3233    
3234 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
3235 wakaba 1.1 ## As if </head>
3236 wakaba 1.3 pop @{$self->{open_elements}};
3237 wakaba 1.1 }
3238 wakaba 1.3 $self->{insertion_mode} = 'after head';
3239 wakaba 1.1 ## reprocess
3240     redo B;
3241    
3242     ## ISSUE: An issue in the spec.
3243 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after head') {
3244 wakaba 1.1 if ($token->{type} eq 'character') {
3245     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3246 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3247 wakaba 1.1 unless (length $token->{data}) {
3248     !!!next-token;
3249     redo B;
3250     }
3251     }
3252    
3253     #
3254     } elsif ($token->{type} eq 'comment') {
3255     my $comment = $self->{document}->create_comment ($token->{data});
3256 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3257 wakaba 1.1 !!!next-token;
3258     redo B;
3259     } elsif ($token->{type} eq 'start tag') {
3260     if ($token->{tag_name} eq 'body') {
3261     !!!insert-element ('body', $token->{attributes});
3262 wakaba 1.3 $self->{insertion_mode} = 'in body';
3263 wakaba 1.1 !!!next-token;
3264     redo B;
3265     } elsif ($token->{tag_name} eq 'frameset') {
3266     !!!insert-element ('frameset', $token->{attributes});
3267 wakaba 1.3 $self->{insertion_mode} = 'in frameset';
3268 wakaba 1.1 !!!next-token;
3269     redo B;
3270     } elsif ({
3271     base => 1, link => 1, meta => 1,
3272 wakaba 1.3 script => 1, style => 1, title => 1,
3273 wakaba 1.1 }->{$token->{tag_name}}) {
3274 wakaba 1.3 !!!parse-error (type => 'after head:'.$token->{tag_name});
3275     $self->{insertion_mode} = 'in head';
3276 wakaba 1.1 ## reprocess
3277     redo B;
3278     } else {
3279     #
3280     }
3281     } else {
3282     #
3283     }
3284    
3285     ## As if <body>
3286     !!!insert-element ('body');
3287 wakaba 1.3 $self->{insertion_mode} = 'in body';
3288 wakaba 1.1 ## reprocess
3289     redo B;
3290 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in body') {
3291 wakaba 1.1 if ($token->{type} eq 'character') {
3292     ## NOTE: There is a code clone of "character in body".
3293     $reconstruct_active_formatting_elements->($insert_to_current);
3294    
3295 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3296 wakaba 1.1
3297     !!!next-token;
3298     redo B;
3299     } elsif ($token->{type} eq 'comment') {
3300     ## NOTE: There is a code clone of "comment in body".
3301     my $comment = $self->{document}->create_comment ($token->{data});
3302 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3303 wakaba 1.1 !!!next-token;
3304     redo B;
3305     } else {
3306     $in_body->($insert_to_current);
3307     redo B;
3308     }
3309 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table') {
3310 wakaba 1.1 if ($token->{type} eq 'character') {
3311     ## NOTE: There are "character in table" code clones.
3312     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3313 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3314 wakaba 1.1
3315     unless (length $token->{data}) {
3316     !!!next-token;
3317     redo B;
3318     }
3319     }
3320    
3321 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3322    
3323 wakaba 1.1 ## As if in body, but insert into foster parent element
3324     ## ISSUE: Spec says that "whenever a node would be inserted
3325     ## into the current node" while characters might not be
3326     ## result in a new Text node.
3327     $reconstruct_active_formatting_elements->($insert_to_foster);
3328    
3329     if ({
3330     table => 1, tbody => 1, tfoot => 1,
3331     thead => 1, tr => 1,
3332 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3333 wakaba 1.1 # MUST
3334     my $foster_parent_element;
3335     my $next_sibling;
3336     my $prev_sibling;
3337 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3338     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3339     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3340 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3341     $foster_parent_element = $parent;
3342 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3343 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3344     } else {
3345 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3346 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3347     }
3348     last OE;
3349     }
3350     } # OE
3351 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3352 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3353     unless defined $foster_parent_element;
3354     if (defined $prev_sibling and
3355     $prev_sibling->node_type == 3) {
3356     $prev_sibling->manakai_append_text ($token->{data});
3357     } else {
3358     $foster_parent_element->insert_before
3359     ($self->{document}->create_text_node ($token->{data}),
3360     $next_sibling);
3361     }
3362     } else {
3363 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3364 wakaba 1.1 }
3365    
3366     !!!next-token;
3367     redo B;
3368     } elsif ($token->{type} eq 'comment') {
3369     my $comment = $self->{document}->create_comment ($token->{data});
3370 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3371 wakaba 1.1 !!!next-token;
3372     redo B;
3373     } elsif ($token->{type} eq 'start tag') {
3374     if ({
3375     caption => 1,
3376     colgroup => 1,
3377     tbody => 1, tfoot => 1, thead => 1,
3378     }->{$token->{tag_name}}) {
3379     ## Clear back to table context
3380 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3381     $self->{open_elements}->[-1]->[1] ne 'html') {
3382     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3383     pop @{$self->{open_elements}};
3384 wakaba 1.1 }
3385    
3386     push @$active_formatting_elements, ['#marker', '']
3387     if $token->{tag_name} eq 'caption';
3388    
3389     !!!insert-element ($token->{tag_name}, $token->{attributes});
3390 wakaba 1.3 $self->{insertion_mode} = {
3391 wakaba 1.1 caption => 'in caption',
3392     colgroup => 'in column group',
3393     tbody => 'in table body',
3394     tfoot => 'in table body',
3395     thead => 'in table body',
3396     }->{$token->{tag_name}};
3397     !!!next-token;
3398     redo B;
3399     } elsif ({
3400     col => 1,
3401     td => 1, th => 1, tr => 1,
3402     }->{$token->{tag_name}}) {
3403     ## Clear back to table context
3404 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3405     $self->{open_elements}->[-1]->[1] ne 'html') {
3406     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3407     pop @{$self->{open_elements}};
3408 wakaba 1.1 }
3409    
3410     !!!insert-element ($token->{tag_name} eq 'col' ? 'colgroup' : 'tbody');
3411 wakaba 1.3 $self->{insertion_mode} = $token->{tag_name} eq 'col'
3412 wakaba 1.1 ? 'in column group' : 'in table body';
3413     ## reprocess
3414     redo B;
3415     } elsif ($token->{tag_name} eq 'table') {
3416     ## NOTE: There are code clones for this "table in table"
3417 wakaba 1.3 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3418 wakaba 1.1
3419     ## As if </table>
3420     ## have a table element in table scope
3421     my $i;
3422 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3423     my $node = $self->{open_elements}->[$_];
3424 wakaba 1.1 if ($node->[1] eq 'table') {
3425     $i = $_;
3426     last INSCOPE;
3427     } elsif ({
3428     table => 1, html => 1,
3429     }->{$node->[1]}) {
3430     last INSCOPE;
3431     }
3432     } # INSCOPE
3433     unless (defined $i) {
3434 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3435 wakaba 1.1 ## Ignore tokens </table><table>
3436     !!!next-token;
3437     redo B;
3438     }
3439    
3440     ## generate implied end tags
3441     if ({
3442     dd => 1, dt => 1, li => 1, p => 1,
3443     td => 1, th => 1, tr => 1,
3444 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3445 wakaba 1.1 !!!back-token; # <table>
3446     $token = {type => 'end tag', tag_name => 'table'};
3447     !!!back-token;
3448     $token = {type => 'end tag',
3449 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3450 wakaba 1.1 redo B;
3451     }
3452    
3453 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3454     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3455 wakaba 1.1 }
3456    
3457 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3458 wakaba 1.1
3459 wakaba 1.3 $self->_reset_insertion_mode;
3460 wakaba 1.1
3461     ## reprocess
3462     redo B;
3463     } else {
3464     #
3465     }
3466     } elsif ($token->{type} eq 'end tag') {
3467     if ($token->{tag_name} eq 'table') {
3468     ## have a table element in table scope
3469     my $i;
3470 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3471     my $node = $self->{open_elements}->[$_];
3472 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3473     $i = $_;
3474     last INSCOPE;
3475     } elsif ({
3476     table => 1, html => 1,
3477     }->{$node->[1]}) {
3478     last INSCOPE;
3479     }
3480     } # INSCOPE
3481     unless (defined $i) {
3482 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3483 wakaba 1.1 ## Ignore the token
3484     !!!next-token;
3485     redo B;
3486     }
3487    
3488     ## generate implied end tags
3489     if ({
3490     dd => 1, dt => 1, li => 1, p => 1,
3491     td => 1, th => 1, tr => 1,
3492 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3493 wakaba 1.1 !!!back-token;
3494     $token = {type => 'end tag',
3495 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3496 wakaba 1.1 redo B;
3497     }
3498    
3499 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3500     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3501 wakaba 1.1 }
3502    
3503 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3504 wakaba 1.1
3505 wakaba 1.3 $self->_reset_insertion_mode;
3506 wakaba 1.1
3507     !!!next-token;
3508     redo B;
3509     } elsif ({
3510     body => 1, caption => 1, col => 1, colgroup => 1,
3511     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
3512     thead => 1, tr => 1,
3513     }->{$token->{tag_name}}) {
3514 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3515 wakaba 1.1 ## Ignore the token
3516     !!!next-token;
3517     redo B;
3518     } else {
3519     #
3520     }
3521     } else {
3522     #
3523     }
3524    
3525 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3526 wakaba 1.1 $in_body->($insert_to_foster);
3527     redo B;
3528 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in caption') {
3529 wakaba 1.1 if ($token->{type} eq 'character') {
3530     ## NOTE: This is a code clone of "character in body".
3531     $reconstruct_active_formatting_elements->($insert_to_current);
3532    
3533 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3534 wakaba 1.1
3535     !!!next-token;
3536     redo B;
3537     } elsif ($token->{type} eq 'comment') {
3538     ## NOTE: This is a code clone of "comment in body".
3539     my $comment = $self->{document}->create_comment ($token->{data});
3540 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3541 wakaba 1.1 !!!next-token;
3542     redo B;
3543     } elsif ($token->{type} eq 'start tag') {
3544     if ({
3545     caption => 1, col => 1, colgroup => 1, tbody => 1,
3546     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
3547     }->{$token->{tag_name}}) {
3548 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3549 wakaba 1.1
3550     ## As if </caption>
3551     ## have a table element in table scope
3552     my $i;
3553 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3554     my $node = $self->{open_elements}->[$_];
3555 wakaba 1.1 if ($node->[1] eq 'caption') {
3556     $i = $_;
3557     last INSCOPE;
3558     } elsif ({
3559     table => 1, html => 1,
3560     }->{$node->[1]}) {
3561     last INSCOPE;
3562     }
3563     } # INSCOPE
3564     unless (defined $i) {
3565 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3566 wakaba 1.1 ## Ignore the token
3567     !!!next-token;
3568     redo B;
3569     }
3570    
3571     ## generate implied end tags
3572     if ({
3573     dd => 1, dt => 1, li => 1, p => 1,
3574     td => 1, th => 1, tr => 1,
3575 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3576 wakaba 1.1 !!!back-token; # <?>
3577     $token = {type => 'end tag', tag_name => 'caption'};
3578     !!!back-token;
3579     $token = {type => 'end tag',
3580 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3581 wakaba 1.1 redo B;
3582     }
3583    
3584 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3585     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3586 wakaba 1.1 }
3587    
3588 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3589 wakaba 1.1
3590     $clear_up_to_marker->();
3591    
3592 wakaba 1.3 $self->{insertion_mode} = 'in table';
3593 wakaba 1.1
3594     ## reprocess
3595     redo B;
3596     } else {
3597     #
3598     }
3599     } elsif ($token->{type} eq 'end tag') {
3600     if ($token->{tag_name} eq 'caption') {
3601     ## have a table element in table scope
3602     my $i;
3603 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3604     my $node = $self->{open_elements}->[$_];
3605 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3606     $i = $_;
3607     last INSCOPE;
3608     } elsif ({
3609     table => 1, html => 1,
3610     }->{$node->[1]}) {
3611     last INSCOPE;
3612     }
3613     } # INSCOPE
3614     unless (defined $i) {
3615 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3616 wakaba 1.1 ## Ignore the token
3617     !!!next-token;
3618     redo B;
3619     }
3620    
3621     ## generate implied end tags
3622     if ({
3623     dd => 1, dt => 1, li => 1, p => 1,
3624     td => 1, th => 1, tr => 1,
3625 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3626 wakaba 1.1 !!!back-token;
3627     $token = {type => 'end tag',
3628 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3629 wakaba 1.1 redo B;
3630     }
3631    
3632 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3633     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3634 wakaba 1.1 }
3635    
3636 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3637 wakaba 1.1
3638     $clear_up_to_marker->();
3639    
3640 wakaba 1.3 $self->{insertion_mode} = 'in table';
3641 wakaba 1.1
3642     !!!next-token;
3643     redo B;
3644     } elsif ($token->{tag_name} eq 'table') {
3645 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3646 wakaba 1.1
3647     ## As if </caption>
3648     ## have a table element in table scope
3649     my $i;
3650 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3651     my $node = $self->{open_elements}->[$_];
3652 wakaba 1.1 if ($node->[1] eq 'caption') {
3653     $i = $_;
3654     last INSCOPE;
3655     } elsif ({
3656     table => 1, html => 1,
3657     }->{$node->[1]}) {
3658     last INSCOPE;
3659     }
3660     } # INSCOPE
3661     unless (defined $i) {
3662 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3663 wakaba 1.1 ## Ignore the token
3664     !!!next-token;
3665     redo B;
3666     }
3667    
3668     ## generate implied end tags
3669     if ({
3670     dd => 1, dt => 1, li => 1, p => 1,
3671     td => 1, th => 1, tr => 1,
3672 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3673 wakaba 1.1 !!!back-token; # </table>
3674     $token = {type => 'end tag', tag_name => 'caption'};
3675     !!!back-token;
3676     $token = {type => 'end tag',
3677 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3678 wakaba 1.1 redo B;
3679     }
3680    
3681 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3682     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3683 wakaba 1.1 }
3684    
3685 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3686 wakaba 1.1
3687     $clear_up_to_marker->();
3688    
3689 wakaba 1.3 $self->{insertion_mode} = 'in table';
3690 wakaba 1.1
3691     ## reprocess
3692     redo B;
3693     } elsif ({
3694     body => 1, col => 1, colgroup => 1,
3695     html => 1, tbody => 1, td => 1, tfoot => 1,
3696     th => 1, thead => 1, tr => 1,
3697     }->{$token->{tag_name}}) {
3698 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3699 wakaba 1.1 ## Ignore the token
3700     redo B;
3701     } else {
3702     #
3703     }
3704     } else {
3705     #
3706     }
3707    
3708     $in_body->($insert_to_current);
3709     redo B;
3710 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in column group') {
3711 wakaba 1.1 if ($token->{type} eq 'character') {
3712     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3713 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3714 wakaba 1.1 unless (length $token->{data}) {
3715     !!!next-token;
3716     redo B;
3717     }
3718     }
3719    
3720     #
3721     } elsif ($token->{type} eq 'comment') {
3722     my $comment = $self->{document}->create_comment ($token->{data});
3723 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3724 wakaba 1.1 !!!next-token;
3725     redo B;
3726     } elsif ($token->{type} eq 'start tag') {
3727     if ($token->{tag_name} eq 'col') {
3728     !!!insert-element ($token->{tag_name}, $token->{attributes});
3729 wakaba 1.3 pop @{$self->{open_elements}};
3730 wakaba 1.1 !!!next-token;
3731     redo B;
3732     } else {
3733     #
3734     }
3735     } elsif ($token->{type} eq 'end tag') {
3736     if ($token->{tag_name} eq 'colgroup') {
3737 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3738     !!!parse-error (type => 'unmatched end tag:colgroup');
3739 wakaba 1.1 ## Ignore the token
3740     !!!next-token;
3741     redo B;
3742     } else {
3743 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3744     $self->{insertion_mode} = 'in table';
3745 wakaba 1.1 !!!next-token;
3746     redo B;
3747     }
3748     } elsif ($token->{tag_name} eq 'col') {
3749 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:col');
3750 wakaba 1.1 ## Ignore the token
3751     !!!next-token;
3752     redo B;
3753     } else {
3754     #
3755     }
3756     } else {
3757     #
3758     }
3759    
3760     ## As if </colgroup>
3761 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3762     !!!parse-error (type => 'unmatched end tag:colgroup');
3763 wakaba 1.1 ## Ignore the token
3764     !!!next-token;
3765     redo B;
3766     } else {
3767 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3768     $self->{insertion_mode} = 'in table';
3769 wakaba 1.1 ## reprocess
3770     redo B;
3771     }
3772 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table body') {
3773 wakaba 1.1 if ($token->{type} eq 'character') {
3774     ## NOTE: This is a "character in table" code clone.
3775     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3776 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3777 wakaba 1.1
3778     unless (length $token->{data}) {
3779     !!!next-token;
3780     redo B;
3781     }
3782     }
3783    
3784 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3785    
3786 wakaba 1.1 ## As if in body, but insert into foster parent element
3787     ## ISSUE: Spec says that "whenever a node would be inserted
3788     ## into the current node" while characters might not be
3789     ## result in a new Text node.
3790     $reconstruct_active_formatting_elements->($insert_to_foster);
3791    
3792     if ({
3793     table => 1, tbody => 1, tfoot => 1,
3794     thead => 1, tr => 1,
3795 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3796 wakaba 1.1 # MUST
3797     my $foster_parent_element;
3798     my $next_sibling;
3799     my $prev_sibling;
3800 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3801     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3802     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3803 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3804     $foster_parent_element = $parent;
3805 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3806 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3807     } else {
3808 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3809 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3810     }
3811     last OE;
3812     }
3813     } # OE
3814 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3815 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3816     unless defined $foster_parent_element;
3817     if (defined $prev_sibling and
3818     $prev_sibling->node_type == 3) {
3819     $prev_sibling->manakai_append_text ($token->{data});
3820     } else {
3821     $foster_parent_element->insert_before
3822     ($self->{document}->create_text_node ($token->{data}),
3823     $next_sibling);
3824     }
3825     } else {
3826 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3827 wakaba 1.1 }
3828    
3829     !!!next-token;
3830     redo B;
3831     } elsif ($token->{type} eq 'comment') {
3832     ## Copied from 'in table'
3833     my $comment = $self->{document}->create_comment ($token->{data});
3834 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3835 wakaba 1.1 !!!next-token;
3836     redo B;
3837     } elsif ($token->{type} eq 'start tag') {
3838     if ({
3839     tr => 1,
3840     th => 1, td => 1,
3841     }->{$token->{tag_name}}) {
3842 wakaba 1.3 unless ($token->{tag_name} eq 'tr') {
3843     !!!parse-error (type => 'missing start tag:tr');
3844     }
3845    
3846 wakaba 1.1 ## Clear back to table body context
3847     while (not {
3848     tbody => 1, tfoot => 1, thead => 1, html => 1,
3849 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3850     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3851     pop @{$self->{open_elements}};
3852 wakaba 1.1 }
3853    
3854 wakaba 1.3 $self->{insertion_mode} = 'in row';
3855 wakaba 1.1 if ($token->{tag_name} eq 'tr') {
3856     !!!insert-element ($token->{tag_name}, $token->{attributes});
3857     !!!next-token;
3858     } else {
3859     !!!insert-element ('tr');
3860     ## reprocess
3861     }
3862     redo B;
3863     } elsif ({
3864     caption => 1, col => 1, colgroup => 1,
3865     tbody => 1, tfoot => 1, thead => 1,
3866     }->{$token->{tag_name}}) {
3867     ## have an element in table scope
3868     my $i;
3869 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3870     my $node = $self->{open_elements}->[$_];
3871 wakaba 1.1 if ({
3872     tbody => 1, thead => 1, tfoot => 1,
3873     }->{$node->[1]}) {
3874     $i = $_;
3875     last INSCOPE;
3876     } elsif ({
3877     table => 1, html => 1,
3878     }->{$node->[1]}) {
3879     last INSCOPE;
3880     }
3881     } # INSCOPE
3882     unless (defined $i) {
3883 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3884 wakaba 1.1 ## Ignore the token
3885     !!!next-token;
3886     redo B;
3887     }
3888    
3889     ## Clear back to table body context
3890     while (not {
3891     tbody => 1, tfoot => 1, thead => 1, html => 1,
3892 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3893     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3894     pop @{$self->{open_elements}};
3895 wakaba 1.1 }
3896    
3897     ## As if <{current node}>
3898     ## have an element in table scope
3899     ## true by definition
3900    
3901     ## Clear back to table body context
3902     ## nop by definition
3903    
3904 wakaba 1.3 pop @{$self->{open_elements}};
3905     $self->{insertion_mode} = 'in table';
3906 wakaba 1.1 ## reprocess
3907     redo B;
3908     } elsif ($token->{tag_name} eq 'table') {
3909     ## NOTE: This is a code clone of "table in table"
3910 wakaba 1.3 !!!parse-error (type => 'not closed:table');
3911 wakaba 1.1
3912     ## As if </table>
3913     ## have a table element in table scope
3914     my $i;
3915 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3916     my $node = $self->{open_elements}->[$_];
3917 wakaba 1.1 if ($node->[1] eq 'table') {
3918     $i = $_;
3919     last INSCOPE;
3920     } elsif ({
3921     table => 1, html => 1,
3922     }->{$node->[1]}) {
3923     last INSCOPE;
3924     }
3925     } # INSCOPE
3926     unless (defined $i) {
3927 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3928 wakaba 1.1 ## Ignore tokens </table><table>
3929     !!!next-token;
3930     redo B;
3931     }
3932    
3933     ## generate implied end tags
3934     if ({
3935     dd => 1, dt => 1, li => 1, p => 1,
3936     td => 1, th => 1, tr => 1,
3937 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3938 wakaba 1.1 !!!back-token; # <table>
3939     $token = {type => 'end tag', tag_name => 'table'};
3940     !!!back-token;
3941     $token = {type => 'end tag',
3942 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3943 wakaba 1.1 redo B;
3944     }
3945    
3946 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3947     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3948 wakaba 1.1 }
3949    
3950 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3951 wakaba 1.1
3952 wakaba 1.3 $self->_reset_insertion_mode;
3953 wakaba 1.1
3954     ## reprocess
3955     redo B;
3956     } else {
3957     #
3958     }
3959     } elsif ($token->{type} eq 'end tag') {
3960     if ({
3961     tbody => 1, tfoot => 1, thead => 1,
3962     }->{$token->{tag_name}}) {
3963     ## have an element in table scope
3964     my $i;
3965 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3966     my $node = $self->{open_elements}->[$_];
3967 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3968     $i = $_;
3969     last INSCOPE;
3970     } elsif ({
3971     table => 1, html => 1,
3972     }->{$node->[1]}) {
3973     last INSCOPE;
3974     }
3975     } # INSCOPE
3976     unless (defined $i) {
3977 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3978 wakaba 1.1 ## Ignore the token
3979     !!!next-token;
3980     redo B;
3981     }
3982    
3983     ## Clear back to table body context
3984     while (not {
3985     tbody => 1, tfoot => 1, thead => 1, html => 1,
3986 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3987     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3988     pop @{$self->{open_elements}};
3989 wakaba 1.1 }
3990    
3991 wakaba 1.3 pop @{$self->{open_elements}};
3992     $self->{insertion_mode} = 'in table';
3993 wakaba 1.1 !!!next-token;
3994     redo B;
3995     } elsif ($token->{tag_name} eq 'table') {
3996     ## have an element in table scope
3997     my $i;
3998 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3999     my $node = $self->{open_elements}->[$_];
4000 wakaba 1.1 if ({
4001     tbody => 1, thead => 1, tfoot => 1,
4002     }->{$node->[1]}) {
4003     $i = $_;
4004     last INSCOPE;
4005     } elsif ({
4006     table => 1, html => 1,
4007     }->{$node->[1]}) {
4008     last INSCOPE;
4009     }
4010     } # INSCOPE
4011     unless (defined $i) {
4012 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4013 wakaba 1.1 ## Ignore the token
4014     !!!next-token;
4015     redo B;
4016     }
4017    
4018     ## Clear back to table body context
4019     while (not {
4020     tbody => 1, tfoot => 1, thead => 1, html => 1,
4021 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4022     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4023     pop @{$self->{open_elements}};
4024 wakaba 1.1 }
4025    
4026     ## As if <{current node}>
4027     ## have an element in table scope
4028     ## true by definition
4029    
4030     ## Clear back to table body context
4031     ## nop by definition
4032    
4033 wakaba 1.3 pop @{$self->{open_elements}};
4034     $self->{insertion_mode} = 'in table';
4035 wakaba 1.1 ## reprocess
4036     redo B;
4037     } elsif ({
4038     body => 1, caption => 1, col => 1, colgroup => 1,
4039     html => 1, td => 1, th => 1, tr => 1,
4040     }->{$token->{tag_name}}) {
4041 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4042 wakaba 1.1 ## Ignore the token
4043     !!!next-token;
4044     redo B;
4045     } else {
4046     #
4047     }
4048     } else {
4049     #
4050     }
4051    
4052     ## As if in table
4053 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
4054 wakaba 1.1 $in_body->($insert_to_foster);
4055     redo B;
4056 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in row') {
4057 wakaba 1.1 if ($token->{type} eq 'character') {
4058     ## NOTE: This is a "character in table" code clone.
4059     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4060 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4061 wakaba 1.1
4062     unless (length $token->{data}) {
4063     !!!next-token;
4064     redo B;
4065     }
4066     }
4067    
4068 wakaba 1.3 !!!parse-error (type => 'in table:#character');
4069    
4070 wakaba 1.1 ## As if in body, but insert into foster parent element
4071     ## ISSUE: Spec says that "whenever a node would be inserted
4072     ## into the current node" while characters might not be
4073     ## result in a new Text node.
4074     $reconstruct_active_formatting_elements->($insert_to_foster);
4075    
4076     if ({
4077     table => 1, tbody => 1, tfoot => 1,
4078     thead => 1, tr => 1,
4079 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4080 wakaba 1.1 # MUST
4081     my $foster_parent_element;
4082     my $next_sibling;
4083     my $prev_sibling;
4084 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
4085     if ($self->{open_elements}->[$_]->[1] eq 'table') {
4086     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4087 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
4088     $foster_parent_element = $parent;
4089 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
4090 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
4091     } else {
4092 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4093 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
4094     }
4095     last OE;
4096     }
4097     } # OE
4098 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
4099 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
4100     unless defined $foster_parent_element;
4101     if (defined $prev_sibling and
4102     $prev_sibling->node_type == 3) {
4103     $prev_sibling->manakai_append_text ($token->{data});
4104     } else {
4105     $foster_parent_element->insert_before
4106     ($self->{document}->create_text_node ($token->{data}),
4107     $next_sibling);
4108     }
4109     } else {
4110 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4111 wakaba 1.1 }
4112    
4113     !!!next-token;
4114     redo B;
4115     } elsif ($token->{type} eq 'comment') {
4116     ## Copied from 'in table'
4117     my $comment = $self->{document}->create_comment ($token->{data});
4118 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4119 wakaba 1.1 !!!next-token;
4120     redo B;
4121     } elsif ($token->{type} eq 'start tag') {
4122     if ($token->{tag_name} eq 'th' or
4123     $token->{tag_name} eq 'td') {
4124     ## Clear back to table row context
4125     while (not {
4126     tr => 1, html => 1,
4127 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4128     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4129     pop @{$self->{open_elements}};
4130 wakaba 1.1 }
4131    
4132     !!!insert-element ($token->{tag_name}, $token->{attributes});
4133 wakaba 1.3 $self->{insertion_mode} = 'in cell';
4134 wakaba 1.1
4135     push @$active_formatting_elements, ['#marker', ''];
4136    
4137     !!!next-token;
4138     redo B;
4139     } elsif ({
4140     caption => 1, col => 1, colgroup => 1,
4141     tbody => 1, tfoot => 1, thead => 1, tr => 1,
4142     }->{$token->{tag_name}}) {
4143     ## As if </tr>
4144     ## have an element in table scope
4145     my $i;
4146 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4147     my $node = $self->{open_elements}->[$_];
4148 wakaba 1.1 if ($node->[1] eq 'tr') {
4149     $i = $_;
4150     last INSCOPE;
4151     } elsif ({
4152     table => 1, html => 1,
4153     }->{$node->[1]}) {
4154     last INSCOPE;
4155     }
4156     } # INSCOPE
4157     unless (defined $i) {
4158 wakaba 1.3 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name});
4159 wakaba 1.1 ## Ignore the token
4160     !!!next-token;
4161     redo B;
4162     }
4163    
4164     ## Clear back to table row context
4165     while (not {
4166     tr => 1, html => 1,
4167 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4168     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4169     pop @{$self->{open_elements}};
4170 wakaba 1.1 }
4171    
4172 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4173     $self->{insertion_mode} = 'in table body';
4174 wakaba 1.1 ## reprocess
4175     redo B;
4176     } elsif ($token->{tag_name} eq 'table') {
4177     ## NOTE: This is a code clone of "table in table"
4178 wakaba 1.3 !!!parse-error (type => 'not closed:table');
4179 wakaba 1.1
4180     ## As if </table>
4181     ## have a table element in table scope
4182     my $i;
4183 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4184     my $node = $self->{open_elements}->[$_];
4185 wakaba 1.1 if ($node->[1] eq 'table') {
4186     $i = $_;
4187     last INSCOPE;
4188     } elsif ({
4189     table => 1, html => 1,
4190     }->{$node->[1]}) {
4191     last INSCOPE;
4192     }
4193     } # INSCOPE
4194     unless (defined $i) {
4195 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
4196 wakaba 1.1 ## Ignore tokens </table><table>
4197     !!!next-token;
4198     redo B;
4199     }
4200    
4201     ## generate implied end tags
4202     if ({
4203     dd => 1, dt => 1, li => 1, p => 1,
4204     td => 1, th => 1, tr => 1,
4205 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4206 wakaba 1.1 !!!back-token; # <table>
4207     $token = {type => 'end tag', tag_name => 'table'};
4208     !!!back-token;
4209     $token = {type => 'end tag',
4210 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4211 wakaba 1.1 redo B;
4212     }
4213    
4214 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4215     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4216 wakaba 1.1 }
4217    
4218 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4219 wakaba 1.1
4220 wakaba 1.3 $self->_reset_insertion_mode;
4221 wakaba 1.1
4222     ## reprocess
4223     redo B;
4224     } else {
4225     #
4226     }
4227     } elsif ($token->{type} eq 'end tag') {
4228     if ($token->{tag_name} eq 'tr') {
4229     ## have an element in table scope
4230     my $i;
4231 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4232     my $node = $self->{open_elements}->[$_];
4233 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4234     $i = $_;
4235     last INSCOPE;
4236     } elsif ({
4237     table => 1, html => 1,
4238     }->{$node->[1]}) {
4239     last INSCOPE;
4240     }
4241     } # INSCOPE
4242     unless (defined $i) {
4243 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4244 wakaba 1.1 ## Ignore the token
4245     !!!next-token;
4246     redo B;
4247     }
4248    
4249     ## Clear back to table row context
4250     while (not {
4251     tr => 1, html => 1,
4252 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4253     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4254     pop @{$self->{open_elements}};
4255 wakaba 1.1 }
4256    
4257 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4258     $self->{insertion_mode} = 'in table body';
4259 wakaba 1.1 !!!next-token;
4260     redo B;
4261     } elsif ($token->{tag_name} eq 'table') {
4262     ## As if </tr>
4263     ## have an element in table scope
4264     my $i;
4265 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4266     my $node = $self->{open_elements}->[$_];
4267 wakaba 1.1 if ($node->[1] eq 'tr') {
4268     $i = $_;
4269     last INSCOPE;
4270     } elsif ({
4271     table => 1, html => 1,
4272     }->{$node->[1]}) {
4273     last INSCOPE;
4274     }
4275     } # INSCOPE
4276     unless (defined $i) {
4277 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{type});
4278 wakaba 1.1 ## Ignore the token
4279     !!!next-token;
4280     redo B;
4281     }
4282    
4283     ## Clear back to table row context
4284     while (not {
4285     tr => 1, html => 1,
4286 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4287     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4288     pop @{$self->{open_elements}};
4289 wakaba 1.1 }
4290    
4291 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4292     $self->{insertion_mode} = 'in table body';
4293 wakaba 1.1 ## reprocess
4294     redo B;
4295     } elsif ({
4296     tbody => 1, tfoot => 1, thead => 1,
4297     }->{$token->{tag_name}}) {
4298     ## have an element in table scope
4299     my $i;
4300 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4301     my $node = $self->{open_elements}->[$_];
4302 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4303     $i = $_;
4304     last INSCOPE;
4305     } elsif ({
4306     table => 1, html => 1,
4307     }->{$node->[1]}) {
4308     last INSCOPE;
4309     }
4310     } # INSCOPE
4311     unless (defined $i) {
4312 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4313 wakaba 1.1 ## Ignore the token
4314     !!!next-token;
4315     redo B;
4316     }
4317    
4318     ## As if </tr>
4319     ## have an element in table scope
4320     my $i;
4321 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4322     my $node = $self->{open_elements}->[$_];
4323 wakaba 1.1 if ($node->[1] eq 'tr') {
4324     $i = $_;
4325     last INSCOPE;
4326     } elsif ({
4327     table => 1, html => 1,
4328     }->{$node->[1]}) {
4329     last INSCOPE;
4330     }
4331     } # INSCOPE
4332     unless (defined $i) {
4333 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:tr');
4334 wakaba 1.1 ## Ignore the token
4335     !!!next-token;
4336     redo B;
4337     }
4338    
4339     ## Clear back to table row context
4340     while (not {
4341     tr => 1, html => 1,
4342 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4343     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4344     pop @{$self->{open_elements}};
4345 wakaba 1.1 }
4346    
4347 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4348     $self->{insertion_mode} = 'in table body';
4349 wakaba 1.1 ## reprocess
4350     redo B;
4351     } elsif ({
4352     body => 1, caption => 1, col => 1,
4353     colgroup => 1, html => 1, td => 1, th => 1,
4354     }->{$token->{tag_name}}) {
4355 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4356 wakaba 1.1 ## Ignore the token
4357     !!!next-token;
4358     redo B;
4359     } else {
4360     #
4361     }
4362     } else {
4363     #
4364     }
4365    
4366     ## As if in table
4367 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
4368 wakaba 1.1 $in_body->($insert_to_foster);
4369     redo B;
4370 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in cell') {
4371 wakaba 1.1 if ($token->{type} eq 'character') {
4372     ## NOTE: This is a code clone of "character in body".
4373     $reconstruct_active_formatting_elements->($insert_to_current);
4374    
4375 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4376 wakaba 1.1
4377     !!!next-token;
4378     redo B;
4379     } elsif ($token->{type} eq 'comment') {
4380     ## NOTE: This is a code clone of "comment in body".
4381     my $comment = $self->{document}->create_comment ($token->{data});
4382 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4383 wakaba 1.1 !!!next-token;
4384     redo B;
4385     } elsif ($token->{type} eq 'start tag') {
4386     if ({
4387     caption => 1, col => 1, colgroup => 1,
4388     tbody => 1, td => 1, tfoot => 1, th => 1,
4389     thead => 1, tr => 1,
4390     }->{$token->{tag_name}}) {
4391     ## have an element in table scope
4392     my $tn;
4393 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4394     my $node = $self->{open_elements}->[$_];
4395 wakaba 1.1 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
4396     $tn = $node->[1];
4397     last INSCOPE;
4398     } elsif ({
4399     table => 1, html => 1,
4400     }->{$node->[1]}) {
4401     last INSCOPE;
4402     }
4403     } # INSCOPE
4404     unless (defined $tn) {
4405 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4406 wakaba 1.1 ## Ignore the token
4407     !!!next-token;
4408     redo B;
4409     }
4410    
4411     ## Close the cell
4412     !!!back-token; # <?>
4413     $token = {type => 'end tag', tag_name => $tn};
4414     redo B;
4415     } else {
4416     #
4417     }
4418     } elsif ($token->{type} eq 'end tag') {
4419     if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4420     ## have an element in table scope
4421     my $i;
4422 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4423     my $node = $self->{open_elements}->[$_];
4424 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4425     $i = $_;
4426     last INSCOPE;
4427     } elsif ({
4428     table => 1, html => 1,
4429     }->{$node->[1]}) {
4430     last INSCOPE;
4431     }
4432     } # INSCOPE
4433     unless (defined $i) {
4434 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4435 wakaba 1.1 ## Ignore the token
4436     !!!next-token;
4437     redo B;
4438     }
4439    
4440     ## generate implied end tags
4441     if ({
4442     dd => 1, dt => 1, li => 1, p => 1,
4443     td => ($token->{tag_name} eq 'th'),
4444     th => ($token->{tag_name} eq 'td'),
4445     tr => 1,
4446 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4447 wakaba 1.1 !!!back-token;
4448     $token = {type => 'end tag',
4449 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4450 wakaba 1.1 redo B;
4451     }
4452    
4453 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4454     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4455 wakaba 1.1 }
4456    
4457 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4458 wakaba 1.1
4459     $clear_up_to_marker->();
4460    
4461 wakaba 1.3 $self->{insertion_mode} = 'in row';
4462 wakaba 1.1
4463     !!!next-token;
4464     redo B;
4465     } elsif ({
4466     body => 1, caption => 1, col => 1,
4467     colgroup => 1, html => 1,
4468     }->{$token->{tag_name}}) {
4469 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4470 wakaba 1.1 ## Ignore the token
4471     !!!next-token;
4472     redo B;
4473     } elsif ({
4474     table => 1, tbody => 1, tfoot => 1,
4475     thead => 1, tr => 1,
4476     }->{$token->{tag_name}}) {
4477     ## have an element in table scope
4478     my $i;
4479     my $tn;
4480 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4481     my $node = $self->{open_elements}->[$_];
4482 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4483     $i = $_;
4484     last INSCOPE;
4485     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4486     $tn = $node->[1];
4487     ## NOTE: There is exactly one |td| or |th| element
4488     ## in scope in the stack of open elements by definition.
4489     } elsif ({
4490     table => 1, html => 1,
4491     }->{$node->[1]}) {
4492     last INSCOPE;
4493     }
4494     } # INSCOPE
4495     unless (defined $i) {
4496 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4497 wakaba 1.1 ## Ignore the token
4498     !!!next-token;
4499     redo B;
4500     }
4501    
4502     ## Close the cell
4503     !!!back-token; # </?>
4504     $token = {type => 'end tag', tag_name => $tn};
4505     redo B;
4506     } else {
4507     #
4508     }
4509     } else {
4510     #
4511     }
4512    
4513     $in_body->($insert_to_current);
4514     redo B;
4515 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in select') {
4516 wakaba 1.1 if ($token->{type} eq 'character') {
4517 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4518 wakaba 1.1 !!!next-token;
4519     redo B;
4520     } elsif ($token->{type} eq 'comment') {
4521     my $comment = $self->{document}->create_comment ($token->{data});
4522 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4523 wakaba 1.1 !!!next-token;
4524     redo B;
4525     } elsif ($token->{type} eq 'start tag') {
4526     if ($token->{tag_name} eq 'option') {
4527 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4528 wakaba 1.1 ## As if </option>
4529 wakaba 1.3 pop @{$self->{open_elements}};
4530 wakaba 1.1 }
4531    
4532     !!!insert-element ($token->{tag_name}, $token->{attributes});
4533     !!!next-token;
4534     redo B;
4535     } elsif ($token->{tag_name} eq 'optgroup') {
4536 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4537 wakaba 1.1 ## As if </option>
4538 wakaba 1.3 pop @{$self->{open_elements}};
4539 wakaba 1.1 }
4540    
4541 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4542 wakaba 1.1 ## As if </optgroup>
4543 wakaba 1.3 pop @{$self->{open_elements}};
4544 wakaba 1.1 }
4545    
4546     !!!insert-element ($token->{tag_name}, $token->{attributes});
4547     !!!next-token;
4548     redo B;
4549     } elsif ($token->{tag_name} eq 'select') {
4550 wakaba 1.3 !!!parse-error (type => 'not closed:select');
4551 wakaba 1.1 ## As if </select> instead
4552     ## have an element in table scope
4553     my $i;
4554 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4555     my $node = $self->{open_elements}->[$_];
4556 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4557     $i = $_;
4558     last INSCOPE;
4559     } elsif ({
4560     table => 1, html => 1,
4561     }->{$node->[1]}) {
4562     last INSCOPE;
4563     }
4564     } # INSCOPE
4565     unless (defined $i) {
4566 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4567 wakaba 1.1 ## Ignore the token
4568     !!!next-token;
4569     redo B;
4570     }
4571    
4572 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4573 wakaba 1.1
4574 wakaba 1.3 $self->_reset_insertion_mode;
4575 wakaba 1.1
4576     !!!next-token;
4577     redo B;
4578     } else {
4579     #
4580     }
4581     } elsif ($token->{type} eq 'end tag') {
4582     if ($token->{tag_name} eq 'optgroup') {
4583 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option' and
4584     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
4585 wakaba 1.1 ## As if </option>
4586 wakaba 1.3 splice @{$self->{open_elements}}, -2;
4587     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4588     pop @{$self->{open_elements}};
4589 wakaba 1.1 } else {
4590 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4591 wakaba 1.1 ## Ignore the token
4592     }
4593     !!!next-token;
4594     redo B;
4595     } elsif ($token->{tag_name} eq 'option') {
4596 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4597     pop @{$self->{open_elements}};
4598 wakaba 1.1 } else {
4599 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4600 wakaba 1.1 ## Ignore the token
4601     }
4602     !!!next-token;
4603     redo B;
4604     } elsif ($token->{tag_name} eq 'select') {
4605     ## have an element in table scope
4606     my $i;
4607 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4608     my $node = $self->{open_elements}->[$_];
4609 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4610     $i = $_;
4611     last INSCOPE;
4612     } elsif ({
4613     table => 1, html => 1,
4614     }->{$node->[1]}) {
4615     last INSCOPE;
4616     }
4617     } # INSCOPE
4618     unless (defined $i) {
4619 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4620 wakaba 1.1 ## Ignore the token
4621     !!!next-token;
4622     redo B;
4623     }
4624    
4625 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4626 wakaba 1.1
4627 wakaba 1.3 $self->_reset_insertion_mode;
4628 wakaba 1.1
4629     !!!next-token;
4630     redo B;
4631     } elsif ({
4632     caption => 1, table => 1, tbody => 1,
4633     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
4634     }->{$token->{tag_name}}) {
4635 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4636 wakaba 1.1
4637     ## have an element in table scope
4638     my $i;
4639 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4640     my $node = $self->{open_elements}->[$_];
4641 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4642     $i = $_;
4643     last INSCOPE;
4644     } elsif ({
4645     table => 1, html => 1,
4646     }->{$node->[1]}) {
4647     last INSCOPE;
4648     }
4649     } # INSCOPE
4650     unless (defined $i) {
4651     ## Ignore the token
4652     !!!next-token;
4653     redo B;
4654     }
4655    
4656     ## As if </select>
4657     ## have an element in table scope
4658     undef $i;
4659 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4660     my $node = $self->{open_elements}->[$_];
4661 wakaba 1.1 if ($node->[1] eq 'select') {
4662     $i = $_;
4663     last INSCOPE;
4664     } elsif ({
4665     table => 1, html => 1,
4666     }->{$node->[1]}) {
4667     last INSCOPE;
4668     }
4669     } # INSCOPE
4670     unless (defined $i) {
4671 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4672 wakaba 1.1 ## Ignore the </select> token
4673     !!!next-token; ## TODO: ok?
4674     redo B;
4675     }
4676    
4677 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4678 wakaba 1.1
4679 wakaba 1.3 $self->_reset_insertion_mode;
4680 wakaba 1.1
4681     ## reprocess
4682     redo B;
4683     } else {
4684     #
4685     }
4686     } else {
4687     #
4688     }
4689    
4690 wakaba 1.3 !!!parse-error (type => 'in select:'.$token->{tag_name});
4691 wakaba 1.1 ## Ignore the token
4692     !!!next-token;
4693     redo B;
4694 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after body') {
4695 wakaba 1.1 if ($token->{type} eq 'character') {
4696     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4697     ## As if in body
4698     $reconstruct_active_formatting_elements->($insert_to_current);
4699    
4700 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4701 wakaba 1.1
4702     unless (length $token->{data}) {
4703     !!!next-token;
4704     redo B;
4705     }
4706     }
4707    
4708     #
4709 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4710 wakaba 1.1 } elsif ($token->{type} eq 'comment') {
4711     my $comment = $self->{document}->create_comment ($token->{data});
4712 wakaba 1.3 $self->{open_elements}->[0]->[0]->append_child ($comment);
4713 wakaba 1.1 !!!next-token;
4714     redo B;
4715 wakaba 1.3 } elsif ($token->{type} eq 'start tag') {
4716     !!!parse-error (type => 'after body:'.$token->{tag_name});
4717     #
4718 wakaba 1.1 } elsif ($token->{type} eq 'end tag') {
4719     if ($token->{tag_name} eq 'html') {
4720 wakaba 1.3 if (defined $self->{inner_html_node}) {
4721     !!!parse-error (type => 'unmatched end tag:html');
4722     ## Ignore the token
4723     !!!next-token;
4724     redo B;
4725     } else {
4726     $phase = 'trailing end';
4727     !!!next-token;
4728     redo B;
4729     }
4730 wakaba 1.1 } else {
4731 wakaba 1.3 !!!parse-error (type => 'after body:/'.$token->{tag_name});
4732 wakaba 1.1 }
4733     } else {
4734 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4735 wakaba 1.1 }
4736    
4737 wakaba 1.3 $self->{insertion_mode} = 'in body';
4738 wakaba 1.1 ## reprocess
4739     redo B;
4740 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in frameset') {
4741 wakaba 1.1 if ($token->{type} eq 'character') {
4742     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4743 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4744 wakaba 1.1
4745     unless (length $token->{data}) {
4746     !!!next-token;
4747     redo B;
4748     }
4749     }
4750    
4751     #
4752     } elsif ($token->{type} eq 'comment') {
4753     my $comment = $self->{document}->create_comment ($token->{data});
4754 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4755 wakaba 1.1 !!!next-token;
4756     redo B;
4757     } elsif ($token->{type} eq 'start tag') {
4758     if ($token->{tag_name} eq 'frameset') {
4759     !!!insert-element ($token->{tag_name}, $token->{attributes});
4760     !!!next-token;
4761     redo B;
4762     } elsif ($token->{tag_name} eq 'frame') {
4763     !!!insert-element ($token->{tag_name}, $token->{attributes});
4764 wakaba 1.3 pop @{$self->{open_elements}};
4765 wakaba 1.1 !!!next-token;
4766     redo B;
4767     } elsif ($token->{tag_name} eq 'noframes') {
4768     $in_body->($insert_to_current);
4769     redo B;
4770     } else {
4771     #
4772     }
4773     } elsif ($token->{type} eq 'end tag') {
4774     if ($token->{tag_name} eq 'frameset') {
4775 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html' and
4776     @{$self->{open_elements}} == 1) {
4777     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4778 wakaba 1.1 ## Ignore the token
4779     !!!next-token;
4780     } else {
4781 wakaba 1.3 pop @{$self->{open_elements}};
4782 wakaba 1.1 !!!next-token;
4783     }
4784    
4785     ## if not inner_html and
4786 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
4787     $self->{insertion_mode} = 'after frameset';
4788 wakaba 1.1 }
4789     redo B;
4790     } else {
4791     #
4792     }
4793     } else {
4794     #
4795     }
4796    
4797 wakaba 1.3 if (defined $token->{tag_name}) {
4798     !!!parse-error (type => 'in frameset:'.$token->{tag_name});
4799     } else {
4800     !!!parse-error (type => 'in frameset:#'.$token->{type});
4801     }
4802 wakaba 1.1 ## Ignore the token
4803     !!!next-token;
4804     redo B;
4805 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after frameset') {
4806 wakaba 1.1 if ($token->{type} eq 'character') {
4807     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4808 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4809 wakaba 1.1
4810     unless (length $token->{data}) {
4811     !!!next-token;
4812     redo B;
4813     }
4814     }
4815    
4816     #
4817     } elsif ($token->{type} eq 'comment') {
4818     my $comment = $self->{document}->create_comment ($token->{data});
4819 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4820 wakaba 1.1 !!!next-token;
4821     redo B;
4822     } elsif ($token->{type} eq 'start tag') {
4823     if ($token->{tag_name} eq 'noframes') {
4824     $in_body->($insert_to_current);
4825     redo B;
4826     } else {
4827     #
4828     }
4829     } elsif ($token->{type} eq 'end tag') {
4830     if ($token->{tag_name} eq 'html') {
4831     $phase = 'trailing end';
4832     !!!next-token;
4833     redo B;
4834     } else {
4835     #
4836     }
4837     } else {
4838     #
4839     }
4840    
4841 wakaba 1.3 if (defined $token->{tag_name}) {
4842     !!!parse-error (type => 'after frameset:'.$token->{tag_name});
4843     } else {
4844     !!!parse-error (type => 'after frameset:#'.$token->{type});
4845     }
4846 wakaba 1.1 ## Ignore the token
4847     !!!next-token;
4848     redo B;
4849    
4850     ## ISSUE: An issue in spec there
4851     } else {
4852 wakaba 1.3 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4853 wakaba 1.1 }
4854     }
4855     } elsif ($phase eq 'trailing end') {
4856     ## states in the main stage is preserved yet # MUST
4857    
4858     if ($token->{type} eq 'DOCTYPE') {
4859 wakaba 1.3 !!!parse-error (type => 'after html:#DOCTYPE');
4860 wakaba 1.1 ## Ignore the token
4861     !!!next-token;
4862     redo B;
4863     } elsif ($token->{type} eq 'comment') {
4864     my $comment = $self->{document}->create_comment ($token->{data});
4865     $self->{document}->append_child ($comment);
4866     !!!next-token;
4867     redo B;
4868     } elsif ($token->{type} eq 'character') {
4869     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4870     my $data = $1;
4871     ## As if in the main phase.
4872     ## NOTE: The insertion mode in the main phase
4873     ## just before the phase has been changed to the trailing
4874     ## end phase is either "after body" or "after frameset".
4875     $reconstruct_active_formatting_elements->($insert_to_current)
4876     if $phase eq 'main';
4877    
4878 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
4879 wakaba 1.1
4880     unless (length $token->{data}) {
4881     !!!next-token;
4882     redo B;
4883     }
4884     }
4885    
4886 wakaba 1.3 !!!parse-error (type => 'after html:#character');
4887 wakaba 1.1 $phase = 'main';
4888     ## reprocess
4889     redo B;
4890     } elsif ($token->{type} eq 'start tag' or
4891     $token->{type} eq 'end tag') {
4892 wakaba 1.3 !!!parse-error (type => 'after html:'.$token->{tag_name});
4893 wakaba 1.1 $phase = 'main';
4894     ## reprocess
4895     redo B;
4896     } elsif ($token->{type} eq 'end-of-file') {
4897     ## Stop parsing
4898     last B;
4899     } else {
4900     die "$0: $token->{type}: Unknown token";
4901     }
4902     }
4903     } # B
4904    
4905     ## Stop parsing # MUST
4906    
4907     ## TODO: script stuffs
4908 wakaba 1.3 } # _tree_construct_main
4909    
4910     sub set_inner_html ($$$) {
4911     my $class = shift;
4912     my $node = shift;
4913     my $s = \$_[0];
4914     my $onerror = $_[1];
4915    
4916     my $nt = $node->node_type;
4917     if ($nt == 9) {
4918     # MUST
4919    
4920     ## Step 1 # MUST
4921     ## TODO: If the document has an active parser, ...
4922     ## ISSUE: There is an issue in the spec.
4923    
4924     ## Step 2 # MUST
4925     my @cn = @{$node->child_nodes};
4926     for (@cn) {
4927     $node->remove_child ($_);
4928     }
4929    
4930     ## Step 3, 4, 5 # MUST
4931     $class->parse_string ($$s => $node, $onerror);
4932     } elsif ($nt == 1) {
4933     ## TODO: If non-html element
4934    
4935     ## NOTE: Most of this code is copied from |parse_string|
4936    
4937     ## Step 1 # MUST
4938 wakaba 1.14 my $this_doc = $node->owner_document;
4939     my $doc = $this_doc->implementation->create_document;
4940 wakaba 1.3 ## TODO: Mark as HTML document
4941     my $p = $class->new;
4942     $p->{document} = $doc;
4943    
4944     ## Step 9 # MUST
4945     my $i = 0;
4946     my $line = 1;
4947     my $column = 0;
4948     $p->{set_next_input_character} = sub {
4949     my $self = shift;
4950 wakaba 1.14
4951     pop @{$self->{prev_input_character}};
4952     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
4953    
4954 wakaba 1.3 $self->{next_input_character} = -1 and return if $i >= length $$s;
4955     $self->{next_input_character} = ord substr $$s, $i++, 1;
4956     $column++;
4957 wakaba 1.4
4958     if ($self->{next_input_character} == 0x000A) { # LF
4959     $line++;
4960     $column = 0;
4961     } elsif ($self->{next_input_character} == 0x000D) { # CR
4962 wakaba 1.3 if ($i >= length $$s) {
4963     #
4964     } else {
4965     my $next_char = ord substr $$s, $i++, 1;
4966     if ($next_char == 0x000A) { # LF
4967     #
4968     } else {
4969     push @{$self->{char}}, $next_char;
4970 wakaba 1.14 ## TODO: This is incorrect, since $next_char might be e.g. CR.
4971 wakaba 1.3 }
4972     }
4973     $self->{next_input_character} = 0x000A; # LF # MUST
4974     $line++;
4975 wakaba 1.4 $column = 0;
4976 wakaba 1.3 } elsif ($self->{next_input_character} > 0x10FFFF) {
4977     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4978     } elsif ($self->{next_input_character} == 0x0000) { # NULL
4979 wakaba 1.14 !!!parse-error (type => 'NULL');
4980 wakaba 1.3 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4981     }
4982     };
4983 wakaba 1.14 $p->{prev_input_character} = [-1, -1, -1];
4984     $p->{next_input_character} = -1;
4985 wakaba 1.3
4986     my $ponerror = $onerror || sub {
4987     my (%opt) = @_;
4988     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
4989     };
4990     $p->{parse_error} = sub {
4991     $ponerror->(@_, line => $line, column => $column);
4992     };
4993    
4994     $p->_initialize_tokenizer;
4995     $p->_initialize_tree_constructor;
4996    
4997     ## Step 2
4998     my $node_ln = $node->local_name;
4999     $p->{content_model_flag} = {
5000     title => 'RCDATA',
5001     textarea => 'RCDATA',
5002     style => 'CDATA',
5003     script => 'CDATA',
5004     xmp => 'CDATA',
5005     iframe => 'CDATA',
5006     noembed => 'CDATA',
5007     noframes => 'CDATA',
5008     noscript => 'CDATA',
5009     plaintext => 'PLAINTEXT',
5010     }->{$node_ln} || 'PCDATA';
5011     ## ISSUE: What is "the name of the element"? local name?
5012    
5013     $p->{inner_html_node} = [$node, $node_ln];
5014    
5015     ## Step 4
5016     my $root = $doc->create_element_ns
5017     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
5018    
5019     ## Step 5 # MUST
5020     $doc->append_child ($root);
5021    
5022     ## Step 6 # MUST
5023     push @{$p->{open_elements}}, [$root, 'html'];
5024    
5025     undef $p->{head_element};
5026    
5027     ## Step 7 # MUST
5028     $p->_reset_insertion_mode;
5029    
5030     ## Step 8 # MUST
5031     my $anode = $node;
5032     AN: while (defined $anode) {
5033     if ($anode->node_type == 1) {
5034     my $nsuri = $anode->namespace_uri;
5035     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
5036     if ($anode->local_name eq 'form') { ## TODO: case?
5037     $p->{form_element} = $anode;
5038     last AN;
5039     }
5040     }
5041     }
5042     $anode = $anode->parent_node;
5043     } # AN
5044    
5045     ## Step 3 # MUST
5046     ## Step 10 # MUST
5047     {
5048     my $self = $p;
5049     !!!next-token;
5050     }
5051     $p->_tree_construction_main;
5052    
5053     ## Step 11 # MUST
5054     my @cn = @{$node->child_nodes};
5055     for (@cn) {
5056     $node->remove_child ($_);
5057     }
5058     ## ISSUE: mutation events? read-only?
5059    
5060     ## Step 12 # MUST
5061     @cn = @{$root->child_nodes};
5062     for (@cn) {
5063 wakaba 1.14 $this_doc->adopt_node ($_);
5064 wakaba 1.3 $node->append_child ($_);
5065     }
5066 wakaba 1.14 ## ISSUE: mutation events?
5067 wakaba 1.3
5068     $p->_terminate_tree_constructor;
5069     } else {
5070     die "$0: |set_inner_html| is not defined for node of type $nt";
5071     }
5072     } # set_inner_html
5073    
5074     } # tree construction stage
5075 wakaba 1.1
5076     sub get_inner_html ($$$) {
5077 wakaba 1.3 my (undef, $node, $on_error) = @_;
5078 wakaba 1.1
5079     ## Step 1
5080     my $s = '';
5081    
5082     my $in_cdata;
5083     my $parent = $node;
5084     while (defined $parent) {
5085     if ($parent->node_type == 1 and
5086     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
5087     {
5088     style => 1, script => 1, xmp => 1, iframe => 1,
5089     noembed => 1, noframes => 1, noscript => 1,
5090     }->{$parent->local_name}) { ## TODO: case thingy
5091     $in_cdata = 1;
5092     }
5093     $parent = $parent->parent_node;
5094     }
5095    
5096     ## Step 2
5097     my @node = @{$node->child_nodes};
5098     C: while (@node) {
5099     my $child = shift @node;
5100     unless (ref $child) {
5101     if ($child eq 'cdata-out') {
5102     $in_cdata = 0;
5103     } else {
5104     $s .= $child; # end tag
5105     }
5106     next C;
5107     }
5108    
5109     my $nt = $child->node_type;
5110     if ($nt == 1) { # Element
5111     my $tag_name = lc $child->tag_name; ## ISSUE: Definition of "lowercase"
5112     $s .= '<' . $tag_name;
5113    
5114     ## ISSUE: Non-html elements
5115    
5116     my @attrs = @{$child->attributes}; # sort order MUST be stable
5117     for my $attr (@attrs) { # order is implementation dependent
5118     my $attr_name = lc $attr->name; ## ISSUE: Definition of "lowercase"
5119     $s .= ' ' . $attr_name . '="';
5120     my $attr_value = $attr->value;
5121     ## escape
5122     $attr_value =~ s/&/&amp;/g;
5123     $attr_value =~ s/</&lt;/g;
5124     $attr_value =~ s/>/&gt;/g;
5125     $attr_value =~ s/"/&quot;/g;
5126     $s .= $attr_value . '"';
5127     }
5128     $s .= '>';
5129    
5130     next C if {
5131     area => 1, base => 1, basefont => 1, bgsound => 1,
5132     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
5133     img => 1, input => 1, link => 1, meta => 1, param => 1,
5134     spacer => 1, wbr => 1,
5135     }->{$tag_name};
5136    
5137     if (not $in_cdata and {
5138     style => 1, script => 1, xmp => 1, iframe => 1,
5139     noembed => 1, noframes => 1, noscript => 1,
5140     }->{$tag_name}) {
5141     unshift @node, 'cdata-out';
5142     $in_cdata = 1;
5143     }
5144    
5145     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
5146     } elsif ($nt == 3 or $nt == 4) {
5147     if ($in_cdata) {
5148     $s .= $child->data;
5149     } else {
5150     my $value = $child->data;
5151     $value =~ s/&/&amp;/g;
5152     $value =~ s/</&lt;/g;
5153     $value =~ s/>/&gt;/g;
5154     $value =~ s/"/&quot;/g;
5155     $s .= $value;
5156     }
5157     } elsif ($nt == 8) {
5158     $s .= '<!--' . $child->data . '-->';
5159     } elsif ($nt == 10) {
5160     $s .= '<!DOCTYPE ' . $child->name . '>';
5161     } elsif ($nt == 5) { # entrefs
5162     push @node, @{$child->child_nodes};
5163     } else {
5164     $on_error->($child) if defined $on_error;
5165     }
5166     ## ISSUE: This code does not support PIs.
5167     } # C
5168    
5169     ## Step 3
5170     return \$s;
5171     } # get_inner_html
5172    
5173     1;
5174 wakaba 1.14 # $Date: 2007/06/23 05:29:48 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24