/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations) (download) (as text)
Wed May 2 13:44:34 2007 UTC (17 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +905 -670 lines
File MIME type: application/x-wais-source
++ ChangeLog	2 May 2007 13:37:34 -0000
2007-05-02  Wakaba  <wakaba@suika.fam.cx>

	* readme.en.html: TODO section is added.

++ whatpm/t/ChangeLog	2 May 2007 13:44:02 -0000
2007-05-02  Wakaba  <wakaba@suika.fam.cx>

	* .cvsignore: Result files are added.

	* HTML-tree.t: Support for document fragment tests.

	* Makefile: Generate test result files.

	* tokenizer-test-1.test: A new test to ensure that
	characters after end tag are preserved in RCDATA or CDATA
	case.

++ whatpm/Whatpm/ChangeLog	2 May 2007 13:42:17 -0000
2007-05-02  Wakaba  <wakaba@suika.fam.cx>

	* NanoDOM.pm (DOMImplementation): New class.
	(append_child): Weaken the |parent_node| reference.
	(create_element_ns, Element new): Set the |owner_document|
	reference.
	(implementation): New attribute.
	(owner_document, local_name, namespace_uri): New attributes.

	* HTML.pm.src (parse_string): Line and column numbers
	are now provided to error handler.
	(!!!parse-error): Short descriptions are added.
	(_construct_tree): Split into three methods; support
	for innerHTML mode.
	(set_inner_html): New method.

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.3 our $VERSION=do{my @r=(q$Revision: 1.8 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5     ## This is an early version of an HTML parser.
6    
7     my $permitted_slash_tag_name = {
8     base => 1,
9     link => 1,
10     meta => 1,
11     hr => 1,
12     br => 1,
13     img=> 1,
14     embed => 1,
15     param => 1,
16     area => 1,
17     col => 1,
18     input => 1,
19     };
20    
21     my $entity_char = {
22     AElig => "\x{00C6}",
23     Aacute => "\x{00C1}",
24     Acirc => "\x{00C2}",
25     Agrave => "\x{00C0}",
26     Alpha => "\x{0391}",
27     Aring => "\x{00C5}",
28     Atilde => "\x{00C3}",
29     Auml => "\x{00C4}",
30     Beta => "\x{0392}",
31     Ccedil => "\x{00C7}",
32     Chi => "\x{03A7}",
33     Dagger => "\x{2021}",
34     Delta => "\x{0394}",
35     ETH => "\x{00D0}",
36     Eacute => "\x{00C9}",
37     Ecirc => "\x{00CA}",
38     Egrave => "\x{00C8}",
39     Epsilon => "\x{0395}",
40     Eta => "\x{0397}",
41     Euml => "\x{00CB}",
42     Gamma => "\x{0393}",
43     Iacute => "\x{00CD}",
44     Icirc => "\x{00CE}",
45     Igrave => "\x{00CC}",
46     Iota => "\x{0399}",
47     Iuml => "\x{00CF}",
48     Kappa => "\x{039A}",
49     Lambda => "\x{039B}",
50     Mu => "\x{039C}",
51     Ntilde => "\x{00D1}",
52     Nu => "\x{039D}",
53     OElig => "\x{0152}",
54     Oacute => "\x{00D3}",
55     Ocirc => "\x{00D4}",
56     Ograve => "\x{00D2}",
57     Omega => "\x{03A9}",
58     Omicron => "\x{039F}",
59     Oslash => "\x{00D8}",
60     Otilde => "\x{00D5}",
61     Ouml => "\x{00D6}",
62     Phi => "\x{03A6}",
63     Pi => "\x{03A0}",
64     Prime => "\x{2033}",
65     Psi => "\x{03A8}",
66     Rho => "\x{03A1}",
67     Scaron => "\x{0160}",
68     Sigma => "\x{03A3}",
69     THORN => "\x{00DE}",
70     Tau => "\x{03A4}",
71     Theta => "\x{0398}",
72     Uacute => "\x{00DA}",
73     Ucirc => "\x{00DB}",
74     Ugrave => "\x{00D9}",
75     Upsilon => "\x{03A5}",
76     Uuml => "\x{00DC}",
77     Xi => "\x{039E}",
78     Yacute => "\x{00DD}",
79     Yuml => "\x{0178}",
80     Zeta => "\x{0396}",
81     aacute => "\x{00E1}",
82     acirc => "\x{00E2}",
83     acute => "\x{00B4}",
84     aelig => "\x{00E6}",
85     agrave => "\x{00E0}",
86     alefsym => "\x{2135}",
87     alpha => "\x{03B1}",
88     amp => "\x{0026}",
89     AMP => "\x{0026}",
90     and => "\x{2227}",
91     ang => "\x{2220}",
92     apos => "\x{0027}",
93     aring => "\x{00E5}",
94     asymp => "\x{2248}",
95     atilde => "\x{00E3}",
96     auml => "\x{00E4}",
97     bdquo => "\x{201E}",
98     beta => "\x{03B2}",
99     brvbar => "\x{00A6}",
100     bull => "\x{2022}",
101     cap => "\x{2229}",
102     ccedil => "\x{00E7}",
103     cedil => "\x{00B8}",
104     cent => "\x{00A2}",
105     chi => "\x{03C7}",
106     circ => "\x{02C6}",
107     clubs => "\x{2663}",
108     cong => "\x{2245}",
109     copy => "\x{00A9}",
110     COPY => "\x{00A9}",
111     crarr => "\x{21B5}",
112     cup => "\x{222A}",
113     curren => "\x{00A4}",
114     dArr => "\x{21D3}",
115     dagger => "\x{2020}",
116     darr => "\x{2193}",
117     deg => "\x{00B0}",
118     delta => "\x{03B4}",
119     diams => "\x{2666}",
120     divide => "\x{00F7}",
121     eacute => "\x{00E9}",
122     ecirc => "\x{00EA}",
123     egrave => "\x{00E8}",
124     empty => "\x{2205}",
125     emsp => "\x{2003}",
126     ensp => "\x{2002}",
127     epsilon => "\x{03B5}",
128     equiv => "\x{2261}",
129     eta => "\x{03B7}",
130     eth => "\x{00F0}",
131     euml => "\x{00EB}",
132     euro => "\x{20AC}",
133     exist => "\x{2203}",
134     fnof => "\x{0192}",
135     forall => "\x{2200}",
136     frac12 => "\x{00BD}",
137     frac14 => "\x{00BC}",
138     frac34 => "\x{00BE}",
139     frasl => "\x{2044}",
140     gamma => "\x{03B3}",
141     ge => "\x{2265}",
142     gt => "\x{003E}",
143     GT => "\x{003E}",
144     hArr => "\x{21D4}",
145     harr => "\x{2194}",
146     hearts => "\x{2665}",
147     hellip => "\x{2026}",
148     iacute => "\x{00ED}",
149     icirc => "\x{00EE}",
150     iexcl => "\x{00A1}",
151     igrave => "\x{00EC}",
152     image => "\x{2111}",
153     infin => "\x{221E}",
154     int => "\x{222B}",
155     iota => "\x{03B9}",
156     iquest => "\x{00BF}",
157     isin => "\x{2208}",
158     iuml => "\x{00EF}",
159     kappa => "\x{03BA}",
160     lArr => "\x{21D0}",
161     lambda => "\x{03BB}",
162     lang => "\x{2329}",
163     laquo => "\x{00AB}",
164     larr => "\x{2190}",
165     lceil => "\x{2308}",
166     ldquo => "\x{201C}",
167     le => "\x{2264}",
168     lfloor => "\x{230A}",
169     lowast => "\x{2217}",
170     loz => "\x{25CA}",
171     lrm => "\x{200E}",
172     lsaquo => "\x{2039}",
173     lsquo => "\x{2018}",
174     lt => "\x{003C}",
175     LT => "\x{003C}",
176     macr => "\x{00AF}",
177     mdash => "\x{2014}",
178     micro => "\x{00B5}",
179     middot => "\x{00B7}",
180     minus => "\x{2212}",
181     mu => "\x{03BC}",
182     nabla => "\x{2207}",
183     nbsp => "\x{00A0}",
184     ndash => "\x{2013}",
185     ne => "\x{2260}",
186     ni => "\x{220B}",
187     not => "\x{00AC}",
188     notin => "\x{2209}",
189     nsub => "\x{2284}",
190     ntilde => "\x{00F1}",
191     nu => "\x{03BD}",
192     oacute => "\x{00F3}",
193     ocirc => "\x{00F4}",
194     oelig => "\x{0153}",
195     ograve => "\x{00F2}",
196     oline => "\x{203E}",
197     omega => "\x{03C9}",
198     omicron => "\x{03BF}",
199     oplus => "\x{2295}",
200     or => "\x{2228}",
201     ordf => "\x{00AA}",
202     ordm => "\x{00BA}",
203     oslash => "\x{00F8}",
204     otilde => "\x{00F5}",
205     otimes => "\x{2297}",
206     ouml => "\x{00F6}",
207     para => "\x{00B6}",
208     part => "\x{2202}",
209     permil => "\x{2030}",
210     perp => "\x{22A5}",
211     phi => "\x{03C6}",
212     pi => "\x{03C0}",
213     piv => "\x{03D6}",
214     plusmn => "\x{00B1}",
215     pound => "\x{00A3}",
216     prime => "\x{2032}",
217     prod => "\x{220F}",
218     prop => "\x{221D}",
219     psi => "\x{03C8}",
220     quot => "\x{0022}",
221     QUOT => "\x{0022}",
222     rArr => "\x{21D2}",
223     radic => "\x{221A}",
224     rang => "\x{232A}",
225     raquo => "\x{00BB}",
226     rarr => "\x{2192}",
227     rceil => "\x{2309}",
228     rdquo => "\x{201D}",
229     real => "\x{211C}",
230     reg => "\x{00AE}",
231     REG => "\x{00AE}",
232     rfloor => "\x{230B}",
233     rho => "\x{03C1}",
234     rlm => "\x{200F}",
235     rsaquo => "\x{203A}",
236     rsquo => "\x{2019}",
237     sbquo => "\x{201A}",
238     scaron => "\x{0161}",
239     sdot => "\x{22C5}",
240     sect => "\x{00A7}",
241     shy => "\x{00AD}",
242     sigma => "\x{03C3}",
243     sigmaf => "\x{03C2}",
244     sim => "\x{223C}",
245     spades => "\x{2660}",
246     sub => "\x{2282}",
247     sube => "\x{2286}",
248     sum => "\x{2211}",
249     sup => "\x{2283}",
250     sup1 => "\x{00B9}",
251     sup2 => "\x{00B2}",
252     sup3 => "\x{00B3}",
253     supe => "\x{2287}",
254     szlig => "\x{00DF}",
255     tau => "\x{03C4}",
256     there4 => "\x{2234}",
257     theta => "\x{03B8}",
258     thetasym => "\x{03D1}",
259     thinsp => "\x{2009}",
260     thorn => "\x{00FE}",
261     tilde => "\x{02DC}",
262     times => "\x{00D7}",
263     trade => "\x{2122}",
264     uArr => "\x{21D1}",
265     uacute => "\x{00FA}",
266     uarr => "\x{2191}",
267     ucirc => "\x{00FB}",
268     ugrave => "\x{00F9}",
269     uml => "\x{00A8}",
270     upsih => "\x{03D2}",
271     upsilon => "\x{03C5}",
272     uuml => "\x{00FC}",
273     weierp => "\x{2118}",
274     xi => "\x{03BE}",
275     yacute => "\x{00FD}",
276     yen => "\x{00A5}",
277     yuml => "\x{00FF}",
278     zeta => "\x{03B6}",
279     zwj => "\x{200D}",
280     zwnj => "\x{200C}",
281     };
282    
283     my $special_category = {
284     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
285     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
286     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
287     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
288     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
289     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
290     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
291     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
292     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
293     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
294     };
295     my $scoping_category = {
296     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
297     table => 1, td => 1, th => 1,
298     };
299     my $formatting_category = {
300     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
301     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
302     };
303     # $phrasing_category: all other elements
304    
305     sub parse_string ($$$;$) {
306     my $self = shift->new;
307     my $s = \$_[0];
308     $self->{document} = $_[1];
309    
310 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
311    
312 wakaba 1.1 my $i = 0;
313 wakaba 1.3 my $line = 1;
314     my $column = 0;
315 wakaba 1.1 $self->{set_next_input_character} = sub {
316     my $self = shift;
317     $self->{next_input_character} = -1 and return if $i >= length $$s;
318     $self->{next_input_character} = ord substr $$s, $i++, 1;
319 wakaba 1.3 $column++;
320 wakaba 1.1
321     if ($self->{next_input_character} == 0x000D) { # CR
322     if ($i >= length $$s) {
323     #
324     } else {
325     my $next_char = ord substr $$s, $i++, 1;
326     if ($next_char == 0x000A) { # LF
327     #
328     } else {
329     push @{$self->{char}}, $next_char;
330     }
331     }
332     $self->{next_input_character} = 0x000A; # LF # MUST
333 wakaba 1.3 $line++;
334     $column = -1;
335 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
336     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
337     } elsif ($self->{next_input_character} == 0x0000) { # NULL
338     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
339     }
340     };
341    
342 wakaba 1.3 my $onerror = $_[2] || sub {
343     my (%opt) = @_;
344     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
345     };
346     $self->{parse_error} = sub {
347     $onerror->(@_, line => $line, column => $column);
348 wakaba 1.1 };
349    
350     $self->_initialize_tokenizer;
351     $self->_initialize_tree_constructor;
352     $self->_construct_tree;
353     $self->_terminate_tree_constructor;
354    
355     return $self->{document};
356     } # parse_string
357    
358     sub new ($) {
359     my $class = shift;
360     my $self = bless {}, $class;
361     $self->{set_next_input_character} = sub {
362     $self->{next_input_character} = -1;
363     };
364     $self->{parse_error} = sub {
365     #
366     };
367     return $self;
368     } # new
369    
370     ## Implementations MUST act as if state machine in the spec
371    
372     sub _initialize_tokenizer ($) {
373     my $self = shift;
374     $self->{state} = 'data'; # MUST
375     $self->{content_model_flag} = 'PCDATA'; # be
376     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
377     undef $self->{current_attribute};
378     undef $self->{last_emitted_start_tag_name};
379     undef $self->{last_attribute_value_state};
380     $self->{char} = [];
381     # $self->{next_input_character}
382     !!!next-input-character;
383     $self->{token} = [];
384     } # _initialize_tokenizer
385    
386     ## A token has:
387     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
388     ## 'character', or 'end-of-file'
389     ## ->{name} (DOCTYPE, start tag (tagname), end tag (tagname))
390     ## ISSUE: the spec need s/tagname/tag name/
391     ## ->{error} == 1 or 0 (DOCTYPE)
392     ## ->{attributes} isa HASH (start tag, end tag)
393     ## ->{data} (comment, character)
394    
395     ## Macros
396     ## Macros MUST be preceded by three EXCLAMATION MARKs.
397     ## emit ($token)
398     ## Emits the specified token.
399    
400     ## Emitted token MUST immediately be handled by the tree construction state.
401    
402     ## Before each step, UA MAY check to see if either one of the scripts in
403     ## "list of scripts that will execute as soon as possible" or the first
404     ## script in the "list of scripts that will execute asynchronously",
405     ## has completed loading. If one has, then it MUST be executed
406     ## and removed from the list.
407    
408     sub _get_next_token ($) {
409     my $self = shift;
410     if (@{$self->{token}}) {
411     return shift @{$self->{token}};
412     }
413    
414     A: {
415     if ($self->{state} eq 'data') {
416     if ($self->{next_input_character} == 0x0026) { # &
417     if ($self->{content_model_flag} eq 'PCDATA' or
418     $self->{content_model_flag} eq 'RCDATA') {
419     $self->{state} = 'entity data';
420     !!!next-input-character;
421     redo A;
422     } else {
423     #
424     }
425     } elsif ($self->{next_input_character} == 0x003C) { # <
426     if ($self->{content_model_flag} ne 'PLAINTEXT') {
427     $self->{state} = 'tag open';
428     !!!next-input-character;
429     redo A;
430     } else {
431     #
432     }
433     } elsif ($self->{next_input_character} == -1) {
434     !!!emit ({type => 'end-of-file'});
435     last A; ## TODO: ok?
436     }
437     # Anything else
438     my $token = {type => 'character',
439     data => chr $self->{next_input_character}};
440     ## Stay in the data state
441     !!!next-input-character;
442    
443     !!!emit ($token);
444    
445     redo A;
446     } elsif ($self->{state} eq 'entity data') {
447     ## (cannot happen in CDATA state)
448    
449     my $token = $self->_tokenize_attempt_to_consume_an_entity;
450    
451     $self->{state} = 'data';
452     # next-input-character is already done
453    
454     unless (defined $token) {
455     !!!emit ({type => 'character', data => '&'});
456     } else {
457     !!!emit ($token);
458     }
459    
460     redo A;
461     } elsif ($self->{state} eq 'tag open') {
462     if ($self->{content_model_flag} eq 'RCDATA' or
463     $self->{content_model_flag} eq 'CDATA') {
464     if ($self->{next_input_character} == 0x002F) { # /
465     !!!next-input-character;
466     $self->{state} = 'close tag open';
467     redo A;
468     } else {
469     ## reconsume
470     $self->{state} = 'data';
471    
472     !!!emit ({type => 'character', data => '<'});
473    
474     redo A;
475     }
476     } elsif ($self->{content_model_flag} eq 'PCDATA') {
477     if ($self->{next_input_character} == 0x0021) { # !
478     $self->{state} = 'markup declaration open';
479     !!!next-input-character;
480     redo A;
481     } elsif ($self->{next_input_character} == 0x002F) { # /
482     $self->{state} = 'close tag open';
483     !!!next-input-character;
484     redo A;
485     } elsif (0x0041 <= $self->{next_input_character} and
486     $self->{next_input_character} <= 0x005A) { # A..Z
487     $self->{current_token}
488     = {type => 'start tag',
489     tag_name => chr ($self->{next_input_character} + 0x0020)};
490     $self->{state} = 'tag name';
491     !!!next-input-character;
492     redo A;
493     } elsif (0x0061 <= $self->{next_input_character} and
494     $self->{next_input_character} <= 0x007A) { # a..z
495     $self->{current_token} = {type => 'start tag',
496     tag_name => chr ($self->{next_input_character})};
497     $self->{state} = 'tag name';
498     !!!next-input-character;
499     redo A;
500     } elsif ($self->{next_input_character} == 0x003E) { # >
501 wakaba 1.3 !!!parse-error (type => 'empty start tag');
502 wakaba 1.1 $self->{state} = 'data';
503     !!!next-input-character;
504    
505     !!!emit ({type => 'character', data => '<>'});
506    
507     redo A;
508     } elsif ($self->{next_input_character} == 0x003F) { # ?
509 wakaba 1.3 !!!parse-error (type => 'pio');
510 wakaba 1.1 $self->{state} = 'bogus comment';
511     ## $self->{next_input_character} is intentionally left as is
512     redo A;
513     } else {
514 wakaba 1.3 !!!parse-error (type => 'bare stago');
515 wakaba 1.1 $self->{state} = 'data';
516     ## reconsume
517    
518     !!!emit ({type => 'character', data => '<'});
519    
520     redo A;
521     }
522     } else {
523     die "$0: $self->{content_model_flag}: Unknown content model flag";
524     }
525     } elsif ($self->{state} eq 'close tag open') {
526     if ($self->{content_model_flag} eq 'RCDATA' or
527     $self->{content_model_flag} eq 'CDATA') {
528     my @next_char;
529     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
530     push @next_char, $self->{next_input_character};
531     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
532     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
533     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
534     !!!next-input-character;
535     next TAGNAME;
536     } else {
537 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
538 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
539     !!!back-next-input-character (@next_char);
540     $self->{state} = 'data';
541    
542     !!!emit ({type => 'character', data => '</'});
543    
544     redo A;
545     }
546     }
547     push @next_char, $self->{next_input_character};
548    
549     unless ($self->{next_input_character} == 0x0009 or # HT
550     $self->{next_input_character} == 0x000A or # LF
551     $self->{next_input_character} == 0x000B or # VT
552     $self->{next_input_character} == 0x000C or # FF
553     $self->{next_input_character} == 0x0020 or # SP
554     $self->{next_input_character} == 0x003E or # >
555     $self->{next_input_character} == 0x002F or # /
556     $self->{next_input_character} == 0x003C or # <
557     $self->{next_input_character} == -1) {
558 wakaba 1.3 !!!parse-error (type => 'unmatched end tag');
559 wakaba 1.1 $self->{next_input_character} = shift @next_char; # reconsume
560     !!!back-next-input-character (@next_char);
561     $self->{state} = 'data';
562    
563     !!!emit ({type => 'character', data => '</'});
564    
565     redo A;
566     } else {
567     $self->{next_input_character} = shift @next_char;
568     !!!back-next-input-character (@next_char);
569     # and consume...
570     }
571     }
572    
573     if (0x0041 <= $self->{next_input_character} and
574     $self->{next_input_character} <= 0x005A) { # A..Z
575     $self->{current_token} = {type => 'end tag',
576     tag_name => chr ($self->{next_input_character} + 0x0020)};
577     $self->{state} = 'tag name';
578     !!!next-input-character;
579     redo A;
580     } elsif (0x0061 <= $self->{next_input_character} and
581     $self->{next_input_character} <= 0x007A) { # a..z
582     $self->{current_token} = {type => 'end tag',
583     tag_name => chr ($self->{next_input_character})};
584     $self->{state} = 'tag name';
585     !!!next-input-character;
586     redo A;
587     } elsif ($self->{next_input_character} == 0x003E) { # >
588 wakaba 1.3 !!!parse-error (type => 'empty end tag');
589 wakaba 1.1 $self->{state} = 'data';
590     !!!next-input-character;
591     redo A;
592     } elsif ($self->{next_input_character} == -1) {
593 wakaba 1.3 !!!parse-error (type => 'bare etago');
594 wakaba 1.1 $self->{state} = 'data';
595     # reconsume
596    
597     !!!emit ({type => 'character', data => '</'});
598    
599     redo A;
600     } else {
601 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
602 wakaba 1.1 $self->{state} = 'bogus comment';
603     ## $self->{next_input_character} is intentionally left as is
604     redo A;
605     }
606     } elsif ($self->{state} eq 'tag name') {
607     if ($self->{next_input_character} == 0x0009 or # HT
608     $self->{next_input_character} == 0x000A or # LF
609     $self->{next_input_character} == 0x000B or # VT
610     $self->{next_input_character} == 0x000C or # FF
611     $self->{next_input_character} == 0x0020) { # SP
612     $self->{state} = 'before attribute name';
613     !!!next-input-character;
614     redo A;
615     } elsif ($self->{next_input_character} == 0x003E) { # >
616     if ($self->{current_token}->{type} eq 'start tag') {
617     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
618     } elsif ($self->{current_token}->{type} eq 'end tag') {
619     $self->{content_model_flag} = 'PCDATA'; # MUST
620     if ($self->{current_token}->{attributes}) {
621 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
622 wakaba 1.1 }
623     } else {
624     die "$0: $self->{current_token}->{type}: Unknown token type";
625     }
626     $self->{state} = 'data';
627     !!!next-input-character;
628    
629     !!!emit ($self->{current_token}); # start tag or end tag
630     undef $self->{current_token};
631    
632     redo A;
633     } elsif (0x0041 <= $self->{next_input_character} and
634     $self->{next_input_character} <= 0x005A) { # A..Z
635     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
636     # start tag or end tag
637     ## Stay in this state
638     !!!next-input-character;
639     redo A;
640     } elsif ($self->{next_input_character} == 0x003C or # <
641     $self->{next_input_character} == -1) {
642 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
643 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
644     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
645     } elsif ($self->{current_token}->{type} eq 'end tag') {
646     $self->{content_model_flag} = 'PCDATA'; # MUST
647     if ($self->{current_token}->{attributes}) {
648 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
649 wakaba 1.1 }
650     } else {
651     die "$0: $self->{current_token}->{type}: Unknown token type";
652     }
653     $self->{state} = 'data';
654     # reconsume
655    
656     !!!emit ($self->{current_token}); # start tag or end tag
657     undef $self->{current_token};
658    
659     redo A;
660     } elsif ($self->{next_input_character} == 0x002F) { # /
661     !!!next-input-character;
662     if ($self->{next_input_character} == 0x003E and # >
663     $self->{current_token}->{type} eq 'start tag' and
664     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
665     # permitted slash
666     #
667     } else {
668 wakaba 1.3 !!!parse-error (type => 'nestc');
669 wakaba 1.1 }
670     $self->{state} = 'before attribute name';
671     # next-input-character is already done
672     redo A;
673     } else {
674     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
675     # start tag or end tag
676     ## Stay in the state
677     !!!next-input-character;
678     redo A;
679     }
680     } elsif ($self->{state} eq 'before attribute name') {
681     if ($self->{next_input_character} == 0x0009 or # HT
682     $self->{next_input_character} == 0x000A or # LF
683     $self->{next_input_character} == 0x000B or # VT
684     $self->{next_input_character} == 0x000C or # FF
685     $self->{next_input_character} == 0x0020) { # SP
686     ## Stay in the state
687     !!!next-input-character;
688     redo A;
689     } elsif ($self->{next_input_character} == 0x003E) { # >
690     if ($self->{current_token}->{type} eq 'start tag') {
691     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
692     } elsif ($self->{current_token}->{type} eq 'end tag') {
693     $self->{content_model_flag} = 'PCDATA'; # MUST
694     if ($self->{current_token}->{attributes}) {
695 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
696 wakaba 1.1 }
697     } else {
698     die "$0: $self->{current_token}->{type}: Unknown token type";
699     }
700     $self->{state} = 'data';
701     !!!next-input-character;
702    
703     !!!emit ($self->{current_token}); # start tag or end tag
704     undef $self->{current_token};
705    
706     redo A;
707     } elsif (0x0041 <= $self->{next_input_character} and
708     $self->{next_input_character} <= 0x005A) { # A..Z
709     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
710     value => ''};
711     $self->{state} = 'attribute name';
712     !!!next-input-character;
713     redo A;
714     } elsif ($self->{next_input_character} == 0x002F) { # /
715     !!!next-input-character;
716     if ($self->{next_input_character} == 0x003E and # >
717     $self->{current_token}->{type} eq 'start tag' and
718     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
719     # permitted slash
720     #
721     } else {
722 wakaba 1.3 !!!parse-error (type => 'nestc');
723 wakaba 1.1 }
724     ## Stay in the state
725     # next-input-character is already done
726     redo A;
727     } elsif ($self->{next_input_character} == 0x003C or # <
728     $self->{next_input_character} == -1) {
729 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
730 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
731     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
732     } elsif ($self->{current_token}->{type} eq 'end tag') {
733     $self->{content_model_flag} = 'PCDATA'; # MUST
734     if ($self->{current_token}->{attributes}) {
735 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
736 wakaba 1.1 }
737     } else {
738     die "$0: $self->{current_token}->{type}: Unknown token type";
739     }
740     $self->{state} = 'data';
741     # reconsume
742    
743     !!!emit ($self->{current_token}); # start tag or end tag
744     undef $self->{current_token};
745    
746     redo A;
747     } else {
748     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
749     value => ''};
750     $self->{state} = 'attribute name';
751     !!!next-input-character;
752     redo A;
753     }
754     } elsif ($self->{state} eq 'attribute name') {
755     my $before_leave = sub {
756     if (exists $self->{current_token}->{attributes} # start tag or end tag
757     ->{$self->{current_attribute}->{name}}) { # MUST
758 wakaba 1.3 !!!parse-error (type => 'dupulicate attribute');
759 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
760     } else {
761     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
762     = $self->{current_attribute};
763     }
764     }; # $before_leave
765    
766     if ($self->{next_input_character} == 0x0009 or # HT
767     $self->{next_input_character} == 0x000A or # LF
768     $self->{next_input_character} == 0x000B or # VT
769     $self->{next_input_character} == 0x000C or # FF
770     $self->{next_input_character} == 0x0020) { # SP
771     $before_leave->();
772     $self->{state} = 'after attribute name';
773     !!!next-input-character;
774     redo A;
775     } elsif ($self->{next_input_character} == 0x003D) { # =
776     $before_leave->();
777     $self->{state} = 'before attribute value';
778     !!!next-input-character;
779     redo A;
780     } elsif ($self->{next_input_character} == 0x003E) { # >
781     $before_leave->();
782     if ($self->{current_token}->{type} eq 'start tag') {
783     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
784     } elsif ($self->{current_token}->{type} eq 'end tag') {
785     $self->{content_model_flag} = 'PCDATA'; # MUST
786     if ($self->{current_token}->{attributes}) {
787 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
788 wakaba 1.1 }
789     } else {
790     die "$0: $self->{current_token}->{type}: Unknown token type";
791     }
792     $self->{state} = 'data';
793     !!!next-input-character;
794    
795     !!!emit ($self->{current_token}); # start tag or end tag
796     undef $self->{current_token};
797    
798     redo A;
799     } elsif (0x0041 <= $self->{next_input_character} and
800     $self->{next_input_character} <= 0x005A) { # A..Z
801     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
802     ## Stay in the state
803     !!!next-input-character;
804     redo A;
805     } elsif ($self->{next_input_character} == 0x002F) { # /
806     $before_leave->();
807     !!!next-input-character;
808     if ($self->{next_input_character} == 0x003E and # >
809     $self->{current_token}->{type} eq 'start tag' and
810     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
811     # permitted slash
812     #
813     } else {
814 wakaba 1.3 !!!parse-error (type => 'nestc');
815 wakaba 1.1 }
816     $self->{state} = 'before attribute name';
817     # next-input-character is already done
818     redo A;
819     } elsif ($self->{next_input_character} == 0x003C or # <
820     $self->{next_input_character} == -1) {
821 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
822 wakaba 1.1 $before_leave->();
823     if ($self->{current_token}->{type} eq 'start tag') {
824     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
825     } elsif ($self->{current_token}->{type} eq 'end tag') {
826     $self->{content_model_flag} = 'PCDATA'; # MUST
827     if ($self->{current_token}->{attributes}) {
828 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
829 wakaba 1.1 }
830     } else {
831     die "$0: $self->{current_token}->{type}: Unknown token type";
832     }
833     $self->{state} = 'data';
834     # reconsume
835    
836     !!!emit ($self->{current_token}); # start tag or end tag
837     undef $self->{current_token};
838    
839     redo A;
840     } else {
841     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
842     ## Stay in the state
843     !!!next-input-character;
844     redo A;
845     }
846     } elsif ($self->{state} eq 'after attribute name') {
847     if ($self->{next_input_character} == 0x0009 or # HT
848     $self->{next_input_character} == 0x000A or # LF
849     $self->{next_input_character} == 0x000B or # VT
850     $self->{next_input_character} == 0x000C or # FF
851     $self->{next_input_character} == 0x0020) { # SP
852     ## Stay in the state
853     !!!next-input-character;
854     redo A;
855     } elsif ($self->{next_input_character} == 0x003D) { # =
856     $self->{state} = 'before attribute value';
857     !!!next-input-character;
858     redo A;
859     } elsif ($self->{next_input_character} == 0x003E) { # >
860     if ($self->{current_token}->{type} eq 'start tag') {
861     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
862     } elsif ($self->{current_token}->{type} eq 'end tag') {
863     $self->{content_model_flag} = 'PCDATA'; # MUST
864     if ($self->{current_token}->{attributes}) {
865 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
866 wakaba 1.1 }
867     } else {
868     die "$0: $self->{current_token}->{type}: Unknown token type";
869     }
870     $self->{state} = 'data';
871     !!!next-input-character;
872    
873     !!!emit ($self->{current_token}); # start tag or end tag
874     undef $self->{current_token};
875    
876     redo A;
877     } elsif (0x0041 <= $self->{next_input_character} and
878     $self->{next_input_character} <= 0x005A) { # A..Z
879     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
880     value => ''};
881     $self->{state} = 'attribute name';
882     !!!next-input-character;
883     redo A;
884     } elsif ($self->{next_input_character} == 0x002F) { # /
885     !!!next-input-character;
886     if ($self->{next_input_character} == 0x003E and # >
887     $self->{current_token}->{type} eq 'start tag' and
888     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
889     # permitted slash
890     #
891     } else {
892 wakaba 1.3 !!!parse-error (type => 'nestc');
893 wakaba 1.1 }
894     $self->{state} = 'before attribute name';
895     # next-input-character is already done
896     redo A;
897     } elsif ($self->{next_input_character} == 0x003C or # <
898     $self->{next_input_character} == -1) {
899 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
900 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
901     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
902     } elsif ($self->{current_token}->{type} eq 'end tag') {
903     $self->{content_model_flag} = 'PCDATA'; # MUST
904     if ($self->{current_token}->{attributes}) {
905 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
906 wakaba 1.1 }
907     } else {
908     die "$0: $self->{current_token}->{type}: Unknown token type";
909     }
910     $self->{state} = 'data';
911     # reconsume
912    
913     !!!emit ($self->{current_token}); # start tag or end tag
914     undef $self->{current_token};
915    
916     redo A;
917     } else {
918     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
919     value => ''};
920     $self->{state} = 'attribute name';
921     !!!next-input-character;
922     redo A;
923     }
924     } elsif ($self->{state} eq 'before attribute value') {
925     if ($self->{next_input_character} == 0x0009 or # HT
926     $self->{next_input_character} == 0x000A or # LF
927     $self->{next_input_character} == 0x000B or # VT
928     $self->{next_input_character} == 0x000C or # FF
929     $self->{next_input_character} == 0x0020) { # SP
930     ## Stay in the state
931     !!!next-input-character;
932     redo A;
933     } elsif ($self->{next_input_character} == 0x0022) { # "
934     $self->{state} = 'attribute value (double-quoted)';
935     !!!next-input-character;
936     redo A;
937     } elsif ($self->{next_input_character} == 0x0026) { # &
938     $self->{state} = 'attribute value (unquoted)';
939     ## reconsume
940     redo A;
941     } elsif ($self->{next_input_character} == 0x0027) { # '
942     $self->{state} = 'attribute value (single-quoted)';
943     !!!next-input-character;
944     redo A;
945     } elsif ($self->{next_input_character} == 0x003E) { # >
946     if ($self->{current_token}->{type} eq 'start tag') {
947     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
948     } elsif ($self->{current_token}->{type} eq 'end tag') {
949     $self->{content_model_flag} = 'PCDATA'; # MUST
950     if ($self->{current_token}->{attributes}) {
951 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
952 wakaba 1.1 }
953     } else {
954     die "$0: $self->{current_token}->{type}: Unknown token type";
955     }
956     $self->{state} = 'data';
957     !!!next-input-character;
958    
959     !!!emit ($self->{current_token}); # start tag or end tag
960     undef $self->{current_token};
961    
962     redo A;
963     } elsif ($self->{next_input_character} == 0x003C or # <
964     $self->{next_input_character} == -1) {
965 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
966 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
967     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
968     } elsif ($self->{current_token}->{type} eq 'end tag') {
969     $self->{content_model_flag} = 'PCDATA'; # MUST
970     if ($self->{current_token}->{attributes}) {
971 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
972 wakaba 1.1 }
973     } else {
974     die "$0: $self->{current_token}->{type}: Unknown token type";
975     }
976     $self->{state} = 'data';
977     ## reconsume
978    
979     !!!emit ($self->{current_token}); # start tag or end tag
980     undef $self->{current_token};
981    
982     redo A;
983     } else {
984     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
985     $self->{state} = 'attribute value (unquoted)';
986     !!!next-input-character;
987     redo A;
988     }
989     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
990     if ($self->{next_input_character} == 0x0022) { # "
991     $self->{state} = 'before attribute name';
992     !!!next-input-character;
993     redo A;
994     } elsif ($self->{next_input_character} == 0x0026) { # &
995     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
996     $self->{state} = 'entity in attribute value';
997     !!!next-input-character;
998     redo A;
999     } elsif ($self->{next_input_character} == -1) {
1000 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1001 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1002     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1003     } elsif ($self->{current_token}->{type} eq 'end tag') {
1004     $self->{content_model_flag} = 'PCDATA'; # MUST
1005     if ($self->{current_token}->{attributes}) {
1006 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1007 wakaba 1.1 }
1008     } else {
1009     die "$0: $self->{current_token}->{type}: Unknown token type";
1010     }
1011     $self->{state} = 'data';
1012     ## reconsume
1013    
1014     !!!emit ($self->{current_token}); # start tag or end tag
1015     undef $self->{current_token};
1016    
1017     redo A;
1018     } else {
1019     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1020     ## Stay in the state
1021     !!!next-input-character;
1022     redo A;
1023     }
1024     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
1025     if ($self->{next_input_character} == 0x0027) { # '
1026     $self->{state} = 'before attribute name';
1027     !!!next-input-character;
1028     redo A;
1029     } elsif ($self->{next_input_character} == 0x0026) { # &
1030     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
1031     $self->{state} = 'entity in attribute value';
1032     !!!next-input-character;
1033     redo A;
1034     } elsif ($self->{next_input_character} == -1) {
1035 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
1036 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1037     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1038     } elsif ($self->{current_token}->{type} eq 'end tag') {
1039     $self->{content_model_flag} = 'PCDATA'; # MUST
1040     if ($self->{current_token}->{attributes}) {
1041 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1042 wakaba 1.1 }
1043     } else {
1044     die "$0: $self->{current_token}->{type}: Unknown token type";
1045     }
1046     $self->{state} = 'data';
1047     ## reconsume
1048    
1049     !!!emit ($self->{current_token}); # start tag or end tag
1050     undef $self->{current_token};
1051    
1052     redo A;
1053     } else {
1054     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1055     ## Stay in the state
1056     !!!next-input-character;
1057     redo A;
1058     }
1059     } elsif ($self->{state} eq 'attribute value (unquoted)') {
1060     if ($self->{next_input_character} == 0x0009 or # HT
1061     $self->{next_input_character} == 0x000A or # LF
1062     $self->{next_input_character} == 0x000B or # HT
1063     $self->{next_input_character} == 0x000C or # FF
1064     $self->{next_input_character} == 0x0020) { # SP
1065     $self->{state} = 'before attribute name';
1066     !!!next-input-character;
1067     redo A;
1068     } elsif ($self->{next_input_character} == 0x0026) { # &
1069     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
1070     $self->{state} = 'entity in attribute value';
1071     !!!next-input-character;
1072     redo A;
1073     } elsif ($self->{next_input_character} == 0x003E) { # >
1074     if ($self->{current_token}->{type} eq 'start tag') {
1075     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1076     } elsif ($self->{current_token}->{type} eq 'end tag') {
1077     $self->{content_model_flag} = 'PCDATA'; # MUST
1078     if ($self->{current_token}->{attributes}) {
1079 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1080 wakaba 1.1 }
1081     } else {
1082     die "$0: $self->{current_token}->{type}: Unknown token type";
1083     }
1084     $self->{state} = 'data';
1085     !!!next-input-character;
1086    
1087     !!!emit ($self->{current_token}); # start tag or end tag
1088     undef $self->{current_token};
1089    
1090     redo A;
1091     } elsif ($self->{next_input_character} == 0x003C or # <
1092     $self->{next_input_character} == -1) {
1093 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
1094 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
1095     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1096     } elsif ($self->{current_token}->{type} eq 'end tag') {
1097     $self->{content_model_flag} = 'PCDATA'; # MUST
1098     if ($self->{current_token}->{attributes}) {
1099 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
1100 wakaba 1.1 }
1101     } else {
1102     die "$0: $self->{current_token}->{type}: Unknown token type";
1103     }
1104     $self->{state} = 'data';
1105     ## reconsume
1106    
1107     !!!emit ($self->{current_token}); # start tag or end tag
1108     undef $self->{current_token};
1109    
1110     redo A;
1111     } else {
1112     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1113     ## Stay in the state
1114     !!!next-input-character;
1115     redo A;
1116     }
1117     } elsif ($self->{state} eq 'entity in attribute value') {
1118     my $token = $self->_tokenize_attempt_to_consume_an_entity;
1119    
1120     unless (defined $token) {
1121     $self->{current_attribute}->{value} .= '&';
1122     } else {
1123     $self->{current_attribute}->{value} .= $token->{data};
1124     ## ISSUE: spec says "append the returned character token to the current attribute's value"
1125     }
1126    
1127     $self->{state} = $self->{last_attribute_value_state};
1128     # next-input-character is already done
1129     redo A;
1130     } elsif ($self->{state} eq 'bogus comment') {
1131     ## (only happen if PCDATA state)
1132    
1133     my $token = {type => 'comment', data => ''};
1134    
1135     BC: {
1136     if ($self->{next_input_character} == 0x003E) { # >
1137     $self->{state} = 'data';
1138     !!!next-input-character;
1139    
1140     !!!emit ($token);
1141    
1142     redo A;
1143     } elsif ($self->{next_input_character} == -1) {
1144     $self->{state} = 'data';
1145     ## reconsume
1146    
1147     !!!emit ($token);
1148    
1149     redo A;
1150     } else {
1151     $token->{data} .= chr ($self->{next_input_character});
1152     !!!next-input-character;
1153     redo BC;
1154     }
1155     } # BC
1156     } elsif ($self->{state} eq 'markup declaration open') {
1157     ## (only happen if PCDATA state)
1158    
1159     my @next_char;
1160     push @next_char, $self->{next_input_character};
1161    
1162     if ($self->{next_input_character} == 0x002D) { # -
1163     !!!next-input-character;
1164     push @next_char, $self->{next_input_character};
1165     if ($self->{next_input_character} == 0x002D) { # -
1166     $self->{current_token} = {type => 'comment', data => ''};
1167     $self->{state} = 'comment';
1168     !!!next-input-character;
1169     redo A;
1170     }
1171     } elsif ($self->{next_input_character} == 0x0044 or # D
1172     $self->{next_input_character} == 0x0064) { # d
1173     !!!next-input-character;
1174     push @next_char, $self->{next_input_character};
1175     if ($self->{next_input_character} == 0x004F or # O
1176     $self->{next_input_character} == 0x006F) { # o
1177     !!!next-input-character;
1178     push @next_char, $self->{next_input_character};
1179     if ($self->{next_input_character} == 0x0043 or # C
1180     $self->{next_input_character} == 0x0063) { # c
1181     !!!next-input-character;
1182     push @next_char, $self->{next_input_character};
1183     if ($self->{next_input_character} == 0x0054 or # T
1184     $self->{next_input_character} == 0x0074) { # t
1185     !!!next-input-character;
1186     push @next_char, $self->{next_input_character};
1187     if ($self->{next_input_character} == 0x0059 or # Y
1188     $self->{next_input_character} == 0x0079) { # y
1189     !!!next-input-character;
1190     push @next_char, $self->{next_input_character};
1191     if ($self->{next_input_character} == 0x0050 or # P
1192     $self->{next_input_character} == 0x0070) { # p
1193     !!!next-input-character;
1194     push @next_char, $self->{next_input_character};
1195     if ($self->{next_input_character} == 0x0045 or # E
1196     $self->{next_input_character} == 0x0065) { # e
1197     ## ISSUE: What a stupid code this is!
1198     $self->{state} = 'DOCTYPE';
1199     !!!next-input-character;
1200     redo A;
1201     }
1202     }
1203     }
1204     }
1205     }
1206     }
1207     }
1208    
1209 wakaba 1.3 !!!parse-error (type => 'bogus comment open');
1210 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1211     !!!back-next-input-character (@next_char);
1212     $self->{state} = 'bogus comment';
1213     redo A;
1214    
1215     ## ISSUE: typos in spec: chacacters, is is a parse error
1216     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1217     } elsif ($self->{state} eq 'comment') {
1218     if ($self->{next_input_character} == 0x002D) { # -
1219     $self->{state} = 'comment dash';
1220     !!!next-input-character;
1221     redo A;
1222     } elsif ($self->{next_input_character} == -1) {
1223 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1224 wakaba 1.1 $self->{state} = 'data';
1225     ## reconsume
1226    
1227     !!!emit ($self->{current_token}); # comment
1228     undef $self->{current_token};
1229    
1230     redo A;
1231     } else {
1232     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1233     ## Stay in the state
1234     !!!next-input-character;
1235     redo A;
1236     }
1237     } elsif ($self->{state} eq 'comment dash') {
1238     if ($self->{next_input_character} == 0x002D) { # -
1239     $self->{state} = 'comment end';
1240     !!!next-input-character;
1241     redo A;
1242     } elsif ($self->{next_input_character} == -1) {
1243 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1244 wakaba 1.1 $self->{state} = 'data';
1245     ## reconsume
1246    
1247     !!!emit ($self->{current_token}); # comment
1248     undef $self->{current_token};
1249    
1250     redo A;
1251     } else {
1252     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1253     $self->{state} = 'comment';
1254     !!!next-input-character;
1255     redo A;
1256     }
1257     } elsif ($self->{state} eq 'comment end') {
1258     if ($self->{next_input_character} == 0x003E) { # >
1259     $self->{state} = 'data';
1260     !!!next-input-character;
1261    
1262     !!!emit ($self->{current_token}); # comment
1263     undef $self->{current_token};
1264    
1265     redo A;
1266     } elsif ($self->{next_input_character} == 0x002D) { # -
1267 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1268 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1269     ## Stay in the state
1270     !!!next-input-character;
1271     redo A;
1272     } elsif ($self->{next_input_character} == -1) {
1273 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1274 wakaba 1.1 $self->{state} = 'data';
1275     ## reconsume
1276    
1277     !!!emit ($self->{current_token}); # comment
1278     undef $self->{current_token};
1279    
1280     redo A;
1281     } else {
1282 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1283 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1284     $self->{state} = 'comment';
1285     !!!next-input-character;
1286     redo A;
1287     }
1288     } elsif ($self->{state} eq 'DOCTYPE') {
1289     if ($self->{next_input_character} == 0x0009 or # HT
1290     $self->{next_input_character} == 0x000A or # LF
1291     $self->{next_input_character} == 0x000B or # VT
1292     $self->{next_input_character} == 0x000C or # FF
1293     $self->{next_input_character} == 0x0020) { # SP
1294     $self->{state} = 'before DOCTYPE name';
1295     !!!next-input-character;
1296     redo A;
1297     } else {
1298 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
1299 wakaba 1.1 $self->{state} = 'before DOCTYPE name';
1300     ## reconsume
1301     redo A;
1302     }
1303     } elsif ($self->{state} eq 'before DOCTYPE name') {
1304     if ($self->{next_input_character} == 0x0009 or # HT
1305     $self->{next_input_character} == 0x000A or # LF
1306     $self->{next_input_character} == 0x000B or # VT
1307     $self->{next_input_character} == 0x000C or # FF
1308     $self->{next_input_character} == 0x0020) { # SP
1309     ## Stay in the state
1310     !!!next-input-character;
1311     redo A;
1312     } elsif (0x0061 <= $self->{next_input_character} and
1313     $self->{next_input_character} <= 0x007A) { # a..z
1314     $self->{current_token} = {type => 'DOCTYPE',
1315     name => chr ($self->{next_input_character} - 0x0020),
1316     error => 1};
1317     $self->{state} = 'DOCTYPE name';
1318     !!!next-input-character;
1319     redo A;
1320     } elsif ($self->{next_input_character} == 0x003E) { # >
1321 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1322 wakaba 1.1 $self->{state} = 'data';
1323     !!!next-input-character;
1324    
1325     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1326    
1327     redo A;
1328     } elsif ($self->{next_input_character} == -1) {
1329 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1330 wakaba 1.1 $self->{state} = 'data';
1331     ## reconsume
1332    
1333     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1334    
1335     redo A;
1336     } else {
1337     $self->{current_token} = {type => 'DOCTYPE',
1338     name => chr ($self->{next_input_character}),
1339     error => 1};
1340     $self->{state} = 'DOCTYPE name';
1341     !!!next-input-character;
1342     redo A;
1343     }
1344     } elsif ($self->{state} eq 'DOCTYPE name') {
1345     if ($self->{next_input_character} == 0x0009 or # HT
1346     $self->{next_input_character} == 0x000A or # LF
1347     $self->{next_input_character} == 0x000B or # VT
1348     $self->{next_input_character} == 0x000C or # FF
1349     $self->{next_input_character} == 0x0020) { # SP
1350     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1351     $self->{state} = 'after DOCTYPE name';
1352     !!!next-input-character;
1353     redo A;
1354     } elsif ($self->{next_input_character} == 0x003E) { # >
1355     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1356     $self->{state} = 'data';
1357     !!!next-input-character;
1358    
1359     !!!emit ($self->{current_token}); # DOCTYPE
1360     undef $self->{current_token};
1361    
1362     redo A;
1363     } elsif (0x0061 <= $self->{next_input_character} and
1364     $self->{next_input_character} <= 0x007A) { # a..z
1365     $self->{current_token}->{name} .= chr ($self->{next_input_character} - 0x0020); # DOCTYPE
1366     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1367     ## Stay in the state
1368     !!!next-input-character;
1369     redo A;
1370     } elsif ($self->{next_input_character} == -1) {
1371 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1372 wakaba 1.1 $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1373     $self->{state} = 'data';
1374     ## reconsume
1375    
1376     !!!emit ($self->{current_token});
1377     undef $self->{current_token};
1378    
1379     redo A;
1380     } else {
1381     $self->{current_token}->{name}
1382     .= chr ($self->{next_input_character}); # DOCTYPE
1383     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1384     ## Stay in the state
1385     !!!next-input-character;
1386     redo A;
1387     }
1388     } elsif ($self->{state} eq 'after DOCTYPE name') {
1389     if ($self->{next_input_character} == 0x0009 or # HT
1390     $self->{next_input_character} == 0x000A or # LF
1391     $self->{next_input_character} == 0x000B or # VT
1392     $self->{next_input_character} == 0x000C or # FF
1393     $self->{next_input_character} == 0x0020) { # SP
1394     ## Stay in the state
1395     !!!next-input-character;
1396     redo A;
1397     } elsif ($self->{next_input_character} == 0x003E) { # >
1398     $self->{state} = 'data';
1399     !!!next-input-character;
1400    
1401     !!!emit ($self->{current_token}); # DOCTYPE
1402     undef $self->{current_token};
1403    
1404     redo A;
1405     } elsif ($self->{next_input_character} == -1) {
1406 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1407 wakaba 1.1 $self->{state} = 'data';
1408     ## reconsume
1409    
1410     !!!emit ($self->{current_token}); # DOCTYPE
1411     undef $self->{current_token};
1412    
1413     redo A;
1414     } else {
1415 wakaba 1.3 !!!parse-error (type => 'string after DOCTYPE name');
1416 wakaba 1.1 $self->{current_token}->{error} = 1; # DOCTYPE
1417     $self->{state} = 'bogus DOCTYPE';
1418     !!!next-input-character;
1419     redo A;
1420     }
1421     } elsif ($self->{state} eq 'bogus DOCTYPE') {
1422     if ($self->{next_input_character} == 0x003E) { # >
1423     $self->{state} = 'data';
1424     !!!next-input-character;
1425    
1426     !!!emit ($self->{current_token}); # DOCTYPE
1427     undef $self->{current_token};
1428    
1429     redo A;
1430     } elsif ($self->{next_input_character} == -1) {
1431 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1432 wakaba 1.1 $self->{state} = 'data';
1433     ## reconsume
1434    
1435     !!!emit ($self->{current_token}); # DOCTYPE
1436     undef $self->{current_token};
1437    
1438     redo A;
1439     } else {
1440     ## Stay in the state
1441     !!!next-input-character;
1442     redo A;
1443     }
1444     } else {
1445     die "$0: $self->{state}: Unknown state";
1446     }
1447     } # A
1448    
1449     die "$0: _get_next_token: unexpected case";
1450     } # _get_next_token
1451    
1452     sub _tokenize_attempt_to_consume_an_entity ($) {
1453     my $self = shift;
1454    
1455     if ($self->{next_input_character} == 0x0023) { # #
1456     !!!next-input-character;
1457     my $num;
1458     if ($self->{next_input_character} == 0x0078 or # x
1459     $self->{next_input_character} == 0x0058) { # X
1460     X: {
1461     my $x_char = $self->{next_input_character};
1462     !!!next-input-character;
1463     if (0x0030 <= $self->{next_input_character} and
1464     $self->{next_input_character} <= 0x0039) { # 0..9
1465     $num ||= 0;
1466     $num *= 0x10;
1467     $num += $self->{next_input_character} - 0x0030;
1468     redo X;
1469     } elsif (0x0061 <= $self->{next_input_character} and
1470     $self->{next_input_character} <= 0x0066) { # a..f
1471     ## ISSUE: the spec says U+0078, which is apparently incorrect
1472     $num ||= 0;
1473     $num *= 0x10;
1474     $num += $self->{next_input_character} - 0x0060 + 9;
1475     redo X;
1476     } elsif (0x0041 <= $self->{next_input_character} and
1477     $self->{next_input_character} <= 0x0046) { # A..F
1478     ## ISSUE: the spec says U+0058, which is apparently incorrect
1479     $num ||= 0;
1480     $num *= 0x10;
1481     $num += $self->{next_input_character} - 0x0040 + 9;
1482     redo X;
1483     } elsif (not defined $num) { # no hexadecimal digit
1484 wakaba 1.3 !!!parse-error (type => 'bare hcro');
1485 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
1486     !!!back-next-input-character ($x_char);
1487     return undef;
1488     } elsif ($self->{next_input_character} == 0x003B) { # ;
1489     !!!next-input-character;
1490     } else {
1491 wakaba 1.3 !!!parse-error (type => 'no refc');
1492 wakaba 1.1 }
1493    
1494     ## TODO: check the definition for |a valid Unicode character|.
1495     if ($num > 1114111 or $num == 0) {
1496     $num = 0xFFFD; # REPLACEMENT CHARACTER
1497     ## ISSUE: Why this is not an error?
1498     }
1499    
1500     return {type => 'character', data => chr $num};
1501     } # X
1502     } elsif (0x0030 <= $self->{next_input_character} and
1503     $self->{next_input_character} <= 0x0039) { # 0..9
1504     my $code = $self->{next_input_character} - 0x0030;
1505     !!!next-input-character;
1506    
1507     while (0x0030 <= $self->{next_input_character} and
1508     $self->{next_input_character} <= 0x0039) { # 0..9
1509     $code *= 10;
1510     $code += $self->{next_input_character} - 0x0030;
1511    
1512     !!!next-input-character;
1513     }
1514    
1515     if ($self->{next_input_character} == 0x003B) { # ;
1516     !!!next-input-character;
1517     } else {
1518 wakaba 1.3 !!!parse-error (type => 'no refc');
1519 wakaba 1.1 }
1520    
1521     ## TODO: check the definition for |a valid Unicode character|.
1522     if ($code > 1114111 or $code == 0) {
1523     $code = 0xFFFD; # REPLACEMENT CHARACTER
1524     ## ISSUE: Why this is not an error?
1525     }
1526    
1527     return {type => 'character', data => chr $code};
1528     } else {
1529 wakaba 1.3 !!!parse-error (type => 'bare nero');
1530 wakaba 1.1 !!!back-next-input-character ($self->{next_input_character});
1531     $self->{next_input_character} = 0x0023; # #
1532     return undef;
1533     }
1534     } elsif ((0x0041 <= $self->{next_input_character} and
1535     $self->{next_input_character} <= 0x005A) or
1536     (0x0061 <= $self->{next_input_character} and
1537     $self->{next_input_character} <= 0x007A)) {
1538     my $entity_name = chr $self->{next_input_character};
1539     !!!next-input-character;
1540    
1541     my $value = $entity_name;
1542     my $match;
1543    
1544     while (length $entity_name < 10 and
1545     ## NOTE: Some number greater than the maximum length of entity name
1546     ((0x0041 <= $self->{next_input_character} and
1547     $self->{next_input_character} <= 0x005A) or
1548     (0x0061 <= $self->{next_input_character} and
1549     $self->{next_input_character} <= 0x007A) or
1550     (0x0030 <= $self->{next_input_character} and
1551     $self->{next_input_character} <= 0x0039))) {
1552     $entity_name .= chr $self->{next_input_character};
1553     if (defined $entity_char->{$entity_name}) {
1554     $value = $entity_char->{$entity_name};
1555     $match = 1;
1556     } else {
1557     $value .= chr $self->{next_input_character};
1558     }
1559     !!!next-input-character;
1560     }
1561    
1562     if ($match) {
1563     if ($self->{next_input_character} == 0x003B) { # ;
1564     !!!next-input-character;
1565     } else {
1566 wakaba 1.3 !!!parse-error (type => 'refc');
1567 wakaba 1.1 }
1568    
1569     return {type => 'character', data => $value};
1570     } else {
1571 wakaba 1.3 !!!parse-error (type => 'bare ero');
1572 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
1573     !!!back-token ({type => 'character', data => $value});
1574     return undef;
1575     }
1576     } else {
1577     ## no characters are consumed
1578 wakaba 1.3 !!!parse-error (type => 'bare ero');
1579 wakaba 1.1 return undef;
1580     }
1581     } # _tokenize_attempt_to_consume_an_entity
1582    
1583     sub _initialize_tree_constructor ($) {
1584     my $self = shift;
1585     ## NOTE: $self->{document} MUST be specified before this method is called
1586     $self->{document}->strict_error_checking (0);
1587     ## TODO: Turn mutation events off # MUST
1588     ## TODO: Turn loose Document option (manakai extension) on
1589     ## TODO: Mark the Document as an HTML document # MUST
1590     } # _initialize_tree_constructor
1591    
1592     sub _terminate_tree_constructor ($) {
1593     my $self = shift;
1594     $self->{document}->strict_error_checking (1);
1595     ## TODO: Turn mutation events on
1596     } # _terminate_tree_constructor
1597    
1598     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
1599    
1600 wakaba 1.3 { # tree construction stage
1601     my $token;
1602    
1603 wakaba 1.1 sub _construct_tree ($) {
1604     my ($self) = @_;
1605    
1606     ## When an interactive UA render the $self->{document} available
1607     ## to the user, or when it begin accepting user input, are
1608     ## not defined.
1609    
1610     ## Append a character: collect it and all subsequent consecutive
1611     ## characters and insert one Text node whose data is concatenation
1612     ## of all those characters. # MUST
1613    
1614     !!!next-token;
1615    
1616 wakaba 1.3 $self->{insertion_mode} = 'before head';
1617     undef $self->{form_element};
1618     undef $self->{head_element};
1619     $self->{open_elements} = [];
1620     undef $self->{inner_html_node};
1621    
1622     $self->_tree_construction_initial; # MUST
1623     $self->_tree_construction_root_element;
1624     $self->_tree_construction_main;
1625     } # _construct_tree
1626    
1627     sub _tree_construction_initial ($) {
1628     my $self = shift;
1629     B: {
1630     if ($token->{type} eq 'DOCTYPE') {
1631     if ($token->{error}) {
1632     ## ISSUE: Spec currently left this case undefined.
1633     !!!parse-error (type => 'bogus DOCTYPE');
1634     }
1635     my $doctype = $self->{document}->create_document_type_definition
1636     ($token->{name});
1637     $self->{document}->append_child ($doctype);
1638     #$phase = 'root element';
1639     !!!next-token;
1640     #redo B;
1641     return;
1642     } elsif ({
1643     comment => 1,
1644     'start tag' => 1,
1645     'end tag' => 1,
1646     'end-of-file' => 1,
1647     }->{$token->{type}}) {
1648     ## ISSUE: Spec currently left this case undefined.
1649     !!!parse-error (type => 'missing DOCTYPE');
1650     #$phase = 'root element';
1651     ## reprocess
1652     #redo B;
1653     return;
1654     } elsif ($token->{type} eq 'character') {
1655     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1656     $self->{document}->manakai_append_text ($1);
1657     ## ISSUE: DOM3 Core does not allow Document > Text
1658     unless (length $token->{data}) {
1659     ## Stay in the phase
1660     !!!next-token;
1661     redo B;
1662     }
1663     }
1664     ## ISSUE: Spec currently left this case undefined.
1665     !!!parse-error (type => 'missing DOCTYPE');
1666     #$phase = 'root element';
1667     ## reprocess
1668     #redo B;
1669     return;
1670     } else {
1671     die "$0: $token->{type}: Unknown token";
1672     }
1673     } # B
1674     } # _tree_construction_initial
1675    
1676     sub _tree_construction_root_element ($) {
1677     my $self = shift;
1678    
1679     B: {
1680     if ($token->{type} eq 'DOCTYPE') {
1681     !!!parse-error (type => 'in html:#DOCTYPE');
1682     ## Ignore the token
1683     ## Stay in the phase
1684     !!!next-token;
1685     redo B;
1686     } elsif ($token->{type} eq 'comment') {
1687     my $comment = $self->{document}->create_comment ($token->{data});
1688     $self->{document}->append_child ($comment);
1689     ## Stay in the phase
1690     !!!next-token;
1691     redo B;
1692     } elsif ($token->{type} eq 'character') {
1693     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1694     $self->{document}->manakai_append_text ($1);
1695     ## ISSUE: DOM3 Core does not allow Document > Text
1696     unless (length $token->{data}) {
1697     ## Stay in the phase
1698     !!!next-token;
1699     redo B;
1700     }
1701     }
1702     #
1703     } elsif ({
1704     'start tag' => 1,
1705     'end tag' => 1,
1706     'end-of-file' => 1,
1707     }->{$token->{type}}) {
1708     ## ISSUE: There is an issue in the spec
1709     #
1710     } else {
1711     die "$0: $token->{type}: Unknown token";
1712     }
1713     my $root_element; !!!create-element ($root_element, 'html');
1714     $self->{document}->append_child ($root_element);
1715     push @{$self->{open_elements}}, [$root_element, 'html'];
1716     #$phase = 'main';
1717     ## reprocess
1718     #redo B;
1719     return;
1720     } # B
1721     } # _tree_construction_root_element
1722    
1723     sub _reset_insertion_mode ($) {
1724     my $self = shift;
1725    
1726     ## Step 1
1727     my $last;
1728    
1729     ## Step 2
1730     my $i = -1;
1731     my $node = $self->{open_elements}->[$i];
1732    
1733     ## Step 3
1734     S3: {
1735     $last = 1 if $self->{open_elements}->[0]->[0] eq $node->[0];
1736     if (defined $self->{inner_html_node}) {
1737     if ($self->{inner_html_node}->[1] eq 'td' or
1738     $self->{inner_html_node}->[1] eq 'th') {
1739     #
1740     } else {
1741     $node = $self->{inner_html_node};
1742     }
1743     }
1744    
1745     ## Step 4..13
1746     my $new_mode = {
1747     select => 'in select',
1748     td => 'in cell',
1749     th => 'in cell',
1750     tr => 'in row',
1751     tbody => 'in table body',
1752     thead => 'in table head',
1753     tfoot => 'in table foot',
1754     caption => 'in caption',
1755     colgroup => 'in column group',
1756     table => 'in table',
1757     head => 'in body', # not in head!
1758     body => 'in body',
1759     frameset => 'in frameset',
1760     }->{$node->[1]};
1761     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1762    
1763     ## Step 14
1764     if ($node->[1] eq 'html') {
1765     unless (defined $self->{head_element}) {
1766     $self->{insertion_mode} = 'before head';
1767     } else {
1768     $self->{insertion_mode} = 'after head';
1769     }
1770     return;
1771     }
1772    
1773     ## Step 15
1774     $self->{insertion_mode} = 'in body' and return if $last;
1775    
1776     ## Step 16
1777     $i--;
1778     $node = $self->{open_elements}->[$i];
1779    
1780     ## Step 17
1781     redo S3;
1782     } # S3
1783     } # _reset_insertion_mode
1784    
1785     sub _tree_construction_main ($) {
1786     my $self = shift;
1787    
1788     my $phase = 'main';
1789 wakaba 1.1
1790     my $active_formatting_elements = [];
1791    
1792     my $reconstruct_active_formatting_elements = sub { # MUST
1793     my $insert = shift;
1794    
1795     ## Step 1
1796     return unless @$active_formatting_elements;
1797    
1798     ## Step 3
1799     my $i = -1;
1800     my $entry = $active_formatting_elements->[$i];
1801    
1802     ## Step 2
1803     return if $entry->[0] eq '#marker';
1804 wakaba 1.3 for (@{$self->{open_elements}}) {
1805 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1806     return;
1807     }
1808     }
1809    
1810     S4: {
1811     ## Step 4
1812     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1813    
1814     ## Step 5
1815     $i--;
1816     $entry = $active_formatting_elements->[$i];
1817    
1818     ## Step 6
1819     if ($entry->[0] eq '#marker') {
1820     #
1821     } else {
1822     my $in_open_elements;
1823 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
1824 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
1825     $in_open_elements = 1;
1826     last OE;
1827     }
1828     }
1829     if ($in_open_elements) {
1830     #
1831     } else {
1832     redo S4;
1833     }
1834     }
1835    
1836     ## Step 7
1837     $i++;
1838     $entry = $active_formatting_elements->[$i];
1839     } # S4
1840    
1841     S7: {
1842     ## Step 8
1843     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1844    
1845     ## Step 9
1846     $insert->($clone->[0]);
1847 wakaba 1.3 push @{$self->{open_elements}}, $clone;
1848 wakaba 1.1
1849     ## Step 10
1850 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
1851 wakaba 1.1
1852     ## Step 11
1853     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1854     ## Step 7'
1855     $i++;
1856     $entry = $active_formatting_elements->[$i];
1857    
1858     redo S7;
1859     }
1860     } # S7
1861     }; # $reconstruct_active_formatting_elements
1862    
1863     my $clear_up_to_marker = sub {
1864     for (reverse 0..$#$active_formatting_elements) {
1865     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1866     splice @$active_formatting_elements, $_;
1867     return;
1868     }
1869     }
1870     }; # $clear_up_to_marker
1871    
1872     my $style_start_tag = sub {
1873     my $style_el; !!!create-element ($style_el, 'style');
1874 wakaba 1.3 ## $self->{insertion_mode} eq 'in head' and ... (always true)
1875     (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
1876     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
1877 wakaba 1.1 ->append_child ($style_el);
1878     $self->{content_model_flag} = 'CDATA';
1879    
1880     my $text = '';
1881     !!!next-token;
1882     while ($token->{type} eq 'character') {
1883     $text .= $token->{data};
1884     !!!next-token;
1885     } # stop if non-character token or tokenizer stops tokenising
1886     if (length $text) {
1887     $style_el->manakai_append_text ($text);
1888     }
1889    
1890     $self->{content_model_flag} = 'PCDATA';
1891    
1892     if ($token->{type} eq 'end tag' and $token->{tag_name} eq 'style') {
1893     ## Ignore the token
1894     } else {
1895 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
1896 wakaba 1.1 ## ISSUE: And ignore?
1897     }
1898     !!!next-token;
1899     }; # $style_start_tag
1900    
1901     my $script_start_tag = sub {
1902     my $script_el;
1903     !!!create-element ($script_el, 'script', $token->{attributes});
1904     ## TODO: mark as "parser-inserted"
1905    
1906     $self->{content_model_flag} = 'CDATA';
1907    
1908     my $text = '';
1909     !!!next-token;
1910     while ($token->{type} eq 'character') {
1911     $text .= $token->{data};
1912     !!!next-token;
1913     } # stop if non-character token or tokenizer stops tokenising
1914     if (length $text) {
1915     $script_el->manakai_append_text ($text);
1916     }
1917    
1918     $self->{content_model_flag} = 'PCDATA';
1919    
1920     if ($token->{type} eq 'end tag' and
1921     $token->{tag_name} eq 'script') {
1922     ## Ignore the token
1923     } else {
1924 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
1925 wakaba 1.1 ## ISSUE: And ignore?
1926     ## TODO: mark as "already executed"
1927     }
1928    
1929 wakaba 1.3 if (defined $self->{inner_html_node}) {
1930     ## TODO: mark as "already executed"
1931     } else {
1932 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
1933     ## TODO: insertion point = just before the next input character
1934    
1935 wakaba 1.3 (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
1936     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])->append_child ($script_el);
1937 wakaba 1.1
1938     ## TODO: insertion point = $old_insertion_point (might be "undefined")
1939    
1940     ## TODO: if there is a script that will execute as soon as the parser resume, then...
1941     }
1942    
1943     !!!next-token;
1944     }; # $script_start_tag
1945    
1946     my $formatting_end_tag = sub {
1947     my $tag_name = shift;
1948    
1949     FET: {
1950     ## Step 1
1951     my $formatting_element;
1952     my $formatting_element_i_in_active;
1953     AFE: for (reverse 0..$#$active_formatting_elements) {
1954     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
1955     $formatting_element = $active_formatting_elements->[$_];
1956     $formatting_element_i_in_active = $_;
1957     last AFE;
1958     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
1959     last AFE;
1960     }
1961     } # AFE
1962     unless (defined $formatting_element) {
1963 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$tag_name);
1964 wakaba 1.1 ## Ignore the token
1965     !!!next-token;
1966     return;
1967     }
1968     ## has an element in scope
1969     my $in_scope = 1;
1970     my $formatting_element_i_in_open;
1971 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
1972     my $node = $self->{open_elements}->[$_];
1973 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
1974     if ($in_scope) {
1975     $formatting_element_i_in_open = $_;
1976     last INSCOPE;
1977     } else { # in open elements but not in scope
1978     !!!parse-error;
1979     ## Ignore the token
1980     !!!next-token;
1981     return;
1982     }
1983     } elsif ({
1984     table => 1, caption => 1, td => 1, th => 1,
1985     button => 1, marquee => 1, object => 1, html => 1,
1986     }->{$node->[1]}) {
1987     $in_scope = 0;
1988     }
1989     } # INSCOPE
1990     unless (defined $formatting_element_i_in_open) {
1991     !!!parse-error;
1992     pop @$active_formatting_elements; # $formatting_element
1993     !!!next-token; ## TODO: ok?
1994     return;
1995     }
1996 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
1997 wakaba 1.1 !!!parse-error;
1998     }
1999    
2000     ## Step 2
2001     my $furthest_block;
2002     my $furthest_block_i_in_open;
2003 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2004     my $node = $self->{open_elements}->[$_];
2005 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
2006     #not $phrasing_category->{$node->[1]} and
2007     ($special_category->{$node->[1]} or
2008     $scoping_category->{$node->[1]})) {
2009     $furthest_block = $node;
2010     $furthest_block_i_in_open = $_;
2011     } elsif ($node->[0] eq $formatting_element->[0]) {
2012     last OE;
2013     }
2014     } # OE
2015    
2016     ## Step 3
2017     unless (defined $furthest_block) { # MUST
2018 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
2019 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
2020     !!!next-token;
2021     return;
2022     }
2023    
2024     ## Step 4
2025 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
2026 wakaba 1.1
2027     ## Step 5
2028     my $furthest_block_parent = $furthest_block->[0]->parent_node;
2029     if (defined $furthest_block_parent) {
2030     $furthest_block_parent->remove_child ($furthest_block->[0]);
2031     }
2032    
2033     ## Step 6
2034     my $bookmark_prev_el
2035     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
2036     ->[0];
2037    
2038     ## Step 7
2039     my $node = $furthest_block;
2040     my $node_i_in_open = $furthest_block_i_in_open;
2041     my $last_node = $furthest_block;
2042     S7: {
2043     ## Step 1
2044     $node_i_in_open--;
2045 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
2046 wakaba 1.1
2047     ## Step 2
2048     my $node_i_in_active;
2049     S7S2: {
2050     for (reverse 0..$#$active_formatting_elements) {
2051     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2052     $node_i_in_active = $_;
2053     last S7S2;
2054     }
2055     }
2056 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
2057 wakaba 1.1 redo S7;
2058     } # S7S2
2059    
2060     ## Step 3
2061     last S7 if $node->[0] eq $formatting_element->[0];
2062    
2063     ## Step 4
2064     if ($last_node->[0] eq $furthest_block->[0]) {
2065     $bookmark_prev_el = $node->[0];
2066     }
2067    
2068     ## Step 5
2069     if ($node->[0]->has_child_nodes ()) {
2070     my $clone = [$node->[0]->clone_node (0), $node->[1]];
2071     $active_formatting_elements->[$node_i_in_active] = $clone;
2072 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
2073 wakaba 1.1 $node = $clone;
2074     }
2075    
2076     ## Step 6
2077     $node->[0]->append_child ($last_node->[0]);
2078    
2079     ## Step 7
2080     $last_node = $node;
2081    
2082     ## Step 8
2083     redo S7;
2084     } # S7
2085    
2086     ## Step 8
2087     $common_ancestor_node->[0]->append_child ($last_node->[0]);
2088    
2089     ## Step 9
2090     my $clone = [$formatting_element->[0]->clone_node (0),
2091     $formatting_element->[1]];
2092    
2093     ## Step 10
2094     my @cn = @{$furthest_block->[0]->child_nodes};
2095     $clone->[0]->append_child ($_) for @cn;
2096    
2097     ## Step 11
2098     $furthest_block->[0]->append_child ($clone->[0]);
2099    
2100     ## Step 12
2101     my $i;
2102     AFE: for (reverse 0..$#$active_formatting_elements) {
2103     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
2104     splice @$active_formatting_elements, $_, 1;
2105     $i-- and last AFE if defined $i;
2106     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
2107     $i = $_;
2108     }
2109     } # AFE
2110     splice @$active_formatting_elements, $i + 1, 0, $clone;
2111    
2112     ## Step 13
2113     undef $i;
2114 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2115     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
2116     splice @{$self->{open_elements}}, $_, 1;
2117 wakaba 1.1 $i-- and last OE if defined $i;
2118 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
2119 wakaba 1.1 $i = $_;
2120     }
2121     } # OE
2122 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
2123 wakaba 1.1
2124     ## Step 14
2125     redo FET;
2126     } # FET
2127     }; # $formatting_end_tag
2128    
2129     my $insert_to_current = sub {
2130 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child (shift);
2131 wakaba 1.1 }; # $insert_to_current
2132    
2133     my $insert_to_foster = sub {
2134     my $child = shift;
2135     if ({
2136     table => 1, tbody => 1, tfoot => 1,
2137     thead => 1, tr => 1,
2138 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2139 wakaba 1.1 # MUST
2140     my $foster_parent_element;
2141     my $next_sibling;
2142 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2143     if ($self->{open_elements}->[$_]->[1] eq 'table') {
2144     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
2145 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
2146     $foster_parent_element = $parent;
2147 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
2148 wakaba 1.1 } else {
2149     $foster_parent_element
2150 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
2151 wakaba 1.1 }
2152     last OE;
2153     }
2154     } # OE
2155 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
2156 wakaba 1.1 unless defined $foster_parent_element;
2157     $foster_parent_element->insert_before
2158     ($child, $next_sibling);
2159     } else {
2160 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
2161 wakaba 1.1 }
2162     }; # $insert_to_foster
2163    
2164     my $in_body = sub {
2165     my $insert = shift;
2166     if ($token->{type} eq 'start tag') {
2167     if ($token->{tag_name} eq 'script') {
2168     $script_start_tag->();
2169     return;
2170     } elsif ($token->{tag_name} eq 'style') {
2171     $style_start_tag->();
2172     return;
2173     } elsif ({
2174     base => 1, link => 1, meta => 1,
2175     }->{$token->{tag_name}}) {
2176 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2177 wakaba 1.1 ## NOTE: This is an "as if in head" code clone
2178     my $el;
2179     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2180 wakaba 1.3 if (defined $self->{head_element}) {
2181     $self->{head_element}->append_child ($el);
2182 wakaba 1.1 } else {
2183     $insert->($el);
2184     }
2185    
2186     !!!next-token;
2187     return;
2188     } elsif ($token->{tag_name} eq 'title') {
2189 wakaba 1.3 !!!parse-error (type => 'in body:title');
2190 wakaba 1.1 ## NOTE: There is an "as if in head" code clone
2191     my $title_el;
2192     !!!create-element ($title_el, 'title', $token->{attributes});
2193 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2194 wakaba 1.1 ->append_child ($title_el);
2195     $self->{content_model_flag} = 'RCDATA';
2196    
2197     my $text = '';
2198     !!!next-token;
2199     while ($token->{type} eq 'character') {
2200     $text .= $token->{data};
2201     !!!next-token;
2202     }
2203     if (length $text) {
2204     $title_el->manakai_append_text ($text);
2205     }
2206    
2207     $self->{content_model_flag} = 'PCDATA';
2208    
2209     if ($token->{type} eq 'end tag' and
2210     $token->{tag_name} eq 'title') {
2211     ## Ignore the token
2212     } else {
2213 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2214 wakaba 1.1 ## ISSUE: And ignore?
2215     }
2216     !!!next-token;
2217     return;
2218     } elsif ($token->{tag_name} eq 'body') {
2219 wakaba 1.3 !!!parse-error (type => 'in body:body');
2220 wakaba 1.1
2221 wakaba 1.3 if (@{$self->{open_elements}} == 1 or
2222     $self->{open_elements}->[1]->[1] ne 'body') {
2223 wakaba 1.1 ## Ignore the token
2224     } else {
2225 wakaba 1.3 my $body_el = $self->{open_elements}->[1]->[0];
2226 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2227     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
2228     $body_el->set_attribute_ns
2229     (undef, [undef, $attr_name],
2230     $token->{attributes}->{$attr_name}->{value});
2231     }
2232     }
2233     }
2234     !!!next-token;
2235     return;
2236     } elsif ({
2237     address => 1, blockquote => 1, center => 1, dir => 1,
2238     div => 1, dl => 1, fieldset => 1, listing => 1,
2239     menu => 1, ol => 1, p => 1, ul => 1,
2240     pre => 1,
2241     }->{$token->{tag_name}}) {
2242     ## has a p element in scope
2243 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2244 wakaba 1.1 if ($_->[1] eq 'p') {
2245     !!!back-token;
2246     $token = {type => 'end tag', tag_name => 'p'};
2247     return;
2248     } elsif ({
2249     table => 1, caption => 1, td => 1, th => 1,
2250     button => 1, marquee => 1, object => 1, html => 1,
2251     }->{$_->[1]}) {
2252     last INSCOPE;
2253     }
2254     } # INSCOPE
2255    
2256     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2257     if ($token->{tag_name} eq 'pre') {
2258     !!!next-token;
2259     if ($token->{type} eq 'character') {
2260     $token->{data} =~ s/^\x0A//;
2261     unless (length $token->{data}) {
2262     !!!next-token;
2263     }
2264     }
2265     } else {
2266     !!!next-token;
2267     }
2268     return;
2269     } elsif ($token->{tag_name} eq 'form') {
2270 wakaba 1.3 if (defined $self->{form_element}) {
2271     !!!parse-error (type => 'in form:form');
2272 wakaba 1.1 ## Ignore the token
2273     } else {
2274     ## has a p element in scope
2275 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2276 wakaba 1.1 if ($_->[1] eq 'p') {
2277     !!!back-token;
2278     $token = {type => 'end tag', tag_name => 'p'};
2279     return;
2280     } elsif ({
2281     table => 1, caption => 1, td => 1, th => 1,
2282     button => 1, marquee => 1, object => 1, html => 1,
2283     }->{$_->[1]}) {
2284     last INSCOPE;
2285     }
2286     } # INSCOPE
2287    
2288     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2289 wakaba 1.3 $self->{form_element} = $self->{open_elements}->[-1]->[0];
2290 wakaba 1.1 !!!next-token;
2291     return;
2292     }
2293     } elsif ($token->{tag_name} eq 'li') {
2294     ## has a p element in scope
2295 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2296 wakaba 1.1 if ($_->[1] eq 'p') {
2297     !!!back-token;
2298     $token = {type => 'end tag', tag_name => 'p'};
2299     return;
2300     } elsif ({
2301     table => 1, caption => 1, td => 1, th => 1,
2302     button => 1, marquee => 1, object => 1, html => 1,
2303     }->{$_->[1]}) {
2304     last INSCOPE;
2305     }
2306     } # INSCOPE
2307    
2308     ## Step 1
2309     my $i = -1;
2310 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2311 wakaba 1.1 LI: {
2312     ## Step 2
2313     if ($node->[1] eq 'li') {
2314 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2315 wakaba 1.1 last LI;
2316     }
2317    
2318     ## Step 3
2319     if (not $formatting_category->{$node->[1]} and
2320     #not $phrasing_category->{$node->[1]} and
2321     ($special_category->{$node->[1]} or
2322     $scoping_category->{$node->[1]}) and
2323     $node->[1] ne 'address' and $node->[1] ne 'div') {
2324     last LI;
2325     }
2326    
2327     ## Step 4
2328     $i--;
2329 wakaba 1.3 $node = $self->{open_elements}->[$i];
2330 wakaba 1.1 redo LI;
2331     } # LI
2332    
2333     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2334     !!!next-token;
2335     return;
2336     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
2337     ## has a p element in scope
2338 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2339 wakaba 1.1 if ($_->[1] eq 'p') {
2340     !!!back-token;
2341     $token = {type => 'end tag', tag_name => 'p'};
2342     return;
2343     } elsif ({
2344     table => 1, caption => 1, td => 1, th => 1,
2345     button => 1, marquee => 1, object => 1, html => 1,
2346     }->{$_->[1]}) {
2347     last INSCOPE;
2348     }
2349     } # INSCOPE
2350    
2351     ## Step 1
2352     my $i = -1;
2353 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2354 wakaba 1.1 LI: {
2355     ## Step 2
2356     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
2357 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2358 wakaba 1.1 last LI;
2359     }
2360    
2361     ## Step 3
2362     if (not $formatting_category->{$node->[1]} and
2363     #not $phrasing_category->{$node->[1]} and
2364     ($special_category->{$node->[1]} or
2365     $scoping_category->{$node->[1]}) and
2366     $node->[1] ne 'address' and $node->[1] ne 'div') {
2367     last LI;
2368     }
2369    
2370     ## Step 4
2371     $i--;
2372 wakaba 1.3 $node = $self->{open_elements}->[$i];
2373 wakaba 1.1 redo LI;
2374     } # LI
2375    
2376     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2377     !!!next-token;
2378     return;
2379     } elsif ($token->{tag_name} eq 'plaintext') {
2380     ## has a p element in scope
2381 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2382 wakaba 1.1 if ($_->[1] eq 'p') {
2383     !!!back-token;
2384     $token = {type => 'end tag', tag_name => 'p'};
2385     return;
2386     } elsif ({
2387     table => 1, caption => 1, td => 1, th => 1,
2388     button => 1, marquee => 1, object => 1, html => 1,
2389     }->{$_->[1]}) {
2390     last INSCOPE;
2391     }
2392     } # INSCOPE
2393    
2394     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2395    
2396     $self->{content_model_flag} = 'PLAINTEXT';
2397    
2398     !!!next-token;
2399     return;
2400     } elsif ({
2401     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2402     }->{$token->{tag_name}}) {
2403     ## has a p element in scope
2404 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2405     my $node = $self->{open_elements}->[$_];
2406 wakaba 1.1 if ($node->[1] eq 'p') {
2407     !!!back-token;
2408     $token = {type => 'end tag', tag_name => 'p'};
2409     return;
2410     } elsif ({
2411     table => 1, caption => 1, td => 1, th => 1,
2412     button => 1, marquee => 1, object => 1, html => 1,
2413     }->{$node->[1]}) {
2414     last INSCOPE;
2415     }
2416     } # INSCOPE
2417    
2418     ## has an element in scope
2419     my $i;
2420 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2421     my $node = $self->{open_elements}->[$_];
2422 wakaba 1.1 if ({
2423     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2424     }->{$node->[1]}) {
2425     $i = $_;
2426     last INSCOPE;
2427     } elsif ({
2428     table => 1, caption => 1, td => 1, th => 1,
2429     button => 1, marquee => 1, object => 1, html => 1,
2430     }->{$node->[1]}) {
2431     last INSCOPE;
2432     }
2433     } # INSCOPE
2434    
2435     if (defined $i) {
2436 wakaba 1.3 !!!parse-error (type => 'in hn:hn');
2437     splice @{$self->{open_elements}}, $i;
2438 wakaba 1.1 }
2439    
2440     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2441    
2442     !!!next-token;
2443     return;
2444     } elsif ($token->{tag_name} eq 'a') {
2445     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
2446     my $node = $active_formatting_elements->[$i];
2447     if ($node->[1] eq 'a') {
2448 wakaba 1.3 !!!parse-error (type => 'in a:a');
2449 wakaba 1.1
2450     !!!back-token;
2451     $token = {type => 'end tag', tag_name => 'a'};
2452     $formatting_end_tag->($token->{tag_name});
2453    
2454     AFE2: for (reverse 0..$#$active_formatting_elements) {
2455     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2456     splice @$active_formatting_elements, $_, 1;
2457     last AFE2;
2458     }
2459     } # AFE2
2460 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2461     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
2462     splice @{$self->{open_elements}}, $_, 1;
2463 wakaba 1.1 last OE;
2464     }
2465     } # OE
2466     last AFE;
2467     } elsif ($node->[0] eq '#marker') {
2468     last AFE;
2469     }
2470     } # AFE
2471    
2472     $reconstruct_active_formatting_elements->($insert_to_current);
2473    
2474     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2475 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2476 wakaba 1.1
2477     !!!next-token;
2478     return;
2479     } elsif ({
2480     b => 1, big => 1, em => 1, font => 1, i => 1,
2481     nobr => 1, s => 1, small => 1, strile => 1,
2482     strong => 1, tt => 1, u => 1,
2483     }->{$token->{tag_name}}) {
2484     $reconstruct_active_formatting_elements->($insert_to_current);
2485    
2486     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2487 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2488 wakaba 1.1
2489     !!!next-token;
2490     return;
2491     } elsif ($token->{tag_name} eq 'button') {
2492     ## has a button element in scope
2493 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2494     my $node = $self->{open_elements}->[$_];
2495 wakaba 1.1 if ($node->[1] eq 'button') {
2496 wakaba 1.3 !!!parse-error (type => 'in button:button');
2497 wakaba 1.1 !!!back-token;
2498     $token = {type => 'end tag', tag_name => 'button'};
2499     return;
2500     } elsif ({
2501     table => 1, caption => 1, td => 1, th => 1,
2502     button => 1, marquee => 1, object => 1, html => 1,
2503     }->{$node->[1]}) {
2504     last INSCOPE;
2505     }
2506     } # INSCOPE
2507    
2508     $reconstruct_active_formatting_elements->($insert_to_current);
2509    
2510     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2511     push @$active_formatting_elements, ['#marker', ''];
2512    
2513     !!!next-token;
2514     return;
2515     } elsif ($token->{tag_name} eq 'marquee' or
2516     $token->{tag_name} eq 'object') {
2517     $reconstruct_active_formatting_elements->($insert_to_current);
2518    
2519     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2520     push @$active_formatting_elements, ['#marker', ''];
2521    
2522     !!!next-token;
2523     return;
2524     } elsif ($token->{tag_name} eq 'xmp') {
2525     $reconstruct_active_formatting_elements->($insert_to_current);
2526    
2527     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2528    
2529     $self->{content_model_flag} = 'CDATA';
2530    
2531     !!!next-token;
2532     return;
2533     } elsif ($token->{tag_name} eq 'table') {
2534     ## has a p element in scope
2535 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2536 wakaba 1.1 if ($_->[1] eq 'p') {
2537     !!!back-token;
2538     $token = {type => 'end tag', tag_name => 'p'};
2539     return;
2540     } elsif ({
2541     table => 1, caption => 1, td => 1, th => 1,
2542     button => 1, marquee => 1, object => 1, html => 1,
2543     }->{$_->[1]}) {
2544     last INSCOPE;
2545     }
2546     } # INSCOPE
2547    
2548     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2549    
2550 wakaba 1.3 $self->{insertion_mode} = 'in table';
2551 wakaba 1.1
2552     !!!next-token;
2553     return;
2554     } elsif ({
2555     area => 1, basefont => 1, bgsound => 1, br => 1,
2556     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
2557     image => 1,
2558     }->{$token->{tag_name}}) {
2559     if ($token->{tag_name} eq 'image') {
2560 wakaba 1.3 !!!parse-error (type => 'image');
2561 wakaba 1.1 $token->{tag_name} = 'img';
2562     }
2563    
2564     $reconstruct_active_formatting_elements->($insert_to_current);
2565    
2566     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2567 wakaba 1.3 pop @{$self->{open_elements}};
2568 wakaba 1.1
2569     !!!next-token;
2570     return;
2571     } elsif ($token->{tag_name} eq 'hr') {
2572     ## has a p element in scope
2573 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2574 wakaba 1.1 if ($_->[1] eq 'p') {
2575     !!!back-token;
2576     $token = {type => 'end tag', tag_name => 'p'};
2577     return;
2578     } elsif ({
2579     table => 1, caption => 1, td => 1, th => 1,
2580     button => 1, marquee => 1, object => 1, html => 1,
2581     }->{$_->[1]}) {
2582     last INSCOPE;
2583     }
2584     } # INSCOPE
2585    
2586     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2587 wakaba 1.3 pop @{$self->{open_elements}};
2588 wakaba 1.1
2589     !!!next-token;
2590     return;
2591     } elsif ($token->{tag_name} eq 'input') {
2592     $reconstruct_active_formatting_elements->($insert_to_current);
2593    
2594     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2595 wakaba 1.3 ## TODO: associate with $self->{form_element} if defined
2596     pop @{$self->{open_elements}};
2597 wakaba 1.1
2598     !!!next-token;
2599     return;
2600     } elsif ($token->{tag_name} eq 'isindex') {
2601 wakaba 1.3 !!!parse-error (type => 'isindex');
2602 wakaba 1.1
2603 wakaba 1.3 if (defined $self->{form_element}) {
2604 wakaba 1.1 ## Ignore the token
2605     !!!next-token;
2606     return;
2607     } else {
2608     my $at = $token->{attributes};
2609     $at->{name} = {name => 'name', value => 'isindex'};
2610     my @tokens = (
2611     {type => 'start tag', tag_name => 'form'},
2612     {type => 'start tag', tag_name => 'hr'},
2613     {type => 'start tag', tag_name => 'p'},
2614     {type => 'start tag', tag_name => 'label'},
2615     {type => 'character',
2616     data => 'This is a searchable index. Insert your search keywords here: '}, # SHOULD
2617     ## TODO: make this configurable
2618     {type => 'start tag', tag_name => 'input', attributes => $at},
2619     #{type => 'character', data => ''}, # SHOULD
2620     {type => 'end tag', tag_name => 'label'},
2621     {type => 'end tag', tag_name => 'p'},
2622     {type => 'start tag', tag_name => 'hr'},
2623     {type => 'end tag', tag_name => 'form'},
2624     );
2625     $token = shift @tokens;
2626     !!!back-token (@tokens);
2627     return;
2628     }
2629     } elsif ({
2630     textarea => 1,
2631     noembed => 1,
2632     noframes => 1,
2633     noscript => 0, ## TODO: 1 if scripting is enabled
2634     }->{$token->{tag_name}}) {
2635     my $tag_name = $token->{tag_name};
2636     my $el;
2637     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2638    
2639     if ($token->{tag_name} eq 'textarea') {
2640 wakaba 1.3 ## TODO: $self->{form_element} if defined
2641 wakaba 1.1 $self->{content_model_flag} = 'RCDATA';
2642     } else {
2643     $self->{content_model_flag} = 'CDATA';
2644     }
2645    
2646     $insert->($el);
2647    
2648     my $text = '';
2649     !!!next-token;
2650     while ($token->{type} eq 'character') {
2651     $text .= $token->{data};
2652     !!!next-token;
2653     }
2654     if (length $text) {
2655     $el->manakai_append_text ($text);
2656     }
2657    
2658     $self->{content_model_flag} = 'PCDATA';
2659    
2660     if ($token->{type} eq 'end tag' and
2661     $token->{tag_name} eq $tag_name) {
2662     ## Ignore the token
2663     } else {
2664 wakaba 1.3 if ($token->{tag_name} eq 'textarea') {
2665     !!!parse-error (type => 'in CDATA:#'.$token->{type});
2666     } else {
2667     !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2668     }
2669 wakaba 1.1 ## ISSUE: And ignore?
2670     }
2671     !!!next-token;
2672     return;
2673     } elsif ($token->{tag_name} eq 'select') {
2674     $reconstruct_active_formatting_elements->($insert_to_current);
2675    
2676     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2677    
2678 wakaba 1.3 $self->{insertion_mode} = 'in select';
2679 wakaba 1.1 !!!next-token;
2680     return;
2681     } elsif ({
2682     caption => 1, col => 1, colgroup => 1, frame => 1,
2683     frameset => 1, head => 1, option => 1, optgroup => 1,
2684     tbody => 1, td => 1, tfoot => 1, th => 1,
2685     thead => 1, tr => 1,
2686     }->{$token->{tag_name}}) {
2687 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2688 wakaba 1.1 ## Ignore the token
2689     !!!next-token;
2690     return;
2691    
2692     ## ISSUE: An issue on HTML5 new elements in the spec.
2693     } else {
2694     $reconstruct_active_formatting_elements->($insert_to_current);
2695    
2696     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2697    
2698     !!!next-token;
2699     return;
2700     }
2701     } elsif ($token->{type} eq 'end tag') {
2702     if ($token->{tag_name} eq 'body') {
2703 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2704 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2705 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2706     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2707 wakaba 1.1 }
2708 wakaba 1.3 $self->{insertion_mode} = 'after body';
2709 wakaba 1.1 !!!next-token;
2710     return;
2711     } else {
2712 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2713 wakaba 1.1 ## Ignore the token
2714     !!!next-token;
2715     return;
2716     }
2717     } elsif ($token->{tag_name} eq 'html') {
2718 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
2719 wakaba 1.1 ## ISSUE: There is an issue in the spec.
2720 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
2721     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
2722 wakaba 1.1 }
2723 wakaba 1.3 $self->{insertion_mode} = 'after body';
2724 wakaba 1.1 ## reprocess
2725     return;
2726     } else {
2727 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2728 wakaba 1.1 ## Ignore the token
2729     !!!next-token;
2730     return;
2731     }
2732     } elsif ({
2733     address => 1, blockquote => 1, center => 1, dir => 1,
2734     div => 1, dl => 1, fieldset => 1, listing => 1,
2735     menu => 1, ol => 1, pre => 1, ul => 1,
2736     form => 1,
2737     p => 1,
2738     dd => 1, dt => 1, li => 1,
2739     button => 1, marquee => 1, object => 1,
2740     }->{$token->{tag_name}}) {
2741     ## has an element in scope
2742     my $i;
2743 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2744     my $node = $self->{open_elements}->[$_];
2745 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
2746     ## generate implied end tags
2747     if ({
2748     dd => ($token->{tag_name} ne 'dd'),
2749     dt => ($token->{tag_name} ne 'dt'),
2750     li => ($token->{tag_name} ne 'li'),
2751     p => ($token->{tag_name} ne 'p'),
2752     td => 1, th => 1, tr => 1,
2753 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2754 wakaba 1.1 !!!back-token;
2755     $token = {type => 'end tag',
2756 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2757 wakaba 1.1 return;
2758     }
2759     $i = $_;
2760     last INSCOPE unless $token->{tag_name} eq 'p';
2761     } elsif ({
2762     table => 1, caption => 1, td => 1, th => 1,
2763     button => 1, marquee => 1, object => 1, html => 1,
2764     }->{$node->[1]}) {
2765     last INSCOPE;
2766     }
2767     } # INSCOPE
2768    
2769 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
2770     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2771 wakaba 1.1 }
2772    
2773 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
2774     undef $self->{form_element} if $token->{tag_name} eq 'form';
2775 wakaba 1.1 $clear_up_to_marker->()
2776     if {
2777     button => 1, marquee => 1, object => 1,
2778     }->{$token->{tag_name}};
2779     !!!next-token;
2780     return;
2781     } elsif ({
2782     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2783     }->{$token->{tag_name}}) {
2784     ## has an element in scope
2785     my $i;
2786 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2787     my $node = $self->{open_elements}->[$_];
2788 wakaba 1.1 if ({
2789     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2790     }->{$node->[1]}) {
2791     ## generate implied end tags
2792     if ({
2793     dd => 1, dt => 1, li => 1, p => 1,
2794     td => 1, th => 1, tr => 1,
2795 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2796 wakaba 1.1 !!!back-token;
2797     $token = {type => 'end tag',
2798 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2799 wakaba 1.1 return;
2800     }
2801     $i = $_;
2802     last INSCOPE;
2803     } elsif ({
2804     table => 1, caption => 1, td => 1, th => 1,
2805     button => 1, marquee => 1, object => 1, html => 1,
2806     }->{$node->[1]}) {
2807     last INSCOPE;
2808     }
2809     } # INSCOPE
2810    
2811 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
2812     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2813 wakaba 1.1 }
2814    
2815 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
2816 wakaba 1.1 !!!next-token;
2817     return;
2818     } elsif ({
2819     a => 1,
2820     b => 1, big => 1, em => 1, font => 1, i => 1,
2821     nobr => 1, s => 1, small => 1, strile => 1,
2822     strong => 1, tt => 1, u => 1,
2823     }->{$token->{tag_name}}) {
2824     $formatting_end_tag->($token->{tag_name});
2825     return;
2826     } elsif ({
2827     caption => 1, col => 1, colgroup => 1, frame => 1,
2828     frameset => 1, head => 1, option => 1, optgroup => 1,
2829     tbody => 1, td => 1, tfoot => 1, th => 1,
2830     thead => 1, tr => 1,
2831     area => 1, basefont => 1, bgsound => 1, br => 1,
2832     embed => 1, hr => 1, iframe => 1, image => 1,
2833     img => 1, input => 1, isindex=> 1, noembed => 1,
2834     noframes => 1, param => 1, select => 1, spacer => 1,
2835     table => 1, textarea => 1, wbr => 1,
2836     noscript => 0, ## TODO: if scripting is enabled
2837     }->{$token->{tag_name}}) {
2838 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2839 wakaba 1.1 ## Ignore the token
2840     !!!next-token;
2841     return;
2842    
2843     ## ISSUE: Issue on HTML5 new elements in spec
2844    
2845     } else {
2846     ## Step 1
2847     my $node_i = -1;
2848 wakaba 1.3 my $node = $self->{open_elements}->[$node_i];
2849 wakaba 1.1
2850     ## Step 2
2851     S2: {
2852     if ($node->[1] eq $token->{tag_name}) {
2853     ## Step 1
2854     ## generate implied end tags
2855     if ({
2856     dd => 1, dt => 1, li => 1, p => 1,
2857     td => 1, th => 1, tr => 1,
2858 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2859 wakaba 1.1 !!!back-token;
2860     $token = {type => 'end tag',
2861 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
2862 wakaba 1.1 return;
2863     }
2864    
2865     ## Step 2
2866 wakaba 1.3 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
2867     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2868 wakaba 1.1 }
2869    
2870     ## Step 3
2871 wakaba 1.3 splice @{$self->{open_elements}}, $node_i;
2872    
2873     !!!next-token;
2874 wakaba 1.1 last S2;
2875     } else {
2876     ## Step 3
2877     if (not $formatting_category->{$node->[1]} and
2878     #not $phrasing_category->{$node->[1]} and
2879     ($special_category->{$node->[1]} or
2880     $scoping_category->{$node->[1]})) {
2881 wakaba 1.3 !!!parse-error (type => 'not closed:'.$node->[1]);
2882 wakaba 1.1 ## Ignore the token
2883     !!!next-token;
2884     last S2;
2885     }
2886     }
2887    
2888     ## Step 4
2889     $node_i--;
2890 wakaba 1.3 $node = $self->{open_elements}->[$node_i];
2891 wakaba 1.1
2892     ## Step 5;
2893     redo S2;
2894     } # S2
2895 wakaba 1.3 return;
2896 wakaba 1.1 }
2897     }
2898     }; # $in_body
2899    
2900     B: {
2901 wakaba 1.3 if ($phase eq 'main') {
2902 wakaba 1.1 if ($token->{type} eq 'DOCTYPE') {
2903 wakaba 1.3 !!!parse-error (type => 'in html:#DOCTYPE');
2904 wakaba 1.1 ## Ignore the token
2905     ## Stay in the phase
2906     !!!next-token;
2907     redo B;
2908     } elsif ($token->{type} eq 'start tag' and
2909     $token->{tag_name} eq 'html') {
2910     ## TODO: unless it is the first start tag token, parse-error
2911 wakaba 1.3 my $top_el = $self->{open_elements}->[0]->[0];
2912 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2913     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
2914     $top_el->set_attribute_ns
2915     (undef, [undef, $attr_name],
2916     $token->{attributes}->{$attr_name}->{value});
2917     }
2918     }
2919     !!!next-token;
2920     redo B;
2921     } elsif ($token->{type} eq 'end-of-file') {
2922     ## Generate implied end tags
2923     if ({
2924     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
2925 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2926 wakaba 1.1 !!!back-token;
2927 wakaba 1.3 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
2928 wakaba 1.1 redo B;
2929     }
2930    
2931 wakaba 1.3 if (@{$self->{open_elements}} > 2 or
2932     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
2933     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2934     } elsif (defined $self->{inner_html_node} and
2935     @{$self->{open_elements}} > 1 and
2936     $self->{open_elements}->[1]->[1] ne 'body') {
2937     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2938 wakaba 1.1 }
2939    
2940     ## Stop parsing
2941     last B;
2942    
2943     ## ISSUE: There is an issue in the spec.
2944     } else {
2945 wakaba 1.3 if ($self->{insertion_mode} eq 'before head') {
2946 wakaba 1.1 if ($token->{type} eq 'character') {
2947     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2948 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
2949 wakaba 1.1 unless (length $token->{data}) {
2950     !!!next-token;
2951     redo B;
2952     }
2953     }
2954     ## As if <head>
2955 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
2956     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2957     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
2958     $self->{insertion_mode} = 'in head';
2959 wakaba 1.1 ## reprocess
2960     redo B;
2961     } elsif ($token->{type} eq 'comment') {
2962     my $comment = $self->{document}->create_comment ($token->{data});
2963 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
2964 wakaba 1.1 !!!next-token;
2965     redo B;
2966     } elsif ($token->{type} eq 'start tag') {
2967     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
2968 wakaba 1.3 !!!create-element ($self->{head_element}, 'head', $attr);
2969     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2970     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
2971     $self->{insertion_mode} = 'in head';
2972 wakaba 1.1 if ($token->{tag_name} eq 'head') {
2973     !!!next-token;
2974     #} elsif ({
2975     # base => 1, link => 1, meta => 1,
2976     # script => 1, style => 1, title => 1,
2977     # }->{$token->{tag_name}}) {
2978     # ## reprocess
2979     } else {
2980     ## reprocess
2981     }
2982     redo B;
2983     } elsif ($token->{type} eq 'end tag') {
2984     if ($token->{tag_name} eq 'html') {
2985     ## As if <head>
2986 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
2987     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
2988     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
2989     $self->{insertion_mode} = 'in head';
2990 wakaba 1.1 ## reprocess
2991     redo B;
2992     } else {
2993 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2994 wakaba 1.1 ## Ignore the token
2995     !!!next-token;
2996     redo B;
2997     }
2998     } else {
2999     die "$0: $token->{type}: Unknown type";
3000     }
3001 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in head') {
3002 wakaba 1.1 if ($token->{type} eq 'character') {
3003     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3004 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3005 wakaba 1.1 unless (length $token->{data}) {
3006     !!!next-token;
3007     redo B;
3008     }
3009     }
3010    
3011     #
3012     } elsif ($token->{type} eq 'comment') {
3013     my $comment = $self->{document}->create_comment ($token->{data});
3014 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3015 wakaba 1.1 !!!next-token;
3016     redo B;
3017     } elsif ($token->{type} eq 'start tag') {
3018     if ($token->{tag_name} eq 'title') {
3019     ## NOTE: There is an "as if in head" code clone
3020     my $title_el;
3021     !!!create-element ($title_el, 'title', $token->{attributes});
3022 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3023 wakaba 1.1 ->append_child ($title_el);
3024     $self->{content_model_flag} = 'RCDATA';
3025    
3026     my $text = '';
3027     !!!next-token;
3028     while ($token->{type} eq 'character') {
3029     $text .= $token->{data};
3030     !!!next-token;
3031     }
3032     if (length $text) {
3033     $title_el->manakai_append_text ($text);
3034     }
3035    
3036     $self->{content_model_flag} = 'PCDATA';
3037    
3038     if ($token->{type} eq 'end tag' and
3039     $token->{tag_name} eq 'title') {
3040     ## Ignore the token
3041     } else {
3042 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
3043 wakaba 1.1 ## ISSUE: And ignore?
3044     }
3045     !!!next-token;
3046     redo B;
3047     } elsif ($token->{tag_name} eq 'style') {
3048     $style_start_tag->();
3049     redo B;
3050     } elsif ($token->{tag_name} eq 'script') {
3051     $script_start_tag->();
3052     redo B;
3053     } elsif ({base => 1, link => 1, meta => 1}->{$token->{tag_name}}) {
3054     ## NOTE: There are "as if in head" code clones
3055     my $el;
3056     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
3057 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3058 wakaba 1.1 ->append_child ($el);
3059    
3060     !!!next-token;
3061     redo B;
3062     } elsif ($token->{tag_name} eq 'head') {
3063 wakaba 1.3 !!!parse-error (type => 'in head:head');
3064 wakaba 1.1 ## Ignore the token
3065     !!!next-token;
3066     redo B;
3067     } else {
3068     #
3069     }
3070     } elsif ($token->{type} eq 'end tag') {
3071     if ($token->{tag_name} eq 'head') {
3072 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
3073     pop @{$self->{open_elements}};
3074 wakaba 1.1 } else {
3075 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:head');
3076 wakaba 1.1 }
3077 wakaba 1.3 $self->{insertion_mode} = 'after head';
3078 wakaba 1.1 !!!next-token;
3079     redo B;
3080     } elsif ($token->{tag_name} eq 'html') {
3081     #
3082     } else {
3083 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3084 wakaba 1.1 ## Ignore the token
3085     !!!next-token;
3086     redo B;
3087     }
3088     } else {
3089     #
3090     }
3091    
3092 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
3093 wakaba 1.1 ## As if </head>
3094 wakaba 1.3 pop @{$self->{open_elements}};
3095 wakaba 1.1 }
3096 wakaba 1.3 $self->{insertion_mode} = 'after head';
3097 wakaba 1.1 ## reprocess
3098     redo B;
3099    
3100     ## ISSUE: An issue in the spec.
3101 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after head') {
3102 wakaba 1.1 if ($token->{type} eq 'character') {
3103     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3104 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3105 wakaba 1.1 unless (length $token->{data}) {
3106     !!!next-token;
3107     redo B;
3108     }
3109     }
3110    
3111     #
3112     } elsif ($token->{type} eq 'comment') {
3113     my $comment = $self->{document}->create_comment ($token->{data});
3114 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3115 wakaba 1.1 !!!next-token;
3116     redo B;
3117     } elsif ($token->{type} eq 'start tag') {
3118     if ($token->{tag_name} eq 'body') {
3119     !!!insert-element ('body', $token->{attributes});
3120 wakaba 1.3 $self->{insertion_mode} = 'in body';
3121 wakaba 1.1 !!!next-token;
3122     redo B;
3123     } elsif ($token->{tag_name} eq 'frameset') {
3124     !!!insert-element ('frameset', $token->{attributes});
3125 wakaba 1.3 $self->{insertion_mode} = 'in frameset';
3126 wakaba 1.1 !!!next-token;
3127     redo B;
3128     } elsif ({
3129     base => 1, link => 1, meta => 1,
3130 wakaba 1.3 script => 1, style => 1, title => 1,
3131 wakaba 1.1 }->{$token->{tag_name}}) {
3132 wakaba 1.3 !!!parse-error (type => 'after head:'.$token->{tag_name});
3133     $self->{insertion_mode} = 'in head';
3134 wakaba 1.1 ## reprocess
3135     redo B;
3136     } else {
3137     #
3138     }
3139     } else {
3140     #
3141     }
3142    
3143     ## As if <body>
3144     !!!insert-element ('body');
3145 wakaba 1.3 $self->{insertion_mode} = 'in body';
3146 wakaba 1.1 ## reprocess
3147     redo B;
3148 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in body') {
3149 wakaba 1.1 if ($token->{type} eq 'character') {
3150     ## NOTE: There is a code clone of "character in body".
3151     $reconstruct_active_formatting_elements->($insert_to_current);
3152    
3153 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3154 wakaba 1.1
3155     !!!next-token;
3156     redo B;
3157     } elsif ($token->{type} eq 'comment') {
3158     ## NOTE: There is a code clone of "comment in body".
3159     my $comment = $self->{document}->create_comment ($token->{data});
3160 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3161 wakaba 1.1 !!!next-token;
3162     redo B;
3163     } else {
3164     $in_body->($insert_to_current);
3165     redo B;
3166     }
3167 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table') {
3168 wakaba 1.1 if ($token->{type} eq 'character') {
3169     ## NOTE: There are "character in table" code clones.
3170     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3171 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3172 wakaba 1.1
3173     unless (length $token->{data}) {
3174     !!!next-token;
3175     redo B;
3176     }
3177     }
3178    
3179 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3180    
3181 wakaba 1.1 ## As if in body, but insert into foster parent element
3182     ## ISSUE: Spec says that "whenever a node would be inserted
3183     ## into the current node" while characters might not be
3184     ## result in a new Text node.
3185     $reconstruct_active_formatting_elements->($insert_to_foster);
3186    
3187     if ({
3188     table => 1, tbody => 1, tfoot => 1,
3189     thead => 1, tr => 1,
3190 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3191 wakaba 1.1 # MUST
3192     my $foster_parent_element;
3193     my $next_sibling;
3194     my $prev_sibling;
3195 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3196     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3197     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3198 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3199     $foster_parent_element = $parent;
3200 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3201 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3202     } else {
3203 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3204 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3205     }
3206     last OE;
3207     }
3208     } # OE
3209 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3210 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3211     unless defined $foster_parent_element;
3212     if (defined $prev_sibling and
3213     $prev_sibling->node_type == 3) {
3214     $prev_sibling->manakai_append_text ($token->{data});
3215     } else {
3216     $foster_parent_element->insert_before
3217     ($self->{document}->create_text_node ($token->{data}),
3218     $next_sibling);
3219     }
3220     } else {
3221 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3222 wakaba 1.1 }
3223    
3224     !!!next-token;
3225     redo B;
3226     } elsif ($token->{type} eq 'comment') {
3227     my $comment = $self->{document}->create_comment ($token->{data});
3228 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3229 wakaba 1.1 !!!next-token;
3230     redo B;
3231     } elsif ($token->{type} eq 'start tag') {
3232     if ({
3233     caption => 1,
3234     colgroup => 1,
3235     tbody => 1, tfoot => 1, thead => 1,
3236     }->{$token->{tag_name}}) {
3237     ## Clear back to table context
3238 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3239     $self->{open_elements}->[-1]->[1] ne 'html') {
3240     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3241     pop @{$self->{open_elements}};
3242 wakaba 1.1 }
3243    
3244     push @$active_formatting_elements, ['#marker', '']
3245     if $token->{tag_name} eq 'caption';
3246    
3247     !!!insert-element ($token->{tag_name}, $token->{attributes});
3248 wakaba 1.3 $self->{insertion_mode} = {
3249 wakaba 1.1 caption => 'in caption',
3250     colgroup => 'in column group',
3251     tbody => 'in table body',
3252     tfoot => 'in table body',
3253     thead => 'in table body',
3254     }->{$token->{tag_name}};
3255     !!!next-token;
3256     redo B;
3257     } elsif ({
3258     col => 1,
3259     td => 1, th => 1, tr => 1,
3260     }->{$token->{tag_name}}) {
3261     ## Clear back to table context
3262 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3263     $self->{open_elements}->[-1]->[1] ne 'html') {
3264     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3265     pop @{$self->{open_elements}};
3266 wakaba 1.1 }
3267    
3268     !!!insert-element ($token->{tag_name} eq 'col' ? 'colgroup' : 'tbody');
3269 wakaba 1.3 $self->{insertion_mode} = $token->{tag_name} eq 'col'
3270 wakaba 1.1 ? 'in column group' : 'in table body';
3271     ## reprocess
3272     redo B;
3273     } elsif ($token->{tag_name} eq 'table') {
3274     ## NOTE: There are code clones for this "table in table"
3275 wakaba 1.3 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3276 wakaba 1.1
3277     ## As if </table>
3278     ## have a table element in table scope
3279     my $i;
3280 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3281     my $node = $self->{open_elements}->[$_];
3282 wakaba 1.1 if ($node->[1] eq 'table') {
3283     $i = $_;
3284     last INSCOPE;
3285     } elsif ({
3286     table => 1, html => 1,
3287     }->{$node->[1]}) {
3288     last INSCOPE;
3289     }
3290     } # INSCOPE
3291     unless (defined $i) {
3292 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3293 wakaba 1.1 ## Ignore tokens </table><table>
3294     !!!next-token;
3295     redo B;
3296     }
3297    
3298     ## generate implied end tags
3299     if ({
3300     dd => 1, dt => 1, li => 1, p => 1,
3301     td => 1, th => 1, tr => 1,
3302 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3303 wakaba 1.1 !!!back-token; # <table>
3304     $token = {type => 'end tag', tag_name => 'table'};
3305     !!!back-token;
3306     $token = {type => 'end tag',
3307 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3308 wakaba 1.1 redo B;
3309     }
3310    
3311 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3312     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3313 wakaba 1.1 }
3314    
3315 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3316 wakaba 1.1
3317 wakaba 1.3 $self->_reset_insertion_mode;
3318 wakaba 1.1
3319     ## reprocess
3320     redo B;
3321     } else {
3322     #
3323     }
3324     } elsif ($token->{type} eq 'end tag') {
3325     if ($token->{tag_name} eq 'table') {
3326     ## have a table element in table scope
3327     my $i;
3328 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3329     my $node = $self->{open_elements}->[$_];
3330 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3331     $i = $_;
3332     last INSCOPE;
3333     } elsif ({
3334     table => 1, html => 1,
3335     }->{$node->[1]}) {
3336     last INSCOPE;
3337     }
3338     } # INSCOPE
3339     unless (defined $i) {
3340 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3341 wakaba 1.1 ## Ignore the token
3342     !!!next-token;
3343     redo B;
3344     }
3345    
3346     ## generate implied end tags
3347     if ({
3348     dd => 1, dt => 1, li => 1, p => 1,
3349     td => 1, th => 1, tr => 1,
3350 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3351 wakaba 1.1 !!!back-token;
3352     $token = {type => 'end tag',
3353 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3354 wakaba 1.1 redo B;
3355     }
3356    
3357 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3358     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3359 wakaba 1.1 }
3360    
3361 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3362 wakaba 1.1
3363 wakaba 1.3 $self->_reset_insertion_mode;
3364 wakaba 1.1
3365     !!!next-token;
3366     redo B;
3367     } elsif ({
3368     body => 1, caption => 1, col => 1, colgroup => 1,
3369     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
3370     thead => 1, tr => 1,
3371     }->{$token->{tag_name}}) {
3372 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3373 wakaba 1.1 ## Ignore the token
3374     !!!next-token;
3375     redo B;
3376     } else {
3377     #
3378     }
3379     } else {
3380     #
3381     }
3382    
3383 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3384 wakaba 1.1 $in_body->($insert_to_foster);
3385     redo B;
3386 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in caption') {
3387 wakaba 1.1 if ($token->{type} eq 'character') {
3388     ## NOTE: This is a code clone of "character in body".
3389     $reconstruct_active_formatting_elements->($insert_to_current);
3390    
3391 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3392 wakaba 1.1
3393     !!!next-token;
3394     redo B;
3395     } elsif ($token->{type} eq 'comment') {
3396     ## NOTE: This is a code clone of "comment in body".
3397     my $comment = $self->{document}->create_comment ($token->{data});
3398 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3399 wakaba 1.1 !!!next-token;
3400     redo B;
3401     } elsif ($token->{type} eq 'start tag') {
3402     if ({
3403     caption => 1, col => 1, colgroup => 1, tbody => 1,
3404     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
3405     }->{$token->{tag_name}}) {
3406 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3407 wakaba 1.1
3408     ## As if </caption>
3409     ## have a table element in table scope
3410     my $i;
3411 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3412     my $node = $self->{open_elements}->[$_];
3413 wakaba 1.1 if ($node->[1] eq 'caption') {
3414     $i = $_;
3415     last INSCOPE;
3416     } elsif ({
3417     table => 1, html => 1,
3418     }->{$node->[1]}) {
3419     last INSCOPE;
3420     }
3421     } # INSCOPE
3422     unless (defined $i) {
3423 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3424 wakaba 1.1 ## Ignore the token
3425     !!!next-token;
3426     redo B;
3427     }
3428    
3429     ## generate implied end tags
3430     if ({
3431     dd => 1, dt => 1, li => 1, p => 1,
3432     td => 1, th => 1, tr => 1,
3433 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3434 wakaba 1.1 !!!back-token; # <?>
3435     $token = {type => 'end tag', tag_name => 'caption'};
3436     !!!back-token;
3437     $token = {type => 'end tag',
3438 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3439 wakaba 1.1 redo B;
3440     }
3441    
3442 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3443     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3444 wakaba 1.1 }
3445    
3446 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3447 wakaba 1.1
3448     $clear_up_to_marker->();
3449    
3450 wakaba 1.3 $self->{insertion_mode} = 'in table';
3451 wakaba 1.1
3452     ## reprocess
3453     redo B;
3454     } else {
3455     #
3456     }
3457     } elsif ($token->{type} eq 'end tag') {
3458     if ($token->{tag_name} eq 'caption') {
3459     ## have a table element in table scope
3460     my $i;
3461 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3462     my $node = $self->{open_elements}->[$_];
3463 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3464     $i = $_;
3465     last INSCOPE;
3466     } elsif ({
3467     table => 1, html => 1,
3468     }->{$node->[1]}) {
3469     last INSCOPE;
3470     }
3471     } # INSCOPE
3472     unless (defined $i) {
3473 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3474 wakaba 1.1 ## Ignore the token
3475     !!!next-token;
3476     redo B;
3477     }
3478    
3479     ## generate implied end tags
3480     if ({
3481     dd => 1, dt => 1, li => 1, p => 1,
3482     td => 1, th => 1, tr => 1,
3483 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3484 wakaba 1.1 !!!back-token;
3485     $token = {type => 'end tag',
3486 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3487 wakaba 1.1 redo B;
3488     }
3489    
3490 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3491     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3492 wakaba 1.1 }
3493    
3494 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3495 wakaba 1.1
3496     $clear_up_to_marker->();
3497    
3498 wakaba 1.3 $self->{insertion_mode} = 'in table';
3499 wakaba 1.1
3500     !!!next-token;
3501     redo B;
3502     } elsif ($token->{tag_name} eq 'table') {
3503 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3504 wakaba 1.1
3505     ## As if </caption>
3506     ## have a table element in table scope
3507     my $i;
3508 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3509     my $node = $self->{open_elements}->[$_];
3510 wakaba 1.1 if ($node->[1] eq 'caption') {
3511     $i = $_;
3512     last INSCOPE;
3513     } elsif ({
3514     table => 1, html => 1,
3515     }->{$node->[1]}) {
3516     last INSCOPE;
3517     }
3518     } # INSCOPE
3519     unless (defined $i) {
3520 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3521 wakaba 1.1 ## Ignore the token
3522     !!!next-token;
3523     redo B;
3524     }
3525    
3526     ## generate implied end tags
3527     if ({
3528     dd => 1, dt => 1, li => 1, p => 1,
3529     td => 1, th => 1, tr => 1,
3530 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3531 wakaba 1.1 !!!back-token; # </table>
3532     $token = {type => 'end tag', tag_name => 'caption'};
3533     !!!back-token;
3534     $token = {type => 'end tag',
3535 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3536 wakaba 1.1 redo B;
3537     }
3538    
3539 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3540     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3541 wakaba 1.1 }
3542    
3543 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3544 wakaba 1.1
3545     $clear_up_to_marker->();
3546    
3547 wakaba 1.3 $self->{insertion_mode} = 'in table';
3548 wakaba 1.1
3549     ## reprocess
3550     redo B;
3551     } elsif ({
3552     body => 1, col => 1, colgroup => 1,
3553     html => 1, tbody => 1, td => 1, tfoot => 1,
3554     th => 1, thead => 1, tr => 1,
3555     }->{$token->{tag_name}}) {
3556 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3557 wakaba 1.1 ## Ignore the token
3558     redo B;
3559     } else {
3560     #
3561     }
3562     } else {
3563     #
3564     }
3565    
3566     $in_body->($insert_to_current);
3567     redo B;
3568 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in column group') {
3569 wakaba 1.1 if ($token->{type} eq 'character') {
3570     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3571 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3572 wakaba 1.1 unless (length $token->{data}) {
3573     !!!next-token;
3574     redo B;
3575     }
3576     }
3577    
3578     #
3579     } elsif ($token->{type} eq 'comment') {
3580     my $comment = $self->{document}->create_comment ($token->{data});
3581 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3582 wakaba 1.1 !!!next-token;
3583     redo B;
3584     } elsif ($token->{type} eq 'start tag') {
3585     if ($token->{tag_name} eq 'col') {
3586     !!!insert-element ($token->{tag_name}, $token->{attributes});
3587 wakaba 1.3 pop @{$self->{open_elements}};
3588 wakaba 1.1 !!!next-token;
3589     redo B;
3590     } else {
3591     #
3592     }
3593     } elsif ($token->{type} eq 'end tag') {
3594     if ($token->{tag_name} eq 'colgroup') {
3595 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3596     !!!parse-error (type => 'unmatched end tag:colgroup');
3597 wakaba 1.1 ## Ignore the token
3598     !!!next-token;
3599     redo B;
3600     } else {
3601 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3602     $self->{insertion_mode} = 'in table';
3603 wakaba 1.1 !!!next-token;
3604     redo B;
3605     }
3606     } elsif ($token->{tag_name} eq 'col') {
3607 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:col');
3608 wakaba 1.1 ## Ignore the token
3609     !!!next-token;
3610     redo B;
3611     } else {
3612     #
3613     }
3614     } else {
3615     #
3616     }
3617    
3618     ## As if </colgroup>
3619 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3620     !!!parse-error (type => 'unmatched end tag:colgroup');
3621 wakaba 1.1 ## Ignore the token
3622     !!!next-token;
3623     redo B;
3624     } else {
3625 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3626     $self->{insertion_mode} = 'in table';
3627 wakaba 1.1 ## reprocess
3628     redo B;
3629     }
3630 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table body') {
3631 wakaba 1.1 if ($token->{type} eq 'character') {
3632     ## NOTE: This is a "character in table" code clone.
3633     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3634 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3635 wakaba 1.1
3636     unless (length $token->{data}) {
3637     !!!next-token;
3638     redo B;
3639     }
3640     }
3641    
3642 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3643    
3644 wakaba 1.1 ## As if in body, but insert into foster parent element
3645     ## ISSUE: Spec says that "whenever a node would be inserted
3646     ## into the current node" while characters might not be
3647     ## result in a new Text node.
3648     $reconstruct_active_formatting_elements->($insert_to_foster);
3649    
3650     if ({
3651     table => 1, tbody => 1, tfoot => 1,
3652     thead => 1, tr => 1,
3653 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3654 wakaba 1.1 # MUST
3655     my $foster_parent_element;
3656     my $next_sibling;
3657     my $prev_sibling;
3658 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3659     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3660     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3661 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3662     $foster_parent_element = $parent;
3663 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3664 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3665     } else {
3666 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3667 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3668     }
3669     last OE;
3670     }
3671     } # OE
3672 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3673 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3674     unless defined $foster_parent_element;
3675     if (defined $prev_sibling and
3676     $prev_sibling->node_type == 3) {
3677     $prev_sibling->manakai_append_text ($token->{data});
3678     } else {
3679     $foster_parent_element->insert_before
3680     ($self->{document}->create_text_node ($token->{data}),
3681     $next_sibling);
3682     }
3683     } else {
3684 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3685 wakaba 1.1 }
3686    
3687     !!!next-token;
3688     redo B;
3689     } elsif ($token->{type} eq 'comment') {
3690     ## Copied from 'in table'
3691     my $comment = $self->{document}->create_comment ($token->{data});
3692 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3693 wakaba 1.1 !!!next-token;
3694     redo B;
3695     } elsif ($token->{type} eq 'start tag') {
3696     if ({
3697     tr => 1,
3698     th => 1, td => 1,
3699     }->{$token->{tag_name}}) {
3700 wakaba 1.3 unless ($token->{tag_name} eq 'tr') {
3701     !!!parse-error (type => 'missing start tag:tr');
3702     }
3703    
3704 wakaba 1.1 ## Clear back to table body context
3705     while (not {
3706     tbody => 1, tfoot => 1, thead => 1, html => 1,
3707 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3708     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3709     pop @{$self->{open_elements}};
3710 wakaba 1.1 }
3711    
3712 wakaba 1.3 $self->{insertion_mode} = 'in row';
3713 wakaba 1.1 if ($token->{tag_name} eq 'tr') {
3714     !!!insert-element ($token->{tag_name}, $token->{attributes});
3715     !!!next-token;
3716     } else {
3717     !!!insert-element ('tr');
3718     ## reprocess
3719     }
3720     redo B;
3721     } elsif ({
3722     caption => 1, col => 1, colgroup => 1,
3723     tbody => 1, tfoot => 1, thead => 1,
3724     }->{$token->{tag_name}}) {
3725     ## have an element in table scope
3726     my $i;
3727 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3728     my $node = $self->{open_elements}->[$_];
3729 wakaba 1.1 if ({
3730     tbody => 1, thead => 1, tfoot => 1,
3731     }->{$node->[1]}) {
3732     $i = $_;
3733     last INSCOPE;
3734     } elsif ({
3735     table => 1, html => 1,
3736     }->{$node->[1]}) {
3737     last INSCOPE;
3738     }
3739     } # INSCOPE
3740     unless (defined $i) {
3741 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3742 wakaba 1.1 ## Ignore the token
3743     !!!next-token;
3744     redo B;
3745     }
3746    
3747     ## Clear back to table body context
3748     while (not {
3749     tbody => 1, tfoot => 1, thead => 1, html => 1,
3750 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3751     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3752     pop @{$self->{open_elements}};
3753 wakaba 1.1 }
3754    
3755     ## As if <{current node}>
3756     ## have an element in table scope
3757     ## true by definition
3758    
3759     ## Clear back to table body context
3760     ## nop by definition
3761    
3762 wakaba 1.3 pop @{$self->{open_elements}};
3763     $self->{insertion_mode} = 'in table';
3764 wakaba 1.1 ## reprocess
3765     redo B;
3766     } elsif ($token->{tag_name} eq 'table') {
3767     ## NOTE: This is a code clone of "table in table"
3768 wakaba 1.3 !!!parse-error (type => 'not closed:table');
3769 wakaba 1.1
3770     ## As if </table>
3771     ## have a table element in table scope
3772     my $i;
3773 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3774     my $node = $self->{open_elements}->[$_];
3775 wakaba 1.1 if ($node->[1] eq 'table') {
3776     $i = $_;
3777     last INSCOPE;
3778     } elsif ({
3779     table => 1, html => 1,
3780     }->{$node->[1]}) {
3781     last INSCOPE;
3782     }
3783     } # INSCOPE
3784     unless (defined $i) {
3785 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3786 wakaba 1.1 ## Ignore tokens </table><table>
3787     !!!next-token;
3788     redo B;
3789     }
3790    
3791     ## generate implied end tags
3792     if ({
3793     dd => 1, dt => 1, li => 1, p => 1,
3794     td => 1, th => 1, tr => 1,
3795 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3796 wakaba 1.1 !!!back-token; # <table>
3797     $token = {type => 'end tag', tag_name => 'table'};
3798     !!!back-token;
3799     $token = {type => 'end tag',
3800 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3801 wakaba 1.1 redo B;
3802     }
3803    
3804 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3805     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3806 wakaba 1.1 }
3807    
3808 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3809 wakaba 1.1
3810 wakaba 1.3 $self->_reset_insertion_mode;
3811 wakaba 1.1
3812     ## reprocess
3813     redo B;
3814     } else {
3815     #
3816     }
3817     } elsif ($token->{type} eq 'end tag') {
3818     if ({
3819     tbody => 1, tfoot => 1, thead => 1,
3820     }->{$token->{tag_name}}) {
3821     ## have an element in table scope
3822     my $i;
3823 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3824     my $node = $self->{open_elements}->[$_];
3825 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3826     $i = $_;
3827     last INSCOPE;
3828     } elsif ({
3829     table => 1, html => 1,
3830     }->{$node->[1]}) {
3831     last INSCOPE;
3832     }
3833     } # INSCOPE
3834     unless (defined $i) {
3835 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3836 wakaba 1.1 ## Ignore the token
3837     !!!next-token;
3838     redo B;
3839     }
3840    
3841     ## Clear back to table body context
3842     while (not {
3843     tbody => 1, tfoot => 1, thead => 1, html => 1,
3844 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3845     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3846     pop @{$self->{open_elements}};
3847 wakaba 1.1 }
3848    
3849 wakaba 1.3 pop @{$self->{open_elements}};
3850     $self->{insertion_mode} = 'in table';
3851 wakaba 1.1 !!!next-token;
3852     redo B;
3853     } elsif ($token->{tag_name} eq 'table') {
3854     ## have an element in table scope
3855     my $i;
3856 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3857     my $node = $self->{open_elements}->[$_];
3858 wakaba 1.1 if ({
3859     tbody => 1, thead => 1, tfoot => 1,
3860     }->{$node->[1]}) {
3861     $i = $_;
3862     last INSCOPE;
3863     } elsif ({
3864     table => 1, html => 1,
3865     }->{$node->[1]}) {
3866     last INSCOPE;
3867     }
3868     } # INSCOPE
3869     unless (defined $i) {
3870 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3871 wakaba 1.1 ## Ignore the token
3872     !!!next-token;
3873     redo B;
3874     }
3875    
3876     ## Clear back to table body context
3877     while (not {
3878     tbody => 1, tfoot => 1, thead => 1, html => 1,
3879 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3880     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3881     pop @{$self->{open_elements}};
3882 wakaba 1.1 }
3883    
3884     ## As if <{current node}>
3885     ## have an element in table scope
3886     ## true by definition
3887    
3888     ## Clear back to table body context
3889     ## nop by definition
3890    
3891 wakaba 1.3 pop @{$self->{open_elements}};
3892     $self->{insertion_mode} = 'in table';
3893 wakaba 1.1 ## reprocess
3894     redo B;
3895     } elsif ({
3896     body => 1, caption => 1, col => 1, colgroup => 1,
3897     html => 1, td => 1, th => 1, tr => 1,
3898     }->{$token->{tag_name}}) {
3899 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3900 wakaba 1.1 ## Ignore the token
3901     !!!next-token;
3902     redo B;
3903     } else {
3904     #
3905     }
3906     } else {
3907     #
3908     }
3909    
3910     ## As if in table
3911 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3912 wakaba 1.1 $in_body->($insert_to_foster);
3913     redo B;
3914 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in row') {
3915 wakaba 1.1 if ($token->{type} eq 'character') {
3916     ## NOTE: This is a "character in table" code clone.
3917     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3918 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3919 wakaba 1.1
3920     unless (length $token->{data}) {
3921     !!!next-token;
3922     redo B;
3923     }
3924     }
3925    
3926 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3927    
3928 wakaba 1.1 ## As if in body, but insert into foster parent element
3929     ## ISSUE: Spec says that "whenever a node would be inserted
3930     ## into the current node" while characters might not be
3931     ## result in a new Text node.
3932     $reconstruct_active_formatting_elements->($insert_to_foster);
3933    
3934     if ({
3935     table => 1, tbody => 1, tfoot => 1,
3936     thead => 1, tr => 1,
3937 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3938 wakaba 1.1 # MUST
3939     my $foster_parent_element;
3940     my $next_sibling;
3941     my $prev_sibling;
3942 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3943     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3944     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3945 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3946     $foster_parent_element = $parent;
3947 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3948 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3949     } else {
3950 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3951 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3952     }
3953     last OE;
3954     }
3955     } # OE
3956 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3957 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3958     unless defined $foster_parent_element;
3959     if (defined $prev_sibling and
3960     $prev_sibling->node_type == 3) {
3961     $prev_sibling->manakai_append_text ($token->{data});
3962     } else {
3963     $foster_parent_element->insert_before
3964     ($self->{document}->create_text_node ($token->{data}),
3965     $next_sibling);
3966     }
3967     } else {
3968 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3969 wakaba 1.1 }
3970    
3971     !!!next-token;
3972     redo B;
3973     } elsif ($token->{type} eq 'comment') {
3974     ## Copied from 'in table'
3975     my $comment = $self->{document}->create_comment ($token->{data});
3976 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3977 wakaba 1.1 !!!next-token;
3978     redo B;
3979     } elsif ($token->{type} eq 'start tag') {
3980     if ($token->{tag_name} eq 'th' or
3981     $token->{tag_name} eq 'td') {
3982     ## Clear back to table row context
3983     while (not {
3984     tr => 1, html => 1,
3985 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3986     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3987     pop @{$self->{open_elements}};
3988 wakaba 1.1 }
3989    
3990     !!!insert-element ($token->{tag_name}, $token->{attributes});
3991 wakaba 1.3 $self->{insertion_mode} = 'in cell';
3992 wakaba 1.1
3993     push @$active_formatting_elements, ['#marker', ''];
3994    
3995     !!!next-token;
3996     redo B;
3997     } elsif ({
3998     caption => 1, col => 1, colgroup => 1,
3999     tbody => 1, tfoot => 1, thead => 1, tr => 1,
4000     }->{$token->{tag_name}}) {
4001     ## As if </tr>
4002     ## have an element in table scope
4003     my $i;
4004 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4005     my $node = $self->{open_elements}->[$_];
4006 wakaba 1.1 if ($node->[1] eq 'tr') {
4007     $i = $_;
4008     last INSCOPE;
4009     } elsif ({
4010     table => 1, html => 1,
4011     }->{$node->[1]}) {
4012     last INSCOPE;
4013     }
4014     } # INSCOPE
4015     unless (defined $i) {
4016 wakaba 1.3 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name});
4017 wakaba 1.1 ## Ignore the token
4018     !!!next-token;
4019     redo B;
4020     }
4021    
4022     ## Clear back to table row context
4023     while (not {
4024     tr => 1, html => 1,
4025 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4026     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4027     pop @{$self->{open_elements}};
4028 wakaba 1.1 }
4029    
4030 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4031     $self->{insertion_mode} = 'in table body';
4032 wakaba 1.1 ## reprocess
4033     redo B;
4034     } elsif ($token->{tag_name} eq 'table') {
4035     ## NOTE: This is a code clone of "table in table"
4036 wakaba 1.3 !!!parse-error (type => 'not closed:table');
4037 wakaba 1.1
4038     ## As if </table>
4039     ## have a table element in table scope
4040     my $i;
4041 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4042     my $node = $self->{open_elements}->[$_];
4043 wakaba 1.1 if ($node->[1] eq 'table') {
4044     $i = $_;
4045     last INSCOPE;
4046     } elsif ({
4047     table => 1, html => 1,
4048     }->{$node->[1]}) {
4049     last INSCOPE;
4050     }
4051     } # INSCOPE
4052     unless (defined $i) {
4053 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
4054 wakaba 1.1 ## Ignore tokens </table><table>
4055     !!!next-token;
4056     redo B;
4057     }
4058    
4059     ## generate implied end tags
4060     if ({
4061     dd => 1, dt => 1, li => 1, p => 1,
4062     td => 1, th => 1, tr => 1,
4063 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4064 wakaba 1.1 !!!back-token; # <table>
4065     $token = {type => 'end tag', tag_name => 'table'};
4066     !!!back-token;
4067     $token = {type => 'end tag',
4068 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4069 wakaba 1.1 redo B;
4070     }
4071    
4072 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4073     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4074 wakaba 1.1 }
4075    
4076 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4077 wakaba 1.1
4078 wakaba 1.3 $self->_reset_insertion_mode;
4079 wakaba 1.1
4080     ## reprocess
4081     redo B;
4082     } else {
4083     #
4084     }
4085     } elsif ($token->{type} eq 'end tag') {
4086     if ($token->{tag_name} eq 'tr') {
4087     ## have an element in table scope
4088     my $i;
4089 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4090     my $node = $self->{open_elements}->[$_];
4091 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4092     $i = $_;
4093     last INSCOPE;
4094     } elsif ({
4095     table => 1, html => 1,
4096     }->{$node->[1]}) {
4097     last INSCOPE;
4098     }
4099     } # INSCOPE
4100     unless (defined $i) {
4101 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4102 wakaba 1.1 ## Ignore the token
4103     !!!next-token;
4104     redo B;
4105     }
4106    
4107     ## Clear back to table row context
4108     while (not {
4109     tr => 1, html => 1,
4110 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4111     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4112     pop @{$self->{open_elements}};
4113 wakaba 1.1 }
4114    
4115 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4116     $self->{insertion_mode} = 'in table body';
4117 wakaba 1.1 !!!next-token;
4118     redo B;
4119     } elsif ($token->{tag_name} eq 'table') {
4120     ## As if </tr>
4121     ## have an element in table scope
4122     my $i;
4123 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4124     my $node = $self->{open_elements}->[$_];
4125 wakaba 1.1 if ($node->[1] eq 'tr') {
4126     $i = $_;
4127     last INSCOPE;
4128     } elsif ({
4129     table => 1, html => 1,
4130     }->{$node->[1]}) {
4131     last INSCOPE;
4132     }
4133     } # INSCOPE
4134     unless (defined $i) {
4135 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{type});
4136 wakaba 1.1 ## Ignore the token
4137     !!!next-token;
4138     redo B;
4139     }
4140    
4141     ## Clear back to table row context
4142     while (not {
4143     tr => 1, html => 1,
4144 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4145     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4146     pop @{$self->{open_elements}};
4147 wakaba 1.1 }
4148    
4149 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4150     $self->{insertion_mode} = 'in table body';
4151 wakaba 1.1 ## reprocess
4152     redo B;
4153     } elsif ({
4154     tbody => 1, tfoot => 1, thead => 1,
4155     }->{$token->{tag_name}}) {
4156     ## have an element in table scope
4157     my $i;
4158 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4159     my $node = $self->{open_elements}->[$_];
4160 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4161     $i = $_;
4162     last INSCOPE;
4163     } elsif ({
4164     table => 1, html => 1,
4165     }->{$node->[1]}) {
4166     last INSCOPE;
4167     }
4168     } # INSCOPE
4169     unless (defined $i) {
4170 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4171 wakaba 1.1 ## Ignore the token
4172     !!!next-token;
4173     redo B;
4174     }
4175    
4176     ## As if </tr>
4177     ## have an element in table scope
4178     my $i;
4179 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4180     my $node = $self->{open_elements}->[$_];
4181 wakaba 1.1 if ($node->[1] eq 'tr') {
4182     $i = $_;
4183     last INSCOPE;
4184     } elsif ({
4185     table => 1, html => 1,
4186     }->{$node->[1]}) {
4187     last INSCOPE;
4188     }
4189     } # INSCOPE
4190     unless (defined $i) {
4191 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:tr');
4192 wakaba 1.1 ## Ignore the token
4193     !!!next-token;
4194     redo B;
4195     }
4196    
4197     ## Clear back to table row context
4198     while (not {
4199     tr => 1, html => 1,
4200 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4201     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4202     pop @{$self->{open_elements}};
4203 wakaba 1.1 }
4204    
4205 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4206     $self->{insertion_mode} = 'in table body';
4207 wakaba 1.1 ## reprocess
4208     redo B;
4209     } elsif ({
4210     body => 1, caption => 1, col => 1,
4211     colgroup => 1, html => 1, td => 1, th => 1,
4212     }->{$token->{tag_name}}) {
4213 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4214 wakaba 1.1 ## Ignore the token
4215     !!!next-token;
4216     redo B;
4217     } else {
4218     #
4219     }
4220     } else {
4221     #
4222     }
4223    
4224     ## As if in table
4225 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
4226 wakaba 1.1 $in_body->($insert_to_foster);
4227     redo B;
4228 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in cell') {
4229 wakaba 1.1 if ($token->{type} eq 'character') {
4230     ## NOTE: This is a code clone of "character in body".
4231     $reconstruct_active_formatting_elements->($insert_to_current);
4232    
4233 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4234 wakaba 1.1
4235     !!!next-token;
4236     redo B;
4237     } elsif ($token->{type} eq 'comment') {
4238     ## NOTE: This is a code clone of "comment in body".
4239     my $comment = $self->{document}->create_comment ($token->{data});
4240 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4241 wakaba 1.1 !!!next-token;
4242     redo B;
4243     } elsif ($token->{type} eq 'start tag') {
4244     if ({
4245     caption => 1, col => 1, colgroup => 1,
4246     tbody => 1, td => 1, tfoot => 1, th => 1,
4247     thead => 1, tr => 1,
4248     }->{$token->{tag_name}}) {
4249     ## have an element in table scope
4250     my $tn;
4251 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4252     my $node = $self->{open_elements}->[$_];
4253 wakaba 1.1 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
4254     $tn = $node->[1];
4255     last INSCOPE;
4256     } elsif ({
4257     table => 1, html => 1,
4258     }->{$node->[1]}) {
4259     last INSCOPE;
4260     }
4261     } # INSCOPE
4262     unless (defined $tn) {
4263 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4264 wakaba 1.1 ## Ignore the token
4265     !!!next-token;
4266     redo B;
4267     }
4268    
4269     ## Close the cell
4270     !!!back-token; # <?>
4271     $token = {type => 'end tag', tag_name => $tn};
4272     redo B;
4273     } else {
4274     #
4275     }
4276     } elsif ($token->{type} eq 'end tag') {
4277     if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4278     ## have an element in table scope
4279     my $i;
4280 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4281     my $node = $self->{open_elements}->[$_];
4282 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4283     $i = $_;
4284     last INSCOPE;
4285     } elsif ({
4286     table => 1, html => 1,
4287     }->{$node->[1]}) {
4288     last INSCOPE;
4289     }
4290     } # INSCOPE
4291     unless (defined $i) {
4292 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4293 wakaba 1.1 ## Ignore the token
4294     !!!next-token;
4295     redo B;
4296     }
4297    
4298     ## generate implied end tags
4299     if ({
4300     dd => 1, dt => 1, li => 1, p => 1,
4301     td => ($token->{tag_name} eq 'th'),
4302     th => ($token->{tag_name} eq 'td'),
4303     tr => 1,
4304 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4305 wakaba 1.1 !!!back-token;
4306     $token = {type => 'end tag',
4307 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4308 wakaba 1.1 redo B;
4309     }
4310    
4311 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4312     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4313 wakaba 1.1 }
4314    
4315 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4316 wakaba 1.1
4317     $clear_up_to_marker->();
4318    
4319 wakaba 1.3 $self->{insertion_mode} = 'in row';
4320 wakaba 1.1
4321     !!!next-token;
4322     redo B;
4323     } elsif ({
4324     body => 1, caption => 1, col => 1,
4325     colgroup => 1, html => 1,
4326     }->{$token->{tag_name}}) {
4327 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4328 wakaba 1.1 ## Ignore the token
4329     !!!next-token;
4330     redo B;
4331     } elsif ({
4332     table => 1, tbody => 1, tfoot => 1,
4333     thead => 1, tr => 1,
4334     }->{$token->{tag_name}}) {
4335     ## have an element in table scope
4336     my $i;
4337     my $tn;
4338 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4339     my $node = $self->{open_elements}->[$_];
4340 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4341     $i = $_;
4342     last INSCOPE;
4343     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4344     $tn = $node->[1];
4345     ## NOTE: There is exactly one |td| or |th| element
4346     ## in scope in the stack of open elements by definition.
4347     } elsif ({
4348     table => 1, html => 1,
4349     }->{$node->[1]}) {
4350     last INSCOPE;
4351     }
4352     } # INSCOPE
4353     unless (defined $i) {
4354 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4355 wakaba 1.1 ## Ignore the token
4356     !!!next-token;
4357     redo B;
4358     }
4359    
4360     ## Close the cell
4361     !!!back-token; # </?>
4362     $token = {type => 'end tag', tag_name => $tn};
4363     redo B;
4364     } else {
4365     #
4366     }
4367     } else {
4368     #
4369     }
4370    
4371     $in_body->($insert_to_current);
4372     redo B;
4373 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in select') {
4374 wakaba 1.1 if ($token->{type} eq 'character') {
4375 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4376 wakaba 1.1 !!!next-token;
4377     redo B;
4378     } elsif ($token->{type} eq 'comment') {
4379     my $comment = $self->{document}->create_comment ($token->{data});
4380 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4381 wakaba 1.1 !!!next-token;
4382     redo B;
4383     } elsif ($token->{type} eq 'start tag') {
4384     if ($token->{tag_name} eq 'option') {
4385 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4386 wakaba 1.1 ## As if </option>
4387 wakaba 1.3 pop @{$self->{open_elements}};
4388 wakaba 1.1 }
4389    
4390     !!!insert-element ($token->{tag_name}, $token->{attributes});
4391     !!!next-token;
4392     redo B;
4393     } elsif ($token->{tag_name} eq 'optgroup') {
4394 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4395 wakaba 1.1 ## As if </option>
4396 wakaba 1.3 pop @{$self->{open_elements}};
4397 wakaba 1.1 }
4398    
4399 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4400 wakaba 1.1 ## As if </optgroup>
4401 wakaba 1.3 pop @{$self->{open_elements}};
4402 wakaba 1.1 }
4403    
4404     !!!insert-element ($token->{tag_name}, $token->{attributes});
4405     !!!next-token;
4406     redo B;
4407     } elsif ($token->{tag_name} eq 'select') {
4408 wakaba 1.3 !!!parse-error (type => 'not closed:select');
4409 wakaba 1.1 ## As if </select> instead
4410     ## have an element in table scope
4411     my $i;
4412 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4413     my $node = $self->{open_elements}->[$_];
4414 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4415     $i = $_;
4416     last INSCOPE;
4417     } elsif ({
4418     table => 1, html => 1,
4419     }->{$node->[1]}) {
4420     last INSCOPE;
4421     }
4422     } # INSCOPE
4423     unless (defined $i) {
4424 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4425 wakaba 1.1 ## Ignore the token
4426     !!!next-token;
4427     redo B;
4428     }
4429    
4430 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4431 wakaba 1.1
4432 wakaba 1.3 $self->_reset_insertion_mode;
4433 wakaba 1.1
4434     !!!next-token;
4435     redo B;
4436     } else {
4437     #
4438     }
4439     } elsif ($token->{type} eq 'end tag') {
4440     if ($token->{tag_name} eq 'optgroup') {
4441 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option' and
4442     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
4443 wakaba 1.1 ## As if </option>
4444 wakaba 1.3 splice @{$self->{open_elements}}, -2;
4445     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4446     pop @{$self->{open_elements}};
4447 wakaba 1.1 } else {
4448 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4449 wakaba 1.1 ## Ignore the token
4450     }
4451     !!!next-token;
4452     redo B;
4453     } elsif ($token->{tag_name} eq 'option') {
4454 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4455     pop @{$self->{open_elements}};
4456 wakaba 1.1 } else {
4457 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4458 wakaba 1.1 ## Ignore the token
4459     }
4460     !!!next-token;
4461     redo B;
4462     } elsif ($token->{tag_name} eq 'select') {
4463     ## have an element in table scope
4464     my $i;
4465 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4466     my $node = $self->{open_elements}->[$_];
4467 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4468     $i = $_;
4469     last INSCOPE;
4470     } elsif ({
4471     table => 1, html => 1,
4472     }->{$node->[1]}) {
4473     last INSCOPE;
4474     }
4475     } # INSCOPE
4476     unless (defined $i) {
4477 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4478 wakaba 1.1 ## Ignore the token
4479     !!!next-token;
4480     redo B;
4481     }
4482    
4483 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4484 wakaba 1.1
4485 wakaba 1.3 $self->_reset_insertion_mode;
4486 wakaba 1.1
4487     !!!next-token;
4488     redo B;
4489     } elsif ({
4490     caption => 1, table => 1, tbody => 1,
4491     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
4492     }->{$token->{tag_name}}) {
4493 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4494 wakaba 1.1
4495     ## have an element in table scope
4496     my $i;
4497 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4498     my $node = $self->{open_elements}->[$_];
4499 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4500     $i = $_;
4501     last INSCOPE;
4502     } elsif ({
4503     table => 1, html => 1,
4504     }->{$node->[1]}) {
4505     last INSCOPE;
4506     }
4507     } # INSCOPE
4508     unless (defined $i) {
4509     ## Ignore the token
4510     !!!next-token;
4511     redo B;
4512     }
4513    
4514     ## As if </select>
4515     ## have an element in table scope
4516     undef $i;
4517 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4518     my $node = $self->{open_elements}->[$_];
4519 wakaba 1.1 if ($node->[1] eq 'select') {
4520     $i = $_;
4521     last INSCOPE;
4522     } elsif ({
4523     table => 1, html => 1,
4524     }->{$node->[1]}) {
4525     last INSCOPE;
4526     }
4527     } # INSCOPE
4528     unless (defined $i) {
4529 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4530 wakaba 1.1 ## Ignore the </select> token
4531     !!!next-token; ## TODO: ok?
4532     redo B;
4533     }
4534    
4535 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4536 wakaba 1.1
4537 wakaba 1.3 $self->_reset_insertion_mode;
4538 wakaba 1.1
4539     ## reprocess
4540     redo B;
4541     } else {
4542     #
4543     }
4544     } else {
4545     #
4546     }
4547    
4548 wakaba 1.3 !!!parse-error (type => 'in select:'.$token->{tag_name});
4549 wakaba 1.1 ## Ignore the token
4550     !!!next-token;
4551     redo B;
4552 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after body') {
4553 wakaba 1.1 if ($token->{type} eq 'character') {
4554     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4555     ## As if in body
4556     $reconstruct_active_formatting_elements->($insert_to_current);
4557    
4558 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4559 wakaba 1.1
4560     unless (length $token->{data}) {
4561     !!!next-token;
4562     redo B;
4563     }
4564     }
4565    
4566     #
4567 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4568 wakaba 1.1 } elsif ($token->{type} eq 'comment') {
4569     my $comment = $self->{document}->create_comment ($token->{data});
4570 wakaba 1.3 $self->{open_elements}->[0]->[0]->append_child ($comment);
4571 wakaba 1.1 !!!next-token;
4572     redo B;
4573 wakaba 1.3 } elsif ($token->{type} eq 'start tag') {
4574     !!!parse-error (type => 'after body:'.$token->{tag_name});
4575     #
4576 wakaba 1.1 } elsif ($token->{type} eq 'end tag') {
4577     if ($token->{tag_name} eq 'html') {
4578 wakaba 1.3 if (defined $self->{inner_html_node}) {
4579     !!!parse-error (type => 'unmatched end tag:html');
4580     ## Ignore the token
4581     !!!next-token;
4582     redo B;
4583     } else {
4584     $phase = 'trailing end';
4585     !!!next-token;
4586     redo B;
4587     }
4588 wakaba 1.1 } else {
4589 wakaba 1.3 !!!parse-error (type => 'after body:/'.$token->{tag_name});
4590 wakaba 1.1 }
4591     } else {
4592 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4593 wakaba 1.1 }
4594    
4595 wakaba 1.3 $self->{insertion_mode} = 'in body';
4596 wakaba 1.1 ## reprocess
4597     redo B;
4598 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in frameset') {
4599 wakaba 1.1 if ($token->{type} eq 'character') {
4600     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4601 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4602 wakaba 1.1
4603     unless (length $token->{data}) {
4604     !!!next-token;
4605     redo B;
4606     }
4607     }
4608    
4609     #
4610     } elsif ($token->{type} eq 'comment') {
4611     my $comment = $self->{document}->create_comment ($token->{data});
4612 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4613 wakaba 1.1 !!!next-token;
4614     redo B;
4615     } elsif ($token->{type} eq 'start tag') {
4616     if ($token->{tag_name} eq 'frameset') {
4617     !!!insert-element ($token->{tag_name}, $token->{attributes});
4618     !!!next-token;
4619     redo B;
4620     } elsif ($token->{tag_name} eq 'frame') {
4621     !!!insert-element ($token->{tag_name}, $token->{attributes});
4622 wakaba 1.3 pop @{$self->{open_elements}};
4623 wakaba 1.1 !!!next-token;
4624     redo B;
4625     } elsif ($token->{tag_name} eq 'noframes') {
4626     $in_body->($insert_to_current);
4627     redo B;
4628     } else {
4629     #
4630     }
4631     } elsif ($token->{type} eq 'end tag') {
4632     if ($token->{tag_name} eq 'frameset') {
4633 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html' and
4634     @{$self->{open_elements}} == 1) {
4635     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4636 wakaba 1.1 ## Ignore the token
4637     !!!next-token;
4638     } else {
4639 wakaba 1.3 pop @{$self->{open_elements}};
4640 wakaba 1.1 !!!next-token;
4641     }
4642    
4643     ## if not inner_html and
4644 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
4645     $self->{insertion_mode} = 'after frameset';
4646 wakaba 1.1 }
4647     redo B;
4648     } else {
4649     #
4650     }
4651     } else {
4652     #
4653     }
4654    
4655 wakaba 1.3 if (defined $token->{tag_name}) {
4656     !!!parse-error (type => 'in frameset:'.$token->{tag_name});
4657     } else {
4658     !!!parse-error (type => 'in frameset:#'.$token->{type});
4659     }
4660 wakaba 1.1 ## Ignore the token
4661     !!!next-token;
4662     redo B;
4663 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after frameset') {
4664 wakaba 1.1 if ($token->{type} eq 'character') {
4665     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4666 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4667 wakaba 1.1
4668     unless (length $token->{data}) {
4669     !!!next-token;
4670     redo B;
4671     }
4672     }
4673    
4674     #
4675     } elsif ($token->{type} eq 'comment') {
4676     my $comment = $self->{document}->create_comment ($token->{data});
4677 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4678 wakaba 1.1 !!!next-token;
4679     redo B;
4680     } elsif ($token->{type} eq 'start tag') {
4681     if ($token->{tag_name} eq 'noframes') {
4682     $in_body->($insert_to_current);
4683     redo B;
4684     } else {
4685     #
4686     }
4687     } elsif ($token->{type} eq 'end tag') {
4688     if ($token->{tag_name} eq 'html') {
4689     $phase = 'trailing end';
4690     !!!next-token;
4691     redo B;
4692     } else {
4693     #
4694     }
4695     } else {
4696     #
4697     }
4698    
4699 wakaba 1.3 if (defined $token->{tag_name}) {
4700     !!!parse-error (type => 'after frameset:'.$token->{tag_name});
4701     } else {
4702     !!!parse-error (type => 'after frameset:#'.$token->{type});
4703     }
4704 wakaba 1.1 ## Ignore the token
4705     !!!next-token;
4706     redo B;
4707    
4708     ## ISSUE: An issue in spec there
4709     } else {
4710 wakaba 1.3 die "$0: $self->{insertion_mode}: Unknown insertion mode";
4711 wakaba 1.1 }
4712     }
4713     } elsif ($phase eq 'trailing end') {
4714     ## states in the main stage is preserved yet # MUST
4715    
4716     if ($token->{type} eq 'DOCTYPE') {
4717 wakaba 1.3 !!!parse-error (type => 'after html:#DOCTYPE');
4718 wakaba 1.1 ## Ignore the token
4719     !!!next-token;
4720     redo B;
4721     } elsif ($token->{type} eq 'comment') {
4722     my $comment = $self->{document}->create_comment ($token->{data});
4723     $self->{document}->append_child ($comment);
4724     !!!next-token;
4725     redo B;
4726     } elsif ($token->{type} eq 'character') {
4727     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4728     my $data = $1;
4729     ## As if in the main phase.
4730     ## NOTE: The insertion mode in the main phase
4731     ## just before the phase has been changed to the trailing
4732     ## end phase is either "after body" or "after frameset".
4733     $reconstruct_active_formatting_elements->($insert_to_current)
4734     if $phase eq 'main';
4735    
4736 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
4737 wakaba 1.1
4738     unless (length $token->{data}) {
4739     !!!next-token;
4740     redo B;
4741     }
4742     }
4743    
4744 wakaba 1.3 !!!parse-error (type => 'after html:#character');
4745 wakaba 1.1 $phase = 'main';
4746     ## reprocess
4747     redo B;
4748     } elsif ($token->{type} eq 'start tag' or
4749     $token->{type} eq 'end tag') {
4750 wakaba 1.3 !!!parse-error (type => 'after html:'.$token->{tag_name});
4751 wakaba 1.1 $phase = 'main';
4752     ## reprocess
4753     redo B;
4754     } elsif ($token->{type} eq 'end-of-file') {
4755     ## Stop parsing
4756     last B;
4757     } else {
4758     die "$0: $token->{type}: Unknown token";
4759     }
4760     }
4761     } # B
4762    
4763     ## Stop parsing # MUST
4764    
4765     ## TODO: script stuffs
4766 wakaba 1.3 } # _tree_construct_main
4767    
4768     sub set_inner_html ($$$) {
4769     my $class = shift;
4770     my $node = shift;
4771     my $s = \$_[0];
4772     my $onerror = $_[1];
4773    
4774     my $nt = $node->node_type;
4775     if ($nt == 9) {
4776     # MUST
4777    
4778     ## Step 1 # MUST
4779     ## TODO: If the document has an active parser, ...
4780     ## ISSUE: There is an issue in the spec.
4781    
4782     ## Step 2 # MUST
4783     my @cn = @{$node->child_nodes};
4784     for (@cn) {
4785     $node->remove_child ($_);
4786     }
4787    
4788     ## Step 3, 4, 5 # MUST
4789     $class->parse_string ($$s => $node, $onerror);
4790     } elsif ($nt == 1) {
4791     ## TODO: If non-html element
4792    
4793     ## NOTE: Most of this code is copied from |parse_string|
4794    
4795     ## Step 1 # MUST
4796     my $doc = $node->owner_document->implementation->create_document;
4797     ## TODO: Mark as HTML document
4798     my $p = $class->new;
4799     $p->{document} = $doc;
4800    
4801     ## Step 9 # MUST
4802     my $i = 0;
4803     my $line = 1;
4804     my $column = 0;
4805     $p->{set_next_input_character} = sub {
4806     my $self = shift;
4807     $self->{next_input_character} = -1 and return if $i >= length $$s;
4808     $self->{next_input_character} = ord substr $$s, $i++, 1;
4809     $column++;
4810    
4811     if ($self->{next_input_character} == 0x000D) { # CR
4812     if ($i >= length $$s) {
4813     #
4814     } else {
4815     my $next_char = ord substr $$s, $i++, 1;
4816     if ($next_char == 0x000A) { # LF
4817     #
4818     } else {
4819     push @{$self->{char}}, $next_char;
4820     }
4821     }
4822     $self->{next_input_character} = 0x000A; # LF # MUST
4823     $line++;
4824     $column = -1;
4825     } elsif ($self->{next_input_character} > 0x10FFFF) {
4826     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4827     } elsif ($self->{next_input_character} == 0x0000) { # NULL
4828     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
4829     }
4830     };
4831    
4832     my $ponerror = $onerror || sub {
4833     my (%opt) = @_;
4834     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
4835     };
4836     $p->{parse_error} = sub {
4837     $ponerror->(@_, line => $line, column => $column);
4838     };
4839    
4840     $p->_initialize_tokenizer;
4841     $p->_initialize_tree_constructor;
4842    
4843     ## Step 2
4844     my $node_ln = $node->local_name;
4845     $p->{content_model_flag} = {
4846     title => 'RCDATA',
4847     textarea => 'RCDATA',
4848     style => 'CDATA',
4849     script => 'CDATA',
4850     xmp => 'CDATA',
4851     iframe => 'CDATA',
4852     noembed => 'CDATA',
4853     noframes => 'CDATA',
4854     noscript => 'CDATA',
4855     plaintext => 'PLAINTEXT',
4856     }->{$node_ln} || 'PCDATA';
4857     ## ISSUE: What is "the name of the element"? local name?
4858    
4859     $p->{inner_html_node} = [$node, $node_ln];
4860    
4861     ## Step 4
4862     my $root = $doc->create_element_ns
4863     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
4864    
4865     ## Step 5 # MUST
4866     $doc->append_child ($root);
4867    
4868     ## Step 6 # MUST
4869     push @{$p->{open_elements}}, [$root, 'html'];
4870    
4871     undef $p->{head_element};
4872    
4873     ## Step 7 # MUST
4874     $p->_reset_insertion_mode;
4875    
4876     ## Step 8 # MUST
4877     my $anode = $node;
4878     AN: while (defined $anode) {
4879     if ($anode->node_type == 1) {
4880     my $nsuri = $anode->namespace_uri;
4881     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
4882     if ($anode->local_name eq 'form') { ## TODO: case?
4883     $p->{form_element} = $anode;
4884     last AN;
4885     }
4886     }
4887     }
4888     $anode = $anode->parent_node;
4889     } # AN
4890    
4891     ## Step 3 # MUST
4892     ## Step 10 # MUST
4893     {
4894     my $self = $p;
4895     !!!next-token;
4896     }
4897     $p->_tree_construction_main;
4898    
4899     ## Step 11 # MUST
4900     my @cn = @{$node->child_nodes};
4901     for (@cn) {
4902     $node->remove_child ($_);
4903     }
4904     ## ISSUE: mutation events? read-only?
4905    
4906     ## Step 12 # MUST
4907     @cn = @{$root->child_nodes};
4908     for (@cn) {
4909     $node->append_child ($_);
4910     }
4911     ## ISSUE: adopt_node? mutation events?
4912    
4913     $p->_terminate_tree_constructor;
4914     } else {
4915     die "$0: |set_inner_html| is not defined for node of type $nt";
4916     }
4917     } # set_inner_html
4918    
4919     } # tree construction stage
4920 wakaba 1.1
4921     sub get_inner_html ($$$) {
4922 wakaba 1.3 my (undef, $node, $on_error) = @_;
4923 wakaba 1.1
4924     ## Step 1
4925     my $s = '';
4926    
4927     my $in_cdata;
4928     my $parent = $node;
4929     while (defined $parent) {
4930     if ($parent->node_type == 1 and
4931     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
4932     {
4933     style => 1, script => 1, xmp => 1, iframe => 1,
4934     noembed => 1, noframes => 1, noscript => 1,
4935     }->{$parent->local_name}) { ## TODO: case thingy
4936     $in_cdata = 1;
4937     }
4938     $parent = $parent->parent_node;
4939     }
4940    
4941     ## Step 2
4942     my @node = @{$node->child_nodes};
4943     C: while (@node) {
4944     my $child = shift @node;
4945     unless (ref $child) {
4946     if ($child eq 'cdata-out') {
4947     $in_cdata = 0;
4948     } else {
4949     $s .= $child; # end tag
4950     }
4951     next C;
4952     }
4953    
4954     my $nt = $child->node_type;
4955     if ($nt == 1) { # Element
4956     my $tag_name = lc $child->tag_name; ## ISSUE: Definition of "lowercase"
4957     $s .= '<' . $tag_name;
4958    
4959     ## ISSUE: Non-html elements
4960    
4961     my @attrs = @{$child->attributes}; # sort order MUST be stable
4962     for my $attr (@attrs) { # order is implementation dependent
4963     my $attr_name = lc $attr->name; ## ISSUE: Definition of "lowercase"
4964     $s .= ' ' . $attr_name . '="';
4965     my $attr_value = $attr->value;
4966     ## escape
4967     $attr_value =~ s/&/&amp;/g;
4968     $attr_value =~ s/</&lt;/g;
4969     $attr_value =~ s/>/&gt;/g;
4970     $attr_value =~ s/"/&quot;/g;
4971     $s .= $attr_value . '"';
4972     }
4973     $s .= '>';
4974    
4975     next C if {
4976     area => 1, base => 1, basefont => 1, bgsound => 1,
4977     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
4978     img => 1, input => 1, link => 1, meta => 1, param => 1,
4979     spacer => 1, wbr => 1,
4980     }->{$tag_name};
4981    
4982     if (not $in_cdata and {
4983     style => 1, script => 1, xmp => 1, iframe => 1,
4984     noembed => 1, noframes => 1, noscript => 1,
4985     }->{$tag_name}) {
4986     unshift @node, 'cdata-out';
4987     $in_cdata = 1;
4988     }
4989    
4990     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
4991     } elsif ($nt == 3 or $nt == 4) {
4992     if ($in_cdata) {
4993     $s .= $child->data;
4994     } else {
4995     my $value = $child->data;
4996     $value =~ s/&/&amp;/g;
4997     $value =~ s/</&lt;/g;
4998     $value =~ s/>/&gt;/g;
4999     $value =~ s/"/&quot;/g;
5000     $s .= $value;
5001     }
5002     } elsif ($nt == 8) {
5003     $s .= '<!--' . $child->data . '-->';
5004     } elsif ($nt == 10) {
5005     $s .= '<!DOCTYPE ' . $child->name . '>';
5006     } elsif ($nt == 5) { # entrefs
5007     push @node, @{$child->child_nodes};
5008     } else {
5009     $on_error->($child) if defined $on_error;
5010     }
5011     ## ISSUE: This code does not support PIs.
5012     } # C
5013    
5014     ## Step 3
5015     return \$s;
5016     } # get_inner_html
5017    
5018     1;
5019 wakaba 1.3 # $Date: 2007/05/01 07:46:42 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24