/[suikacvs]/markup/html/whatpm/What/HTML.pm.src
Suika

Contents of /markup/html/whatpm/What/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (hide annotations) (download) (as text)
Tue May 1 06:22:12 2007 UTC (17 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.6: +282 -159 lines
File MIME type: application/x-wais-source
++ whatpm/What/ChangeLog	1 May 2007 06:20:06 -0000
2007-05-01  Wakaba  <wakaba@suika.fam.cx>

	* NanoDOM.pm (last_child, previous_sibling): New attributes.
	(clone_node): Attribute nodes were not completely copied.

	* HTML.pm.src: Many bugs are fixed.

++ whatpm/t/ChangeLog	1 May 2007 06:21:52 -0000
2007-05-01  Wakaba  <wakaba@suika.fam.cx>

	* HTML-tree.t: New test file is added.  Sort key
	was incorrect.

	* HTML-tokenizer.t: New test file is added.

	* tokenizer-test-1.test, tree-test-1.dat: New tests.

1 wakaba 1.1 package What::HTML;
2     use strict;
3 wakaba 1.7 our $VERSION=do{my @r=(q$Revision: 1.6 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5     ## This is a very, very early version of an HTML parser.
6    
7     my $permitted_slash_tag_name = {
8     base => 1,
9     link => 1,
10     meta => 1,
11     hr => 1,
12     br => 1,
13     img=> 1,
14     embed => 1,
15     param => 1,
16     area => 1,
17     col => 1,
18     input => 1,
19     };
20    
21 wakaba 1.5 my $entity_char = {
22     AElig => "\x{00C6}",
23     Aacute => "\x{00C1}",
24     Acirc => "\x{00C2}",
25     Agrave => "\x{00C0}",
26     Alpha => "\x{0391}",
27     Aring => "\x{00C5}",
28     Atilde => "\x{00C3}",
29     Auml => "\x{00C4}",
30     Beta => "\x{0392}",
31     Ccedil => "\x{00C7}",
32     Chi => "\x{03A7}",
33     Dagger => "\x{2021}",
34     Delta => "\x{0394}",
35     ETH => "\x{00D0}",
36     Eacute => "\x{00C9}",
37     Ecirc => "\x{00CA}",
38     Egrave => "\x{00C8}",
39     Epsilon => "\x{0395}",
40     Eta => "\x{0397}",
41     Euml => "\x{00CB}",
42     Gamma => "\x{0393}",
43     Iacute => "\x{00CD}",
44     Icirc => "\x{00CE}",
45     Igrave => "\x{00CC}",
46     Iota => "\x{0399}",
47     Iuml => "\x{00CF}",
48     Kappa => "\x{039A}",
49     Lambda => "\x{039B}",
50     Mu => "\x{039C}",
51     Ntilde => "\x{00D1}",
52     Nu => "\x{039D}",
53     OElig => "\x{0152}",
54     Oacute => "\x{00D3}",
55     Ocirc => "\x{00D4}",
56     Ograve => "\x{00D2}",
57     Omega => "\x{03A9}",
58     Omicron => "\x{039F}",
59     Oslash => "\x{00D8}",
60     Otilde => "\x{00D5}",
61     Ouml => "\x{00D6}",
62     Phi => "\x{03A6}",
63     Pi => "\x{03A0}",
64     Prime => "\x{2033}",
65     Psi => "\x{03A8}",
66     Rho => "\x{03A1}",
67     Scaron => "\x{0160}",
68     Sigma => "\x{03A3}",
69     THORN => "\x{00DE}",
70     Tau => "\x{03A4}",
71     Theta => "\x{0398}",
72     Uacute => "\x{00DA}",
73     Ucirc => "\x{00DB}",
74     Ugrave => "\x{00D9}",
75     Upsilon => "\x{03A5}",
76     Uuml => "\x{00DC}",
77     Xi => "\x{039E}",
78     Yacute => "\x{00DD}",
79     Yuml => "\x{0178}",
80     Zeta => "\x{0396}",
81     aacute => "\x{00E1}",
82     acirc => "\x{00E2}",
83     acute => "\x{00B4}",
84     aelig => "\x{00E6}",
85     agrave => "\x{00E0}",
86     alefsym => "\x{2135}",
87     alpha => "\x{03B1}",
88     amp => "\x{0026}",
89     AMP => "\x{0026}",
90     and => "\x{2227}",
91     ang => "\x{2220}",
92     apos => "\x{0027}",
93     aring => "\x{00E5}",
94     asymp => "\x{2248}",
95     atilde => "\x{00E3}",
96     auml => "\x{00E4}",
97     bdquo => "\x{201E}",
98     beta => "\x{03B2}",
99     brvbar => "\x{00A6}",
100     bull => "\x{2022}",
101     cap => "\x{2229}",
102     ccedil => "\x{00E7}",
103     cedil => "\x{00B8}",
104     cent => "\x{00A2}",
105     chi => "\x{03C7}",
106     circ => "\x{02C6}",
107     clubs => "\x{2663}",
108     cong => "\x{2245}",
109     copy => "\x{00A9}",
110     COPY => "\x{00A9}",
111     crarr => "\x{21B5}",
112     cup => "\x{222A}",
113     curren => "\x{00A4}",
114     dArr => "\x{21D3}",
115     dagger => "\x{2020}",
116     darr => "\x{2193}",
117     deg => "\x{00B0}",
118     delta => "\x{03B4}",
119     diams => "\x{2666}",
120     divide => "\x{00F7}",
121     eacute => "\x{00E9}",
122     ecirc => "\x{00EA}",
123     egrave => "\x{00E8}",
124     empty => "\x{2205}",
125     emsp => "\x{2003}",
126     ensp => "\x{2002}",
127     epsilon => "\x{03B5}",
128     equiv => "\x{2261}",
129     eta => "\x{03B7}",
130     eth => "\x{00F0}",
131     euml => "\x{00EB}",
132     euro => "\x{20AC}",
133     exist => "\x{2203}",
134     fnof => "\x{0192}",
135     forall => "\x{2200}",
136     frac12 => "\x{00BD}",
137     frac14 => "\x{00BC}",
138     frac34 => "\x{00BE}",
139     frasl => "\x{2044}",
140     gamma => "\x{03B3}",
141     ge => "\x{2265}",
142     gt => "\x{003E}",
143     GT => "\x{003E}",
144     hArr => "\x{21D4}",
145     harr => "\x{2194}",
146     hearts => "\x{2665}",
147     hellip => "\x{2026}",
148     iacute => "\x{00ED}",
149     icirc => "\x{00EE}",
150     iexcl => "\x{00A1}",
151     igrave => "\x{00EC}",
152     image => "\x{2111}",
153     infin => "\x{221E}",
154     int => "\x{222B}",
155     iota => "\x{03B9}",
156     iquest => "\x{00BF}",
157     isin => "\x{2208}",
158     iuml => "\x{00EF}",
159     kappa => "\x{03BA}",
160     lArr => "\x{21D0}",
161     lambda => "\x{03BB}",
162     lang => "\x{2329}",
163     laquo => "\x{00AB}",
164     larr => "\x{2190}",
165     lceil => "\x{2308}",
166     ldquo => "\x{201C}",
167     le => "\x{2264}",
168     lfloor => "\x{230A}",
169     lowast => "\x{2217}",
170     loz => "\x{25CA}",
171     lrm => "\x{200E}",
172     lsaquo => "\x{2039}",
173     lsquo => "\x{2018}",
174     lt => "\x{003C}",
175     LT => "\x{003C}",
176     macr => "\x{00AF}",
177     mdash => "\x{2014}",
178     micro => "\x{00B5}",
179     middot => "\x{00B7}",
180     minus => "\x{2212}",
181     mu => "\x{03BC}",
182     nabla => "\x{2207}",
183     nbsp => "\x{00A0}",
184     ndash => "\x{2013}",
185     ne => "\x{2260}",
186     ni => "\x{220B}",
187     not => "\x{00AC}",
188     notin => "\x{2209}",
189     nsub => "\x{2284}",
190     ntilde => "\x{00F1}",
191     nu => "\x{03BD}",
192     oacute => "\x{00F3}",
193     ocirc => "\x{00F4}",
194     oelig => "\x{0153}",
195     ograve => "\x{00F2}",
196     oline => "\x{203E}",
197     omega => "\x{03C9}",
198     omicron => "\x{03BF}",
199     oplus => "\x{2295}",
200     or => "\x{2228}",
201     ordf => "\x{00AA}",
202     ordm => "\x{00BA}",
203     oslash => "\x{00F8}",
204     otilde => "\x{00F5}",
205     otimes => "\x{2297}",
206     ouml => "\x{00F6}",
207     para => "\x{00B6}",
208     part => "\x{2202}",
209     permil => "\x{2030}",
210     perp => "\x{22A5}",
211     phi => "\x{03C6}",
212     pi => "\x{03C0}",
213     piv => "\x{03D6}",
214     plusmn => "\x{00B1}",
215     pound => "\x{00A3}",
216     prime => "\x{2032}",
217     prod => "\x{220F}",
218     prop => "\x{221D}",
219     psi => "\x{03C8}",
220     quot => "\x{0022}",
221     QUOT => "\x{0022}",
222     rArr => "\x{21D2}",
223     radic => "\x{221A}",
224     rang => "\x{232A}",
225     raquo => "\x{00BB}",
226     rarr => "\x{2192}",
227     rceil => "\x{2309}",
228     rdquo => "\x{201D}",
229     real => "\x{211C}",
230     reg => "\x{00AE}",
231     REG => "\x{00AE}",
232     rfloor => "\x{230B}",
233     rho => "\x{03C1}",
234     rlm => "\x{200F}",
235     rsaquo => "\x{203A}",
236     rsquo => "\x{2019}",
237     sbquo => "\x{201A}",
238     scaron => "\x{0161}",
239     sdot => "\x{22C5}",
240     sect => "\x{00A7}",
241     shy => "\x{00AD}",
242     sigma => "\x{03C3}",
243     sigmaf => "\x{03C2}",
244     sim => "\x{223C}",
245     spades => "\x{2660}",
246     sub => "\x{2282}",
247     sube => "\x{2286}",
248     sum => "\x{2211}",
249     sup => "\x{2283}",
250     sup1 => "\x{00B9}",
251     sup2 => "\x{00B2}",
252     sup3 => "\x{00B3}",
253     supe => "\x{2287}",
254     szlig => "\x{00DF}",
255     tau => "\x{03C4}",
256     there4 => "\x{2234}",
257     theta => "\x{03B8}",
258     thetasym => "\x{03D1}",
259     thinsp => "\x{2009}",
260     thorn => "\x{00FE}",
261     tilde => "\x{02DC}",
262     times => "\x{00D7}",
263     trade => "\x{2122}",
264     uArr => "\x{21D1}",
265     uacute => "\x{00FA}",
266     uarr => "\x{2191}",
267     ucirc => "\x{00FB}",
268     ugrave => "\x{00F9}",
269     uml => "\x{00A8}",
270     upsih => "\x{03D2}",
271     upsilon => "\x{03C5}",
272     uuml => "\x{00FC}",
273     weierp => "\x{2118}",
274     xi => "\x{03BE}",
275     yacute => "\x{00FD}",
276     yen => "\x{00A5}",
277     yuml => "\x{00FF}",
278     zeta => "\x{03B6}",
279     zwj => "\x{200D}",
280     zwnj => "\x{200C}",
281     };
282    
283 wakaba 1.2 my $special_category = {
284     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
285     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
286     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
287     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
288     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
289     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
290     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
291     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
292     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
293     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
294     };
295     my $scoping_category = {
296     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
297     table => 1, td => 1, th => 1,
298     };
299     my $formatting_category = {
300     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
301     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
302     };
303     # $phrasing_category: all other elements
304    
305 wakaba 1.1 sub new ($) {
306     my $class = shift;
307     my $self = bless {}, $class;
308     $self->{set_next_input_character} = sub {
309     $self->{next_input_character} = -1;
310     };
311     $self->{parse_error} = sub {
312     #
313     };
314     return $self;
315     } # new
316    
317     ## Implementations MUST act as if state machine in the spec
318    
319     sub _initialize_tokenizer ($) {
320     my $self = shift;
321     $self->{state} = 'data'; # MUST
322     $self->{content_model_flag} = 'PCDATA'; # be
323     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
324     undef $self->{current_attribute};
325     undef $self->{last_emitted_start_tag_name};
326     undef $self->{last_attribute_value_state};
327     $self->{char} = [];
328     # $self->{next_input_character}
329     !!!next-input-character;
330     $self->{token} = [];
331     } # _initialize_tokenizer
332    
333     ## A token has:
334     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
335     ## 'character', or 'end-of-file'
336     ## ->{name} (DOCTYPE, start tag (tagname), end tag (tagname))
337     ## ISSUE: the spec need s/tagname/tag name/
338     ## ->{error} == 1 or 0 (DOCTYPE)
339     ## ->{attributes} isa HASH (start tag, end tag)
340     ## ->{data} (comment, character)
341    
342     ## Macros
343     ## Macros MUST be preceded by three EXCLAMATION MARKs.
344     ## emit ($token)
345     ## Emits the specified token.
346    
347     ## Emitted token MUST immediately be handled by the tree construction state.
348    
349     ## Before each step, UA MAY check to see if either one of the scripts in
350     ## "list of scripts that will execute as soon as possible" or the first
351     ## script in the "list of scripts that will execute asynchronously",
352     ## has completed loading. If one has, then it MUST be executed
353     ## and removed from the list.
354    
355     sub _get_next_token ($) {
356     my $self = shift;
357     if (@{$self->{token}}) {
358     return shift @{$self->{token}};
359     }
360    
361     A: {
362     if ($self->{state} eq 'data') {
363     if ($self->{next_input_character} == 0x0026) { # &
364     if ($self->{content_model_flag} eq 'PCDATA' or
365     $self->{content_model_flag} eq 'RCDATA') {
366     $self->{state} = 'entity data';
367     !!!next-input-character;
368     redo A;
369     } else {
370     #
371     }
372     } elsif ($self->{next_input_character} == 0x003C) { # <
373     if ($self->{content_model_flag} ne 'PLAINTEXT') {
374     $self->{state} = 'tag open';
375     !!!next-input-character;
376     redo A;
377     } else {
378     #
379     }
380     } elsif ($self->{next_input_character} == -1) {
381     !!!emit ({type => 'end-of-file'});
382     last A; ## TODO: ok?
383     }
384     # Anything else
385     my $token = {type => 'character',
386     data => chr $self->{next_input_character}};
387     ## Stay in the data state
388     !!!next-input-character;
389    
390     !!!emit ($token);
391    
392     redo A;
393     } elsif ($self->{state} eq 'entity data') {
394     ## (cannot happen in CDATA state)
395    
396     my $token = $self->_tokenize_attempt_to_consume_an_entity;
397    
398     $self->{state} = 'data';
399     # next-input-character is already done
400    
401     unless (defined $token) {
402     !!!emit ({type => 'character', data => '&'});
403     } else {
404     !!!emit ($token);
405     }
406    
407     redo A;
408     } elsif ($self->{state} eq 'tag open') {
409     if ($self->{content_model_flag} eq 'RCDATA' or
410     $self->{content_model_flag} eq 'CDATA') {
411     if ($self->{next_input_character} == 0x002F) { # /
412     !!!next-input-character;
413     $self->{state} = 'close tag open';
414     redo A;
415     } else {
416     ## reconsume
417     $self->{state} = 'data';
418    
419 wakaba 1.7 !!!emit ({type => 'character', data => '<'});
420 wakaba 1.1
421     redo A;
422     }
423     } elsif ($self->{content_model_flag} eq 'PCDATA') {
424     if ($self->{next_input_character} == 0x0021) { # !
425     $self->{state} = 'markup declaration open';
426     !!!next-input-character;
427     redo A;
428     } elsif ($self->{next_input_character} == 0x002F) { # /
429     $self->{state} = 'close tag open';
430     !!!next-input-character;
431     redo A;
432     } elsif (0x0041 <= $self->{next_input_character} and
433     $self->{next_input_character} <= 0x005A) { # A..Z
434     $self->{current_token}
435     = {type => 'start tag',
436     tag_name => chr ($self->{next_input_character} + 0x0020)};
437     $self->{state} = 'tag name';
438     !!!next-input-character;
439     redo A;
440     } elsif (0x0061 <= $self->{next_input_character} and
441     $self->{next_input_character} <= 0x007A) { # a..z
442     $self->{current_token} = {type => 'start tag',
443     tag_name => chr ($self->{next_input_character})};
444     $self->{state} = 'tag name';
445     !!!next-input-character;
446     redo A;
447     } elsif ($self->{next_input_character} == 0x003E) { # >
448     !!!parse-error;
449     $self->{state} = 'data';
450     !!!next-input-character;
451    
452 wakaba 1.3 !!!emit ({type => 'character', data => '<>'});
453 wakaba 1.1
454     redo A;
455     } elsif ($self->{next_input_character} == 0x003F) { # ?
456     !!!parse-error;
457     $self->{state} = 'bogus comment';
458     ## $self->{next_input_character} is intentionally left as is
459     redo A;
460     } else {
461     !!!parse-error;
462     $self->{state} = 'data';
463     ## reconsume
464    
465     !!!emit ({type => 'character', data => '<'});
466    
467     redo A;
468     }
469     } else {
470     die "$0: $self->{content_model_flag}: Unknown content model flag";
471     }
472     } elsif ($self->{state} eq 'close tag open') {
473     if ($self->{content_model_flag} eq 'RCDATA' or
474     $self->{content_model_flag} eq 'CDATA') {
475     my @next_char;
476     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
477     push @next_char, $self->{next_input_character};
478     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
479     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
480     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
481     !!!next-input-character;
482     next TAGNAME;
483     } else {
484     !!!parse-error;
485     $self->{next_input_character} = shift @next_char; # reconsume
486     !!!back-next-input-character (@next_char);
487     $self->{state} = 'data';
488    
489     !!!emit ({type => 'character', data => '</'});
490    
491     redo A;
492     }
493     }
494 wakaba 1.2 push @next_char, $self->{next_input_character};
495 wakaba 1.1
496 wakaba 1.2 unless ($self->{next_input_character} == 0x0009 or # HT
497     $self->{next_input_character} == 0x000A or # LF
498     $self->{next_input_character} == 0x000B or # VT
499     $self->{next_input_character} == 0x000C or # FF
500     $self->{next_input_character} == 0x0020 or # SP
501     $self->{next_input_character} == 0x003E or # >
502     $self->{next_input_character} == 0x002F or # /
503     $self->{next_input_character} == 0x003C or # <
504 wakaba 1.1 $self->{next_input_character} == -1) {
505     !!!parse-error;
506     $self->{next_input_character} = shift @next_char; # reconsume
507     !!!back-next-input-character (@next_char);
508     $self->{state} = 'data';
509    
510     !!!emit ({type => 'character', data => '</'});
511    
512     redo A;
513     } else {
514     $self->{next_input_character} = shift @next_char;
515     !!!back-next-input-character (@next_char);
516     # and consume...
517     }
518     }
519    
520     if (0x0041 <= $self->{next_input_character} and
521     $self->{next_input_character} <= 0x005A) { # A..Z
522     $self->{current_token} = {type => 'end tag',
523     tag_name => chr ($self->{next_input_character} + 0x0020)};
524     $self->{state} = 'tag name';
525     !!!next-input-character;
526     redo A;
527     } elsif (0x0061 <= $self->{next_input_character} and
528     $self->{next_input_character} <= 0x007A) { # a..z
529     $self->{current_token} = {type => 'end tag',
530     tag_name => chr ($self->{next_input_character})};
531     $self->{state} = 'tag name';
532     !!!next-input-character;
533     redo A;
534     } elsif ($self->{next_input_character} == 0x003E) { # >
535     !!!parse-error;
536     $self->{state} = 'data';
537     !!!next-input-character;
538     redo A;
539     } elsif ($self->{next_input_character} == -1) {
540     !!!parse-error;
541     $self->{state} = 'data';
542     # reconsume
543    
544     !!!emit ({type => 'character', data => '</'});
545    
546     redo A;
547     } else {
548     !!!parse-error;
549     $self->{state} = 'bogus comment';
550     ## $self->{next_input_character} is intentionally left as is
551     redo A;
552     }
553     } elsif ($self->{state} eq 'tag name') {
554     if ($self->{next_input_character} == 0x0009 or # HT
555     $self->{next_input_character} == 0x000A or # LF
556     $self->{next_input_character} == 0x000B or # VT
557     $self->{next_input_character} == 0x000C or # FF
558     $self->{next_input_character} == 0x0020) { # SP
559     $self->{state} = 'before attribute name';
560     !!!next-input-character;
561     redo A;
562     } elsif ($self->{next_input_character} == 0x003E) { # >
563     if ($self->{current_token}->{type} eq 'start tag') {
564     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
565     } elsif ($self->{current_token}->{type} eq 'end tag') {
566     $self->{content_model_flag} = 'PCDATA'; # MUST
567 wakaba 1.2 if ($self->{current_token}->{attributes}) {
568 wakaba 1.1 !!!parse-error;
569     }
570     } else {
571     die "$0: $self->{current_token}->{type}: Unknown token type";
572     }
573     $self->{state} = 'data';
574     !!!next-input-character;
575    
576     !!!emit ($self->{current_token}); # start tag or end tag
577     undef $self->{current_token};
578    
579     redo A;
580     } elsif (0x0041 <= $self->{next_input_character} and
581     $self->{next_input_character} <= 0x005A) { # A..Z
582     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
583     # start tag or end tag
584     ## Stay in this state
585     !!!next-input-character;
586     redo A;
587     } elsif ($self->{next_input_character} == 0x003C or # <
588     $self->{next_input_character} == -1) {
589     !!!parse-error;
590     if ($self->{current_token}->{type} eq 'start tag') {
591     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
592     } elsif ($self->{current_token}->{type} eq 'end tag') {
593     $self->{content_model_flag} = 'PCDATA'; # MUST
594 wakaba 1.2 if ($self->{current_token}->{attributes}) {
595 wakaba 1.1 !!!parse-error;
596     }
597     } else {
598     die "$0: $self->{current_token}->{type}: Unknown token type";
599     }
600     $self->{state} = 'data';
601     # reconsume
602    
603     !!!emit ($self->{current_token}); # start tag or end tag
604     undef $self->{current_token};
605    
606     redo A;
607     } elsif ($self->{next_input_character} == 0x002F) { # /
608     !!!next-input-character;
609     if ($self->{next_input_character} == 0x003E and # >
610     $self->{current_token}->{type} eq 'start tag' and
611     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
612     # permitted slash
613     #
614     } else {
615     !!!parse-error;
616     }
617     $self->{state} = 'before attribute name';
618     # next-input-character is already done
619     redo A;
620     } else {
621     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
622     # start tag or end tag
623     ## Stay in the state
624     !!!next-input-character;
625     redo A;
626     }
627     } elsif ($self->{state} eq 'before attribute name') {
628     if ($self->{next_input_character} == 0x0009 or # HT
629     $self->{next_input_character} == 0x000A or # LF
630     $self->{next_input_character} == 0x000B or # VT
631     $self->{next_input_character} == 0x000C or # FF
632     $self->{next_input_character} == 0x0020) { # SP
633     ## Stay in the state
634     !!!next-input-character;
635     redo A;
636     } elsif ($self->{next_input_character} == 0x003E) { # >
637     if ($self->{current_token}->{type} eq 'start tag') {
638     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
639     } elsif ($self->{current_token}->{type} eq 'end tag') {
640     $self->{content_model_flag} = 'PCDATA'; # MUST
641 wakaba 1.2 if ($self->{current_token}->{attributes}) {
642 wakaba 1.1 !!!parse-error;
643     }
644     } else {
645     die "$0: $self->{current_token}->{type}: Unknown token type";
646     }
647     $self->{state} = 'data';
648     !!!next-input-character;
649    
650     !!!emit ($self->{current_token}); # start tag or end tag
651     undef $self->{current_token};
652    
653     redo A;
654     } elsif (0x0041 <= $self->{next_input_character} and
655     $self->{next_input_character} <= 0x005A) { # A..Z
656     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
657     value => ''};
658     $self->{state} = 'attribute name';
659     !!!next-input-character;
660     redo A;
661     } elsif ($self->{next_input_character} == 0x002F) { # /
662     !!!next-input-character;
663     if ($self->{next_input_character} == 0x003E and # >
664     $self->{current_token}->{type} eq 'start tag' and
665     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
666     # permitted slash
667     #
668     } else {
669     !!!parse-error;
670     }
671     ## Stay in the state
672     # next-input-character is already done
673     redo A;
674     } elsif ($self->{next_input_character} == 0x003C or # <
675     $self->{next_input_character} == -1) {
676     !!!parse-error;
677     if ($self->{current_token}->{type} eq 'start tag') {
678     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
679     } elsif ($self->{current_token}->{type} eq 'end tag') {
680     $self->{content_model_flag} = 'PCDATA'; # MUST
681 wakaba 1.2 if ($self->{current_token}->{attributes}) {
682 wakaba 1.1 !!!parse-error;
683     }
684     } else {
685     die "$0: $self->{current_token}->{type}: Unknown token type";
686     }
687     $self->{state} = 'data';
688     # reconsume
689    
690     !!!emit ($self->{current_token}); # start tag or end tag
691     undef $self->{current_token};
692    
693     redo A;
694     } else {
695     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
696     value => ''};
697     $self->{state} = 'attribute name';
698     !!!next-input-character;
699     redo A;
700     }
701     } elsif ($self->{state} eq 'attribute name') {
702     my $before_leave = sub {
703 wakaba 1.2 if (exists $self->{current_token}->{attributes} # start tag or end tag
704 wakaba 1.1 ->{$self->{current_attribute}->{name}}) { # MUST
705     !!!parse-error;
706     ## Discard $self->{current_attribute} # MUST
707     } else {
708 wakaba 1.2 $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
709 wakaba 1.1 = $self->{current_attribute};
710     }
711     }; # $before_leave
712    
713     if ($self->{next_input_character} == 0x0009 or # HT
714     $self->{next_input_character} == 0x000A or # LF
715     $self->{next_input_character} == 0x000B or # VT
716     $self->{next_input_character} == 0x000C or # FF
717     $self->{next_input_character} == 0x0020) { # SP
718     $before_leave->();
719     $self->{state} = 'after attribute name';
720     !!!next-input-character;
721     redo A;
722     } elsif ($self->{next_input_character} == 0x003D) { # =
723     $before_leave->();
724     $self->{state} = 'before attribute value';
725     !!!next-input-character;
726     redo A;
727     } elsif ($self->{next_input_character} == 0x003E) { # >
728     $before_leave->();
729     if ($self->{current_token}->{type} eq 'start tag') {
730     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
731     } elsif ($self->{current_token}->{type} eq 'end tag') {
732     $self->{content_model_flag} = 'PCDATA'; # MUST
733 wakaba 1.2 if ($self->{current_token}->{attributes}) {
734 wakaba 1.1 !!!parse-error;
735     }
736     } else {
737     die "$0: $self->{current_token}->{type}: Unknown token type";
738     }
739     $self->{state} = 'data';
740     !!!next-input-character;
741    
742     !!!emit ($self->{current_token}); # start tag or end tag
743     undef $self->{current_token};
744    
745     redo A;
746     } elsif (0x0041 <= $self->{next_input_character} and
747     $self->{next_input_character} <= 0x005A) { # A..Z
748     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
749     ## Stay in the state
750     !!!next-input-character;
751     redo A;
752     } elsif ($self->{next_input_character} == 0x002F) { # /
753     $before_leave->();
754     !!!next-input-character;
755     if ($self->{next_input_character} == 0x003E and # >
756     $self->{current_token}->{type} eq 'start tag' and
757     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
758     # permitted slash
759     #
760     } else {
761     !!!parse-error;
762     }
763     $self->{state} = 'before attribute name';
764     # next-input-character is already done
765     redo A;
766     } elsif ($self->{next_input_character} == 0x003C or # <
767     $self->{next_input_character} == -1) {
768     !!!parse-error;
769     $before_leave->();
770     if ($self->{current_token}->{type} eq 'start tag') {
771     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
772     } elsif ($self->{current_token}->{type} eq 'end tag') {
773     $self->{content_model_flag} = 'PCDATA'; # MUST
774 wakaba 1.2 if ($self->{current_token}->{attributes}) {
775 wakaba 1.1 !!!parse-error;
776     }
777     } else {
778     die "$0: $self->{current_token}->{type}: Unknown token type";
779     }
780     $self->{state} = 'data';
781     # reconsume
782    
783     !!!emit ($self->{current_token}); # start tag or end tag
784     undef $self->{current_token};
785    
786     redo A;
787     } else {
788     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
789     ## Stay in the state
790     !!!next-input-character;
791     redo A;
792     }
793     } elsif ($self->{state} eq 'after attribute name') {
794     if ($self->{next_input_character} == 0x0009 or # HT
795     $self->{next_input_character} == 0x000A or # LF
796     $self->{next_input_character} == 0x000B or # VT
797     $self->{next_input_character} == 0x000C or # FF
798     $self->{next_input_character} == 0x0020) { # SP
799     ## Stay in the state
800     !!!next-input-character;
801     redo A;
802     } elsif ($self->{next_input_character} == 0x003D) { # =
803     $self->{state} = 'before attribute value';
804     !!!next-input-character;
805     redo A;
806     } elsif ($self->{next_input_character} == 0x003E) { # >
807     if ($self->{current_token}->{type} eq 'start tag') {
808     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
809     } elsif ($self->{current_token}->{type} eq 'end tag') {
810     $self->{content_model_flag} = 'PCDATA'; # MUST
811 wakaba 1.2 if ($self->{current_token}->{attributes}) {
812 wakaba 1.1 !!!parse-error;
813     }
814     } else {
815     die "$0: $self->{current_token}->{type}: Unknown token type";
816     }
817     $self->{state} = 'data';
818     !!!next-input-character;
819    
820     !!!emit ($self->{current_token}); # start tag or end tag
821     undef $self->{current_token};
822    
823     redo A;
824     } elsif (0x0041 <= $self->{next_input_character} and
825     $self->{next_input_character} <= 0x005A) { # A..Z
826     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
827     value => ''};
828     $self->{state} = 'attribute name';
829     !!!next-input-character;
830     redo A;
831     } elsif ($self->{next_input_character} == 0x002F) { # /
832     !!!next-input-character;
833     if ($self->{next_input_character} == 0x003E and # >
834     $self->{current_token}->{type} eq 'start tag' and
835     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
836     # permitted slash
837     #
838     } else {
839     !!!parse-error;
840     }
841     $self->{state} = 'before attribute name';
842     # next-input-character is already done
843     redo A;
844     } elsif ($self->{next_input_character} == 0x003C or # <
845     $self->{next_input_character} == -1) {
846     !!!parse-error;
847     if ($self->{current_token}->{type} eq 'start tag') {
848     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
849     } elsif ($self->{current_token}->{type} eq 'end tag') {
850     $self->{content_model_flag} = 'PCDATA'; # MUST
851 wakaba 1.2 if ($self->{current_token}->{attributes}) {
852 wakaba 1.1 !!!parse-error;
853     }
854     } else {
855     die "$0: $self->{current_token}->{type}: Unknown token type";
856     }
857     $self->{state} = 'data';
858     # reconsume
859    
860     !!!emit ($self->{current_token}); # start tag or end tag
861     undef $self->{current_token};
862    
863     redo A;
864     } else {
865     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
866     value => ''};
867     $self->{state} = 'attribute name';
868     !!!next-input-character;
869     redo A;
870     }
871     } elsif ($self->{state} eq 'before attribute value') {
872     if ($self->{next_input_character} == 0x0009 or # HT
873     $self->{next_input_character} == 0x000A or # LF
874     $self->{next_input_character} == 0x000B or # VT
875     $self->{next_input_character} == 0x000C or # FF
876     $self->{next_input_character} == 0x0020) { # SP
877     ## Stay in the state
878     !!!next-input-character;
879     redo A;
880     } elsif ($self->{next_input_character} == 0x0022) { # "
881     $self->{state} = 'attribute value (double-quoted)';
882     !!!next-input-character;
883     redo A;
884     } elsif ($self->{next_input_character} == 0x0026) { # &
885     $self->{state} = 'attribute value (unquoted)';
886     ## reconsume
887     redo A;
888     } elsif ($self->{next_input_character} == 0x0027) { # '
889     $self->{state} = 'attribute value (single-quoted)';
890     !!!next-input-character;
891     redo A;
892     } elsif ($self->{next_input_character} == 0x003E) { # >
893     if ($self->{current_token}->{type} eq 'start tag') {
894     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
895     } elsif ($self->{current_token}->{type} eq 'end tag') {
896     $self->{content_model_flag} = 'PCDATA'; # MUST
897 wakaba 1.2 if ($self->{current_token}->{attributes}) {
898 wakaba 1.1 !!!parse-error;
899     }
900     } else {
901     die "$0: $self->{current_token}->{type}: Unknown token type";
902     }
903     $self->{state} = 'data';
904     !!!next-input-character;
905    
906     !!!emit ($self->{current_token}); # start tag or end tag
907     undef $self->{current_token};
908    
909     redo A;
910     } elsif ($self->{next_input_character} == 0x003C or # <
911     $self->{next_input_character} == -1) {
912     !!!parse-error;
913     if ($self->{current_token}->{type} eq 'start tag') {
914     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
915     } elsif ($self->{current_token}->{type} eq 'end tag') {
916     $self->{content_model_flag} = 'PCDATA'; # MUST
917 wakaba 1.2 if ($self->{current_token}->{attributes}) {
918 wakaba 1.1 !!!parse-error;
919     }
920     } else {
921     die "$0: $self->{current_token}->{type}: Unknown token type";
922     }
923     $self->{state} = 'data';
924     ## reconsume
925    
926     !!!emit ($self->{current_token}); # start tag or end tag
927     undef $self->{current_token};
928    
929     redo A;
930     } else {
931     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
932     $self->{state} = 'attribute value (unquoted)';
933     !!!next-input-character;
934     redo A;
935     }
936     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
937     if ($self->{next_input_character} == 0x0022) { # "
938     $self->{state} = 'before attribute name';
939     !!!next-input-character;
940     redo A;
941     } elsif ($self->{next_input_character} == 0x0026) { # &
942     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
943     $self->{state} = 'entity in attribute value';
944     !!!next-input-character;
945     redo A;
946     } elsif ($self->{next_input_character} == -1) {
947     !!!parse-error;
948     if ($self->{current_token}->{type} eq 'start tag') {
949     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
950     } elsif ($self->{current_token}->{type} eq 'end tag') {
951     $self->{content_model_flag} = 'PCDATA'; # MUST
952 wakaba 1.2 if ($self->{current_token}->{attributes}) {
953 wakaba 1.1 !!!parse-error;
954     }
955     } else {
956     die "$0: $self->{current_token}->{type}: Unknown token type";
957     }
958     $self->{state} = 'data';
959     ## reconsume
960    
961     !!!emit ($self->{current_token}); # start tag or end tag
962     undef $self->{current_token};
963    
964     redo A;
965     } else {
966     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
967     ## Stay in the state
968     !!!next-input-character;
969     redo A;
970     }
971     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
972     if ($self->{next_input_character} == 0x0027) { # '
973     $self->{state} = 'before attribute name';
974     !!!next-input-character;
975     redo A;
976     } elsif ($self->{next_input_character} == 0x0026) { # &
977     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
978     $self->{state} = 'entity in attribute value';
979     !!!next-input-character;
980     redo A;
981     } elsif ($self->{next_input_character} == -1) {
982     !!!parse-error;
983     if ($self->{current_token}->{type} eq 'start tag') {
984     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
985     } elsif ($self->{current_token}->{type} eq 'end tag') {
986     $self->{content_model_flag} = 'PCDATA'; # MUST
987 wakaba 1.2 if ($self->{current_token}->{attributes}) {
988 wakaba 1.1 !!!parse-error;
989     }
990     } else {
991     die "$0: $self->{current_token}->{type}: Unknown token type";
992     }
993     $self->{state} = 'data';
994     ## reconsume
995    
996     !!!emit ($self->{current_token}); # start tag or end tag
997     undef $self->{current_token};
998    
999     redo A;
1000     } else {
1001     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1002     ## Stay in the state
1003     !!!next-input-character;
1004     redo A;
1005     }
1006     } elsif ($self->{state} eq 'attribute value (unquoted)') {
1007     if ($self->{next_input_character} == 0x0009 or # HT
1008     $self->{next_input_character} == 0x000A or # LF
1009     $self->{next_input_character} == 0x000B or # HT
1010     $self->{next_input_character} == 0x000C or # FF
1011     $self->{next_input_character} == 0x0020) { # SP
1012     $self->{state} = 'before attribute name';
1013     !!!next-input-character;
1014     redo A;
1015     } elsif ($self->{next_input_character} == 0x0026) { # &
1016     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
1017     $self->{state} = 'entity in attribute value';
1018     !!!next-input-character;
1019     redo A;
1020     } elsif ($self->{next_input_character} == 0x003E) { # >
1021     if ($self->{current_token}->{type} eq 'start tag') {
1022     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1023     } elsif ($self->{current_token}->{type} eq 'end tag') {
1024     $self->{content_model_flag} = 'PCDATA'; # MUST
1025 wakaba 1.2 if ($self->{current_token}->{attributes}) {
1026 wakaba 1.1 !!!parse-error;
1027     }
1028     } else {
1029     die "$0: $self->{current_token}->{type}: Unknown token type";
1030     }
1031     $self->{state} = 'data';
1032     !!!next-input-character;
1033    
1034     !!!emit ($self->{current_token}); # start tag or end tag
1035     undef $self->{current_token};
1036    
1037     redo A;
1038     } elsif ($self->{next_input_character} == 0x003C or # <
1039     $self->{next_input_character} == -1) {
1040     !!!parse-error;
1041     if ($self->{current_token}->{type} eq 'start tag') {
1042     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
1043     } elsif ($self->{current_token}->{type} eq 'end tag') {
1044     $self->{content_model_flag} = 'PCDATA'; # MUST
1045 wakaba 1.2 if ($self->{current_token}->{attributes}) {
1046 wakaba 1.1 !!!parse-error;
1047     }
1048     } else {
1049     die "$0: $self->{current_token}->{type}: Unknown token type";
1050     }
1051     $self->{state} = 'data';
1052     ## reconsume
1053    
1054     !!!emit ($self->{current_token}); # start tag or end tag
1055     undef $self->{current_token};
1056    
1057     redo A;
1058     } else {
1059     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
1060     ## Stay in the state
1061     !!!next-input-character;
1062     redo A;
1063     }
1064     } elsif ($self->{state} eq 'entity in attribute value') {
1065     my $token = $self->_tokenize_attempt_to_consume_an_entity;
1066    
1067     unless (defined $token) {
1068     $self->{current_attribute}->{value} .= '&';
1069     } else {
1070     $self->{current_attribute}->{value} .= $token->{data};
1071     ## ISSUE: spec says "append the returned character token to the current attribute's value"
1072     }
1073    
1074     $self->{state} = $self->{last_attribute_value_state};
1075     # next-input-character is already done
1076     redo A;
1077     } elsif ($self->{state} eq 'bogus comment') {
1078     ## (only happen if PCDATA state)
1079    
1080     my $token = {type => 'comment', data => ''};
1081    
1082     BC: {
1083     if ($self->{next_input_character} == 0x003E) { # >
1084     $self->{state} = 'data';
1085     !!!next-input-character;
1086    
1087     !!!emit ($token);
1088    
1089     redo A;
1090     } elsif ($self->{next_input_character} == -1) {
1091     $self->{state} = 'data';
1092     ## reconsume
1093    
1094     !!!emit ($token);
1095    
1096     redo A;
1097     } else {
1098     $token->{data} .= chr ($self->{next_input_character});
1099     !!!next-input-character;
1100     redo BC;
1101     }
1102     } # BC
1103     } elsif ($self->{state} eq 'markup declaration open') {
1104     ## (only happen if PCDATA state)
1105    
1106     my @next_char;
1107     push @next_char, $self->{next_input_character};
1108    
1109     if ($self->{next_input_character} == 0x002D) { # -
1110     !!!next-input-character;
1111     push @next_char, $self->{next_input_character};
1112     if ($self->{next_input_character} == 0x002D) { # -
1113     $self->{current_token} = {type => 'comment', data => ''};
1114     $self->{state} = 'comment';
1115     !!!next-input-character;
1116     redo A;
1117     }
1118     } elsif ($self->{next_input_character} == 0x0044 or # D
1119     $self->{next_input_character} == 0x0064) { # d
1120     !!!next-input-character;
1121     push @next_char, $self->{next_input_character};
1122     if ($self->{next_input_character} == 0x004F or # O
1123     $self->{next_input_character} == 0x006F) { # o
1124     !!!next-input-character;
1125     push @next_char, $self->{next_input_character};
1126     if ($self->{next_input_character} == 0x0043 or # C
1127     $self->{next_input_character} == 0x0063) { # c
1128     !!!next-input-character;
1129     push @next_char, $self->{next_input_character};
1130     if ($self->{next_input_character} == 0x0054 or # T
1131     $self->{next_input_character} == 0x0074) { # t
1132     !!!next-input-character;
1133     push @next_char, $self->{next_input_character};
1134     if ($self->{next_input_character} == 0x0059 or # Y
1135     $self->{next_input_character} == 0x0079) { # y
1136     !!!next-input-character;
1137     push @next_char, $self->{next_input_character};
1138     if ($self->{next_input_character} == 0x0050 or # P
1139     $self->{next_input_character} == 0x0070) { # p
1140     !!!next-input-character;
1141     push @next_char, $self->{next_input_character};
1142     if ($self->{next_input_character} == 0x0045 or # E
1143     $self->{next_input_character} == 0x0065) { # e
1144     ## ISSUE: What a stupid code this is!
1145     $self->{state} = 'DOCTYPE';
1146     !!!next-input-character;
1147     redo A;
1148     }
1149     }
1150     }
1151     }
1152     }
1153     }
1154     }
1155    
1156     !!!parse-error;
1157     $self->{next_input_character} = shift @next_char;
1158     !!!back-next-input-character (@next_char);
1159     $self->{state} = 'bogus comment';
1160     redo A;
1161    
1162     ## ISSUE: typos in spec: chacacters, is is a parse error
1163     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1164     } elsif ($self->{state} eq 'comment') {
1165     if ($self->{next_input_character} == 0x002D) { # -
1166     $self->{state} = 'comment dash';
1167     !!!next-input-character;
1168     redo A;
1169     } elsif ($self->{next_input_character} == -1) {
1170     !!!parse-error;
1171     $self->{state} = 'data';
1172     ## reconsume
1173    
1174     !!!emit ($self->{current_token}); # comment
1175     undef $self->{current_token};
1176    
1177     redo A;
1178     } else {
1179     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1180     ## Stay in the state
1181     !!!next-input-character;
1182     redo A;
1183     }
1184     } elsif ($self->{state} eq 'comment dash') {
1185     if ($self->{next_input_character} == 0x002D) { # -
1186     $self->{state} = 'comment end';
1187     !!!next-input-character;
1188     redo A;
1189     } elsif ($self->{next_input_character} == -1) {
1190     !!!parse-error;
1191     $self->{state} = 'data';
1192     ## reconsume
1193    
1194     !!!emit ($self->{current_token}); # comment
1195     undef $self->{current_token};
1196    
1197     redo A;
1198     } else {
1199     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1200     $self->{state} = 'comment';
1201     !!!next-input-character;
1202     redo A;
1203     }
1204     } elsif ($self->{state} eq 'comment end') {
1205     if ($self->{next_input_character} == 0x003E) { # >
1206     $self->{state} = 'data';
1207     !!!next-input-character;
1208    
1209     !!!emit ($self->{current_token}); # comment
1210     undef $self->{current_token};
1211    
1212     redo A;
1213     } elsif ($self->{next_input_character} == 0x002D) { # -
1214     !!!parse-error;
1215     $self->{current_token}->{data} .= '-'; # comment
1216     ## Stay in the state
1217     !!!next-input-character;
1218     redo A;
1219     } elsif ($self->{next_input_character} == -1) {
1220     !!!parse-error;
1221     $self->{state} = 'data';
1222     ## reconsume
1223    
1224     !!!emit ($self->{current_token}); # comment
1225     undef $self->{current_token};
1226    
1227     redo A;
1228     } else {
1229     !!!parse-error;
1230     $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1231     $self->{state} = 'comment';
1232     !!!next-input-character;
1233     redo A;
1234     }
1235     } elsif ($self->{state} eq 'DOCTYPE') {
1236     if ($self->{next_input_character} == 0x0009 or # HT
1237     $self->{next_input_character} == 0x000A or # LF
1238     $self->{next_input_character} == 0x000B or # VT
1239     $self->{next_input_character} == 0x000C or # FF
1240     $self->{next_input_character} == 0x0020) { # SP
1241     $self->{state} = 'before DOCTYPE name';
1242     !!!next-input-character;
1243     redo A;
1244     } else {
1245     !!!parse-error;
1246     $self->{state} = 'before DOCTYPE name';
1247     ## reconsume
1248     redo A;
1249     }
1250     } elsif ($self->{state} eq 'before DOCTYPE name') {
1251     if ($self->{next_input_character} == 0x0009 or # HT
1252     $self->{next_input_character} == 0x000A or # LF
1253     $self->{next_input_character} == 0x000B or # VT
1254     $self->{next_input_character} == 0x000C or # FF
1255     $self->{next_input_character} == 0x0020) { # SP
1256     ## Stay in the state
1257     !!!next-input-character;
1258     redo A;
1259     } elsif (0x0061 <= $self->{next_input_character} and
1260     $self->{next_input_character} <= 0x007A) { # a..z
1261     $self->{current_token} = {type => 'DOCTYPE',
1262     name => chr ($self->{next_input_character} - 0x0020),
1263     error => 1};
1264     $self->{state} = 'DOCTYPE name';
1265     !!!next-input-character;
1266     redo A;
1267     } elsif ($self->{next_input_character} == 0x003E) { # >
1268     !!!parse-error;
1269     $self->{state} = 'data';
1270     !!!next-input-character;
1271    
1272     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1273    
1274     redo A;
1275     } elsif ($self->{next_input_character} == -1) {
1276     !!!parse-error;
1277     $self->{state} = 'data';
1278     ## reconsume
1279    
1280     !!!emit ({type => 'DOCTYPE', name => '', error => 1});
1281    
1282     redo A;
1283     } else {
1284     $self->{current_token} = {type => 'DOCTYPE',
1285     name => chr ($self->{next_input_character}),
1286     error => 1};
1287     $self->{state} = 'DOCTYPE name';
1288     !!!next-input-character;
1289     redo A;
1290     }
1291     } elsif ($self->{state} eq 'DOCTYPE name') {
1292     if ($self->{next_input_character} == 0x0009 or # HT
1293     $self->{next_input_character} == 0x000A or # LF
1294     $self->{next_input_character} == 0x000B or # VT
1295     $self->{next_input_character} == 0x000C or # FF
1296     $self->{next_input_character} == 0x0020) { # SP
1297     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1298     $self->{state} = 'after DOCTYPE name';
1299     !!!next-input-character;
1300     redo A;
1301     } elsif ($self->{next_input_character} == 0x003E) { # >
1302     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1303     $self->{state} = 'data';
1304     !!!next-input-character;
1305    
1306     !!!emit ($self->{current_token}); # DOCTYPE
1307     undef $self->{current_token};
1308    
1309     redo A;
1310     } elsif (0x0061 <= $self->{next_input_character} and
1311     $self->{next_input_character} <= 0x007A) { # a..z
1312     $self->{current_token}->{name} .= chr ($self->{next_input_character} - 0x0020); # DOCTYPE
1313     #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1314     ## Stay in the state
1315     !!!next-input-character;
1316     redo A;
1317     } elsif ($self->{next_input_character} == -1) {
1318     !!!parse-error;
1319     $self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML'); # DOCTYPE
1320     $self->{state} = 'data';
1321     ## reconsume
1322    
1323     !!!emit ($self->{current_token});
1324     undef $self->{current_token};
1325    
1326     redo A;
1327     } else {
1328 wakaba 1.3 $self->{current_token}->{name}
1329     .= chr ($self->{next_input_character}); # DOCTYPE
1330 wakaba 1.1 #$self->{current_token}->{error} = ($self->{current_token}->{name} ne 'HTML');
1331     ## Stay in the state
1332     !!!next-input-character;
1333     redo A;
1334     }
1335     } elsif ($self->{state} eq 'after DOCTYPE name') {
1336     if ($self->{next_input_character} == 0x0009 or # HT
1337     $self->{next_input_character} == 0x000A or # LF
1338     $self->{next_input_character} == 0x000B or # VT
1339     $self->{next_input_character} == 0x000C or # FF
1340     $self->{next_input_character} == 0x0020) { # SP
1341     ## Stay in the state
1342     !!!next-input-character;
1343     redo A;
1344     } elsif ($self->{next_input_character} == 0x003E) { # >
1345     $self->{state} = 'data';
1346     !!!next-input-character;
1347    
1348     !!!emit ($self->{current_token}); # DOCTYPE
1349     undef $self->{current_token};
1350    
1351     redo A;
1352     } elsif ($self->{next_input_character} == -1) {
1353     !!!parse-error;
1354     $self->{state} = 'data';
1355     ## reconsume
1356    
1357     !!!emit ($self->{current_token}); # DOCTYPE
1358     undef $self->{current_token};
1359    
1360     redo A;
1361     } else {
1362     !!!parse-error;
1363     $self->{current_token}->{error} = 1; # DOCTYPE
1364     $self->{state} = 'bogus DOCTYPE';
1365     !!!next-input-character;
1366     redo A;
1367     }
1368     } elsif ($self->{state} eq 'bogus DOCTYPE') {
1369     if ($self->{next_input_character} == 0x003E) { # >
1370     $self->{state} = 'data';
1371     !!!next-input-character;
1372    
1373     !!!emit ($self->{current_token}); # DOCTYPE
1374     undef $self->{current_token};
1375    
1376     redo A;
1377     } elsif ($self->{next_input_character} == -1) {
1378     !!!parse-error;
1379     $self->{state} = 'data';
1380     ## reconsume
1381    
1382     !!!emit ($self->{current_token}); # DOCTYPE
1383     undef $self->{current_token};
1384    
1385     redo A;
1386     } else {
1387     ## Stay in the state
1388     !!!next-input-character;
1389     redo A;
1390     }
1391     } else {
1392     die "$0: $self->{state}: Unknown state";
1393     }
1394     } # A
1395    
1396     die "$0: _get_next_token: unexpected case";
1397     } # _get_next_token
1398    
1399     sub _tokenize_attempt_to_consume_an_entity ($) {
1400     my $self = shift;
1401    
1402     if ($self->{next_input_character} == 0x0023) { # #
1403     !!!next-input-character;
1404     my $num;
1405     if ($self->{next_input_character} == 0x0078 or # x
1406     $self->{next_input_character} == 0x0058) { # X
1407     X: {
1408     my $x_char = $self->{next_input_character};
1409     !!!next-input-character;
1410     if (0x0030 <= $self->{next_input_character} and
1411     $self->{next_input_character} <= 0x0039) { # 0..9
1412     $num ||= 0;
1413     $num *= 0x10;
1414     $num += $self->{next_input_character} - 0x0030;
1415     redo X;
1416     } elsif (0x0061 <= $self->{next_input_character} and
1417     $self->{next_input_character} <= 0x0066) { # a..f
1418     ## ISSUE: the spec says U+0078, which is apparently incorrect
1419     $num ||= 0;
1420     $num *= 0x10;
1421     $num += $self->{next_input_character} - 0x0060 + 9;
1422     redo X;
1423     } elsif (0x0041 <= $self->{next_input_character} and
1424     $self->{next_input_character} <= 0x0046) { # A..F
1425     ## ISSUE: the spec says U+0058, which is apparently incorrect
1426     $num ||= 0;
1427     $num *= 0x10;
1428     $num += $self->{next_input_character} - 0x0040 + 9;
1429     redo X;
1430     } elsif (not defined $num) { # no hexadecimal digit
1431     !!!parse-error;
1432     $self->{next_input_character} = 0x0023; # #
1433     !!!back-next-input-character ($x_char);
1434 wakaba 1.5 return undef;
1435 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003B) { # ;
1436     !!!next-input-character;
1437     } else {
1438     !!!parse-error;
1439     }
1440    
1441     ## TODO: check the definition for |a valid Unicode character|.
1442     if ($num > 1114111 or $num == 0) {
1443     $num = 0xFFFD; # REPLACEMENT CHARACTER
1444     ## ISSUE: Why this is not an error?
1445     }
1446    
1447 wakaba 1.5 return {type => 'character', data => chr $num};
1448 wakaba 1.1 } # X
1449 wakaba 1.4 } elsif (0x0030 <= $self->{next_input_character} and
1450     $self->{next_input_character} <= 0x0039) { # 0..9
1451     my $code = $self->{next_input_character} - 0x0030;
1452     !!!next-input-character;
1453    
1454     while (0x0030 <= $self->{next_input_character} and
1455     $self->{next_input_character} <= 0x0039) { # 0..9
1456     $code *= 10;
1457     $code += $self->{next_input_character} - 0x0030;
1458    
1459     !!!next-input-character;
1460     }
1461 wakaba 1.1
1462 wakaba 1.4 if ($self->{next_input_character} == 0x003B) { # ;
1463     !!!next-input-character;
1464     } else {
1465     !!!parse-error;
1466     }
1467 wakaba 1.1
1468 wakaba 1.4 ## TODO: check the definition for |a valid Unicode character|.
1469     if ($code > 1114111 or $code == 0) {
1470     $code = 0xFFFD; # REPLACEMENT CHARACTER
1471     ## ISSUE: Why this is not an error?
1472     }
1473    
1474 wakaba 1.5 return {type => 'character', data => chr $code};
1475 wakaba 1.4 } else {
1476     !!!parse-error;
1477     !!!back-next-input-character ($self->{next_input_character});
1478     $self->{next_input_character} = 0x0023; # #
1479 wakaba 1.5 return undef;
1480     }
1481     } elsif ((0x0041 <= $self->{next_input_character} and
1482     $self->{next_input_character} <= 0x005A) or
1483     (0x0061 <= $self->{next_input_character} and
1484     $self->{next_input_character} <= 0x007A)) {
1485     my $entity_name = chr $self->{next_input_character};
1486     !!!next-input-character;
1487    
1488     my $value = $entity_name;
1489     my $match;
1490    
1491     while (length $entity_name < 10 and
1492     ## NOTE: Some number greater than the maximum length of entity name
1493     ((0x0041 <= $self->{next_input_character} and
1494     $self->{next_input_character} <= 0x005A) or
1495     (0x0061 <= $self->{next_input_character} and
1496     $self->{next_input_character} <= 0x007A) or
1497     (0x0030 <= $self->{next_input_character} and
1498     $self->{next_input_character} <= 0x0039))) {
1499     $entity_name .= chr $self->{next_input_character};
1500     if (defined $entity_char->{$entity_name}) {
1501     $value = $entity_char->{$entity_name};
1502     $match = 1;
1503     } else {
1504     $value .= chr $self->{next_input_character};
1505     }
1506     !!!next-input-character;
1507     }
1508    
1509     if ($match) {
1510     if ($self->{next_input_character} == 0x003B) { # ;
1511     !!!next-input-character;
1512     } else {
1513     !!!parse-error;
1514     }
1515    
1516     return {type => 'character', data => $value};
1517     } else {
1518     !!!parse-error;
1519     ## NOTE: No characters are consumed in the spec.
1520     !!!back-token ({type => 'character', data => $value});
1521     return undef;
1522 wakaba 1.1 }
1523 wakaba 1.5 } else {
1524     ## no characters are consumed
1525     !!!parse-error;
1526     return undef;
1527     }
1528 wakaba 1.1 } # _tokenize_attempt_to_consume_an_entity
1529    
1530 wakaba 1.2 sub _initialize_tree_constructor ($) {
1531     my $self = shift;
1532     require What::NanoDOM;
1533     $self->{document} = What::NanoDOM::Document->new;
1534     $self->{document}->strict_error_checking (0);
1535     ## TODO: Turn mutation events off # MUST
1536     ## TODO: Turn loose Document option (manakai extension) on
1537     } # _initialize_tree_constructor
1538    
1539     sub _terminate_tree_constructor ($) {
1540     my $self = shift;
1541     $self->{document}->strict_error_checking (1);
1542     ## TODO: Turn mutation events on
1543     } # _terminate_tree_constructor
1544    
1545     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
1546    
1547     sub _construct_tree ($) {
1548     my ($self) = @_;
1549    
1550     ## When an interactive UA render the $self->{document} available
1551     ## to the user, or when it begin accepting user input, are
1552     ## not defined.
1553    
1554     ## Append a character: collect it and all subsequent consecutive
1555     ## characters and insert one Text node whose data is concatenation
1556     ## of all those characters. # MUST
1557    
1558     my $token;
1559     !!!next-token;
1560    
1561     my $phase = 'initial'; # MUST
1562    
1563     my $open_elements = [];
1564     my $active_formatting_elements = [];
1565     my $head_element;
1566     my $form_element;
1567     my $insertion_mode = 'before head';
1568    
1569     my $reconstruct_active_formatting_elements = sub { # MUST
1570 wakaba 1.7 my $insert = shift;
1571    
1572 wakaba 1.2 ## Step 1
1573     return unless @$active_formatting_elements;
1574    
1575     ## Step 3
1576     my $i = -1;
1577     my $entry = $active_formatting_elements->[$i];
1578    
1579     ## Step 2
1580     return if $entry->[0] eq '#marker';
1581     for (@$open_elements) {
1582     if ($entry->[0] eq $_->[0]) {
1583     return;
1584     }
1585     }
1586    
1587     S4: {
1588 wakaba 1.7 ## Step 4
1589 wakaba 1.2 last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
1590    
1591     ## Step 5
1592     $i--;
1593     $entry = $active_formatting_elements->[$i];
1594    
1595     ## Step 6
1596     if ($entry->[0] eq '#marker') {
1597     #
1598     } else {
1599     my $in_open_elements;
1600     OE: for (@$open_elements) {
1601     if ($entry->[0] eq $_->[0]) {
1602 wakaba 1.7 $in_open_elements = 1;
1603     last OE;
1604     }
1605 wakaba 1.2 }
1606     if ($in_open_elements) {
1607     #
1608     } else {
1609     redo S4;
1610     }
1611     }
1612    
1613     ## Step 7
1614     $i++;
1615     $entry = $active_formatting_elements->[$i];
1616     } # S4
1617    
1618     S7: {
1619     ## Step 8
1620 wakaba 1.7 my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
1621 wakaba 1.2
1622     ## Step 9
1623 wakaba 1.7 $insert->($clone->[0]);
1624     push @$open_elements, $clone;
1625 wakaba 1.2
1626     ## Step 10
1627     $active_formatting_elements->[$i] = $open_elements->[-1];
1628 wakaba 1.7
1629     ## Step 11
1630     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
1631 wakaba 1.2 ## Step 7'
1632     $i++;
1633     $entry = $active_formatting_elements->[$i];
1634    
1635     redo S7;
1636     }
1637     } # S7
1638     }; # $reconstruct_active_formatting_elements
1639    
1640     my $clear_up_to_marker = sub {
1641     for (reverse 0..$#$active_formatting_elements) {
1642     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
1643     splice @$active_formatting_elements, $_;
1644     return;
1645     }
1646     }
1647     }; # $clear_up_to_marker
1648    
1649     my $reset_insertion_mode = sub {
1650     ## Step 1
1651     my $last;
1652    
1653     ## Step 2
1654     my $i = -1;
1655     my $node = $open_elements->[$i];
1656    
1657     ## Step 3
1658     S3: {
1659     $last = 1 if $open_elements->[0]->[0] eq $node->[0];
1660     ## TODO: the element whose inner_html is set is neither td nor th, then $node = the element
1661    
1662     ## Step 4..13
1663     my $new_mode = {
1664     select => 'in select',
1665     td => 'in cell',
1666     th => 'in cell',
1667     tr => 'in row',
1668     tbody => 'in table body',
1669     thead => 'in table head',
1670     tfoot => 'in table foot',
1671     caption => 'in caption',
1672     colgroup => 'in column group',
1673     table => 'in table',
1674     head => 'in body', # not in head!
1675     body => 'in body',
1676     frameset => 'in frameset',
1677     }->{$node->[1]};
1678     $insertion_mode = $new_mode and return if defined $new_mode;
1679    
1680     ## Step 14
1681     if ($node->[1] eq 'html') {
1682     unless (defined $head_element) {
1683     $insertion_mode = 'before head';
1684     } else {
1685     $insertion_mode = 'after head';
1686     }
1687     return;
1688     }
1689    
1690     ## Step 15
1691     $insertion_mode = 'in body' and return if $last;
1692    
1693     ## Step 16
1694     $i--;
1695     $node = $open_elements->[$i];
1696    
1697     ## Step 17
1698     redo S3;
1699     } # S3
1700     }; # $reset_insertion_mode
1701    
1702     my $style_start_tag = sub {
1703     my $style_el; !!!create-element ($style_el, 'style');
1704     ## $insertion_mode eq 'in head' and ... (always true)
1705     (($insertion_mode eq 'in head' and defined $head_element)
1706     ? $head_element : $open_elements->[-1]->[0])
1707     ->append_child ($style_el);
1708     $self->{content_model_flag} = 'CDATA';
1709    
1710     my $text = '';
1711     !!!next-token;
1712     while ($token->{type} eq 'character') {
1713     $text .= $token->{data};
1714     !!!next-token;
1715     } # stop if non-character token or tokenizer stops tokenising
1716     if (length $text) {
1717     $style_el->manakai_append_text ($text);
1718     }
1719    
1720     $self->{content_model_flag} = 'PCDATA';
1721    
1722     if ($token->{type} eq 'end tag' and $token->{tag_name} eq 'style') {
1723     ## Ignore the token
1724     } else {
1725     !!!parse-error;
1726     ## ISSUE: And ignore?
1727     }
1728     !!!next-token;
1729     }; # $style_start_tag
1730    
1731     my $script_start_tag = sub {
1732 wakaba 1.7 my $script_el;
1733     !!!create-element ($script_el, 'script', $token->{attributes});
1734 wakaba 1.2 ## TODO: mark as "parser-inserted"
1735    
1736     $self->{content_model_flag} = 'CDATA';
1737    
1738     my $text = '';
1739     !!!next-token;
1740     while ($token->{type} eq 'character') {
1741     $text .= $token->{data};
1742     !!!next-token;
1743     } # stop if non-character token or tokenizer stops tokenising
1744     if (length $text) {
1745     $script_el->manakai_append_text ($text);
1746     }
1747    
1748     $self->{content_model_flag} = 'PCDATA';
1749 wakaba 1.7
1750 wakaba 1.2 if ($token->{type} eq 'end tag' and
1751     $token->{tag_name} eq 'script') {
1752     ## Ignore the token
1753     } else {
1754     !!!parse-error;
1755     ## ISSUE: And ignore?
1756     ## TODO: mark as "already executed"
1757     }
1758    
1759     ## TODO: inner_html mode then mark as "already executed" and skip
1760     if (1) {
1761     ## TODO: $old_insertion_point = current insertion point
1762     ## TODO: insertion point = just before the next input character
1763    
1764     (($insertion_mode eq 'in head' and defined $head_element)
1765     ? $head_element : $open_elements->[-1]->[0])->append_child ($script_el);
1766    
1767     ## TODO: insertion point = $old_insertion_point (might be "undefined")
1768    
1769     ## TODO: if there is a script that will execute as soon as the parser resume, then...
1770     }
1771    
1772     !!!next-token;
1773     }; # $script_start_tag
1774    
1775     my $formatting_end_tag = sub {
1776     my $tag_name = shift;
1777    
1778     FET: {
1779     ## Step 1
1780     my $formatting_element;
1781     my $formatting_element_i_in_active;
1782     AFE: for (reverse 0..$#$active_formatting_elements) {
1783     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
1784     $formatting_element = $active_formatting_elements->[$_];
1785     $formatting_element_i_in_active = $_;
1786     last AFE;
1787     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
1788     last AFE;
1789     }
1790     } # AFE
1791     unless (defined $formatting_element) {
1792     !!!parse-error;
1793     ## Ignore the token
1794     !!!next-token;
1795     return;
1796     }
1797     ## has an element in scope
1798     my $in_scope = 1;
1799     my $formatting_element_i_in_open;
1800     INSCOPE: for (reverse 0..$#$open_elements) {
1801     my $node = $open_elements->[$_];
1802     if ($node->[0] eq $formatting_element->[0]) {
1803     if ($in_scope) {
1804     $formatting_element_i_in_open = $_;
1805     last INSCOPE;
1806     } else { # in open elements but not in scope
1807     !!!parse-error;
1808     ## Ignore the token
1809     !!!next-token;
1810     return;
1811     }
1812     } elsif ({
1813     table => 1, caption => 1, td => 1, th => 1,
1814     button => 1, marquee => 1, object => 1, html => 1,
1815     }->{$node->[1]}) {
1816     $in_scope = 0;
1817     }
1818     } # INSCOPE
1819     unless (defined $formatting_element_i_in_open) {
1820     !!!parse-error;
1821     pop @$active_formatting_elements; # $formatting_element
1822     !!!next-token; ## TODO: ok?
1823     return;
1824     }
1825     if (not $open_elements->[-1]->[0] eq $formatting_element->[0]) {
1826     !!!parse-error;
1827     }
1828    
1829     ## Step 2
1830     my $furthest_block;
1831     my $furthest_block_i_in_open;
1832     OE: for (reverse 0..$#$open_elements) {
1833     my $node = $open_elements->[$_];
1834     if (not $formatting_category->{$node->[1]} and
1835     #not $phrasing_category->{$node->[1]} and
1836     ($special_category->{$node->[1]} or
1837     $scoping_category->{$node->[1]})) {
1838     $furthest_block = $node;
1839     $furthest_block_i_in_open = $_;
1840     } elsif ($node->[0] eq $formatting_element->[0]) {
1841     last OE;
1842     }
1843     } # OE
1844    
1845     ## Step 3
1846     unless (defined $furthest_block) { # MUST
1847     splice @$open_elements, $formatting_element_i_in_open;
1848     splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
1849     !!!next-token;
1850     return;
1851     }
1852    
1853     ## Step 4
1854     my $common_ancestor_node = $open_elements->[$formatting_element_i_in_open - 1];
1855    
1856     ## Step 5
1857     my $furthest_block_parent = $furthest_block->[0]->parent_node;
1858     if (defined $furthest_block_parent) {
1859     $furthest_block_parent->remove_child ($furthest_block->[0]);
1860     }
1861    
1862     ## Step 6
1863     my $bookmark_prev_el
1864     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
1865     ->[0];
1866    
1867     ## Step 7
1868     my $node = $furthest_block;
1869     my $node_i_in_open = $furthest_block_i_in_open;
1870     my $last_node = $furthest_block;
1871     S7: {
1872     ## Step 1
1873     $node_i_in_open--;
1874     $node = $open_elements->[$node_i_in_open];
1875    
1876     ## Step 2
1877     my $node_i_in_active;
1878     S7S2: {
1879     for (reverse 0..$#$active_formatting_elements) {
1880     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
1881     $node_i_in_active = $_;
1882     last S7S2;
1883     }
1884     }
1885     splice @$open_elements, $node_i_in_open, 1;
1886     redo S7;
1887     } # S7S2
1888    
1889     ## Step 3
1890     last S7 if $node->[0] eq $formatting_element->[0];
1891    
1892     ## Step 4
1893     if ($last_node->[0] eq $furthest_block->[0]) {
1894     $bookmark_prev_el = $node->[0];
1895     }
1896    
1897     ## Step 5
1898     if ($node->[0]->has_child_nodes ()) {
1899     my $clone = [$node->[0]->clone_node (0), $node->[1]];
1900     $active_formatting_elements->[$node_i_in_active] = $clone;
1901     $open_elements->[$node_i_in_open] = $clone;
1902     $node = $clone;
1903     }
1904    
1905     ## Step 6
1906 wakaba 1.6 $node->[0]->append_child ($last_node->[0]);
1907 wakaba 1.2
1908     ## Step 7
1909     $last_node = $node;
1910    
1911     ## Step 8
1912     redo S7;
1913     } # S7
1914    
1915     ## Step 8
1916 wakaba 1.6 $common_ancestor_node->[0]->append_child ($last_node->[0]);
1917 wakaba 1.2
1918     ## Step 9
1919     my $clone = [$formatting_element->[0]->clone_node (0),
1920     $formatting_element->[1]];
1921    
1922     ## Step 10
1923     my @cn = @{$furthest_block->[0]->child_nodes};
1924     $clone->[0]->append_child ($_) for @cn;
1925    
1926     ## Step 11
1927     $furthest_block->[0]->append_child ($clone->[0]);
1928    
1929     ## Step 12
1930     my $i;
1931     AFE: for (reverse 0..$#$active_formatting_elements) {
1932     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
1933     splice @$active_formatting_elements, $_, 1;
1934     $i-- and last AFE if defined $i;
1935     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
1936     $i = $_;
1937     }
1938     } # AFE
1939     splice @$active_formatting_elements, $i + 1, 0, $clone;
1940    
1941     ## Step 13
1942     undef $i;
1943     OE: for (reverse 0..$#$open_elements) {
1944     if ($open_elements->[$_]->[0] eq $formatting_element->[0]) {
1945     splice @$open_elements, $_, 1;
1946     $i-- and last OE if defined $i;
1947     } elsif ($open_elements->[$_]->[0] eq $furthest_block->[0]) {
1948     $i = $_;
1949     }
1950     } # OE
1951     splice @$open_elements, $i + 1, 1, $clone;
1952    
1953     ## Step 14
1954     redo FET;
1955     } # FET
1956     }; # $formatting_end_tag
1957    
1958 wakaba 1.7 my $insert_to_current = sub {
1959     $open_elements->[-1]->[0]->append_child (shift);
1960     }; # $insert_to_current
1961    
1962     my $insert_to_foster = sub {
1963     my $child = shift;
1964     if ({
1965     table => 1, tbody => 1, tfoot => 1,
1966     thead => 1, tr => 1,
1967     }->{$open_elements->[-1]->[1]}) {
1968     # MUST
1969     my $foster_parent_element;
1970     my $next_sibling;
1971     OE: for (reverse 0..$#$open_elements) {
1972     if ($open_elements->[$_]->[1] eq 'table') {
1973     my $parent = $open_elements->[$_]->[0]->parent_node;
1974     if (defined $parent and $parent->node_type == 1) {
1975     $foster_parent_element = $parent;
1976     $next_sibling = $open_elements->[$_]->[0];
1977     } else {
1978     $foster_parent_element
1979     = $open_elements->[$_ - 1]->[0];
1980     }
1981     last OE;
1982     }
1983     } # OE
1984     $foster_parent_element = $open_elements->[0]->[0]
1985     unless defined $foster_parent_element;
1986     $foster_parent_element->insert_before
1987     ($child, $next_sibling);
1988     } else {
1989     $open_elements->[-1]->[0]->append_child ($child);
1990     }
1991     }; # $insert_to_foster
1992    
1993 wakaba 1.2 my $in_body = sub {
1994     my $insert = shift;
1995     if ($token->{type} eq 'start tag') {
1996     if ($token->{tag_name} eq 'script') {
1997     $script_start_tag->();
1998     return;
1999     } elsif ($token->{tag_name} eq 'style') {
2000     $style_start_tag->();
2001     return;
2002     } elsif ({
2003 wakaba 1.7 base => 1, link => 1, meta => 1,
2004 wakaba 1.2 }->{$token->{tag_name}}) {
2005     !!!parse-error;
2006     ## NOTE: This is an "as if in head" code clone
2007     my $el;
2008     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2009     if (defined $head_element) {
2010     $head_element->append_child ($el);
2011     } else {
2012     $insert->($el);
2013     }
2014    
2015     ## ISSUE: Issue on magical <base> in the spec
2016    
2017     !!!next-token;
2018     return;
2019 wakaba 1.7 } elsif ($token->{tag_name} eq 'title') {
2020     ## NOTE: There is an "as if in head" code clone
2021     my $title_el;
2022     !!!create-element ($title_el, 'title', $token->{attributes});
2023     (defined $head_element ? $head_element : $open_elements->[-1]->[0])
2024     ->append_child ($title_el);
2025     $self->{content_model_flag} = 'RCDATA';
2026    
2027     my $text = '';
2028     !!!next-token;
2029     while ($token->{type} eq 'character') {
2030     $text .= $token->{data};
2031     !!!next-token;
2032     }
2033     if (length $text) {
2034     $title_el->manakai_append_text ($text);
2035     }
2036    
2037     $self->{content_model_flag} = 'PCDATA';
2038    
2039     if ($token->{type} eq 'end tag' and
2040     $token->{tag_name} eq 'title') {
2041     ## Ignore the token
2042     } else {
2043     !!!parse-error;
2044     ## ISSUE: And ignore?
2045     }
2046     !!!next-token;
2047     return;
2048 wakaba 1.2 } elsif ($token->{tag_name} eq 'body') {
2049     !!!parse-error;
2050    
2051     if (@$open_elements == 1 or
2052     $open_elements->[1]->[1] ne 'body') {
2053     ## Ignore the token
2054     } else {
2055     my $body_el = $open_elements->[1]->[0];
2056     for my $attr_name (keys %{$token->{attributes}}) {
2057     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
2058     $body_el->set_attribute_ns
2059     (undef, [undef, $attr_name],
2060     $token->{attributes}->{$attr_name}->{value});
2061     }
2062     }
2063     }
2064     !!!next-token;
2065     return;
2066     } elsif ({
2067     address => 1, blockquote => 1, center => 1, dir => 1,
2068     div => 1, dl => 1, fieldset => 1, listing => 1,
2069     menu => 1, ol => 1, p => 1, ul => 1,
2070     pre => 1,
2071     }->{$token->{tag_name}}) {
2072     ## has a p element in scope
2073     INSCOPE: for (reverse @$open_elements) {
2074     if ($_->[1] eq 'p') {
2075     !!!back-token;
2076     $token = {type => 'end tag', tag_name => 'p'};
2077     return;
2078     } elsif ({
2079     table => 1, caption => 1, td => 1, th => 1,
2080     button => 1, marquee => 1, object => 1, html => 1,
2081     }->{$_->[1]}) {
2082     last INSCOPE;
2083     }
2084     } # INSCOPE
2085    
2086     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2087     if ($token->{tag_name} eq 'pre') {
2088     !!!next-token;
2089     if ($token->{type} eq 'character') {
2090     $token->{data} =~ s/^\x0A//;
2091     unless (length $token->{data}) {
2092     !!!next-token;
2093     }
2094     }
2095     } else {
2096     !!!next-token;
2097     }
2098     return;
2099     } elsif ($token->{tag_name} eq 'form') {
2100     if (defined $form_element) {
2101     !!!parse-error;
2102     ## Ignore the token
2103     } else {
2104     ## has a p element in scope
2105     INSCOPE: for (reverse @$open_elements) {
2106     if ($_->[1] eq 'p') {
2107     !!!back-token;
2108     $token = {type => 'end tag', tag_name => 'p'};
2109     return;
2110     } elsif ({
2111     table => 1, caption => 1, td => 1, th => 1,
2112     button => 1, marquee => 1, object => 1, html => 1,
2113     }->{$_->[1]}) {
2114     last INSCOPE;
2115     }
2116     } # INSCOPE
2117    
2118     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2119     $form_element = $open_elements->[-1]->[0];
2120     !!!next-token;
2121     return;
2122     }
2123     } elsif ($token->{tag_name} eq 'li') {
2124     ## has a p element in scope
2125     INSCOPE: for (reverse @$open_elements) {
2126     if ($_->[1] eq 'p') {
2127     !!!back-token;
2128     $token = {type => 'end tag', tag_name => 'p'};
2129     return;
2130     } elsif ({
2131     table => 1, caption => 1, td => 1, th => 1,
2132     button => 1, marquee => 1, object => 1, html => 1,
2133     }->{$_->[1]}) {
2134     last INSCOPE;
2135     }
2136     } # INSCOPE
2137    
2138     ## Step 1
2139     my $i = -1;
2140     my $node = $open_elements->[$i];
2141     LI: {
2142     ## Step 2
2143     if ($node->[1] eq 'li') {
2144     splice @$open_elements, $i;
2145     last LI;
2146     }
2147    
2148     ## Step 3
2149     if (not $formatting_category->{$node->[1]} and
2150     #not $phrasing_category->{$node->[1]} and
2151     ($special_category->{$node->[1]} or
2152     $scoping_category->{$node->[1]}) and
2153     $node->[1] ne 'address' and $node->[1] ne 'div') {
2154     last LI;
2155     }
2156    
2157     ## Step 4
2158 wakaba 1.7 $i--;
2159 wakaba 1.2 $node = $open_elements->[$i];
2160     redo LI;
2161     } # LI
2162    
2163     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2164     !!!next-token;
2165     return;
2166     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
2167     ## has a p element in scope
2168     INSCOPE: for (reverse @$open_elements) {
2169     if ($_->[1] eq 'p') {
2170     !!!back-token;
2171     $token = {type => 'end tag', tag_name => 'p'};
2172     return;
2173     } elsif ({
2174     table => 1, caption => 1, td => 1, th => 1,
2175     button => 1, marquee => 1, object => 1, html => 1,
2176     }->{$_->[1]}) {
2177     last INSCOPE;
2178     }
2179     } # INSCOPE
2180    
2181     ## Step 1
2182     my $i = -1;
2183     my $node = $open_elements->[$i];
2184     LI: {
2185     ## Step 2
2186     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
2187     splice @$open_elements, $i;
2188     last LI;
2189     }
2190    
2191     ## Step 3
2192     if (not $formatting_category->{$node->[1]} and
2193     #not $phrasing_category->{$node->[1]} and
2194     ($special_category->{$node->[1]} or
2195     $scoping_category->{$node->[1]}) and
2196     $node->[1] ne 'address' and $node->[1] ne 'div') {
2197     last LI;
2198     }
2199    
2200     ## Step 4
2201 wakaba 1.7 $i--;
2202 wakaba 1.2 $node = $open_elements->[$i];
2203     redo LI;
2204     } # LI
2205    
2206     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2207     !!!next-token;
2208     return;
2209     } elsif ($token->{tag_name} eq 'plaintext') {
2210     ## has a p element in scope
2211     INSCOPE: for (reverse @$open_elements) {
2212     if ($_->[1] eq 'p') {
2213     !!!back-token;
2214     $token = {type => 'end tag', tag_name => 'p'};
2215     return;
2216     } elsif ({
2217     table => 1, caption => 1, td => 1, th => 1,
2218     button => 1, marquee => 1, object => 1, html => 1,
2219     }->{$_->[1]}) {
2220     last INSCOPE;
2221     }
2222     } # INSCOPE
2223    
2224     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2225    
2226     $self->{content_model_flag} = 'PLAINTEXT';
2227    
2228     !!!next-token;
2229     return;
2230     } elsif ({
2231     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2232     }->{$token->{tag_name}}) {
2233     ## has a p element in scope
2234     INSCOPE: for (reverse 0..$#$open_elements) {
2235     my $node = $open_elements->[$_];
2236     if ($node->[1] eq 'p') {
2237     !!!back-token;
2238     $token = {type => 'end tag', tag_name => 'p'};
2239     return;
2240     } elsif ({
2241     table => 1, caption => 1, td => 1, th => 1,
2242     button => 1, marquee => 1, object => 1, html => 1,
2243     }->{$node->[1]}) {
2244     last INSCOPE;
2245     }
2246     } # INSCOPE
2247    
2248     ## has an element in scope
2249     my $i;
2250     INSCOPE: for (reverse 0..$#$open_elements) {
2251     my $node = $open_elements->[$_];
2252     if ({
2253     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2254     }->{$node->[1]}) {
2255     $i = $_;
2256     last INSCOPE;
2257     } elsif ({
2258     table => 1, caption => 1, td => 1, th => 1,
2259     button => 1, marquee => 1, object => 1, html => 1,
2260     }->{$node->[1]}) {
2261     last INSCOPE;
2262     }
2263     } # INSCOPE
2264    
2265     if (defined $i) {
2266     !!!parse-error;
2267     splice @$open_elements, $i;
2268     }
2269    
2270     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2271    
2272     !!!next-token;
2273     return;
2274     } elsif ($token->{tag_name} eq 'a') {
2275     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
2276     my $node = $active_formatting_elements->[$i];
2277     if ($node->[1] eq 'a') {
2278 wakaba 1.7 !!!parse-error ('a in a');
2279 wakaba 1.2
2280     !!!back-token;
2281     $token = {type => 'end tag', tag_name => 'a'};
2282     $formatting_end_tag->($token->{tag_name});
2283    
2284 wakaba 1.7 AFE2: for (reverse 0..$#$active_formatting_elements) {
2285     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2286     splice @$active_formatting_elements, $_, 1;
2287     last AFE2;
2288     }
2289     } # AFE2
2290 wakaba 1.2 OE: for (reverse 0..$#$open_elements) {
2291     if ($open_elements->[$_]->[0] eq $node->[0]) {
2292 wakaba 1.7 splice @$open_elements, $_, 1;
2293 wakaba 1.2 last OE;
2294     }
2295     } # OE
2296     last AFE;
2297     } elsif ($node->[0] eq '#marker') {
2298     last AFE;
2299     }
2300     } # AFE
2301    
2302 wakaba 1.7 $reconstruct_active_formatting_elements->($insert_to_current);
2303 wakaba 1.2
2304     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2305     push @$active_formatting_elements, $open_elements->[-1];
2306    
2307     !!!next-token;
2308     return;
2309     } elsif ({
2310     b => 1, big => 1, em => 1, font => 1, i => 1,
2311     nobr => 1, s => 1, small => 1, strile => 1,
2312     strong => 1, tt => 1, u => 1,
2313     }->{$token->{tag_name}}) {
2314 wakaba 1.7 $reconstruct_active_formatting_elements->($insert_to_current);
2315 wakaba 1.2
2316     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2317     push @$active_formatting_elements, $open_elements->[-1];
2318    
2319     !!!next-token;
2320     return;
2321     } elsif ($token->{tag_name} eq 'button') {
2322     ## has a button element in scope
2323     INSCOPE: for (reverse 0..$#$open_elements) {
2324     my $node = $open_elements->[$_];
2325     if ($node->[1] eq 'button') {
2326     !!!parse-error;
2327     !!!back-token;
2328     $token = {type => 'end tag', tag_name => 'button'};
2329     return;
2330     } elsif ({
2331     table => 1, caption => 1, td => 1, th => 1,
2332     button => 1, marquee => 1, object => 1, html => 1,
2333     }->{$node->[1]}) {
2334     last INSCOPE;
2335     }
2336     } # INSCOPE
2337    
2338 wakaba 1.7 $reconstruct_active_formatting_elements->($insert_to_current);
2339 wakaba 1.2
2340     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2341     push @$active_formatting_elements, ['#marker', ''];
2342    
2343     !!!next-token;
2344     return;
2345     } elsif ($token->{tag_name} eq 'marquee' or
2346     $token->{tag_name} eq 'object') {
2347 wakaba 1.7 $reconstruct_active_formatting_elements->($insert_to_current);
2348 wakaba 1.2
2349     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2350     push @$active_formatting_elements, ['#marker', ''];
2351    
2352     !!!next-token;
2353     return;
2354     } elsif ($token->{tag_name} eq 'xmp') {
2355 wakaba 1.7 $reconstruct_active_formatting_elements->($insert_to_current);
2356 wakaba 1.2
2357     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2358    
2359     $self->{content_model_flag} = 'CDATA';
2360    
2361     !!!next-token;
2362     return;
2363 wakaba 1.6 } elsif ($token->{tag_name} eq 'table') {
2364 wakaba 1.2 ## has a p element in scope
2365     INSCOPE: for (reverse @$open_elements) {
2366     if ($_->[1] eq 'p') {
2367     !!!back-token;
2368     $token = {type => 'end tag', tag_name => 'p'};
2369     return;
2370     } elsif ({
2371     table => 1, caption => 1, td => 1, th => 1,
2372     button => 1, marquee => 1, object => 1, html => 1,
2373     }->{$_->[1]}) {
2374     last INSCOPE;
2375     }
2376     } # INSCOPE
2377    
2378     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2379    
2380     $insertion_mode = 'in table';
2381    
2382     !!!next-token;
2383     return;
2384     } elsif ({
2385     area => 1, basefont => 1, bgsound => 1, br => 1,
2386     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
2387     image => 1,
2388     }->{$token->{tag_name}}) {
2389     if ($token->{tag_name} eq 'image') {
2390     !!!parse-error;
2391     $token->{tag_name} = 'img';
2392     }
2393    
2394 wakaba 1.7 $reconstruct_active_formatting_elements->($insert_to_current);
2395 wakaba 1.2
2396     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2397     pop @$open_elements;
2398    
2399     !!!next-token;
2400     return;
2401     } elsif ($token->{tag_name} eq 'hr') {
2402     ## has a p element in scope
2403     INSCOPE: for (reverse @$open_elements) {
2404     if ($_->[1] eq 'p') {
2405     !!!back-token;
2406     $token = {type => 'end tag', tag_name => 'p'};
2407     return;
2408     } elsif ({
2409     table => 1, caption => 1, td => 1, th => 1,
2410     button => 1, marquee => 1, object => 1, html => 1,
2411     }->{$_->[1]}) {
2412     last INSCOPE;
2413     }
2414     } # INSCOPE
2415    
2416     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2417     pop @$open_elements;
2418    
2419     !!!next-token;
2420     return;
2421     } elsif ($token->{tag_name} eq 'input') {
2422 wakaba 1.7 $reconstruct_active_formatting_elements->($insert_to_current);
2423 wakaba 1.2
2424     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2425     ## TODO: associate with $form_element if defined
2426     pop @$open_elements;
2427    
2428     !!!next-token;
2429     return;
2430     } elsif ($token->{tag_name} eq 'isindex') {
2431     !!!parse-error;
2432    
2433     if (defined $form_element) {
2434     ## Ignore the token
2435     !!!next-token;
2436     return;
2437     } else {
2438     my $at = $token->{attributes};
2439     $at->{name} = {name => 'name', value => 'isindex'};
2440     my @tokens = (
2441     {type => 'start tag', tag_name => 'form'},
2442     {type => 'start tag', tag_name => 'hr'},
2443     {type => 'start tag', tag_name => 'p'},
2444     {type => 'start tag', tag_name => 'label'},
2445     {type => 'character',
2446 wakaba 1.7 data => 'This is a searchable index. Insert your search keywords here: '}, # SHOULD
2447 wakaba 1.2 ## TODO: make this configurable
2448     {type => 'start tag', tag_name => 'input', attributes => $at},
2449     #{type => 'character', data => ''}, # SHOULD
2450     {type => 'end tag', tag_name => 'label'},
2451     {type => 'end tag', tag_name => 'p'},
2452     {type => 'start tag', tag_name => 'hr'},
2453     {type => 'end tag', tag_name => 'form'},
2454     );
2455     $token = shift @tokens;
2456     !!!back-token (@tokens);
2457     return;
2458     }
2459     } elsif ({
2460     textarea => 1,
2461     noembed => 1,
2462     noframes => 1,
2463     noscript => 0, ## TODO: 1 if scripting is enabled
2464     }->{$token->{tag_name}}) {
2465     my $tag_name = $token->{tag_name};
2466     my $el;
2467     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2468    
2469     if ($token->{tag_name} eq 'textarea') {
2470     ## TODO: form_element if defined
2471     $self->{content_model_flag} = 'RCDATA';
2472     } else {
2473     $self->{content_model_flag} = 'CDATA';
2474     }
2475    
2476     $insert->($el);
2477    
2478     my $text = '';
2479     !!!next-token;
2480     while ($token->{type} eq 'character') {
2481     $text .= $token->{data};
2482     !!!next-token;
2483     }
2484     if (length $text) {
2485     $el->manakai_append_text ($text);
2486     }
2487    
2488     $self->{content_model_flag} = 'PCDATA';
2489    
2490     if ($token->{type} eq 'end tag' and
2491     $token->{tag_name} eq $tag_name) {
2492     ## Ignore the token
2493     } else {
2494     !!!parse-error;
2495     ## ISSUE: And ignore?
2496     }
2497     !!!next-token;
2498     return;
2499 wakaba 1.7 } elsif ($token->{tag_name} eq 'select') {
2500     $reconstruct_active_formatting_elements->($insert_to_current);
2501 wakaba 1.2
2502     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2503    
2504     $insertion_mode = 'in select';
2505     !!!next-token;
2506     return;
2507     } elsif ({
2508     caption => 1, col => 1, colgroup => 1, frame => 1,
2509     frameset => 1, head => 1, option => 1, optgroup => 1,
2510     tbody => 1, td => 1, tfoot => 1, th => 1,
2511     thead => 1, tr => 1,
2512     }->{$token->{tag_name}}) {
2513     !!!parse-error;
2514     ## Ignore the token
2515     !!!next-token;
2516     return;
2517    
2518     ## ISSUE: An issue on HTML5 new elements in the spec.
2519     } else {
2520 wakaba 1.7 $reconstruct_active_formatting_elements->($insert_to_current);
2521 wakaba 1.2
2522     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2523    
2524     !!!next-token;
2525     return;
2526     }
2527     } elsif ($token->{type} eq 'end tag') {
2528     if ($token->{tag_name} eq 'body') {
2529     if (@$open_elements > 1 and $open_elements->[1]->[1] eq 'body') {
2530     ## ISSUE: There is an issue in the spec.
2531     if ($open_elements->[-1]->[1] ne 'body') {
2532     !!!parse-error;
2533     }
2534     $insertion_mode = 'after body';
2535     !!!next-token;
2536     return;
2537     } else {
2538     !!!parse-error;
2539     ## Ignore the token
2540     !!!next-token;
2541     return;
2542     }
2543     } elsif ($token->{tag_name} eq 'html') {
2544     if (@$open_elements > 1 and $open_elements->[1]->[1] eq 'body') {
2545     ## ISSUE: There is an issue in the spec.
2546     if ($open_elements->[-1]->[1] ne 'body') {
2547     !!!parse-error;
2548     }
2549     $insertion_mode = 'after body';
2550     ## reprocess
2551     return;
2552     } else {
2553     !!!parse-error;
2554     ## Ignore the token
2555     !!!next-token;
2556     return;
2557     }
2558     } elsif ({
2559     address => 1, blockquote => 1, center => 1, dir => 1,
2560     div => 1, dl => 1, fieldset => 1, listing => 1,
2561     menu => 1, ol => 1, pre => 1, ul => 1,
2562     form => 1,
2563     p => 1,
2564     dd => 1, dt => 1, li => 1,
2565     button => 1, marquee => 1, object => 1,
2566     }->{$token->{tag_name}}) {
2567     ## has an element in scope
2568     my $i;
2569     INSCOPE: for (reverse 0..$#$open_elements) {
2570     my $node = $open_elements->[$_];
2571     if ($node->[1] eq $token->{tag_name}) {
2572     ## generate implied end tags
2573     if ({
2574     dd => ($token->{tag_name} ne 'dd'),
2575     dt => ($token->{tag_name} ne 'dt'),
2576     li => ($token->{tag_name} ne 'li'),
2577     p => ($token->{tag_name} ne 'p'),
2578     td => 1, th => 1, tr => 1,
2579     }->{$open_elements->[-1]->[1]}) {
2580     !!!back-token;
2581     $token = {type => 'end tag',
2582     tag_name => $open_elements->[-1]->[1]}; # MUST
2583     return;
2584     }
2585     $i = $_;
2586     last INSCOPE unless $token->{tag_name} eq 'p';
2587     } elsif ({
2588     table => 1, caption => 1, td => 1, th => 1,
2589     button => 1, marquee => 1, object => 1, html => 1,
2590     }->{$node->[1]}) {
2591     last INSCOPE;
2592     }
2593     } # INSCOPE
2594    
2595     if ($open_elements->[-1]->[1] ne $token->{tag_name}) {
2596     !!!parse-error;
2597     }
2598    
2599     splice @$open_elements, $i if defined $i;
2600     undef $form_element if $token->{tag_name} eq 'form';
2601     $clear_up_to_marker->()
2602     if {
2603     button => 1, marquee => 1, object => 1,
2604     }->{$token->{tag_name}};
2605     !!!next-token;
2606     return;
2607     } elsif ({
2608     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2609     }->{$token->{tag_name}}) {
2610     ## has an element in scope
2611     my $i;
2612     INSCOPE: for (reverse 0..$#$open_elements) {
2613     my $node = $open_elements->[$_];
2614     if ({
2615     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2616     }->{$node->[1]}) {
2617     ## generate implied end tags
2618     if ({
2619     dd => 1, dt => 1, li => 1, p => 1,
2620     td => 1, th => 1, tr => 1,
2621     }->{$open_elements->[-1]->[1]}) {
2622     !!!back-token;
2623     $token = {type => 'end tag',
2624     tag_name => $open_elements->[-1]->[1]}; # MUST
2625     return;
2626     }
2627     $i = $_;
2628     last INSCOPE;
2629     } elsif ({
2630     table => 1, caption => 1, td => 1, th => 1,
2631     button => 1, marquee => 1, object => 1, html => 1,
2632     }->{$node->[1]}) {
2633     last INSCOPE;
2634     }
2635     } # INSCOPE
2636    
2637     if ($open_elements->[-1]->[1] ne $token->{tag_name}) {
2638     !!!parse-error;
2639     }
2640    
2641     splice @$open_elements, $i if defined $i;
2642     !!!next-token;
2643     return;
2644     } elsif ({
2645     a => 1,
2646     b => 1, big => 1, em => 1, font => 1, i => 1,
2647     nobr => 1, s => 1, small => 1, strile => 1,
2648     strong => 1, tt => 1, u => 1,
2649     }->{$token->{tag_name}}) {
2650     $formatting_end_tag->($token->{tag_name});
2651     return;
2652     } elsif ({
2653     caption => 1, col => 1, colgroup => 1, frame => 1,
2654     frameset => 1, head => 1, option => 1, optgroup => 1,
2655     tbody => 1, td => 1, tfoot => 1, th => 1,
2656     thead => 1, tr => 1,
2657     area => 1, basefont => 1, bgsound => 1, br => 1,
2658     embed => 1, hr => 1, iframe => 1, image => 1,
2659     img => 1, input => 1, isindex=> 1, noembed => 1,
2660     noframes => 1, param => 1, select => 1, spacer => 1,
2661     table => 1, textarea => 1, wbr => 1,
2662     noscript => 0, ## TODO: if scripting is enabled
2663     }->{$token->{tag_name}}) {
2664     !!!parse-error;
2665     ## Ignore the token
2666     !!!next-token;
2667     return;
2668    
2669     ## ISSUE: Issue on HTML5 new elements in spec
2670    
2671     } else {
2672     ## Step 1
2673     my $node_i = -1;
2674     my $node = $open_elements->[$node_i];
2675    
2676     ## Step 2
2677     S2: {
2678     if ($node->[1] eq $token->{tag_name}) {
2679     ## Step 1
2680     ## generate implied end tags
2681     if ({
2682     dd => 1, dt => 1, li => 1, p => 1,
2683     td => 1, th => 1, tr => 1,
2684     }->{$open_elements->[-1]->[1]}) {
2685     !!!back-token;
2686     $token = {type => 'end tag',
2687     tag_name => $open_elements->[-1]->[1]}; # MUST
2688     return;
2689     }
2690    
2691     ## Step 2
2692     if ($token->{tag_name} ne $open_elements->[-1]->[1]) {
2693     !!!parse-error;
2694     }
2695    
2696     ## Step 3
2697     splice @$open_elements, $node_i;
2698     last S2;
2699     } else {
2700     ## Step 3
2701     if (not $formatting_category->{$node->[1]} and
2702     #not $phrasing_category->{$node->[1]} and
2703     ($special_category->{$node->[1]} or
2704     $scoping_category->{$node->[1]})) {
2705     !!!parse-error;
2706     ## Ignore the token
2707     !!!next-token;
2708     last S2;
2709     }
2710     }
2711    
2712     ## Step 4
2713     $node_i--;
2714     $node = $open_elements->[$node_i];
2715    
2716     ## Step 5;
2717     redo S2;
2718     } # S2
2719     }
2720     }
2721     }; # $in_body
2722    
2723     B: {
2724     if ($phase eq 'initial') {
2725     if ($token->{type} eq 'DOCTYPE') {
2726     if ($token->{error}) {
2727     ## ISSUE: Spec currently left this case undefined.
2728 wakaba 1.6 !!!parse-error ('missing DOCTYPE');
2729 wakaba 1.2 }
2730     my $doctype = $self->{document}->create_document_type_definition
2731     ($token->{name});
2732     $self->{document}->append_child ($doctype);
2733     $phase = 'root element';
2734     !!!next-token;
2735     redo B;
2736     } elsif ({
2737     comment => 1,
2738     'start tag' => 1,
2739     'end tag' => 1,
2740     'end-of-file' => 1,
2741     }->{$token->{type}}) {
2742     ## ISSUE: Spec currently left this case undefined.
2743 wakaba 1.6 !!!parse-error ('missing DOCTYPE');
2744 wakaba 1.2 $phase = 'root element';
2745     ## reprocess
2746     redo B;
2747     } elsif ($token->{type} eq 'character') {
2748     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2749     $self->{document}->manakai_append_text ($1);
2750     ## ISSUE: DOM3 Core does not allow Document > Text
2751     unless (length $token->{data}) {
2752     ## Stay in the phase
2753     !!!next-token;
2754     redo B;
2755     }
2756     }
2757     ## ISSUE: Spec currently left this case undefined.
2758 wakaba 1.6 !!!parse-error ('missing DOCTYPE');
2759 wakaba 1.2 $phase = 'root element';
2760     ## reprocess
2761     redo B;
2762     } else {
2763     die "$0: $token->{type}: Unknown token";
2764     }
2765     } elsif ($phase eq 'root element') {
2766     if ($token->{type} eq 'DOCTYPE') {
2767     !!!parse-error;
2768     ## Ignore the token
2769     ## Stay in the phase
2770     !!!next-token;
2771     redo B;
2772     } elsif ($token->{type} eq 'comment') {
2773     my $comment = $self->{document}->create_comment ($token->{data});
2774     $self->{document}->append_child ($comment);
2775     ## Stay in the phase
2776     !!!next-token;
2777     redo B;
2778     } elsif ($token->{type} eq 'character') {
2779     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2780     $self->{document}->manakai_append_text ($1);
2781     ## ISSUE: DOM3 Core does not allow Document > Text
2782     unless (length $token->{data}) {
2783     ## Stay in the phase
2784     !!!next-token;
2785     redo B;
2786     }
2787     }
2788     #
2789     } elsif ({
2790     'start tag' => 1,
2791     'end tag' => 1,
2792     'end-of-file' => 1,
2793     }->{$token->{type}}) {
2794     ## ISSUE: There is an issue in the spec
2795     #
2796     } else {
2797     die "$0: $token->{type}: Unknown token";
2798     }
2799     my $root_element; !!!create-element ($root_element, 'html');
2800     $self->{document}->append_child ($root_element);
2801     $open_elements = [[$root_element, 'html']];
2802     $phase = 'main';
2803     ## reprocess
2804     redo B;
2805     } elsif ($phase eq 'main') {
2806     if ($token->{type} eq 'DOCTYPE') {
2807     !!!parse-error;
2808     ## Ignore the token
2809     ## Stay in the phase
2810     !!!next-token;
2811     redo B;
2812     } elsif ($token->{type} eq 'start tag' and
2813     $token->{tag_name} eq 'html') {
2814     ## TODO: unless it is the first start tag token, parse-error
2815     my $top_el = $open_elements->[0]->[0];
2816     for my $attr_name (keys %{$token->{attributes}}) {
2817     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
2818 wakaba 1.7 $top_el->set_attribute_ns
2819     (undef, [undef, $attr_name],
2820     $token->{attributes}->{$attr_name}->{value});
2821 wakaba 1.2 }
2822     }
2823     !!!next-token;
2824     redo B;
2825     } elsif ($token->{type} eq 'end-of-file') {
2826     ## Generate implied end tags
2827     if ({
2828     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
2829     }->{$open_elements->[-1]->[1]}) {
2830     !!!back-token;
2831     $token = {type => 'end tag', tag_name => $open_elements->[-1]->[1]};
2832     redo B;
2833     }
2834    
2835     if (@$open_elements > 2 or
2836     (@$open_elements == 2 and $open_elements->[1]->[1] ne 'body')) {
2837     !!!parse-error;
2838     } else {
2839     ## TODO: inner_html parser and @$open_elements > 1 and $open_elements->[1] ne 'body', then parse-error
2840     }
2841    
2842     ## Stop parsing
2843     last B;
2844    
2845     ## ISSUE: There is an issue in the spec.
2846     } else {
2847     if ($insertion_mode eq 'before head') {
2848     if ($token->{type} eq 'character') {
2849     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2850     $open_elements->[-1]->[0]->manakai_append_text ($1);
2851     unless (length $token->{data}) {
2852     !!!next-token;
2853     redo B;
2854     }
2855     }
2856     ## As if <head>
2857     !!!create-element ($head_element, 'head');
2858     $open_elements->[-1]->[0]->append_child ($head_element);
2859     push @$open_elements, [$head_element, 'head'];
2860     $insertion_mode = 'in head';
2861     ## reprocess
2862     redo B;
2863     } elsif ($token->{type} eq 'comment') {
2864     my $comment = $self->{document}->create_comment ($token->{data});
2865     $open_elements->[-1]->[0]->append_child ($comment);
2866     !!!next-token;
2867     redo B;
2868     } elsif ($token->{type} eq 'start tag') {
2869     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
2870     !!!create-element ($head_element, 'head', $attr);
2871     $open_elements->[-1]->[0]->append_child ($head_element);
2872     push @$open_elements, [$head_element, 'head'];
2873     $insertion_mode = 'in head';
2874     if ($token->{tag_name} eq 'head') {
2875     !!!next-token;
2876     #} elsif ({
2877     # base => 1, link => 1, meta => 1,
2878     # script => 1, style => 1, title => 1,
2879     # }->{$token->{tag_name}}) {
2880     # ## reprocess
2881     } else {
2882     ## reprocess
2883     }
2884     redo B;
2885     } elsif ($token->{type} eq 'end tag') {
2886     if ($token->{tag_name} eq 'html') {
2887     ## As if <head>
2888     !!!create-element ($head_element, 'head');
2889     $open_elements->[-1]->[0]->append_child ($head_element);
2890     push @$open_elements, [$head_element, 'head'];
2891     $insertion_mode = 'in head';
2892     ## reprocess
2893     redo B;
2894     } else {
2895     !!!parse-error;
2896     ## Ignore the token
2897 wakaba 1.6 !!!next-token;
2898 wakaba 1.2 redo B;
2899     }
2900     } else {
2901     die "$0: $token->{type}: Unknown type";
2902     }
2903     } elsif ($insertion_mode eq 'in head') {
2904     if ($token->{type} eq 'character') {
2905     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
2906     $open_elements->[-1]->[0]->manakai_append_text ($1);
2907     unless (length $token->{data}) {
2908     !!!next-token;
2909     redo B;
2910     }
2911     }
2912    
2913     #
2914     } elsif ($token->{type} eq 'comment') {
2915     my $comment = $self->{document}->create_comment ($token->{data});
2916     $open_elements->[-1]->[0]->append_child ($comment);
2917     !!!next-token;
2918     redo B;
2919     } elsif ($token->{type} eq 'start tag') {
2920     if ($token->{tag_name} eq 'title') {
2921 wakaba 1.7 ## NOTE: There is an "as if in head" code clone
2922     my $title_el;
2923     !!!create-element ($title_el, 'title', $token->{attributes});
2924 wakaba 1.2 (defined $head_element ? $head_element : $open_elements->[-1]->[0])
2925     ->append_child ($title_el);
2926     $self->{content_model_flag} = 'RCDATA';
2927 wakaba 1.7
2928 wakaba 1.2 my $text = '';
2929     !!!next-token;
2930     while ($token->{type} eq 'character') {
2931     $text .= $token->{data};
2932     !!!next-token;
2933     }
2934     if (length $text) {
2935     $title_el->manakai_append_text ($text);
2936     }
2937    
2938     $self->{content_model_flag} = 'PCDATA';
2939    
2940     if ($token->{type} eq 'end tag' and
2941     $token->{tag_name} eq 'title') {
2942     ## Ignore the token
2943     } else {
2944     !!!parse-error;
2945     ## ISSUE: And ignore?
2946     }
2947     !!!next-token;
2948     redo B;
2949     } elsif ($token->{tag_name} eq 'style') {
2950     $style_start_tag->();
2951     redo B;
2952     } elsif ($token->{tag_name} eq 'script') {
2953     $script_start_tag->();
2954     redo B;
2955     } elsif ({base => 1, link => 1, meta => 1}->{$token->{tag_name}}) {
2956     ## NOTE: There are "as if in head" code clones
2957     my $el;
2958     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2959     (defined $head_element ? $head_element : $open_elements->[-1]->[0])
2960     ->append_child ($el);
2961    
2962     ## ISSUE: Issue on magical <base> in the spec
2963    
2964     !!!next-token;
2965     redo B;
2966     } elsif ($token->{tag_name} eq 'head') {
2967     !!!parse-error;
2968     ## Ignore the token
2969     !!!next-token;
2970     redo B;
2971     } else {
2972     #
2973     }
2974     } elsif ($token->{type} eq 'end tag') {
2975     if ($token->{tag_name} eq 'head') {
2976     if ($open_elements->[-1]->[1] eq 'head') {
2977     pop @$open_elements;
2978     } else {
2979     !!!parse-error;
2980     }
2981     $insertion_mode = 'after head';
2982     !!!next-token;
2983     redo B;
2984     } elsif ($token->{tag_name} eq 'html') {
2985     #
2986     } else {
2987     !!!parse-error;
2988     ## Ignore the token
2989     !!!next-token;
2990     redo B;
2991     }
2992     } else {
2993     #
2994     }
2995    
2996     if ($open_elements->[-1]->[1] eq 'head') {
2997     ## As if </head>
2998     pop @$open_elements;
2999     }
3000     $insertion_mode = 'after head';
3001     ## reprocess
3002     redo B;
3003    
3004     ## ISSUE: An issue in the spec.
3005     } elsif ($insertion_mode eq 'after head') {
3006     if ($token->{type} eq 'character') {
3007     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3008     $open_elements->[-1]->[0]->manakai_append_text ($1);
3009     unless (length $token->{data}) {
3010     !!!next-token;
3011     redo B;
3012     }
3013     }
3014    
3015     #
3016     } elsif ($token->{type} eq 'comment') {
3017     my $comment = $self->{document}->create_comment ($token->{data});
3018     $open_elements->[-1]->[0]->append_child ($comment);
3019     !!!next-token;
3020     redo B;
3021     } elsif ($token->{type} eq 'start tag') {
3022     if ($token->{tag_name} eq 'body') {
3023     !!!insert-element ('body', $token->{attributes});
3024     $insertion_mode = 'in body';
3025     !!!next-token;
3026     redo B;
3027     } elsif ($token->{tag_name} eq 'frameset') {
3028     !!!insert-element ('frameset', $token->{attributes});
3029     $insertion_mode = 'in frameset';
3030     !!!next-token;
3031     redo B;
3032     } elsif ({
3033     base => 1, link => 1, meta => 1,
3034     script=> 1, style => 1, title => 1,
3035     }->{$token->{tag_name}}) {
3036     !!!parse-error;
3037     $insertion_mode = 'in head';
3038     ## reprocess
3039     redo B;
3040     } else {
3041     #
3042     }
3043     } else {
3044     #
3045     }
3046    
3047     ## As if <body>
3048     !!!insert-element ('body');
3049     $insertion_mode = 'in body';
3050     ## reprocess
3051     redo B;
3052     } elsif ($insertion_mode eq 'in body') {
3053     if ($token->{type} eq 'character') {
3054     ## NOTE: There is a code clone of "character in body".
3055 wakaba 1.7 $reconstruct_active_formatting_elements->($insert_to_current);
3056 wakaba 1.2
3057     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
3058    
3059     !!!next-token;
3060     redo B;
3061     } elsif ($token->{type} eq 'comment') {
3062     ## NOTE: There is a code clone of "comment in body".
3063     my $comment = $self->{document}->create_comment ($token->{data});
3064     $open_elements->[-1]->[0]->append_child ($comment);
3065     !!!next-token;
3066     redo B;
3067     } else {
3068 wakaba 1.7 $in_body->($insert_to_current);
3069 wakaba 1.2 redo B;
3070     }
3071     } elsif ($insertion_mode eq 'in table') {
3072     if ($token->{type} eq 'character') {
3073 wakaba 1.7 ## NOTE: There are "character in table" code clones.
3074     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3075     $open_elements->[-1]->[0]->manakai_append_text ($1);
3076    
3077     unless (length $token->{data}) {
3078     !!!next-token;
3079     redo B;
3080     }
3081     }
3082 wakaba 1.2
3083 wakaba 1.7 ## As if in body, but insert into foster parent element
3084     ## ISSUE: Spec says that "whenever a node would be inserted
3085     ## into the current node" while characters might not be
3086     ## result in a new Text node.
3087     $reconstruct_active_formatting_elements->($insert_to_foster);
3088    
3089     if ({
3090     table => 1, tbody => 1, tfoot => 1,
3091     thead => 1, tr => 1,
3092     }->{$open_elements->[-1]->[1]}) {
3093     # MUST
3094     my $foster_parent_element;
3095     my $next_sibling;
3096     my $prev_sibling;
3097     OE: for (reverse 0..$#$open_elements) {
3098     if ($open_elements->[$_]->[1] eq 'table') {
3099     my $parent = $open_elements->[$_]->[0]->parent_node;
3100     if (defined $parent and $parent->node_type == 1) {
3101     $foster_parent_element = $parent;
3102     $next_sibling = $open_elements->[$_]->[0];
3103     $prev_sibling = $next_sibling->previous_sibling;
3104     } else {
3105     $foster_parent_element = $open_elements->[$_ - 1]->[0];
3106     $prev_sibling = $foster_parent_element->last_child;
3107     }
3108     last OE;
3109     }
3110     } # OE
3111     $foster_parent_element = $open_elements->[0]->[0] and
3112     $prev_sibling = $foster_parent_element->last_child
3113     unless defined $foster_parent_element;
3114     if (defined $prev_sibling and
3115     $prev_sibling->node_type == 3) {
3116     $prev_sibling->manakai_append_text ($token->{data});
3117     } else {
3118     $foster_parent_element->insert_before
3119     ($self->{document}->create_text_node ($token->{data}),
3120     $next_sibling);
3121     }
3122     } else {
3123     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
3124     }
3125    
3126 wakaba 1.2 !!!next-token;
3127     redo B;
3128     } elsif ($token->{type} eq 'comment') {
3129     my $comment = $self->{document}->create_comment ($token->{data});
3130     $open_elements->[-1]->[0]->append_child ($comment);
3131     !!!next-token;
3132     redo B;
3133     } elsif ($token->{type} eq 'start tag') {
3134     if ({
3135     caption => 1,
3136     colgroup => 1,
3137     tbody => 1, tfoot => 1, thead => 1,
3138     }->{$token->{tag_name}}) {
3139     ## Clear back to table context
3140     while ($open_elements->[-1]->[1] ne 'table' and
3141     $open_elements->[-1]->[1] ne 'html') {
3142     !!!parse-error;
3143     pop @$open_elements;
3144     }
3145    
3146     push @$active_formatting_elements, ['#marker', '']
3147     if $token->{tag_name} eq 'caption';
3148    
3149     !!!insert-element ($token->{tag_name}, $token->{attributes});
3150     $insertion_mode = {
3151     caption => 'in caption',
3152     colgroup => 'in column group',
3153     tbody => 'in table body',
3154     tfoot => 'in table body',
3155     thead => 'in table body',
3156     }->{$token->{tag_name}};
3157     !!!next-token;
3158     redo B;
3159     } elsif ({
3160     col => 1,
3161     td => 1, th => 1, tr => 1,
3162     }->{$token->{tag_name}}) {
3163     ## Clear back to table context
3164     while ($open_elements->[-1]->[1] ne 'table' and
3165     $open_elements->[-1]->[1] ne 'html') {
3166     !!!parse-error;
3167     pop @$open_elements;
3168     }
3169    
3170     !!!insert-element ($token->{tag_name} eq 'col' ? 'colgroup' : 'tbody');
3171     $insertion_mode = $token->{tag_name} eq 'col'
3172     ? 'in column group' : 'in table body';
3173     ## reprocess
3174     redo B;
3175     } elsif ($token->{tag_name} eq 'table') {
3176     ## NOTE: There are code clones for this "table in table"
3177     !!!parse-error;
3178    
3179     ## As if </table>
3180     ## have a table element in table scope
3181     my $i;
3182     INSCOPE: for (reverse 0..$#$open_elements) {
3183     my $node = $open_elements->[$_];
3184     if ($node->[1] eq 'table') {
3185     $i = $_;
3186     last INSCOPE;
3187     } elsif ({
3188     table => 1, html => 1,
3189     }->{$node->[1]}) {
3190     last INSCOPE;
3191     }
3192     } # INSCOPE
3193     unless (defined $i) {
3194     !!!parse-error;
3195     ## Ignore tokens </table><table>
3196     !!!next-token;
3197     redo B;
3198     }
3199    
3200     ## generate implied end tags
3201     if ({
3202     dd => 1, dt => 1, li => 1, p => 1,
3203     td => 1, th => 1, tr => 1,
3204     }->{$open_elements->[-1]->[1]}) {
3205     !!!back-token; # <table>
3206     $token = {type => 'end tag', tag_name => 'table'};
3207     !!!back-token;
3208     $token = {type => 'end tag',
3209     tag_name => $open_elements->[-1]->[1]}; # MUST
3210     redo B;
3211     }
3212    
3213     if ($open_elements->[-1]->[1] ne 'table') {
3214     !!!parse-error;
3215     }
3216    
3217     splice @$open_elements, $i;
3218    
3219     $reset_insertion_mode->();
3220    
3221     ## reprocess
3222     redo B;
3223     } else {
3224     #
3225     }
3226     } elsif ($token->{type} eq 'end tag') {
3227     if ($token->{tag_name} eq 'table') {
3228     ## have a table element in table scope
3229     my $i;
3230     INSCOPE: for (reverse 0..$#$open_elements) {
3231     my $node = $open_elements->[$_];
3232     if ($node->[1] eq $token->{tag_name}) {
3233     $i = $_;
3234     last INSCOPE;
3235     } elsif ({
3236     table => 1, html => 1,
3237     }->{$node->[1]}) {
3238     last INSCOPE;
3239     }
3240     } # INSCOPE
3241     unless (defined $i) {
3242     !!!parse-error;
3243     ## Ignore the token
3244     !!!next-token;
3245     redo B;
3246     }
3247    
3248     ## generate implied end tags
3249     if ({
3250     dd => 1, dt => 1, li => 1, p => 1,
3251     td => 1, th => 1, tr => 1,
3252     }->{$open_elements->[-1]->[1]}) {
3253     !!!back-token;
3254     $token = {type => 'end tag',
3255     tag_name => $open_elements->[-1]->[1]}; # MUST
3256     redo B;
3257     }
3258    
3259     if ($open_elements->[-1]->[1] ne 'table') {
3260     !!!parse-error;
3261     }
3262    
3263     splice @$open_elements, $i;
3264    
3265     $reset_insertion_mode->();
3266    
3267     !!!next-token;
3268     redo B;
3269     } elsif ({
3270     body => 1, caption => 1, col => 1, colgroup => 1,
3271     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
3272     thead => 1, tr => 1,
3273     }->{$token->{tag_name}}) {
3274     !!!parse-error;
3275     ## Ignore the token
3276     !!!next-token;
3277     redo B;
3278     } else {
3279     #
3280     }
3281     } else {
3282     #
3283     }
3284    
3285     !!!parse-error;
3286 wakaba 1.7 $in_body->($insert_to_foster);
3287 wakaba 1.2 redo B;
3288     } elsif ($insertion_mode eq 'in caption') {
3289 wakaba 1.6 if ($token->{type} eq 'character') {
3290     ## NOTE: This is a code clone of "character in body".
3291 wakaba 1.7 $reconstruct_active_formatting_elements->($insert_to_current);
3292 wakaba 1.6
3293     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
3294    
3295     !!!next-token;
3296     redo B;
3297     } elsif ($token->{type} eq 'comment') {
3298     ## NOTE: This is a code clone of "comment in body".
3299     my $comment = $self->{document}->create_comment ($token->{data});
3300     $open_elements->[-1]->[0]->append_child ($comment);
3301     !!!next-token;
3302     redo B;
3303     } elsif ($token->{type} eq 'start tag') {
3304 wakaba 1.2 if ({
3305     caption => 1, col => 1, colgroup => 1, tbody => 1,
3306     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
3307     }->{$token->{tag_name}}) {
3308     !!!parse-error;
3309    
3310     ## As if </caption>
3311     ## have a table element in table scope
3312     my $i;
3313     INSCOPE: for (reverse 0..$#$open_elements) {
3314     my $node = $open_elements->[$_];
3315     if ($node->[1] eq 'caption') {
3316     $i = $_;
3317     last INSCOPE;
3318     } elsif ({
3319     table => 1, html => 1,
3320     }->{$node->[1]}) {
3321     last INSCOPE;
3322     }
3323     } # INSCOPE
3324     unless (defined $i) {
3325     !!!parse-error;
3326     ## Ignore the token
3327     !!!next-token;
3328     redo B;
3329     }
3330    
3331     ## generate implied end tags
3332     if ({
3333     dd => 1, dt => 1, li => 1, p => 1,
3334     td => 1, th => 1, tr => 1,
3335     }->{$open_elements->[-1]->[1]}) {
3336     !!!back-token; # <?>
3337     $token = {type => 'end tag', tag_name => 'caption'};
3338     !!!back-token;
3339     $token = {type => 'end tag',
3340     tag_name => $open_elements->[-1]->[1]}; # MUST
3341     redo B;
3342     }
3343    
3344     if ($open_elements->[-1]->[1] ne 'caption') {
3345     !!!parse-error;
3346     }
3347    
3348     splice @$open_elements, $i;
3349    
3350     $clear_up_to_marker->();
3351    
3352     $insertion_mode = 'in table';
3353    
3354     ## reprocess
3355     redo B;
3356     } else {
3357     #
3358     }
3359     } elsif ($token->{type} eq 'end tag') {
3360     if ($token->{tag_name} eq 'caption') {
3361     ## have a table element in table scope
3362     my $i;
3363     INSCOPE: for (reverse 0..$#$open_elements) {
3364     my $node = $open_elements->[$_];
3365     if ($node->[1] eq $token->{tag_name}) {
3366     $i = $_;
3367     last INSCOPE;
3368     } elsif ({
3369     table => 1, html => 1,
3370     }->{$node->[1]}) {
3371     last INSCOPE;
3372     }
3373     } # INSCOPE
3374     unless (defined $i) {
3375     !!!parse-error;
3376     ## Ignore the token
3377     !!!next-token;
3378     redo B;
3379     }
3380    
3381     ## generate implied end tags
3382     if ({
3383     dd => 1, dt => 1, li => 1, p => 1,
3384     td => 1, th => 1, tr => 1,
3385     }->{$open_elements->[-1]->[1]}) {
3386     !!!back-token;
3387     $token = {type => 'end tag',
3388     tag_name => $open_elements->[-1]->[1]}; # MUST
3389     redo B;
3390     }
3391    
3392     if ($open_elements->[-1]->[1] ne 'caption') {
3393     !!!parse-error;
3394     }
3395    
3396     splice @$open_elements, $i;
3397    
3398     $clear_up_to_marker->();
3399    
3400     $insertion_mode = 'in table';
3401    
3402     !!!next-token;
3403     redo B;
3404     } elsif ($token->{tag_name} eq 'table') {
3405     !!!parse-error;
3406    
3407     ## As if </caption>
3408     ## have a table element in table scope
3409     my $i;
3410     INSCOPE: for (reverse 0..$#$open_elements) {
3411     my $node = $open_elements->[$_];
3412     if ($node->[1] eq 'caption') {
3413     $i = $_;
3414     last INSCOPE;
3415     } elsif ({
3416     table => 1, html => 1,
3417     }->{$node->[1]}) {
3418     last INSCOPE;
3419     }
3420     } # INSCOPE
3421     unless (defined $i) {
3422     !!!parse-error;
3423     ## Ignore the token
3424     !!!next-token;
3425     redo B;
3426     }
3427    
3428     ## generate implied end tags
3429     if ({
3430     dd => 1, dt => 1, li => 1, p => 1,
3431     td => 1, th => 1, tr => 1,
3432     }->{$open_elements->[-1]->[1]}) {
3433     !!!back-token; # </table>
3434     $token = {type => 'end tag', tag_name => 'caption'};
3435     !!!back-token;
3436     $token = {type => 'end tag',
3437     tag_name => $open_elements->[-1]->[1]}; # MUST
3438     redo B;
3439     }
3440    
3441     if ($open_elements->[-1]->[1] ne 'caption') {
3442     !!!parse-error;
3443     }
3444    
3445     splice @$open_elements, $i;
3446    
3447     $clear_up_to_marker->();
3448    
3449     $insertion_mode = 'in table';
3450    
3451     ## reprocess
3452     redo B;
3453     } elsif ({
3454     body => 1, col => 1, colgroup => 1,
3455     html => 1, tbody => 1, td => 1, tfoot => 1,
3456     th => 1, thead => 1, tr => 1,
3457     }->{$token->{tag_name}}) {
3458     !!!parse-error;
3459     ## Ignore the token
3460     redo B;
3461     } else {
3462     #
3463     }
3464     } else {
3465     #
3466     }
3467    
3468 wakaba 1.7 $in_body->($insert_to_current);
3469 wakaba 1.2 redo B;
3470     } elsif ($insertion_mode eq 'in column group') {
3471     if ($token->{type} eq 'character') {
3472     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3473     $open_elements->[-1]->[0]->manakai_append_text ($1);
3474     unless (length $token->{data}) {
3475     !!!next-token;
3476     redo B;
3477     }
3478     }
3479    
3480     #
3481     } elsif ($token->{type} eq 'comment') {
3482     my $comment = $self->{document}->create_comment ($token->{data});
3483     $open_elements->[-1]->[0]->append_child ($comment);
3484     !!!next-token;
3485     redo B;
3486     } elsif ($token->{type} eq 'start tag') {
3487     if ($token->{tag_name} eq 'col') {
3488     !!!insert-element ($token->{tag_name}, $token->{attributes});
3489     pop @$open_elements;
3490     !!!next-token;
3491     redo B;
3492     } else {
3493     #
3494     }
3495     } elsif ($token->{type} eq 'end tag') {
3496     if ($token->{tag_name} eq 'colgroup') {
3497     if ($open_elements->[-1]->[1] eq 'html') {
3498     !!!parse-error;
3499     ## Ignore the token
3500     !!!next-token;
3501     redo B;
3502     } else {
3503     pop @$open_elements; # colgroup
3504     $insertion_mode = 'in table';
3505     !!!next-token;
3506     redo B;
3507     }
3508     } elsif ($token->{tag_name} eq 'col') {
3509     !!!parse-error;
3510     ## Ignore the token
3511     !!!next-token;
3512     redo B;
3513     } else {
3514     #
3515     }
3516     } else {
3517     #
3518     }
3519    
3520     ## As if </colgroup>
3521     if ($open_elements->[-1]->[1] eq 'html') {
3522     !!!parse-error;
3523     ## Ignore the token
3524     !!!next-token;
3525     redo B;
3526     } else {
3527     pop @$open_elements; # colgroup
3528     $insertion_mode = 'in table';
3529     ## reprocess
3530     redo B;
3531     }
3532     } elsif ($insertion_mode eq 'in table body') {
3533     if ($token->{type} eq 'character') {
3534 wakaba 1.7 ## NOTE: This is a "character in table" code clone.
3535     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3536     $open_elements->[-1]->[0]->manakai_append_text ($1);
3537    
3538     unless (length $token->{data}) {
3539     !!!next-token;
3540     redo B;
3541     }
3542     }
3543 wakaba 1.2
3544 wakaba 1.7 ## As if in body, but insert into foster parent element
3545     ## ISSUE: Spec says that "whenever a node would be inserted
3546     ## into the current node" while characters might not be
3547     ## result in a new Text node.
3548     $reconstruct_active_formatting_elements->($insert_to_foster);
3549 wakaba 1.2
3550 wakaba 1.7 if ({
3551     table => 1, tbody => 1, tfoot => 1,
3552     thead => 1, tr => 1,
3553     }->{$open_elements->[-1]->[1]}) {
3554     # MUST
3555     my $foster_parent_element;
3556     my $next_sibling;
3557     my $prev_sibling;
3558     OE: for (reverse 0..$#$open_elements) {
3559     if ($open_elements->[$_]->[1] eq 'table') {
3560     my $parent = $open_elements->[$_]->[0]->parent_node;
3561     if (defined $parent and $parent->node_type == 1) {
3562     $foster_parent_element = $parent;
3563     $next_sibling = $open_elements->[$_]->[0];
3564     $prev_sibling = $next_sibling->previous_sibling;
3565     } else {
3566     $foster_parent_element = $open_elements->[$_ - 1]->[0];
3567     $prev_sibling = $foster_parent_element->last_child;
3568     }
3569     last OE;
3570     }
3571     } # OE
3572     $foster_parent_element = $open_elements->[0]->[0] and
3573     $prev_sibling = $foster_parent_element->last_child
3574     unless defined $foster_parent_element;
3575     if (defined $prev_sibling and
3576     $prev_sibling->node_type == 3) {
3577     $prev_sibling->manakai_append_text ($token->{data});
3578     } else {
3579     $foster_parent_element->insert_before
3580     ($self->{document}->create_text_node ($token->{data}),
3581     $next_sibling);
3582     }
3583     } else {
3584     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
3585     }
3586    
3587 wakaba 1.2 !!!next-token;
3588     redo B;
3589     } elsif ($token->{type} eq 'comment') {
3590     ## Copied from 'in table'
3591     my $comment = $self->{document}->create_comment ($token->{data});
3592     $open_elements->[-1]->[0]->append_child ($comment);
3593     !!!next-token;
3594     redo B;
3595     } elsif ($token->{type} eq 'start tag') {
3596     if ({
3597     tr => 1,
3598     th => 1, td => 1,
3599     }->{$token->{tag_name}}) {
3600     ## Clear back to table body context
3601     while (not {
3602     tbody => 1, tfoot => 1, thead => 1, html => 1,
3603     }->{$open_elements->[-1]->[1]}) {
3604     !!!parse-error;
3605     pop @$open_elements;
3606     }
3607    
3608     $insertion_mode = 'in row';
3609     if ($token->{tag_name} eq 'tr') {
3610     !!!insert-element ($token->{tag_name}, $token->{attributes});
3611     !!!next-token;
3612     } else {
3613     !!!insert-element ('tr');
3614     ## reprocess
3615     }
3616     redo B;
3617     } elsif ({
3618     caption => 1, col => 1, colgroup => 1,
3619     tbody => 1, tfoot => 1, thead => 1,
3620     }->{$token->{tag_name}}) {
3621     ## have an element in table scope
3622     my $i;
3623     INSCOPE: for (reverse 0..$#$open_elements) {
3624     my $node = $open_elements->[$_];
3625     if ({
3626     tbody => 1, thead => 1, tfoot => 1,
3627     }->{$node->[1]}) {
3628     $i = $_;
3629     last INSCOPE;
3630     } elsif ({
3631     table => 1, html => 1,
3632     }->{$node->[1]}) {
3633     last INSCOPE;
3634     }
3635     } # INSCOPE
3636     unless (defined $i) {
3637     !!!parse-error;
3638     ## Ignore the token
3639     !!!next-token;
3640     redo B;
3641     }
3642    
3643     ## Clear back to table body context
3644     while (not {
3645     tbody => 1, tfoot => 1, thead => 1, html => 1,
3646     }->{$open_elements->[-1]->[1]}) {
3647     !!!parse-error;
3648     pop @$open_elements;
3649     }
3650    
3651     ## As if <{current node}>
3652     ## have an element in table scope
3653     ## true by definition
3654    
3655     ## Clear back to table body context
3656     ## nop by definition
3657    
3658     pop @$open_elements;
3659     $insertion_mode = 'in table';
3660     ## reprocess
3661     redo B;
3662     } elsif ($token->{tag_name} eq 'table') {
3663     ## NOTE: This is a code clone of "table in table"
3664     !!!parse-error;
3665    
3666     ## As if </table>
3667     ## have a table element in table scope
3668     my $i;
3669     INSCOPE: for (reverse 0..$#$open_elements) {
3670     my $node = $open_elements->[$_];
3671     if ($node->[1] eq 'table') {
3672     $i = $_;
3673     last INSCOPE;
3674     } elsif ({
3675     table => 1, html => 1,
3676     }->{$node->[1]}) {
3677     last INSCOPE;
3678     }
3679     } # INSCOPE
3680     unless (defined $i) {
3681     !!!parse-error;
3682     ## Ignore tokens </table><table>
3683     !!!next-token;
3684     redo B;
3685     }
3686    
3687     ## generate implied end tags
3688     if ({
3689     dd => 1, dt => 1, li => 1, p => 1,
3690     td => 1, th => 1, tr => 1,
3691     }->{$open_elements->[-1]->[1]}) {
3692     !!!back-token; # <table>
3693     $token = {type => 'end tag', tag_name => 'table'};
3694     !!!back-token;
3695     $token = {type => 'end tag',
3696     tag_name => $open_elements->[-1]->[1]}; # MUST
3697     redo B;
3698     }
3699    
3700     if ($open_elements->[-1]->[1] ne 'table') {
3701     !!!parse-error;
3702     }
3703    
3704     splice @$open_elements, $i;
3705    
3706     $reset_insertion_mode->();
3707    
3708     ## reprocess
3709     redo B;
3710     } else {
3711     #
3712     }
3713     } elsif ($token->{type} eq 'end tag') {
3714     if ({
3715     tbody => 1, tfoot => 1, thead => 1,
3716     }->{$token->{tag_name}}) {
3717     ## have an element in table scope
3718     my $i;
3719     INSCOPE: for (reverse 0..$#$open_elements) {
3720     my $node = $open_elements->[$_];
3721     if ($node->[1] eq $token->{tag_name}) {
3722     $i = $_;
3723     last INSCOPE;
3724     } elsif ({
3725     table => 1, html => 1,
3726     }->{$node->[1]}) {
3727     last INSCOPE;
3728     }
3729     } # INSCOPE
3730     unless (defined $i) {
3731     !!!parse-error;
3732     ## Ignore the token
3733     !!!next-token;
3734     redo B;
3735     }
3736    
3737     ## Clear back to table body context
3738     while (not {
3739     tbody => 1, tfoot => 1, thead => 1, html => 1,
3740     }->{$open_elements->[-1]->[1]}) {
3741     !!!parse-error;
3742     pop @$open_elements;
3743     }
3744    
3745     pop @$open_elements;
3746     $insertion_mode = 'in table';
3747     !!!next-token;
3748     redo B;
3749     } elsif ($token->{tag_name} eq 'table') {
3750     ## have an element in table scope
3751     my $i;
3752     INSCOPE: for (reverse 0..$#$open_elements) {
3753     my $node = $open_elements->[$_];
3754     if ({
3755     tbody => 1, thead => 1, tfoot => 1,
3756     }->{$node->[1]}) {
3757     $i = $_;
3758     last INSCOPE;
3759     } elsif ({
3760     table => 1, html => 1,
3761     }->{$node->[1]}) {
3762     last INSCOPE;
3763     }
3764     } # INSCOPE
3765     unless (defined $i) {
3766     !!!parse-error;
3767     ## Ignore the token
3768     !!!next-token;
3769     redo B;
3770     }
3771    
3772     ## Clear back to table body context
3773     while (not {
3774     tbody => 1, tfoot => 1, thead => 1, html => 1,
3775     }->{$open_elements->[-1]->[1]}) {
3776     !!!parse-error;
3777     pop @$open_elements;
3778     }
3779    
3780     ## As if <{current node}>
3781     ## have an element in table scope
3782     ## true by definition
3783    
3784     ## Clear back to table body context
3785     ## nop by definition
3786    
3787     pop @$open_elements;
3788     $insertion_mode = 'in table';
3789     ## reprocess
3790     redo B;
3791     } elsif ({
3792     body => 1, caption => 1, col => 1, colgroup => 1,
3793     html => 1, td => 1, th => 1, tr => 1,
3794     }->{$token->{tag_name}}) {
3795     !!!parse-error;
3796     ## Ignore the token
3797     !!!next-token;
3798     redo B;
3799     } else {
3800     #
3801     }
3802     } else {
3803     #
3804     }
3805    
3806     ## As if in table
3807     !!!parse-error;
3808 wakaba 1.7 $in_body->($insert_to_foster);
3809 wakaba 1.2 redo B;
3810     } elsif ($insertion_mode eq 'in row') {
3811     if ($token->{type} eq 'character') {
3812 wakaba 1.7 ## NOTE: This is a "character in table" code clone.
3813     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3814     $open_elements->[-1]->[0]->manakai_append_text ($1);
3815    
3816     unless (length $token->{data}) {
3817     !!!next-token;
3818     redo B;
3819     }
3820     }
3821 wakaba 1.2
3822 wakaba 1.7 ## As if in body, but insert into foster parent element
3823     ## ISSUE: Spec says that "whenever a node would be inserted
3824     ## into the current node" while characters might not be
3825     ## result in a new Text node.
3826     $reconstruct_active_formatting_elements->($insert_to_foster);
3827    
3828     if ({
3829     table => 1, tbody => 1, tfoot => 1,
3830     thead => 1, tr => 1,
3831     }->{$open_elements->[-1]->[1]}) {
3832     # MUST
3833     my $foster_parent_element;
3834     my $next_sibling;
3835     my $prev_sibling;
3836     OE: for (reverse 0..$#$open_elements) {
3837     if ($open_elements->[$_]->[1] eq 'table') {
3838     my $parent = $open_elements->[$_]->[0]->parent_node;
3839     if (defined $parent and $parent->node_type == 1) {
3840     $foster_parent_element = $parent;
3841     $next_sibling = $open_elements->[$_]->[0];
3842     $prev_sibling = $next_sibling->previous_sibling;
3843     } else {
3844     $foster_parent_element = $open_elements->[$_ - 1]->[0];
3845     $prev_sibling = $foster_parent_element->last_child;
3846     }
3847     last OE;
3848     }
3849     } # OE
3850     $foster_parent_element = $open_elements->[0]->[0] and
3851     $prev_sibling = $foster_parent_element->last_child
3852     unless defined $foster_parent_element;
3853     if (defined $prev_sibling and
3854     $prev_sibling->node_type == 3) {
3855     $prev_sibling->manakai_append_text ($token->{data});
3856     } else {
3857     $foster_parent_element->insert_before
3858     ($self->{document}->create_text_node ($token->{data}),
3859     $next_sibling);
3860     }
3861     } else {
3862     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
3863     }
3864    
3865 wakaba 1.2 !!!next-token;
3866     redo B;
3867     } elsif ($token->{type} eq 'comment') {
3868     ## Copied from 'in table'
3869     my $comment = $self->{document}->create_comment ($token->{data});
3870     $open_elements->[-1]->[0]->append_child ($comment);
3871     !!!next-token;
3872     redo B;
3873     } elsif ($token->{type} eq 'start tag') {
3874     if ($token->{tag_name} eq 'th' or
3875     $token->{tag_name} eq 'td') {
3876     ## Clear back to table row context
3877     while (not {
3878 wakaba 1.6 tr => 1, html => 1,
3879 wakaba 1.2 }->{$open_elements->[-1]->[1]}) {
3880     !!!parse-error;
3881     pop @$open_elements;
3882     }
3883    
3884     !!!insert-element ($token->{tag_name}, $token->{attributes});
3885     $insertion_mode = 'in cell';
3886    
3887     push @$active_formatting_elements, ['#marker', ''];
3888    
3889     !!!next-token;
3890     redo B;
3891     } elsif ({
3892     caption => 1, col => 1, colgroup => 1,
3893     tbody => 1, tfoot => 1, thead => 1, tr => 1,
3894     }->{$token->{tag_name}}) {
3895     ## As if </tr>
3896     ## have an element in table scope
3897     my $i;
3898     INSCOPE: for (reverse 0..$#$open_elements) {
3899     my $node = $open_elements->[$_];
3900     if ($node->[1] eq 'tr') {
3901     $i = $_;
3902     last INSCOPE;
3903     } elsif ({
3904     table => 1, html => 1,
3905     }->{$node->[1]}) {
3906     last INSCOPE;
3907     }
3908     } # INSCOPE
3909     unless (defined $i) {
3910     !!!parse-error;
3911     ## Ignore the token
3912     !!!next-token;
3913     redo B;
3914     }
3915    
3916     ## Clear back to table row context
3917     while (not {
3918     tr => 1, html => 1,
3919     }->{$open_elements->[-1]->[1]}) {
3920     !!!parse-error;
3921     pop @$open_elements;
3922     }
3923    
3924     pop @$open_elements; # tr
3925     $insertion_mode = 'in table body';
3926     ## reprocess
3927     redo B;
3928     } elsif ($token->{tag_name} eq 'table') {
3929     ## NOTE: This is a code clone of "table in table"
3930     !!!parse-error;
3931    
3932     ## As if </table>
3933     ## have a table element in table scope
3934     my $i;
3935     INSCOPE: for (reverse 0..$#$open_elements) {
3936     my $node = $open_elements->[$_];
3937     if ($node->[1] eq 'table') {
3938     $i = $_;
3939     last INSCOPE;
3940     } elsif ({
3941     table => 1, html => 1,
3942     }->{$node->[1]}) {
3943     last INSCOPE;
3944     }
3945     } # INSCOPE
3946     unless (defined $i) {
3947     !!!parse-error;
3948     ## Ignore tokens </table><table>
3949     !!!next-token;
3950     redo B;
3951     }
3952    
3953     ## generate implied end tags
3954     if ({
3955     dd => 1, dt => 1, li => 1, p => 1,
3956     td => 1, th => 1, tr => 1,
3957     }->{$open_elements->[-1]->[1]}) {
3958     !!!back-token; # <table>
3959     $token = {type => 'end tag', tag_name => 'table'};
3960     !!!back-token;
3961     $token = {type => 'end tag',
3962     tag_name => $open_elements->[-1]->[1]}; # MUST
3963     redo B;
3964     }
3965    
3966     if ($open_elements->[-1]->[1] ne 'table') {
3967     !!!parse-error;
3968     }
3969    
3970     splice @$open_elements, $i;
3971    
3972     $reset_insertion_mode->();
3973    
3974     ## reprocess
3975     redo B;
3976     } else {
3977     #
3978     }
3979     } elsif ($token->{type} eq 'end tag') {
3980     if ($token->{tag_name} eq 'tr') {
3981     ## have an element in table scope
3982     my $i;
3983     INSCOPE: for (reverse 0..$#$open_elements) {
3984     my $node = $open_elements->[$_];
3985     if ($node->[1] eq $token->{tag_name}) {
3986     $i = $_;
3987     last INSCOPE;
3988     } elsif ({
3989     table => 1, html => 1,
3990     }->{$node->[1]}) {
3991     last INSCOPE;
3992     }
3993     } # INSCOPE
3994     unless (defined $i) {
3995     !!!parse-error;
3996     ## Ignore the token
3997     !!!next-token;
3998     redo B;
3999     }
4000    
4001     ## Clear back to table row context
4002     while (not {
4003     tr => 1, html => 1,
4004     }->{$open_elements->[-1]->[1]}) {
4005     !!!parse-error;
4006     pop @$open_elements;
4007     }
4008    
4009     pop @$open_elements; # tr
4010     $insertion_mode = 'in table body';
4011     !!!next-token;
4012     redo B;
4013     } elsif ($token->{tag_name} eq 'table') {
4014     ## As if </tr>
4015     ## have an element in table scope
4016     my $i;
4017     INSCOPE: for (reverse 0..$#$open_elements) {
4018     my $node = $open_elements->[$_];
4019     if ($node->[1] eq 'tr') {
4020     $i = $_;
4021     last INSCOPE;
4022     } elsif ({
4023     table => 1, html => 1,
4024     }->{$node->[1]}) {
4025     last INSCOPE;
4026     }
4027     } # INSCOPE
4028     unless (defined $i) {
4029     !!!parse-error;
4030     ## Ignore the token
4031     !!!next-token;
4032     redo B;
4033     }
4034    
4035     ## Clear back to table row context
4036     while (not {
4037     tr => 1, html => 1,
4038     }->{$open_elements->[-1]->[1]}) {
4039     !!!parse-error;
4040     pop @$open_elements;
4041     }
4042    
4043     pop @$open_elements; # tr
4044     $insertion_mode = 'in table body';
4045     ## reprocess
4046     redo B;
4047     } elsif ({
4048     tbody => 1, tfoot => 1, thead => 1,
4049     }->{$token->{tag_name}}) {
4050     ## have an element in table scope
4051     my $i;
4052     INSCOPE: for (reverse 0..$#$open_elements) {
4053     my $node = $open_elements->[$_];
4054     if ($node->[1] eq $token->{tag_name}) {
4055     $i = $_;
4056     last INSCOPE;
4057     } elsif ({
4058     table => 1, html => 1,
4059     }->{$node->[1]}) {
4060     last INSCOPE;
4061     }
4062     } # INSCOPE
4063     unless (defined $i) {
4064     !!!parse-error;
4065     ## Ignore the token
4066     !!!next-token;
4067     redo B;
4068     }
4069    
4070     ## As if </tr>
4071     ## have an element in table scope
4072     my $i;
4073     INSCOPE: for (reverse 0..$#$open_elements) {
4074     my $node = $open_elements->[$_];
4075     if ($node->[1] eq 'tr') {
4076     $i = $_;
4077     last INSCOPE;
4078     } elsif ({
4079     table => 1, html => 1,
4080     }->{$node->[1]}) {
4081     last INSCOPE;
4082     }
4083     } # INSCOPE
4084     unless (defined $i) {
4085     !!!parse-error;
4086     ## Ignore the token
4087     !!!next-token;
4088     redo B;
4089     }
4090    
4091     ## Clear back to table row context
4092     while (not {
4093     tr => 1, html => 1,
4094     }->{$open_elements->[-1]->[1]}) {
4095     !!!parse-error;
4096     pop @$open_elements;
4097     }
4098    
4099     pop @$open_elements; # tr
4100     $insertion_mode = 'in table body';
4101     ## reprocess
4102     redo B;
4103     } elsif ({
4104     body => 1, caption => 1, col => 1,
4105     colgroup => 1, html => 1, td => 1, th => 1,
4106     }->{$token->{tag_name}}) {
4107     !!!parse-error;
4108     ## Ignore the token
4109     !!!next-token;
4110     redo B;
4111     } else {
4112     #
4113     }
4114     } else {
4115     #
4116     }
4117    
4118     ## As if in table
4119     !!!parse-error;
4120 wakaba 1.7 $in_body->($insert_to_foster);
4121 wakaba 1.2 redo B;
4122     } elsif ($insertion_mode eq 'in cell') {
4123     if ($token->{type} eq 'character') {
4124     ## NOTE: This is a code clone of "character in body".
4125 wakaba 1.7 $reconstruct_active_formatting_elements->($insert_to_current);
4126 wakaba 1.2
4127     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
4128    
4129     !!!next-token;
4130     redo B;
4131     } elsif ($token->{type} eq 'comment') {
4132     ## NOTE: This is a code clone of "comment in body".
4133     my $comment = $self->{document}->create_comment ($token->{data});
4134     $open_elements->[-1]->[0]->append_child ($comment);
4135     !!!next-token;
4136     redo B;
4137     } elsif ($token->{type} eq 'start tag') {
4138     if ({
4139     caption => 1, col => 1, colgroup => 1,
4140     tbody => 1, td => 1, tfoot => 1, th => 1,
4141     thead => 1, tr => 1,
4142     }->{$token->{tag_name}}) {
4143     ## have an element in table scope
4144     my $tn;
4145     INSCOPE: for (reverse 0..$#$open_elements) {
4146     my $node = $open_elements->[$_];
4147     if ($node->[1] eq 'td' or $node->[1] eq 'th') {
4148     $tn = $node->[1];
4149     last INSCOPE;
4150     } elsif ({
4151     table => 1, html => 1,
4152     }->{$node->[1]}) {
4153     last INSCOPE;
4154     }
4155     } # INSCOPE
4156     unless (defined $tn) {
4157     !!!parse-error;
4158     ## Ignore the token
4159     !!!next-token;
4160     redo B;
4161     }
4162    
4163     ## Close the cell
4164     !!!back-token; # <?>
4165     $token = {type => 'end tag', tag_name => $tn};
4166     redo B;
4167     } else {
4168     #
4169     }
4170     } elsif ($token->{type} eq 'end tag') {
4171 wakaba 1.6 if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4172 wakaba 1.2 ## have an element in table scope
4173     my $i;
4174     INSCOPE: for (reverse 0..$#$open_elements) {
4175     my $node = $open_elements->[$_];
4176     if ($node->[1] eq $token->{tag_name}) {
4177     $i = $_;
4178     last INSCOPE;
4179     } elsif ({
4180     table => 1, html => 1,
4181     }->{$node->[1]}) {
4182     last INSCOPE;
4183     }
4184     } # INSCOPE
4185     unless (defined $i) {
4186     !!!parse-error;
4187     ## Ignore the token
4188     !!!next-token;
4189     redo B;
4190     }
4191    
4192     ## generate implied end tags
4193     if ({
4194     dd => 1, dt => 1, li => 1, p => 1,
4195     td => ($token->{tag_name} eq 'th'),
4196     th => ($token->{tag_name} eq 'td'),
4197     tr => 1,
4198     }->{$open_elements->[-1]->[1]}) {
4199     !!!back-token;
4200     $token = {type => 'end tag',
4201     tag_name => $open_elements->[-1]->[1]}; # MUST
4202     redo B;
4203     }
4204    
4205     if ($open_elements->[-1]->[1] ne $token->{tag_name}) {
4206     !!!parse-error;
4207     }
4208    
4209     splice @$open_elements, $i;
4210    
4211     $clear_up_to_marker->();
4212    
4213     $insertion_mode = 'in row';
4214    
4215     !!!next-token;
4216     redo B;
4217     } elsif ({
4218     body => 1, caption => 1, col => 1,
4219     colgroup => 1, html => 1,
4220     }->{$token->{tag_name}}) {
4221     !!!parse-error;
4222     ## Ignore the token
4223     !!!next-token;
4224     redo B;
4225     } elsif ({
4226     table => 1, tbody => 1, tfoot => 1,
4227     thead => 1, tr => 1,
4228     }->{$token->{tag_name}}) {
4229     ## have an element in table scope
4230     my $i;
4231     my $tn;
4232     INSCOPE: for (reverse 0..$#$open_elements) {
4233     my $node = $open_elements->[$_];
4234     if ($node->[1] eq $token->{tag_name}) {
4235     $i = $_;
4236     last INSCOPE;
4237     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4238     $tn = $node->[1];
4239     ## NOTE: There is exactly one |td| or |th| element
4240     ## in scope in the stack of open elements by definition.
4241     } elsif ({
4242     table => 1, html => 1,
4243     }->{$node->[1]}) {
4244     last INSCOPE;
4245     }
4246     } # INSCOPE
4247     unless (defined $i) {
4248     !!!parse-error;
4249     ## Ignore the token
4250     !!!next-token;
4251     redo B;
4252     }
4253    
4254     ## Close the cell
4255     !!!back-token; # </?>
4256     $token = {type => 'end tag', tag_name => $tn};
4257     redo B;
4258     } else {
4259     #
4260     }
4261     } else {
4262     #
4263     }
4264    
4265 wakaba 1.7 $in_body->($insert_to_current);
4266 wakaba 1.2 redo B;
4267     } elsif ($insertion_mode eq 'in select') {
4268     if ($token->{type} eq 'character') {
4269     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
4270     !!!next-token;
4271     redo B;
4272     } elsif ($token->{type} eq 'comment') {
4273     my $comment = $self->{document}->create_comment ($token->{data});
4274     $open_elements->[-1]->[0]->append_child ($comment);
4275     !!!next-token;
4276     redo B;
4277     } elsif ($token->{type} eq 'start tag') {
4278     if ($token->{tag_name} eq 'option') {
4279     if ($open_elements->[-1]->[1] eq 'option') {
4280     ## As if </option>
4281     pop @$open_elements;
4282     }
4283    
4284     !!!insert-element ($token->{tag_name}, $token->{attributes});
4285     !!!next-token;
4286     redo B;
4287     } elsif ($token->{tag_name} eq 'optgroup') {
4288     if ($open_elements->[-1]->[1] eq 'option') {
4289     ## As if </option>
4290     pop @$open_elements;
4291     }
4292    
4293     if ($open_elements->[-1]->[1] eq 'optgroup') {
4294     ## As if </optgroup>
4295     pop @$open_elements;
4296     }
4297    
4298     !!!insert-element ($token->{tag_name}, $token->{attributes});
4299     !!!next-token;
4300     redo B;
4301     } elsif ($token->{tag_name} eq 'select') {
4302     !!!parse-error;
4303     ## As if </select> instead
4304     ## have an element in table scope
4305     my $i;
4306     INSCOPE: for (reverse 0..$#$open_elements) {
4307     my $node = $open_elements->[$_];
4308     if ($node->[1] eq $token->{tag_name}) {
4309     $i = $_;
4310     last INSCOPE;
4311     } elsif ({
4312     table => 1, html => 1,
4313     }->{$node->[1]}) {
4314     last INSCOPE;
4315     }
4316     } # INSCOPE
4317     unless (defined $i) {
4318     !!!parse-error;
4319     ## Ignore the token
4320     !!!next-token;
4321     redo B;
4322     }
4323    
4324     splice @$open_elements, $i;
4325    
4326     $reset_insertion_mode->();
4327    
4328     !!!next-token;
4329     redo B;
4330     } else {
4331     #
4332     }
4333     } elsif ($token->{type} eq 'end tag') {
4334     if ($token->{tag_name} eq 'optgroup') {
4335     if ($open_elements->[-1]->[1] eq 'option' and
4336     $open_elements->[-2]->[1] eq 'optgroup') {
4337     ## As if </option>
4338     splice @$open_elements, -2;
4339     } elsif ($open_elements->[-1]->[1] eq 'optgroup') {
4340     pop @$open_elements;
4341     } else {
4342     !!!parse-error;
4343     ## Ignore the token
4344     }
4345     !!!next-token;
4346     redo B;
4347     } elsif ($token->{tag_name} eq 'option') {
4348     if ($open_elements->[-1]->[1] eq 'option') {
4349     pop @$open_elements;
4350     } else {
4351     !!!parse-error;
4352     ## Ignore the token
4353     }
4354     !!!next-token;
4355     redo B;
4356     } elsif ($token->{tag_name} eq 'select') {
4357     ## have an element in table scope
4358     my $i;
4359     INSCOPE: for (reverse 0..$#$open_elements) {
4360     my $node = $open_elements->[$_];
4361     if ($node->[1] eq $token->{tag_name}) {
4362     $i = $_;
4363     last INSCOPE;
4364     } elsif ({
4365     table => 1, html => 1,
4366     }->{$node->[1]}) {
4367     last INSCOPE;
4368     }
4369     } # INSCOPE
4370     unless (defined $i) {
4371     !!!parse-error;
4372     ## Ignore the token
4373     !!!next-token;
4374     redo B;
4375     }
4376    
4377     splice @$open_elements, $i;
4378    
4379     $reset_insertion_mode->();
4380    
4381     !!!next-token;
4382     redo B;
4383     } elsif ({
4384     caption => 1, table => 1, tbody => 1,
4385     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
4386     }->{$token->{tag_name}}) {
4387     !!!parse-error;
4388    
4389     ## have an element in table scope
4390     my $i;
4391     INSCOPE: for (reverse 0..$#$open_elements) {
4392     my $node = $open_elements->[$_];
4393     if ($node->[1] eq $token->{tag_name}) {
4394     $i = $_;
4395     last INSCOPE;
4396     } elsif ({
4397     table => 1, html => 1,
4398     }->{$node->[1]}) {
4399     last INSCOPE;
4400     }
4401     } # INSCOPE
4402     unless (defined $i) {
4403     ## Ignore the token
4404     !!!next-token;
4405     redo B;
4406     }
4407    
4408     ## As if </select>
4409     ## have an element in table scope
4410     undef $i;
4411     INSCOPE: for (reverse 0..$#$open_elements) {
4412     my $node = $open_elements->[$_];
4413     if ($node->[1] eq 'select') {
4414     $i = $_;
4415     last INSCOPE;
4416     } elsif ({
4417     table => 1, html => 1,
4418     }->{$node->[1]}) {
4419     last INSCOPE;
4420     }
4421     } # INSCOPE
4422     unless (defined $i) {
4423     !!!parse-error;
4424     ## Ignore the </select> token
4425     !!!next-token; ## TODO: ok?
4426     redo B;
4427     }
4428    
4429     splice @$open_elements, $i;
4430    
4431     $reset_insertion_mode->();
4432    
4433     ## reprocess
4434     redo B;
4435     } else {
4436     #
4437     }
4438     } else {
4439     #
4440     }
4441    
4442     !!!parse-error;
4443     ## Ignore the token
4444 wakaba 1.7 !!!next-token;
4445 wakaba 1.2 redo B;
4446     } elsif ($insertion_mode eq 'after body') {
4447     if ($token->{type} eq 'character') {
4448     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4449     ## As if in body
4450 wakaba 1.7 $reconstruct_active_formatting_elements->($insert_to_current);
4451 wakaba 1.2
4452     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
4453    
4454     unless (length $token->{data}) {
4455     !!!next-token;
4456     redo B;
4457     }
4458     }
4459    
4460     #
4461     } elsif ($token->{type} eq 'comment') {
4462     my $comment = $self->{document}->create_comment ($token->{data});
4463     $open_elements->[0]->[0]->append_child ($comment);
4464     !!!next-token;
4465     redo B;
4466     } elsif ($token->{type} eq 'end tag') {
4467 wakaba 1.6 if ($token->{tag_name} eq 'html') {
4468 wakaba 1.2 ## TODO: if inner_html, parse-error, ignore the token; otherwise,
4469    
4470     $phase = 'trailing end';
4471     !!!next-token;
4472     redo B;
4473     } else {
4474     #
4475     }
4476     } else {
4477     #
4478     }
4479    
4480     !!!parse-error;
4481     $insertion_mode = 'in body';
4482     ## reprocess
4483     redo B;
4484     } elsif ($insertion_mode eq 'in frameset') {
4485     if ($token->{type} eq 'character') {
4486     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4487     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
4488    
4489     unless (length $token->{data}) {
4490     !!!next-token;
4491     redo B;
4492     }
4493     }
4494    
4495     #
4496     } elsif ($token->{type} eq 'comment') {
4497     my $comment = $self->{document}->create_comment ($token->{data});
4498     $open_elements->[-1]->[0]->append_child ($comment);
4499     !!!next-token;
4500     redo B;
4501     } elsif ($token->{type} eq 'start tag') {
4502     if ($token->{tag_name} eq 'frameset') {
4503     !!!insert-element ($token->{tag_name}, $token->{attributes});
4504     !!!next-token;
4505     redo B;
4506     } elsif ($token->{tag_name} eq 'frame') {
4507     !!!insert-element ($token->{tag_name}, $token->{attributes});
4508     pop @$open_elements;
4509     !!!next-token;
4510     redo B;
4511     } elsif ($token->{tag_name} eq 'noframes') {
4512 wakaba 1.7 $in_body->($insert_to_current);
4513 wakaba 1.2 redo B;
4514     } else {
4515     #
4516     }
4517     } elsif ($token->{type} eq 'end tag') {
4518     if ($token->{tag_name} eq 'frameset') {
4519     if ($open_elements->[-1]->[1] eq 'html' and
4520     @$open_elements == 1) {
4521     !!!parse-error;
4522     ## Ignore the token
4523     !!!next-token;
4524     } else {
4525     pop @$open_elements;
4526     !!!next-token;
4527     }
4528    
4529     ## if not inner_html and
4530     if ($open_elements->[-1]->[1] ne 'frameset') {
4531     $insertion_mode = 'after frameset';
4532     }
4533     redo B;
4534     } else {
4535     #
4536     }
4537     } else {
4538     #
4539     }
4540    
4541     !!!parse-error;
4542     ## Ignore the token
4543     !!!next-token;
4544     redo B;
4545     } elsif ($insertion_mode eq 'after frameset') {
4546     if ($token->{type} eq 'character') {
4547     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4548     $open_elements->[-1]->[0]->manakai_append_text ($token->{data});
4549    
4550     unless (length $token->{data}) {
4551     !!!next-token;
4552     redo B;
4553     }
4554     }
4555    
4556     #
4557     } elsif ($token->{type} eq 'comment') {
4558     my $comment = $self->{document}->create_comment ($token->{data});
4559     $open_elements->[-1]->[0]->append_child ($comment);
4560     !!!next-token;
4561     redo B;
4562     } elsif ($token->{type} eq 'start tag') {
4563     if ($token->{tag_name} eq 'noframes') {
4564 wakaba 1.7 $in_body->($insert_to_current);
4565 wakaba 1.2 redo B;
4566     } else {
4567     #
4568     }
4569     } elsif ($token->{type} eq 'end tag') {
4570     if ($token->{tag_name} eq 'html') {
4571     $phase = 'trailing end';
4572     !!!next-token;
4573     redo B;
4574     } else {
4575     #
4576     }
4577     } else {
4578     #
4579     }
4580    
4581     !!!parse-error;
4582     ## Ignore the token
4583     !!!next-token;
4584     redo B;
4585    
4586     ## ISSUE: An issue in spec there
4587     } else {
4588     die "$0: $insertion_mode: Unknown insertion mode";
4589     }
4590     }
4591     } elsif ($phase eq 'trailing end') {
4592     ## states in the main stage is preserved yet # MUST
4593    
4594     if ($token->{type} eq 'DOCTYPE') {
4595     !!!parse-error;
4596     ## Ignore the token
4597     !!!next-token;
4598     redo B;
4599     } elsif ($token->{type} eq 'comment') {
4600     my $comment = $self->{document}->create_comment ($token->{data});
4601     $self->{document}->append_child ($comment);
4602     !!!next-token;
4603     redo B;
4604     } elsif ($token->{type} eq 'character') {
4605     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4606 wakaba 1.7 my $data = $1;
4607 wakaba 1.2 ## As if in the main phase.
4608     ## NOTE: The insertion mode in the main phase
4609     ## just before the phase has been changed to the trailing
4610     ## end phase is either "after body" or "after frameset".
4611 wakaba 1.7 $reconstruct_active_formatting_elements->($insert_to_current)
4612 wakaba 1.2 if $phase eq 'main';
4613    
4614 wakaba 1.7 $open_elements->[-1]->[0]->manakai_append_text ($data);
4615 wakaba 1.2
4616     unless (length $token->{data}) {
4617     !!!next-token;
4618     redo B;
4619     }
4620     }
4621    
4622     !!!parse-error;
4623     $phase = 'main';
4624     ## reprocess
4625     redo B;
4626     } elsif ($token->{type} eq 'start tag' or
4627     $token->{type} eq 'end tag') {
4628     !!!parse-error;
4629     $phase = 'main';
4630     ## reprocess
4631     redo B;
4632     } elsif ($token->{type} eq 'end-of-file') {
4633     ## Stop parsing
4634     last B;
4635     } else {
4636     die "$0: $token->{type}: Unknown token";
4637     }
4638     }
4639     } # B
4640    
4641     ## Stop parsing # MUST
4642    
4643     ## TODO: script stuffs
4644     } # _construct_tree
4645    
4646     sub inner_html ($$$) {
4647     my ($class, $node, $on_error) = @_;
4648    
4649     ## Step 1
4650     my $s = '';
4651    
4652     my $in_cdata;
4653     my $parent = $node;
4654     while (defined $parent) {
4655     if ($parent->node_type == 1 and
4656     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
4657     {
4658     style => 1, script => 1, xmp => 1, iframe => 1,
4659     noembed => 1, noframes => 1, noscript => 1,
4660     }->{$parent->local_name}) { ## TODO: case thingy
4661     $in_cdata = 1;
4662     }
4663     $parent = $parent->parent_node;
4664     }
4665    
4666     ## Step 2
4667     my @node = @{$node->child_nodes};
4668     C: while (@node) {
4669     my $child = shift @node;
4670     unless (ref $child) {
4671     if ($child eq 'cdata-out') {
4672     $in_cdata = 0;
4673     } else {
4674     $s .= $child; # end tag
4675     }
4676     next C;
4677     }
4678    
4679     my $nt = $child->node_type;
4680     if ($nt == 1) { # Element
4681     my $tag_name = lc $child->tag_name; ## ISSUE: Definition of "lowercase"
4682     $s .= '<' . $tag_name;
4683    
4684     ## ISSUE: Non-html elements
4685    
4686     my @attrs = @{$child->attributes}; # sort order MUST be stable
4687     for my $attr (@attrs) { # order is implementation dependent
4688     my $attr_name = lc $attr->name; ## ISSUE: Definition of "lowercase"
4689     $s .= ' ' . $attr_name . '="';
4690     my $attr_value = $attr->value;
4691     ## escape
4692     $attr_value =~ s/&/&amp;/g;
4693     $attr_value =~ s/</&lt;/g;
4694     $attr_value =~ s/>/&gt;/g;
4695     $attr_value =~ s/"/&quot;/g;
4696     $s .= $attr_value . '"';
4697     }
4698     $s .= '>';
4699    
4700     next C if {
4701     area => 1, base => 1, basefont => 1, bgsound => 1,
4702     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
4703     img => 1, input => 1, link => 1, meta => 1, param => 1,
4704     spacer => 1, wbr => 1,
4705     }->{$tag_name};
4706    
4707     if (not $in_cdata and {
4708     style => 1, script => 1, xmp => 1, iframe => 1,
4709     noembed => 1, noframes => 1, noscript => 1,
4710     }->{$tag_name}) {
4711     unshift @node, 'cdata-out';
4712     $in_cdata = 1;
4713     }
4714    
4715     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
4716     } elsif ($nt == 3 or $nt == 4) {
4717     if ($in_cdata) {
4718     $s .= $child->data;
4719     } else {
4720     my $value = $child->data;
4721     $value =~ s/&/&amp;/g;
4722     $value =~ s/</&lt;/g;
4723     $value =~ s/>/&gt;/g;
4724     $value =~ s/"/&quot;/g;
4725     $s .= $value;
4726     }
4727     } elsif ($nt == 8) {
4728     $s .= '<!--' . $child->data . '-->';
4729     } elsif ($nt == 10) {
4730     $s .= '<!DOCTYPE ' . $child->name . '>';
4731     } elsif ($nt == 5) { # entrefs
4732     push @node, @{$child->child_nodes};
4733     } else {
4734     $on_error->($child);
4735     }
4736     } # C
4737    
4738     ## Step 3
4739     return \$s;
4740     } # inner_html
4741    
4742 wakaba 1.1 1;
4743 wakaba 1.7 # $Date: 2007/04/30 14:12:02 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24