/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Contents of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.21 - (hide annotations) (download) (as text)
Sat Jun 23 14:34:39 2007 UTC (17 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.20: +6 -5 lines
File MIME type: application/x-wais-source
++ whatpm/Whatpm/ChangeLog	23 Jun 2007 14:34:34 -0000
	* HTML.pm.src: HTML5 revision 918 (</head>, </body>, </html>).

2007-06-23  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.2 package Whatpm::HTML;
2 wakaba 1.1 use strict;
3 wakaba 1.21 our $VERSION=do{my @r=(q$Revision: 1.20 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5 wakaba 1.18 ## ISSUE:
6     ## var doc = implementation.createDocument (null, null, null);
7     ## doc.write ('');
8     ## alert (doc.compatMode);
9 wakaba 1.1
10     my $permitted_slash_tag_name = {
11     base => 1,
12     link => 1,
13     meta => 1,
14     hr => 1,
15     br => 1,
16     img=> 1,
17     embed => 1,
18     param => 1,
19     area => 1,
20     col => 1,
21     input => 1,
22     };
23    
24 wakaba 1.4 my $c1_entity_char = {
25 wakaba 1.10 0x80 => 0x20AC,
26     0x81 => 0xFFFD,
27     0x82 => 0x201A,
28     0x83 => 0x0192,
29     0x84 => 0x201E,
30     0x85 => 0x2026,
31     0x86 => 0x2020,
32     0x87 => 0x2021,
33     0x88 => 0x02C6,
34     0x89 => 0x2030,
35     0x8A => 0x0160,
36     0x8B => 0x2039,
37     0x8C => 0x0152,
38     0x8D => 0xFFFD,
39     0x8E => 0x017D,
40     0x8F => 0xFFFD,
41     0x90 => 0xFFFD,
42     0x91 => 0x2018,
43     0x92 => 0x2019,
44     0x93 => 0x201C,
45     0x94 => 0x201D,
46     0x95 => 0x2022,
47     0x96 => 0x2013,
48     0x97 => 0x2014,
49     0x98 => 0x02DC,
50     0x99 => 0x2122,
51     0x9A => 0x0161,
52     0x9B => 0x203A,
53     0x9C => 0x0153,
54     0x9D => 0xFFFD,
55     0x9E => 0x017E,
56     0x9F => 0x0178,
57 wakaba 1.4 }; # $c1_entity_char
58 wakaba 1.1
59     my $special_category = {
60     address => 1, area => 1, base => 1, basefont => 1, bgsound => 1,
61     blockquote => 1, body => 1, br => 1, center => 1, col => 1, colgroup => 1,
62     dd => 1, dir => 1, div => 1, dl => 1, dt => 1, embed => 1, fieldset => 1,
63     form => 1, frame => 1, frameset => 1, h1 => 1, h2 => 1, h3 => 1,
64     h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, iframe => 1, image => 1,
65     img => 1, input => 1, isindex => 1, li => 1, link => 1, listing => 1,
66     menu => 1, meta => 1, noembed => 1, noframes => 1, noscript => 1,
67     ol => 1, optgroup => 1, option => 1, p => 1, param => 1, plaintext => 1,
68     pre => 1, script => 1, select => 1, spacer => 1, style => 1, tbody => 1,
69     textarea => 1, tfoot => 1, thead => 1, title => 1, tr => 1, ul => 1, wbr => 1,
70     };
71     my $scoping_category = {
72     button => 1, caption => 1, html => 1, marquee => 1, object => 1,
73     table => 1, td => 1, th => 1,
74     };
75     my $formatting_category = {
76     a => 1, b => 1, big => 1, em => 1, font => 1, i => 1, nobr => 1,
77     s => 1, small => 1, strile => 1, strong => 1, tt => 1, u => 1,
78     };
79     # $phrasing_category: all other elements
80    
81     sub parse_string ($$$;$) {
82     my $self = shift->new;
83     my $s = \$_[0];
84     $self->{document} = $_[1];
85    
86 wakaba 1.3 ## NOTE: |set_inner_html| copies most of this method's code
87    
88 wakaba 1.1 my $i = 0;
89 wakaba 1.3 my $line = 1;
90     my $column = 0;
91 wakaba 1.1 $self->{set_next_input_character} = sub {
92     my $self = shift;
93 wakaba 1.13
94     pop @{$self->{prev_input_character}};
95     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
96    
97 wakaba 1.1 $self->{next_input_character} = -1 and return if $i >= length $$s;
98     $self->{next_input_character} = ord substr $$s, $i++, 1;
99 wakaba 1.3 $column++;
100 wakaba 1.1
101 wakaba 1.4 if ($self->{next_input_character} == 0x000A) { # LF
102     $line++;
103     $column = 0;
104     } elsif ($self->{next_input_character} == 0x000D) { # CR
105 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
106 wakaba 1.1 $self->{next_input_character} = 0x000A; # LF # MUST
107 wakaba 1.3 $line++;
108 wakaba 1.4 $column = 0;
109 wakaba 1.1 } elsif ($self->{next_input_character} > 0x10FFFF) {
110     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
111     } elsif ($self->{next_input_character} == 0x0000) { # NULL
112 wakaba 1.8 !!!parse-error (type => 'NULL');
113 wakaba 1.1 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
114     }
115     };
116 wakaba 1.13 $self->{prev_input_character} = [-1, -1, -1];
117     $self->{next_input_character} = -1;
118 wakaba 1.1
119 wakaba 1.3 my $onerror = $_[2] || sub {
120     my (%opt) = @_;
121     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
122     };
123     $self->{parse_error} = sub {
124     $onerror->(@_, line => $line, column => $column);
125 wakaba 1.1 };
126    
127     $self->_initialize_tokenizer;
128     $self->_initialize_tree_constructor;
129     $self->_construct_tree;
130     $self->_terminate_tree_constructor;
131    
132     return $self->{document};
133     } # parse_string
134    
135     sub new ($) {
136     my $class = shift;
137     my $self = bless {}, $class;
138     $self->{set_next_input_character} = sub {
139     $self->{next_input_character} = -1;
140     };
141     $self->{parse_error} = sub {
142     #
143     };
144     return $self;
145     } # new
146    
147     ## Implementations MUST act as if state machine in the spec
148    
149     sub _initialize_tokenizer ($) {
150     my $self = shift;
151     $self->{state} = 'data'; # MUST
152     $self->{content_model_flag} = 'PCDATA'; # be
153     undef $self->{current_token}; # start tag, end tag, comment, or DOCTYPE
154     undef $self->{current_attribute};
155     undef $self->{last_emitted_start_tag_name};
156     undef $self->{last_attribute_value_state};
157     $self->{char} = [];
158     # $self->{next_input_character}
159     !!!next-input-character;
160     $self->{token} = [];
161 wakaba 1.18 # $self->{escape}
162 wakaba 1.1 } # _initialize_tokenizer
163    
164     ## A token has:
165     ## ->{type} eq 'DOCTYPE', 'start tag', 'end tag', 'comment',
166     ## 'character', or 'end-of-file'
167 wakaba 1.18 ## ->{name} (DOCTYPE, start tag (tag name), end tag (tag name))
168     ## ->{public_identifier} (DOCTYPE)
169     ## ->{system_identifier} (DOCTYPE)
170     ## ->{correct} == 1 or 0 (DOCTYPE)
171 wakaba 1.1 ## ->{attributes} isa HASH (start tag, end tag)
172     ## ->{data} (comment, character)
173    
174     ## Emitted token MUST immediately be handled by the tree construction state.
175    
176     ## Before each step, UA MAY check to see if either one of the scripts in
177     ## "list of scripts that will execute as soon as possible" or the first
178     ## script in the "list of scripts that will execute asynchronously",
179     ## has completed loading. If one has, then it MUST be executed
180     ## and removed from the list.
181    
182     sub _get_next_token ($) {
183     my $self = shift;
184     if (@{$self->{token}}) {
185     return shift @{$self->{token}};
186     }
187    
188     A: {
189     if ($self->{state} eq 'data') {
190     if ($self->{next_input_character} == 0x0026) { # &
191     if ($self->{content_model_flag} eq 'PCDATA' or
192     $self->{content_model_flag} eq 'RCDATA') {
193     $self->{state} = 'entity data';
194     !!!next-input-character;
195     redo A;
196     } else {
197     #
198     }
199 wakaba 1.13 } elsif ($self->{next_input_character} == 0x002D) { # -
200     if ($self->{content_model_flag} eq 'RCDATA' or
201     $self->{content_model_flag} eq 'CDATA') {
202     unless ($self->{escape}) {
203     if ($self->{prev_input_character}->[0] == 0x002D and # -
204     $self->{prev_input_character}->[1] == 0x0021 and # !
205     $self->{prev_input_character}->[2] == 0x003C) { # <
206     $self->{escape} = 1;
207     }
208     }
209     }
210    
211     #
212 wakaba 1.1 } elsif ($self->{next_input_character} == 0x003C) { # <
213 wakaba 1.13 if ($self->{content_model_flag} eq 'PCDATA' or
214     (($self->{content_model_flag} eq 'CDATA' or
215     $self->{content_model_flag} eq 'RCDATA') and
216     not $self->{escape})) {
217 wakaba 1.1 $self->{state} = 'tag open';
218     !!!next-input-character;
219     redo A;
220     } else {
221     #
222     }
223 wakaba 1.13 } elsif ($self->{next_input_character} == 0x003E) { # >
224     if ($self->{escape} and
225     ($self->{content_model_flag} eq 'RCDATA' or
226     $self->{content_model_flag} eq 'CDATA')) {
227     if ($self->{prev_input_character}->[0] == 0x002D and # -
228     $self->{prev_input_character}->[1] == 0x002D) { # -
229     delete $self->{escape};
230     }
231     }
232    
233     #
234 wakaba 1.1 } elsif ($self->{next_input_character} == -1) {
235     !!!emit ({type => 'end-of-file'});
236     last A; ## TODO: ok?
237     }
238     # Anything else
239     my $token = {type => 'character',
240     data => chr $self->{next_input_character}};
241     ## Stay in the data state
242     !!!next-input-character;
243    
244     !!!emit ($token);
245    
246     redo A;
247     } elsif ($self->{state} eq 'entity data') {
248     ## (cannot happen in CDATA state)
249    
250     my $token = $self->_tokenize_attempt_to_consume_an_entity;
251    
252     $self->{state} = 'data';
253     # next-input-character is already done
254    
255     unless (defined $token) {
256     !!!emit ({type => 'character', data => '&'});
257     } else {
258     !!!emit ($token);
259     }
260    
261     redo A;
262     } elsif ($self->{state} eq 'tag open') {
263     if ($self->{content_model_flag} eq 'RCDATA' or
264     $self->{content_model_flag} eq 'CDATA') {
265     if ($self->{next_input_character} == 0x002F) { # /
266     !!!next-input-character;
267     $self->{state} = 'close tag open';
268     redo A;
269     } else {
270     ## reconsume
271     $self->{state} = 'data';
272    
273     !!!emit ({type => 'character', data => '<'});
274    
275     redo A;
276     }
277     } elsif ($self->{content_model_flag} eq 'PCDATA') {
278     if ($self->{next_input_character} == 0x0021) { # !
279     $self->{state} = 'markup declaration open';
280     !!!next-input-character;
281     redo A;
282     } elsif ($self->{next_input_character} == 0x002F) { # /
283     $self->{state} = 'close tag open';
284     !!!next-input-character;
285     redo A;
286     } elsif (0x0041 <= $self->{next_input_character} and
287     $self->{next_input_character} <= 0x005A) { # A..Z
288     $self->{current_token}
289     = {type => 'start tag',
290     tag_name => chr ($self->{next_input_character} + 0x0020)};
291     $self->{state} = 'tag name';
292     !!!next-input-character;
293     redo A;
294     } elsif (0x0061 <= $self->{next_input_character} and
295     $self->{next_input_character} <= 0x007A) { # a..z
296     $self->{current_token} = {type => 'start tag',
297     tag_name => chr ($self->{next_input_character})};
298     $self->{state} = 'tag name';
299     !!!next-input-character;
300     redo A;
301     } elsif ($self->{next_input_character} == 0x003E) { # >
302 wakaba 1.3 !!!parse-error (type => 'empty start tag');
303 wakaba 1.1 $self->{state} = 'data';
304     !!!next-input-character;
305    
306     !!!emit ({type => 'character', data => '<>'});
307    
308     redo A;
309     } elsif ($self->{next_input_character} == 0x003F) { # ?
310 wakaba 1.3 !!!parse-error (type => 'pio');
311 wakaba 1.1 $self->{state} = 'bogus comment';
312     ## $self->{next_input_character} is intentionally left as is
313     redo A;
314     } else {
315 wakaba 1.3 !!!parse-error (type => 'bare stago');
316 wakaba 1.1 $self->{state} = 'data';
317     ## reconsume
318    
319     !!!emit ({type => 'character', data => '<'});
320    
321     redo A;
322     }
323     } else {
324     die "$0: $self->{content_model_flag}: Unknown content model flag";
325     }
326     } elsif ($self->{state} eq 'close tag open') {
327     if ($self->{content_model_flag} eq 'RCDATA' or
328     $self->{content_model_flag} eq 'CDATA') {
329     my @next_char;
330     TAGNAME: for (my $i = 0; $i < length $self->{last_emitted_start_tag_name}; $i++) {
331     push @next_char, $self->{next_input_character};
332     my $c = ord substr ($self->{last_emitted_start_tag_name}, $i, 1);
333     my $C = 0x0061 <= $c && $c <= 0x007A ? $c - 0x0020 : $c;
334     if ($self->{next_input_character} == $c or $self->{next_input_character} == $C) {
335     !!!next-input-character;
336     next TAGNAME;
337     } else {
338     $self->{next_input_character} = shift @next_char; # reconsume
339     !!!back-next-input-character (@next_char);
340     $self->{state} = 'data';
341    
342     !!!emit ({type => 'character', data => '</'});
343    
344     redo A;
345     }
346     }
347     push @next_char, $self->{next_input_character};
348    
349     unless ($self->{next_input_character} == 0x0009 or # HT
350     $self->{next_input_character} == 0x000A or # LF
351     $self->{next_input_character} == 0x000B or # VT
352     $self->{next_input_character} == 0x000C or # FF
353     $self->{next_input_character} == 0x0020 or # SP
354     $self->{next_input_character} == 0x003E or # >
355     $self->{next_input_character} == 0x002F or # /
356     $self->{next_input_character} == -1) {
357     $self->{next_input_character} = shift @next_char; # reconsume
358     !!!back-next-input-character (@next_char);
359     $self->{state} = 'data';
360    
361     !!!emit ({type => 'character', data => '</'});
362    
363     redo A;
364     } else {
365     $self->{next_input_character} = shift @next_char;
366     !!!back-next-input-character (@next_char);
367     # and consume...
368     }
369     }
370    
371     if (0x0041 <= $self->{next_input_character} and
372     $self->{next_input_character} <= 0x005A) { # A..Z
373     $self->{current_token} = {type => 'end tag',
374     tag_name => chr ($self->{next_input_character} + 0x0020)};
375     $self->{state} = 'tag name';
376     !!!next-input-character;
377     redo A;
378     } elsif (0x0061 <= $self->{next_input_character} and
379     $self->{next_input_character} <= 0x007A) { # a..z
380     $self->{current_token} = {type => 'end tag',
381     tag_name => chr ($self->{next_input_character})};
382     $self->{state} = 'tag name';
383     !!!next-input-character;
384     redo A;
385     } elsif ($self->{next_input_character} == 0x003E) { # >
386 wakaba 1.3 !!!parse-error (type => 'empty end tag');
387 wakaba 1.1 $self->{state} = 'data';
388     !!!next-input-character;
389     redo A;
390     } elsif ($self->{next_input_character} == -1) {
391 wakaba 1.3 !!!parse-error (type => 'bare etago');
392 wakaba 1.1 $self->{state} = 'data';
393     # reconsume
394    
395     !!!emit ({type => 'character', data => '</'});
396    
397     redo A;
398     } else {
399 wakaba 1.3 !!!parse-error (type => 'bogus end tag');
400 wakaba 1.1 $self->{state} = 'bogus comment';
401     ## $self->{next_input_character} is intentionally left as is
402     redo A;
403     }
404     } elsif ($self->{state} eq 'tag name') {
405     if ($self->{next_input_character} == 0x0009 or # HT
406     $self->{next_input_character} == 0x000A or # LF
407     $self->{next_input_character} == 0x000B or # VT
408     $self->{next_input_character} == 0x000C or # FF
409     $self->{next_input_character} == 0x0020) { # SP
410     $self->{state} = 'before attribute name';
411     !!!next-input-character;
412     redo A;
413     } elsif ($self->{next_input_character} == 0x003E) { # >
414     if ($self->{current_token}->{type} eq 'start tag') {
415     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
416     } elsif ($self->{current_token}->{type} eq 'end tag') {
417     $self->{content_model_flag} = 'PCDATA'; # MUST
418     if ($self->{current_token}->{attributes}) {
419 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
420 wakaba 1.1 }
421     } else {
422     die "$0: $self->{current_token}->{type}: Unknown token type";
423     }
424     $self->{state} = 'data';
425     !!!next-input-character;
426    
427     !!!emit ($self->{current_token}); # start tag or end tag
428     undef $self->{current_token};
429    
430     redo A;
431     } elsif (0x0041 <= $self->{next_input_character} and
432     $self->{next_input_character} <= 0x005A) { # A..Z
433     $self->{current_token}->{tag_name} .= chr ($self->{next_input_character} + 0x0020);
434     # start tag or end tag
435     ## Stay in this state
436     !!!next-input-character;
437     redo A;
438 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
439 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
440 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
441     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
442     } elsif ($self->{current_token}->{type} eq 'end tag') {
443     $self->{content_model_flag} = 'PCDATA'; # MUST
444     if ($self->{current_token}->{attributes}) {
445 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
446 wakaba 1.1 }
447     } else {
448     die "$0: $self->{current_token}->{type}: Unknown token type";
449     }
450     $self->{state} = 'data';
451     # reconsume
452    
453     !!!emit ($self->{current_token}); # start tag or end tag
454     undef $self->{current_token};
455    
456     redo A;
457     } elsif ($self->{next_input_character} == 0x002F) { # /
458     !!!next-input-character;
459     if ($self->{next_input_character} == 0x003E and # >
460     $self->{current_token}->{type} eq 'start tag' and
461     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
462     # permitted slash
463     #
464     } else {
465 wakaba 1.3 !!!parse-error (type => 'nestc');
466 wakaba 1.1 }
467     $self->{state} = 'before attribute name';
468     # next-input-character is already done
469     redo A;
470     } else {
471     $self->{current_token}->{tag_name} .= chr $self->{next_input_character};
472     # start tag or end tag
473     ## Stay in the state
474     !!!next-input-character;
475     redo A;
476     }
477     } elsif ($self->{state} eq 'before attribute name') {
478     if ($self->{next_input_character} == 0x0009 or # HT
479     $self->{next_input_character} == 0x000A or # LF
480     $self->{next_input_character} == 0x000B or # VT
481     $self->{next_input_character} == 0x000C or # FF
482     $self->{next_input_character} == 0x0020) { # SP
483     ## Stay in the state
484     !!!next-input-character;
485     redo A;
486     } elsif ($self->{next_input_character} == 0x003E) { # >
487     if ($self->{current_token}->{type} eq 'start tag') {
488     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
489     } elsif ($self->{current_token}->{type} eq 'end tag') {
490     $self->{content_model_flag} = 'PCDATA'; # MUST
491     if ($self->{current_token}->{attributes}) {
492 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
493 wakaba 1.1 }
494     } else {
495     die "$0: $self->{current_token}->{type}: Unknown token type";
496     }
497     $self->{state} = 'data';
498     !!!next-input-character;
499    
500     !!!emit ($self->{current_token}); # start tag or end tag
501     undef $self->{current_token};
502    
503     redo A;
504     } elsif (0x0041 <= $self->{next_input_character} and
505     $self->{next_input_character} <= 0x005A) { # A..Z
506     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
507     value => ''};
508     $self->{state} = 'attribute name';
509     !!!next-input-character;
510     redo A;
511     } elsif ($self->{next_input_character} == 0x002F) { # /
512     !!!next-input-character;
513     if ($self->{next_input_character} == 0x003E and # >
514     $self->{current_token}->{type} eq 'start tag' and
515     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
516     # permitted slash
517     #
518     } else {
519 wakaba 1.3 !!!parse-error (type => 'nestc');
520 wakaba 1.1 }
521     ## Stay in the state
522     # next-input-character is already done
523     redo A;
524 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
525 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
526 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
527     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
528     } elsif ($self->{current_token}->{type} eq 'end tag') {
529     $self->{content_model_flag} = 'PCDATA'; # MUST
530     if ($self->{current_token}->{attributes}) {
531 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
532 wakaba 1.1 }
533     } else {
534     die "$0: $self->{current_token}->{type}: Unknown token type";
535     }
536     $self->{state} = 'data';
537     # reconsume
538    
539     !!!emit ($self->{current_token}); # start tag or end tag
540     undef $self->{current_token};
541    
542     redo A;
543     } else {
544     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
545     value => ''};
546     $self->{state} = 'attribute name';
547     !!!next-input-character;
548     redo A;
549     }
550     } elsif ($self->{state} eq 'attribute name') {
551     my $before_leave = sub {
552     if (exists $self->{current_token}->{attributes} # start tag or end tag
553     ->{$self->{current_attribute}->{name}}) { # MUST
554 wakaba 1.3 !!!parse-error (type => 'dupulicate attribute');
555 wakaba 1.1 ## Discard $self->{current_attribute} # MUST
556     } else {
557     $self->{current_token}->{attributes}->{$self->{current_attribute}->{name}}
558     = $self->{current_attribute};
559     }
560     }; # $before_leave
561    
562     if ($self->{next_input_character} == 0x0009 or # HT
563     $self->{next_input_character} == 0x000A or # LF
564     $self->{next_input_character} == 0x000B or # VT
565     $self->{next_input_character} == 0x000C or # FF
566     $self->{next_input_character} == 0x0020) { # SP
567     $before_leave->();
568     $self->{state} = 'after attribute name';
569     !!!next-input-character;
570     redo A;
571     } elsif ($self->{next_input_character} == 0x003D) { # =
572     $before_leave->();
573     $self->{state} = 'before attribute value';
574     !!!next-input-character;
575     redo A;
576     } elsif ($self->{next_input_character} == 0x003E) { # >
577     $before_leave->();
578     if ($self->{current_token}->{type} eq 'start tag') {
579     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
580     } elsif ($self->{current_token}->{type} eq 'end tag') {
581     $self->{content_model_flag} = 'PCDATA'; # MUST
582     if ($self->{current_token}->{attributes}) {
583 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
584 wakaba 1.1 }
585     } else {
586     die "$0: $self->{current_token}->{type}: Unknown token type";
587     }
588     $self->{state} = 'data';
589     !!!next-input-character;
590    
591     !!!emit ($self->{current_token}); # start tag or end tag
592     undef $self->{current_token};
593    
594     redo A;
595     } elsif (0x0041 <= $self->{next_input_character} and
596     $self->{next_input_character} <= 0x005A) { # A..Z
597     $self->{current_attribute}->{name} .= chr ($self->{next_input_character} + 0x0020);
598     ## Stay in the state
599     !!!next-input-character;
600     redo A;
601     } elsif ($self->{next_input_character} == 0x002F) { # /
602     $before_leave->();
603     !!!next-input-character;
604     if ($self->{next_input_character} == 0x003E and # >
605     $self->{current_token}->{type} eq 'start tag' and
606     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
607     # permitted slash
608     #
609     } else {
610 wakaba 1.3 !!!parse-error (type => 'nestc');
611 wakaba 1.1 }
612     $self->{state} = 'before attribute name';
613     # next-input-character is already done
614     redo A;
615 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
616 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
617 wakaba 1.1 $before_leave->();
618     if ($self->{current_token}->{type} eq 'start tag') {
619     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
620     } elsif ($self->{current_token}->{type} eq 'end tag') {
621     $self->{content_model_flag} = 'PCDATA'; # MUST
622     if ($self->{current_token}->{attributes}) {
623 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
624 wakaba 1.1 }
625     } else {
626     die "$0: $self->{current_token}->{type}: Unknown token type";
627     }
628     $self->{state} = 'data';
629     # reconsume
630    
631     !!!emit ($self->{current_token}); # start tag or end tag
632     undef $self->{current_token};
633    
634     redo A;
635     } else {
636     $self->{current_attribute}->{name} .= chr ($self->{next_input_character});
637     ## Stay in the state
638     !!!next-input-character;
639     redo A;
640     }
641     } elsif ($self->{state} eq 'after attribute name') {
642     if ($self->{next_input_character} == 0x0009 or # HT
643     $self->{next_input_character} == 0x000A or # LF
644     $self->{next_input_character} == 0x000B or # VT
645     $self->{next_input_character} == 0x000C or # FF
646     $self->{next_input_character} == 0x0020) { # SP
647     ## Stay in the state
648     !!!next-input-character;
649     redo A;
650     } elsif ($self->{next_input_character} == 0x003D) { # =
651     $self->{state} = 'before attribute value';
652     !!!next-input-character;
653     redo A;
654     } elsif ($self->{next_input_character} == 0x003E) { # >
655     if ($self->{current_token}->{type} eq 'start tag') {
656     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
657     } elsif ($self->{current_token}->{type} eq 'end tag') {
658     $self->{content_model_flag} = 'PCDATA'; # MUST
659     if ($self->{current_token}->{attributes}) {
660 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
661 wakaba 1.1 }
662     } else {
663     die "$0: $self->{current_token}->{type}: Unknown token type";
664     }
665     $self->{state} = 'data';
666     !!!next-input-character;
667    
668     !!!emit ($self->{current_token}); # start tag or end tag
669     undef $self->{current_token};
670    
671     redo A;
672     } elsif (0x0041 <= $self->{next_input_character} and
673     $self->{next_input_character} <= 0x005A) { # A..Z
674     $self->{current_attribute} = {name => chr ($self->{next_input_character} + 0x0020),
675     value => ''};
676     $self->{state} = 'attribute name';
677     !!!next-input-character;
678     redo A;
679     } elsif ($self->{next_input_character} == 0x002F) { # /
680     !!!next-input-character;
681     if ($self->{next_input_character} == 0x003E and # >
682     $self->{current_token}->{type} eq 'start tag' and
683     $permitted_slash_tag_name->{$self->{current_token}->{tag_name}}) {
684     # permitted slash
685     #
686     } else {
687 wakaba 1.3 !!!parse-error (type => 'nestc');
688 wakaba 1.1 }
689     $self->{state} = 'before attribute name';
690     # next-input-character is already done
691     redo A;
692 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
693 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
694 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
695     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
696     } elsif ($self->{current_token}->{type} eq 'end tag') {
697     $self->{content_model_flag} = 'PCDATA'; # MUST
698     if ($self->{current_token}->{attributes}) {
699 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
700 wakaba 1.1 }
701     } else {
702     die "$0: $self->{current_token}->{type}: Unknown token type";
703     }
704     $self->{state} = 'data';
705     # reconsume
706    
707     !!!emit ($self->{current_token}); # start tag or end tag
708     undef $self->{current_token};
709    
710     redo A;
711     } else {
712     $self->{current_attribute} = {name => chr ($self->{next_input_character}),
713     value => ''};
714     $self->{state} = 'attribute name';
715     !!!next-input-character;
716     redo A;
717     }
718     } elsif ($self->{state} eq 'before attribute value') {
719     if ($self->{next_input_character} == 0x0009 or # HT
720     $self->{next_input_character} == 0x000A or # LF
721     $self->{next_input_character} == 0x000B or # VT
722     $self->{next_input_character} == 0x000C or # FF
723     $self->{next_input_character} == 0x0020) { # SP
724     ## Stay in the state
725     !!!next-input-character;
726     redo A;
727     } elsif ($self->{next_input_character} == 0x0022) { # "
728     $self->{state} = 'attribute value (double-quoted)';
729     !!!next-input-character;
730     redo A;
731     } elsif ($self->{next_input_character} == 0x0026) { # &
732     $self->{state} = 'attribute value (unquoted)';
733     ## reconsume
734     redo A;
735     } elsif ($self->{next_input_character} == 0x0027) { # '
736     $self->{state} = 'attribute value (single-quoted)';
737     !!!next-input-character;
738     redo A;
739     } elsif ($self->{next_input_character} == 0x003E) { # >
740     if ($self->{current_token}->{type} eq 'start tag') {
741     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
742     } elsif ($self->{current_token}->{type} eq 'end tag') {
743     $self->{content_model_flag} = 'PCDATA'; # MUST
744     if ($self->{current_token}->{attributes}) {
745 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
746 wakaba 1.1 }
747     } else {
748     die "$0: $self->{current_token}->{type}: Unknown token type";
749     }
750     $self->{state} = 'data';
751     !!!next-input-character;
752    
753     !!!emit ($self->{current_token}); # start tag or end tag
754     undef $self->{current_token};
755    
756     redo A;
757 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
758 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
759 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
760     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
761     } elsif ($self->{current_token}->{type} eq 'end tag') {
762     $self->{content_model_flag} = 'PCDATA'; # MUST
763     if ($self->{current_token}->{attributes}) {
764 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
765 wakaba 1.1 }
766     } else {
767     die "$0: $self->{current_token}->{type}: Unknown token type";
768     }
769     $self->{state} = 'data';
770     ## reconsume
771    
772     !!!emit ($self->{current_token}); # start tag or end tag
773     undef $self->{current_token};
774    
775     redo A;
776     } else {
777     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
778     $self->{state} = 'attribute value (unquoted)';
779     !!!next-input-character;
780     redo A;
781     }
782     } elsif ($self->{state} eq 'attribute value (double-quoted)') {
783     if ($self->{next_input_character} == 0x0022) { # "
784     $self->{state} = 'before attribute name';
785     !!!next-input-character;
786     redo A;
787     } elsif ($self->{next_input_character} == 0x0026) { # &
788     $self->{last_attribute_value_state} = 'attribute value (double-quoted)';
789     $self->{state} = 'entity in attribute value';
790     !!!next-input-character;
791     redo A;
792     } elsif ($self->{next_input_character} == -1) {
793 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
794 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
795     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
796     } elsif ($self->{current_token}->{type} eq 'end tag') {
797     $self->{content_model_flag} = 'PCDATA'; # MUST
798     if ($self->{current_token}->{attributes}) {
799 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
800 wakaba 1.1 }
801     } else {
802     die "$0: $self->{current_token}->{type}: Unknown token type";
803     }
804     $self->{state} = 'data';
805     ## reconsume
806    
807     !!!emit ($self->{current_token}); # start tag or end tag
808     undef $self->{current_token};
809    
810     redo A;
811     } else {
812     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
813     ## Stay in the state
814     !!!next-input-character;
815     redo A;
816     }
817     } elsif ($self->{state} eq 'attribute value (single-quoted)') {
818     if ($self->{next_input_character} == 0x0027) { # '
819     $self->{state} = 'before attribute name';
820     !!!next-input-character;
821     redo A;
822     } elsif ($self->{next_input_character} == 0x0026) { # &
823     $self->{last_attribute_value_state} = 'attribute value (single-quoted)';
824     $self->{state} = 'entity in attribute value';
825     !!!next-input-character;
826     redo A;
827     } elsif ($self->{next_input_character} == -1) {
828 wakaba 1.3 !!!parse-error (type => 'unclosed attribute value');
829 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
830     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
831     } elsif ($self->{current_token}->{type} eq 'end tag') {
832     $self->{content_model_flag} = 'PCDATA'; # MUST
833     if ($self->{current_token}->{attributes}) {
834 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
835 wakaba 1.1 }
836     } else {
837     die "$0: $self->{current_token}->{type}: Unknown token type";
838     }
839     $self->{state} = 'data';
840     ## reconsume
841    
842     !!!emit ($self->{current_token}); # start tag or end tag
843     undef $self->{current_token};
844    
845     redo A;
846     } else {
847     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
848     ## Stay in the state
849     !!!next-input-character;
850     redo A;
851     }
852     } elsif ($self->{state} eq 'attribute value (unquoted)') {
853     if ($self->{next_input_character} == 0x0009 or # HT
854     $self->{next_input_character} == 0x000A or # LF
855     $self->{next_input_character} == 0x000B or # HT
856     $self->{next_input_character} == 0x000C or # FF
857     $self->{next_input_character} == 0x0020) { # SP
858     $self->{state} = 'before attribute name';
859     !!!next-input-character;
860     redo A;
861     } elsif ($self->{next_input_character} == 0x0026) { # &
862     $self->{last_attribute_value_state} = 'attribute value (unquoted)';
863     $self->{state} = 'entity in attribute value';
864     !!!next-input-character;
865     redo A;
866     } elsif ($self->{next_input_character} == 0x003E) { # >
867     if ($self->{current_token}->{type} eq 'start tag') {
868     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
869     } elsif ($self->{current_token}->{type} eq 'end tag') {
870     $self->{content_model_flag} = 'PCDATA'; # MUST
871     if ($self->{current_token}->{attributes}) {
872 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
873 wakaba 1.1 }
874     } else {
875     die "$0: $self->{current_token}->{type}: Unknown token type";
876     }
877     $self->{state} = 'data';
878     !!!next-input-character;
879    
880     !!!emit ($self->{current_token}); # start tag or end tag
881     undef $self->{current_token};
882    
883     redo A;
884 wakaba 1.17 } elsif ($self->{next_input_character} == -1) {
885 wakaba 1.3 !!!parse-error (type => 'unclosed tag');
886 wakaba 1.1 if ($self->{current_token}->{type} eq 'start tag') {
887     $self->{last_emitted_start_tag_name} = $self->{current_token}->{tag_name};
888     } elsif ($self->{current_token}->{type} eq 'end tag') {
889     $self->{content_model_flag} = 'PCDATA'; # MUST
890     if ($self->{current_token}->{attributes}) {
891 wakaba 1.3 !!!parse-error (type => 'end tag attribute');
892 wakaba 1.1 }
893     } else {
894     die "$0: $self->{current_token}->{type}: Unknown token type";
895     }
896     $self->{state} = 'data';
897     ## reconsume
898    
899     !!!emit ($self->{current_token}); # start tag or end tag
900     undef $self->{current_token};
901    
902     redo A;
903     } else {
904     $self->{current_attribute}->{value} .= chr ($self->{next_input_character});
905     ## Stay in the state
906     !!!next-input-character;
907     redo A;
908     }
909     } elsif ($self->{state} eq 'entity in attribute value') {
910     my $token = $self->_tokenize_attempt_to_consume_an_entity;
911    
912     unless (defined $token) {
913     $self->{current_attribute}->{value} .= '&';
914     } else {
915     $self->{current_attribute}->{value} .= $token->{data};
916     ## ISSUE: spec says "append the returned character token to the current attribute's value"
917     }
918    
919     $self->{state} = $self->{last_attribute_value_state};
920     # next-input-character is already done
921     redo A;
922     } elsif ($self->{state} eq 'bogus comment') {
923     ## (only happen if PCDATA state)
924    
925     my $token = {type => 'comment', data => ''};
926    
927     BC: {
928     if ($self->{next_input_character} == 0x003E) { # >
929     $self->{state} = 'data';
930     !!!next-input-character;
931    
932     !!!emit ($token);
933    
934     redo A;
935     } elsif ($self->{next_input_character} == -1) {
936     $self->{state} = 'data';
937     ## reconsume
938    
939     !!!emit ($token);
940    
941     redo A;
942     } else {
943     $token->{data} .= chr ($self->{next_input_character});
944     !!!next-input-character;
945     redo BC;
946     }
947     } # BC
948     } elsif ($self->{state} eq 'markup declaration open') {
949     ## (only happen if PCDATA state)
950    
951     my @next_char;
952     push @next_char, $self->{next_input_character};
953    
954     if ($self->{next_input_character} == 0x002D) { # -
955     !!!next-input-character;
956     push @next_char, $self->{next_input_character};
957     if ($self->{next_input_character} == 0x002D) { # -
958     $self->{current_token} = {type => 'comment', data => ''};
959     $self->{state} = 'comment';
960     !!!next-input-character;
961     redo A;
962     }
963     } elsif ($self->{next_input_character} == 0x0044 or # D
964     $self->{next_input_character} == 0x0064) { # d
965     !!!next-input-character;
966     push @next_char, $self->{next_input_character};
967     if ($self->{next_input_character} == 0x004F or # O
968     $self->{next_input_character} == 0x006F) { # o
969     !!!next-input-character;
970     push @next_char, $self->{next_input_character};
971     if ($self->{next_input_character} == 0x0043 or # C
972     $self->{next_input_character} == 0x0063) { # c
973     !!!next-input-character;
974     push @next_char, $self->{next_input_character};
975     if ($self->{next_input_character} == 0x0054 or # T
976     $self->{next_input_character} == 0x0074) { # t
977     !!!next-input-character;
978     push @next_char, $self->{next_input_character};
979     if ($self->{next_input_character} == 0x0059 or # Y
980     $self->{next_input_character} == 0x0079) { # y
981     !!!next-input-character;
982     push @next_char, $self->{next_input_character};
983     if ($self->{next_input_character} == 0x0050 or # P
984     $self->{next_input_character} == 0x0070) { # p
985     !!!next-input-character;
986     push @next_char, $self->{next_input_character};
987     if ($self->{next_input_character} == 0x0045 or # E
988     $self->{next_input_character} == 0x0065) { # e
989     ## ISSUE: What a stupid code this is!
990     $self->{state} = 'DOCTYPE';
991     !!!next-input-character;
992     redo A;
993     }
994     }
995     }
996     }
997     }
998     }
999     }
1000    
1001 wakaba 1.3 !!!parse-error (type => 'bogus comment open');
1002 wakaba 1.1 $self->{next_input_character} = shift @next_char;
1003     !!!back-next-input-character (@next_char);
1004     $self->{state} = 'bogus comment';
1005     redo A;
1006    
1007     ## ISSUE: typos in spec: chacacters, is is a parse error
1008     ## ISSUE: spec is somewhat unclear on "is the first character that will be in the comment"; what is "that will be in the comment" is what the algorithm defines, isn't it?
1009     } elsif ($self->{state} eq 'comment') {
1010     if ($self->{next_input_character} == 0x002D) { # -
1011     $self->{state} = 'comment dash';
1012     !!!next-input-character;
1013     redo A;
1014     } elsif ($self->{next_input_character} == -1) {
1015 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1016 wakaba 1.1 $self->{state} = 'data';
1017     ## reconsume
1018    
1019     !!!emit ($self->{current_token}); # comment
1020     undef $self->{current_token};
1021    
1022     redo A;
1023     } else {
1024     $self->{current_token}->{data} .= chr ($self->{next_input_character}); # comment
1025     ## Stay in the state
1026     !!!next-input-character;
1027     redo A;
1028     }
1029     } elsif ($self->{state} eq 'comment dash') {
1030     if ($self->{next_input_character} == 0x002D) { # -
1031     $self->{state} = 'comment end';
1032     !!!next-input-character;
1033     redo A;
1034     } elsif ($self->{next_input_character} == -1) {
1035 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1036 wakaba 1.1 $self->{state} = 'data';
1037     ## reconsume
1038    
1039     !!!emit ($self->{current_token}); # comment
1040     undef $self->{current_token};
1041    
1042     redo A;
1043     } else {
1044     $self->{current_token}->{data} .= '-' . chr ($self->{next_input_character}); # comment
1045     $self->{state} = 'comment';
1046     !!!next-input-character;
1047     redo A;
1048     }
1049     } elsif ($self->{state} eq 'comment end') {
1050     if ($self->{next_input_character} == 0x003E) { # >
1051     $self->{state} = 'data';
1052     !!!next-input-character;
1053    
1054     !!!emit ($self->{current_token}); # comment
1055     undef $self->{current_token};
1056    
1057     redo A;
1058     } elsif ($self->{next_input_character} == 0x002D) { # -
1059 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1060 wakaba 1.1 $self->{current_token}->{data} .= '-'; # comment
1061     ## Stay in the state
1062     !!!next-input-character;
1063     redo A;
1064     } elsif ($self->{next_input_character} == -1) {
1065 wakaba 1.3 !!!parse-error (type => 'unclosed comment');
1066 wakaba 1.1 $self->{state} = 'data';
1067     ## reconsume
1068    
1069     !!!emit ($self->{current_token}); # comment
1070     undef $self->{current_token};
1071    
1072     redo A;
1073     } else {
1074 wakaba 1.3 !!!parse-error (type => 'dash in comment');
1075 wakaba 1.1 $self->{current_token}->{data} .= '--' . chr ($self->{next_input_character}); # comment
1076     $self->{state} = 'comment';
1077     !!!next-input-character;
1078     redo A;
1079     }
1080     } elsif ($self->{state} eq 'DOCTYPE') {
1081     if ($self->{next_input_character} == 0x0009 or # HT
1082     $self->{next_input_character} == 0x000A or # LF
1083     $self->{next_input_character} == 0x000B or # VT
1084     $self->{next_input_character} == 0x000C or # FF
1085     $self->{next_input_character} == 0x0020) { # SP
1086     $self->{state} = 'before DOCTYPE name';
1087     !!!next-input-character;
1088     redo A;
1089     } else {
1090 wakaba 1.3 !!!parse-error (type => 'no space before DOCTYPE name');
1091 wakaba 1.1 $self->{state} = 'before DOCTYPE name';
1092     ## reconsume
1093     redo A;
1094     }
1095     } elsif ($self->{state} eq 'before DOCTYPE name') {
1096     if ($self->{next_input_character} == 0x0009 or # HT
1097     $self->{next_input_character} == 0x000A or # LF
1098     $self->{next_input_character} == 0x000B or # VT
1099     $self->{next_input_character} == 0x000C or # FF
1100     $self->{next_input_character} == 0x0020) { # SP
1101     ## Stay in the state
1102     !!!next-input-character;
1103     redo A;
1104     } elsif ($self->{next_input_character} == 0x003E) { # >
1105 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1106 wakaba 1.1 $self->{state} = 'data';
1107     !!!next-input-character;
1108    
1109 wakaba 1.18 !!!emit ({type => 'DOCTYPE'}); # incorrect
1110 wakaba 1.1
1111     redo A;
1112     } elsif ($self->{next_input_character} == -1) {
1113 wakaba 1.3 !!!parse-error (type => 'no DOCTYPE name');
1114 wakaba 1.1 $self->{state} = 'data';
1115     ## reconsume
1116    
1117 wakaba 1.18 !!!emit ({type => 'DOCTYPE'}); # incorrect
1118 wakaba 1.1
1119     redo A;
1120     } else {
1121 wakaba 1.18 $self->{current_token}
1122     = {type => 'DOCTYPE',
1123     name => chr ($self->{next_input_character}),
1124     correct => 1};
1125 wakaba 1.4 ## ISSUE: "Set the token's name name to the" in the spec
1126 wakaba 1.1 $self->{state} = 'DOCTYPE name';
1127     !!!next-input-character;
1128     redo A;
1129     }
1130     } elsif ($self->{state} eq 'DOCTYPE name') {
1131 wakaba 1.18 ## ISSUE: Redundant "First," in the spec.
1132 wakaba 1.1 if ($self->{next_input_character} == 0x0009 or # HT
1133     $self->{next_input_character} == 0x000A or # LF
1134     $self->{next_input_character} == 0x000B or # VT
1135     $self->{next_input_character} == 0x000C or # FF
1136     $self->{next_input_character} == 0x0020) { # SP
1137     $self->{state} = 'after DOCTYPE name';
1138     !!!next-input-character;
1139     redo A;
1140     } elsif ($self->{next_input_character} == 0x003E) { # >
1141     $self->{state} = 'data';
1142     !!!next-input-character;
1143    
1144     !!!emit ($self->{current_token}); # DOCTYPE
1145     undef $self->{current_token};
1146    
1147     redo A;
1148     } elsif ($self->{next_input_character} == -1) {
1149 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1150 wakaba 1.1 $self->{state} = 'data';
1151     ## reconsume
1152    
1153 wakaba 1.18 delete $self->{current_token}->{correct};
1154     !!!emit ($self->{current_token}); # DOCTYPE
1155 wakaba 1.1 undef $self->{current_token};
1156    
1157     redo A;
1158     } else {
1159     $self->{current_token}->{name}
1160     .= chr ($self->{next_input_character}); # DOCTYPE
1161     ## Stay in the state
1162     !!!next-input-character;
1163     redo A;
1164     }
1165     } elsif ($self->{state} eq 'after DOCTYPE name') {
1166     if ($self->{next_input_character} == 0x0009 or # HT
1167     $self->{next_input_character} == 0x000A or # LF
1168     $self->{next_input_character} == 0x000B or # VT
1169     $self->{next_input_character} == 0x000C or # FF
1170     $self->{next_input_character} == 0x0020) { # SP
1171     ## Stay in the state
1172     !!!next-input-character;
1173     redo A;
1174     } elsif ($self->{next_input_character} == 0x003E) { # >
1175     $self->{state} = 'data';
1176     !!!next-input-character;
1177    
1178     !!!emit ($self->{current_token}); # DOCTYPE
1179     undef $self->{current_token};
1180    
1181     redo A;
1182     } elsif ($self->{next_input_character} == -1) {
1183 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1184 wakaba 1.1 $self->{state} = 'data';
1185     ## reconsume
1186    
1187 wakaba 1.18 delete $self->{current_token}->{correct};
1188     !!!emit ($self->{current_token}); # DOCTYPE
1189     undef $self->{current_token};
1190    
1191     redo A;
1192     } elsif ($self->{next_input_character} == 0x0050 or # P
1193     $self->{next_input_character} == 0x0070) { # p
1194     !!!next-input-character;
1195     if ($self->{next_input_character} == 0x0055 or # U
1196     $self->{next_input_character} == 0x0075) { # u
1197     !!!next-input-character;
1198     if ($self->{next_input_character} == 0x0042 or # B
1199     $self->{next_input_character} == 0x0062) { # b
1200     !!!next-input-character;
1201     if ($self->{next_input_character} == 0x004C or # L
1202     $self->{next_input_character} == 0x006C) { # l
1203     !!!next-input-character;
1204     if ($self->{next_input_character} == 0x0049 or # I
1205     $self->{next_input_character} == 0x0069) { # i
1206     !!!next-input-character;
1207     if ($self->{next_input_character} == 0x0043 or # C
1208     $self->{next_input_character} == 0x0063) { # c
1209     $self->{state} = 'before DOCTYPE public identifier';
1210     !!!next-input-character;
1211     redo A;
1212     }
1213     }
1214     }
1215     }
1216     }
1217    
1218     #
1219     } elsif ($self->{next_input_character} == 0x0053 or # S
1220     $self->{next_input_character} == 0x0073) { # s
1221     !!!next-input-character;
1222     if ($self->{next_input_character} == 0x0059 or # Y
1223     $self->{next_input_character} == 0x0079) { # y
1224     !!!next-input-character;
1225     if ($self->{next_input_character} == 0x0053 or # S
1226     $self->{next_input_character} == 0x0073) { # s
1227     !!!next-input-character;
1228     if ($self->{next_input_character} == 0x0054 or # T
1229     $self->{next_input_character} == 0x0074) { # t
1230     !!!next-input-character;
1231     if ($self->{next_input_character} == 0x0045 or # E
1232     $self->{next_input_character} == 0x0065) { # e
1233     !!!next-input-character;
1234     if ($self->{next_input_character} == 0x004D or # M
1235     $self->{next_input_character} == 0x006D) { # m
1236     $self->{state} = 'before DOCTYPE system identifier';
1237     !!!next-input-character;
1238     redo A;
1239     }
1240     }
1241     }
1242     }
1243     }
1244    
1245     #
1246     } else {
1247     !!!next-input-character;
1248     #
1249     }
1250    
1251     !!!parse-error (type => 'string after DOCTYPE name');
1252     $self->{state} = 'bogus DOCTYPE';
1253     # next-input-character is already done
1254     redo A;
1255     } elsif ($self->{state} eq 'before DOCTYPE public identifier') {
1256     if ({
1257     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1258     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1259     }->{$self->{next_input_character}}) {
1260     ## Stay in the state
1261     !!!next-input-character;
1262     redo A;
1263     } elsif ($self->{next_input_character} eq 0x0022) { # "
1264     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1265     $self->{state} = 'DOCTYPE public identifier (double-quoted)';
1266     !!!next-input-character;
1267     redo A;
1268     } elsif ($self->{next_input_character} eq 0x0027) { # '
1269     $self->{current_token}->{public_identifier} = ''; # DOCTYPE
1270     $self->{state} = 'DOCTYPE public identifier (single-quoted)';
1271     !!!next-input-character;
1272     redo A;
1273     } elsif ($self->{next_input_character} eq 0x003E) { # >
1274     !!!parse-error (type => 'no PUBLIC literal');
1275    
1276     $self->{state} = 'data';
1277     !!!next-input-character;
1278    
1279     delete $self->{current_token}->{correct};
1280     !!!emit ($self->{current_token}); # DOCTYPE
1281     undef $self->{current_token};
1282    
1283     redo A;
1284     } elsif ($self->{next_input_character} == -1) {
1285     !!!parse-error (type => 'unclosed DOCTYPE');
1286    
1287     $self->{state} = 'data';
1288     ## reconsume
1289    
1290     delete $self->{current_token}->{correct};
1291     !!!emit ($self->{current_token}); # DOCTYPE
1292     undef $self->{current_token};
1293    
1294     redo A;
1295     } else {
1296     !!!parse-error (type => 'string after PUBLIC');
1297     $self->{state} = 'bogus DOCTYPE';
1298     !!!next-input-character;
1299     redo A;
1300     }
1301     } elsif ($self->{state} eq 'DOCTYPE public identifier (double-quoted)') {
1302     if ($self->{next_input_character} == 0x0022) { # "
1303     $self->{state} = 'after DOCTYPE public identifier';
1304     !!!next-input-character;
1305     redo A;
1306     } elsif ($self->{next_input_character} == -1) {
1307     !!!parse-error (type => 'unclosed PUBLIC literal');
1308    
1309     $self->{state} = 'data';
1310     ## reconsume
1311    
1312     delete $self->{current_token}->{correct};
1313     !!!emit ($self->{current_token}); # DOCTYPE
1314     undef $self->{current_token};
1315    
1316     redo A;
1317     } else {
1318     $self->{current_token}->{public_identifier} # DOCTYPE
1319     .= chr $self->{next_input_character};
1320     ## Stay in the state
1321     !!!next-input-character;
1322     redo A;
1323     }
1324     } elsif ($self->{state} eq 'DOCTYPE public identifier (single-quoted)') {
1325     if ($self->{next_input_character} == 0x0027) { # '
1326     $self->{state} = 'after DOCTYPE public identifier';
1327     !!!next-input-character;
1328     redo A;
1329     } elsif ($self->{next_input_character} == -1) {
1330     !!!parse-error (type => 'unclosed PUBLIC literal');
1331    
1332     $self->{state} = 'data';
1333     ## reconsume
1334    
1335     delete $self->{current_token}->{correct};
1336     !!!emit ($self->{current_token}); # DOCTYPE
1337     undef $self->{current_token};
1338    
1339     redo A;
1340     } else {
1341     $self->{current_token}->{public_identifier} # DOCTYPE
1342     .= chr $self->{next_input_character};
1343     ## Stay in the state
1344     !!!next-input-character;
1345     redo A;
1346     }
1347     } elsif ($self->{state} eq 'after DOCTYPE public identifier') {
1348     if ({
1349     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1350     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1351     }->{$self->{next_input_character}}) {
1352     ## Stay in the state
1353     !!!next-input-character;
1354     redo A;
1355     } elsif ($self->{next_input_character} == 0x0022) { # "
1356     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1357     $self->{state} = 'DOCTYPE system identifier (double-quoted)';
1358     !!!next-input-character;
1359     redo A;
1360     } elsif ($self->{next_input_character} == 0x0027) { # '
1361     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1362     $self->{state} = 'DOCTYPE system identifier (single-quoted)';
1363     !!!next-input-character;
1364     redo A;
1365     } elsif ($self->{next_input_character} == 0x003E) { # >
1366     $self->{state} = 'data';
1367     !!!next-input-character;
1368    
1369     !!!emit ($self->{current_token}); # DOCTYPE
1370     undef $self->{current_token};
1371    
1372     redo A;
1373     } elsif ($self->{next_input_character} == -1) {
1374     !!!parse-error (type => 'unclosed DOCTYPE');
1375    
1376     $self->{state} = 'data';
1377     ## recomsume
1378    
1379     delete $self->{current_token}->{correct};
1380     !!!emit ($self->{current_token}); # DOCTYPE
1381     undef $self->{current_token};
1382    
1383     redo A;
1384     } else {
1385     !!!parse-error (type => 'string after PUBLIC literal');
1386     $self->{state} = 'bogus DOCTYPE';
1387     !!!next-input-character;
1388     redo A;
1389     }
1390     } elsif ($self->{state} eq 'before DOCTYPE system identifier') {
1391     if ({
1392     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1393     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1394     }->{$self->{next_input_character}}) {
1395     ## Stay in the state
1396     !!!next-input-character;
1397     redo A;
1398     } elsif ($self->{next_input_character} == 0x0022) { # "
1399     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1400     $self->{state} = 'DOCTYPE system identifier (double-quoted)';
1401     !!!next-input-character;
1402     redo A;
1403     } elsif ($self->{next_input_character} == 0x0027) { # '
1404     $self->{current_token}->{system_identifier} = ''; # DOCTYPE
1405     $self->{state} = 'DOCTYPE system identifier (single-quoted)';
1406     !!!next-input-character;
1407     redo A;
1408     } elsif ($self->{next_input_character} == 0x003E) { # >
1409     !!!parse-error (type => 'no SYSTEM literal');
1410     $self->{state} = 'data';
1411     !!!next-input-character;
1412    
1413     delete $self->{current_token}->{correct};
1414     !!!emit ($self->{current_token}); # DOCTYPE
1415     undef $self->{current_token};
1416    
1417     redo A;
1418     } elsif ($self->{next_input_character} == -1) {
1419     !!!parse-error (type => 'unclosed DOCTYPE');
1420    
1421     $self->{state} = 'data';
1422     ## recomsume
1423    
1424     delete $self->{current_token}->{correct};
1425     !!!emit ($self->{current_token}); # DOCTYPE
1426     undef $self->{current_token};
1427    
1428     redo A;
1429     } else {
1430     !!!parse-error (type => 'string after PUBLIC literal');
1431     $self->{state} = 'bogus DOCTYPE';
1432     !!!next-input-character;
1433     redo A;
1434     }
1435     } elsif ($self->{state} eq 'DOCTYPE system identifier (double-quoted)') {
1436     if ($self->{next_input_character} == 0x0022) { # "
1437     $self->{state} = 'after DOCTYPE system identifier';
1438     !!!next-input-character;
1439     redo A;
1440     } elsif ($self->{next_input_character} == -1) {
1441     !!!parse-error (type => 'unclosed SYSTEM literal');
1442    
1443     $self->{state} = 'data';
1444     ## reconsume
1445    
1446     delete $self->{current_token}->{correct};
1447     !!!emit ($self->{current_token}); # DOCTYPE
1448     undef $self->{current_token};
1449    
1450     redo A;
1451     } else {
1452     $self->{current_token}->{system_identifier} # DOCTYPE
1453     .= chr $self->{next_input_character};
1454     ## Stay in the state
1455     !!!next-input-character;
1456     redo A;
1457     }
1458     } elsif ($self->{state} eq 'DOCTYPE system identifier (single-quoted)') {
1459     if ($self->{next_input_character} == 0x0027) { # '
1460     $self->{state} = 'after DOCTYPE system identifier';
1461     !!!next-input-character;
1462     redo A;
1463     } elsif ($self->{next_input_character} == -1) {
1464     !!!parse-error (type => 'unclosed SYSTEM literal');
1465    
1466     $self->{state} = 'data';
1467     ## reconsume
1468    
1469     delete $self->{current_token}->{correct};
1470 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
1471     undef $self->{current_token};
1472    
1473     redo A;
1474     } else {
1475 wakaba 1.18 $self->{current_token}->{system_identifier} # DOCTYPE
1476     .= chr $self->{next_input_character};
1477     ## Stay in the state
1478     !!!next-input-character;
1479     redo A;
1480     }
1481     } elsif ($self->{state} eq 'after DOCTYPE system identifier') {
1482     if ({
1483     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,
1484     #0x000D => 1, # HT, LF, VT, FF, SP, CR
1485     }->{$self->{next_input_character}}) {
1486     ## Stay in the state
1487     !!!next-input-character;
1488     redo A;
1489     } elsif ($self->{next_input_character} == 0x003E) { # >
1490     $self->{state} = 'data';
1491     !!!next-input-character;
1492    
1493     !!!emit ($self->{current_token}); # DOCTYPE
1494     undef $self->{current_token};
1495    
1496     redo A;
1497     } elsif ($self->{next_input_character} == -1) {
1498     !!!parse-error (type => 'unclosed DOCTYPE');
1499    
1500     $self->{state} = 'data';
1501     ## recomsume
1502    
1503     delete $self->{current_token}->{correct};
1504     !!!emit ($self->{current_token}); # DOCTYPE
1505     undef $self->{current_token};
1506    
1507     redo A;
1508     } else {
1509     !!!parse-error (type => 'string after SYSTEM literal');
1510 wakaba 1.1 $self->{state} = 'bogus DOCTYPE';
1511     !!!next-input-character;
1512     redo A;
1513     }
1514     } elsif ($self->{state} eq 'bogus DOCTYPE') {
1515     if ($self->{next_input_character} == 0x003E) { # >
1516     $self->{state} = 'data';
1517     !!!next-input-character;
1518    
1519 wakaba 1.18 delete $self->{current_token}->{correct};
1520 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
1521     undef $self->{current_token};
1522    
1523     redo A;
1524     } elsif ($self->{next_input_character} == -1) {
1525 wakaba 1.3 !!!parse-error (type => 'unclosed DOCTYPE');
1526 wakaba 1.1 $self->{state} = 'data';
1527     ## reconsume
1528    
1529 wakaba 1.18 delete $self->{current_token}->{correct};
1530 wakaba 1.1 !!!emit ($self->{current_token}); # DOCTYPE
1531     undef $self->{current_token};
1532    
1533     redo A;
1534     } else {
1535     ## Stay in the state
1536     !!!next-input-character;
1537     redo A;
1538     }
1539     } else {
1540     die "$0: $self->{state}: Unknown state";
1541     }
1542     } # A
1543    
1544     die "$0: _get_next_token: unexpected case";
1545     } # _get_next_token
1546    
1547     sub _tokenize_attempt_to_consume_an_entity ($) {
1548     my $self = shift;
1549 wakaba 1.20
1550     if ({
1551     0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,
1552     0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, & # 0x000D # CR
1553     }->{$self->{next_input_character}}) {
1554     ## Don't consume
1555     ## No error
1556     return undef;
1557     } elsif ($self->{next_input_character} == 0x0023) { # #
1558 wakaba 1.1 !!!next-input-character;
1559     if ($self->{next_input_character} == 0x0078 or # x
1560     $self->{next_input_character} == 0x0058) { # X
1561 wakaba 1.4 my $num;
1562 wakaba 1.1 X: {
1563     my $x_char = $self->{next_input_character};
1564     !!!next-input-character;
1565     if (0x0030 <= $self->{next_input_character} and
1566     $self->{next_input_character} <= 0x0039) { # 0..9
1567     $num ||= 0;
1568     $num *= 0x10;
1569     $num += $self->{next_input_character} - 0x0030;
1570     redo X;
1571     } elsif (0x0061 <= $self->{next_input_character} and
1572     $self->{next_input_character} <= 0x0066) { # a..f
1573     ## ISSUE: the spec says U+0078, which is apparently incorrect
1574     $num ||= 0;
1575     $num *= 0x10;
1576     $num += $self->{next_input_character} - 0x0060 + 9;
1577     redo X;
1578     } elsif (0x0041 <= $self->{next_input_character} and
1579     $self->{next_input_character} <= 0x0046) { # A..F
1580     ## ISSUE: the spec says U+0058, which is apparently incorrect
1581     $num ||= 0;
1582     $num *= 0x10;
1583     $num += $self->{next_input_character} - 0x0040 + 9;
1584     redo X;
1585     } elsif (not defined $num) { # no hexadecimal digit
1586 wakaba 1.3 !!!parse-error (type => 'bare hcro');
1587 wakaba 1.1 $self->{next_input_character} = 0x0023; # #
1588     !!!back-next-input-character ($x_char);
1589     return undef;
1590     } elsif ($self->{next_input_character} == 0x003B) { # ;
1591     !!!next-input-character;
1592     } else {
1593 wakaba 1.3 !!!parse-error (type => 'no refc');
1594 wakaba 1.1 }
1595    
1596     ## TODO: check the definition for |a valid Unicode character|.
1597 wakaba 1.4 ## <http://lists.whatwg.org/pipermail/whatwg-whatwg.org/2006-December/thread.html#8189>
1598 wakaba 1.1 if ($num > 1114111 or $num == 0) {
1599     $num = 0xFFFD; # REPLACEMENT CHARACTER
1600     ## ISSUE: Why this is not an error?
1601 wakaba 1.4 } elsif (0x80 <= $num and $num <= 0x9F) {
1602 wakaba 1.8 !!!parse-error (type => sprintf 'c1 entity:U+%04X', $num);
1603 wakaba 1.4 $num = $c1_entity_char->{$num};
1604 wakaba 1.1 }
1605    
1606     return {type => 'character', data => chr $num};
1607     } # X
1608     } elsif (0x0030 <= $self->{next_input_character} and
1609     $self->{next_input_character} <= 0x0039) { # 0..9
1610     my $code = $self->{next_input_character} - 0x0030;
1611     !!!next-input-character;
1612    
1613     while (0x0030 <= $self->{next_input_character} and
1614     $self->{next_input_character} <= 0x0039) { # 0..9
1615     $code *= 10;
1616     $code += $self->{next_input_character} - 0x0030;
1617    
1618     !!!next-input-character;
1619     }
1620    
1621     if ($self->{next_input_character} == 0x003B) { # ;
1622     !!!next-input-character;
1623     } else {
1624 wakaba 1.3 !!!parse-error (type => 'no refc');
1625 wakaba 1.1 }
1626    
1627     ## TODO: check the definition for |a valid Unicode character|.
1628     if ($code > 1114111 or $code == 0) {
1629     $code = 0xFFFD; # REPLACEMENT CHARACTER
1630     ## ISSUE: Why this is not an error?
1631 wakaba 1.4 } elsif (0x80 <= $code and $code <= 0x9F) {
1632 wakaba 1.8 !!!parse-error (type => sprintf 'c1 entity:U+%04X', $code);
1633 wakaba 1.4 $code = $c1_entity_char->{$code};
1634 wakaba 1.1 }
1635    
1636     return {type => 'character', data => chr $code};
1637     } else {
1638 wakaba 1.3 !!!parse-error (type => 'bare nero');
1639 wakaba 1.1 !!!back-next-input-character ($self->{next_input_character});
1640     $self->{next_input_character} = 0x0023; # #
1641     return undef;
1642     }
1643     } elsif ((0x0041 <= $self->{next_input_character} and
1644     $self->{next_input_character} <= 0x005A) or
1645     (0x0061 <= $self->{next_input_character} and
1646     $self->{next_input_character} <= 0x007A)) {
1647     my $entity_name = chr $self->{next_input_character};
1648     !!!next-input-character;
1649    
1650     my $value = $entity_name;
1651     my $match;
1652 wakaba 1.16 require Whatpm::_NamedEntityList;
1653     our $EntityChar;
1654 wakaba 1.1
1655     while (length $entity_name < 10 and
1656     ## NOTE: Some number greater than the maximum length of entity name
1657 wakaba 1.16 ((0x0041 <= $self->{next_input_character} and # a
1658     $self->{next_input_character} <= 0x005A) or # x
1659     (0x0061 <= $self->{next_input_character} and # a
1660     $self->{next_input_character} <= 0x007A) or # z
1661     (0x0030 <= $self->{next_input_character} and # 0
1662     $self->{next_input_character} <= 0x0039) or # 9
1663     $self->{next_input_character} == 0x003B)) { # ;
1664 wakaba 1.1 $entity_name .= chr $self->{next_input_character};
1665 wakaba 1.16 if (defined $EntityChar->{$entity_name}) {
1666     $value = $EntityChar->{$entity_name};
1667     if ($self->{next_input_character} == 0x003B) { # ;
1668     $match = 1;
1669     !!!next-input-character;
1670     last;
1671     } else {
1672     $match = -1;
1673     }
1674 wakaba 1.1 } else {
1675     $value .= chr $self->{next_input_character};
1676     }
1677     !!!next-input-character;
1678     }
1679    
1680 wakaba 1.16 if ($match > 0) {
1681     return {type => 'character', data => $value};
1682     } elsif ($match < 0) {
1683     !!!parse-error (type => 'refc');
1684 wakaba 1.1 return {type => 'character', data => $value};
1685     } else {
1686 wakaba 1.3 !!!parse-error (type => 'bare ero');
1687 wakaba 1.1 ## NOTE: No characters are consumed in the spec.
1688     !!!back-token ({type => 'character', data => $value});
1689     return undef;
1690     }
1691     } else {
1692     ## no characters are consumed
1693 wakaba 1.3 !!!parse-error (type => 'bare ero');
1694 wakaba 1.1 return undef;
1695     }
1696     } # _tokenize_attempt_to_consume_an_entity
1697    
1698     sub _initialize_tree_constructor ($) {
1699     my $self = shift;
1700     ## NOTE: $self->{document} MUST be specified before this method is called
1701     $self->{document}->strict_error_checking (0);
1702     ## TODO: Turn mutation events off # MUST
1703     ## TODO: Turn loose Document option (manakai extension) on
1704 wakaba 1.18 $self->{document}->manakai_is_html (1); # MUST
1705 wakaba 1.1 } # _initialize_tree_constructor
1706    
1707     sub _terminate_tree_constructor ($) {
1708     my $self = shift;
1709     $self->{document}->strict_error_checking (1);
1710     ## TODO: Turn mutation events on
1711     } # _terminate_tree_constructor
1712    
1713     ## ISSUE: Should append_child (for example) in script executed in tree construction stage fire mutation events?
1714    
1715 wakaba 1.3 { # tree construction stage
1716     my $token;
1717    
1718 wakaba 1.1 sub _construct_tree ($) {
1719     my ($self) = @_;
1720    
1721     ## When an interactive UA render the $self->{document} available
1722     ## to the user, or when it begin accepting user input, are
1723     ## not defined.
1724    
1725     ## Append a character: collect it and all subsequent consecutive
1726     ## characters and insert one Text node whose data is concatenation
1727     ## of all those characters. # MUST
1728    
1729     !!!next-token;
1730    
1731 wakaba 1.3 $self->{insertion_mode} = 'before head';
1732     undef $self->{form_element};
1733     undef $self->{head_element};
1734     $self->{open_elements} = [];
1735     undef $self->{inner_html_node};
1736    
1737     $self->_tree_construction_initial; # MUST
1738     $self->_tree_construction_root_element;
1739     $self->_tree_construction_main;
1740     } # _construct_tree
1741    
1742     sub _tree_construction_initial ($) {
1743     my $self = shift;
1744 wakaba 1.18 INITIAL: {
1745     if ($token->{type} eq 'DOCTYPE') {
1746     ## NOTE: Conformance checkers MAY, instead of reporting "not HTML5"
1747     ## error, switch to a conformance checking mode for another
1748     ## language.
1749     my $doctype_name = $token->{name};
1750     $doctype_name = '' unless defined $doctype_name;
1751     $doctype_name =~ tr/a-z/A-Z/;
1752     if (not defined $token->{name} or # <!DOCTYPE>
1753     defined $token->{public_identifier} or
1754     defined $token->{system_identifier}) {
1755     !!!parse-error (type => 'not HTML5');
1756     } elsif ($doctype_name ne 'HTML') {
1757     ## ISSUE: ASCII case-insensitive? (in fact it does not matter)
1758     !!!parse-error (type => 'not HTML5');
1759     }
1760    
1761     my $doctype = $self->{document}->create_document_type_definition
1762     ($token->{name}); ## ISSUE: If name is missing (e.g. <!DOCTYPE>)?
1763     $doctype->public_id ($token->{public_identifier})
1764     if defined $token->{public_identifier};
1765     $doctype->system_id ($token->{system_identifier})
1766     if defined $token->{system_identifier};
1767     ## NOTE: Other DocumentType attributes are null or empty lists.
1768     ## ISSUE: internalSubset = null??
1769     $self->{document}->append_child ($doctype);
1770    
1771     if (not $token->{correct} or $doctype_name ne 'HTML') {
1772     $self->{document}->manakai_compat_mode ('quirks');
1773     } elsif (defined $token->{public_identifier}) {
1774     my $pubid = $token->{public_identifier};
1775     $pubid =~ tr/a-z/A-z/;
1776     if ({
1777     "+//SILMARIL//DTD HTML PRO V0R11 19970101//EN" => 1,
1778     "-//ADVASOFT LTD//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
1779     "-//AS//DTD HTML 3.0 ASWEDIT + EXTENSIONS//EN" => 1,
1780     "-//IETF//DTD HTML 2.0 LEVEL 1//EN" => 1,
1781     "-//IETF//DTD HTML 2.0 LEVEL 2//EN" => 1,
1782     "-//IETF//DTD HTML 2.0 STRICT LEVEL 1//EN" => 1,
1783     "-//IETF//DTD HTML 2.0 STRICT LEVEL 2//EN" => 1,
1784     "-//IETF//DTD HTML 2.0 STRICT//EN" => 1,
1785     "-//IETF//DTD HTML 2.0//EN" => 1,
1786     "-//IETF//DTD HTML 2.1E//EN" => 1,
1787     "-//IETF//DTD HTML 3.0//EN" => 1,
1788     "-//IETF//DTD HTML 3.0//EN//" => 1,
1789     "-//IETF//DTD HTML 3.2 FINAL//EN" => 1,
1790     "-//IETF//DTD HTML 3.2//EN" => 1,
1791     "-//IETF//DTD HTML 3//EN" => 1,
1792     "-//IETF//DTD HTML LEVEL 0//EN" => 1,
1793     "-//IETF//DTD HTML LEVEL 0//EN//2.0" => 1,
1794     "-//IETF//DTD HTML LEVEL 1//EN" => 1,
1795     "-//IETF//DTD HTML LEVEL 1//EN//2.0" => 1,
1796     "-//IETF//DTD HTML LEVEL 2//EN" => 1,
1797     "-//IETF//DTD HTML LEVEL 2//EN//2.0" => 1,
1798     "-//IETF//DTD HTML LEVEL 3//EN" => 1,
1799     "-//IETF//DTD HTML LEVEL 3//EN//3.0" => 1,
1800     "-//IETF//DTD HTML STRICT LEVEL 0//EN" => 1,
1801     "-//IETF//DTD HTML STRICT LEVEL 0//EN//2.0" => 1,
1802     "-//IETF//DTD HTML STRICT LEVEL 1//EN" => 1,
1803     "-//IETF//DTD HTML STRICT LEVEL 1//EN//2.0" => 1,
1804     "-//IETF//DTD HTML STRICT LEVEL 2//EN" => 1,
1805     "-//IETF//DTD HTML STRICT LEVEL 2//EN//2.0" => 1,
1806     "-//IETF//DTD HTML STRICT LEVEL 3//EN" => 1,
1807     "-//IETF//DTD HTML STRICT LEVEL 3//EN//3.0" => 1,
1808     "-//IETF//DTD HTML STRICT//EN" => 1,
1809     "-//IETF//DTD HTML STRICT//EN//2.0" => 1,
1810     "-//IETF//DTD HTML STRICT//EN//3.0" => 1,
1811     "-//IETF//DTD HTML//EN" => 1,
1812     "-//IETF//DTD HTML//EN//2.0" => 1,
1813     "-//IETF//DTD HTML//EN//3.0" => 1,
1814     "-//METRIUS//DTD METRIUS PRESENTATIONAL//EN" => 1,
1815     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML STRICT//EN" => 1,
1816     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 HTML//EN" => 1,
1817     "-//MICROSOFT//DTD INTERNET EXPLORER 2.0 TABLES//EN" => 1,
1818     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML STRICT//EN" => 1,
1819     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 HTML//EN" => 1,
1820     "-//MICROSOFT//DTD INTERNET EXPLORER 3.0 TABLES//EN" => 1,
1821     "-//NETSCAPE COMM. CORP.//DTD HTML//EN" => 1,
1822     "-//NETSCAPE COMM. CORP.//DTD STRICT HTML//EN" => 1,
1823     "-//O'REILLY AND ASSOCIATES//DTD HTML 2.0//EN" => 1,
1824     "-//O'REILLY AND ASSOCIATES//DTD HTML EXTENDED 1.0//EN" => 1,
1825     "-//SPYGLASS//DTD HTML 2.0 EXTENDED//EN" => 1,
1826     "-//SQ//DTD HTML 2.0 HOTMETAL + EXTENSIONS//EN" => 1,
1827     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA HTML//EN" => 1,
1828     "-//SUN MICROSYSTEMS CORP.//DTD HOTJAVA STRICT HTML//EN" => 1,
1829     "-//W3C//DTD HTML 3 1995-03-24//EN" => 1,
1830     "-//W3C//DTD HTML 3.2 DRAFT//EN" => 1,
1831     "-//W3C//DTD HTML 3.2 FINAL//EN" => 1,
1832     "-//W3C//DTD HTML 3.2//EN" => 1,
1833     "-//W3C//DTD HTML 3.2S DRAFT//EN" => 1,
1834     "-//W3C//DTD HTML 4.0 FRAMESET//EN" => 1,
1835     "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN" => 1,
1836     "-//W3C//DTD HTML EXPERIMETNAL 19960712//EN" => 1,
1837     "-//W3C//DTD HTML EXPERIMENTAL 970421//EN" => 1,
1838     "-//W3C//DTD W3 HTML//EN" => 1,
1839     "-//W3O//DTD W3 HTML 3.0//EN" => 1,
1840     "-//W3O//DTD W3 HTML 3.0//EN//" => 1,
1841     "-//W3O//DTD W3 HTML STRICT 3.0//EN//" => 1,
1842     "-//WEBTECHS//DTD MOZILLA HTML 2.0//EN" => 1,
1843     "-//WEBTECHS//DTD MOZILLA HTML//EN" => 1,
1844     "-/W3C/DTD HTML 4.0 TRANSITIONAL/EN" => 1,
1845     "HTML" => 1,
1846     }->{$pubid}) {
1847     $self->{document}->manakai_compat_mode ('quirks');
1848     } elsif ($pubid eq "-//W3C//DTD HTML 4.01 FRAMESET//EN" or
1849     $pubid eq "-//W3C//DTD HTML 4.01 TRANSITIONAL//EN") {
1850     if (defined $token->{system_identifier}) {
1851     $self->{document}->manakai_compat_mode ('quirks');
1852     } else {
1853     $self->{document}->manakai_compat_mode ('limited quirks');
1854 wakaba 1.3 }
1855 wakaba 1.18 } elsif ($pubid eq "-//W3C//DTD XHTML 1.0 Frameset//EN" or
1856     $pubid eq "-//W3C//DTD XHTML 1.0 Transitional//EN") {
1857     $self->{document}->manakai_compat_mode ('limited quirks');
1858     }
1859     }
1860     if (defined $token->{system_identifier}) {
1861     my $sysid = $token->{system_identifier};
1862     $sysid =~ tr/A-Z/a-z/;
1863     if ($sysid eq "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
1864     $self->{document}->manakai_compat_mode ('quirks');
1865     }
1866     }
1867    
1868     ## Go to the root element phase.
1869     !!!next-token;
1870     return;
1871     } elsif ({
1872     'start tag' => 1,
1873     'end tag' => 1,
1874     'end-of-file' => 1,
1875     }->{$token->{type}}) {
1876     !!!parse-error (type => 'no DOCTYPE');
1877     $self->{document}->manakai_compat_mode ('quirks');
1878     ## Go to the root element phase
1879     ## reprocess
1880     return;
1881     } elsif ($token->{type} eq 'character') {
1882     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D
1883     ## Ignore the token
1884     unless (length $token->{data}) {
1885     ## Stay in the phase
1886     !!!next-token;
1887     redo INITIAL;
1888 wakaba 1.3 }
1889     }
1890 wakaba 1.18
1891     !!!parse-error (type => 'no DOCTYPE');
1892     $self->{document}->manakai_compat_mode ('quirks');
1893     ## Go to the root element phase
1894     ## reprocess
1895     return;
1896     } elsif ($token->{type} eq 'comment') {
1897     my $comment = $self->{document}->create_comment ($token->{data});
1898     $self->{document}->append_child ($comment);
1899    
1900     ## Stay in the phase.
1901     !!!next-token;
1902     redo INITIAL;
1903     } else {
1904     die "$0: $token->{type}: Unknown token";
1905     }
1906     } # INITIAL
1907 wakaba 1.3 } # _tree_construction_initial
1908    
1909     sub _tree_construction_root_element ($) {
1910     my $self = shift;
1911    
1912     B: {
1913     if ($token->{type} eq 'DOCTYPE') {
1914     !!!parse-error (type => 'in html:#DOCTYPE');
1915     ## Ignore the token
1916     ## Stay in the phase
1917     !!!next-token;
1918     redo B;
1919     } elsif ($token->{type} eq 'comment') {
1920     my $comment = $self->{document}->create_comment ($token->{data});
1921     $self->{document}->append_child ($comment);
1922     ## Stay in the phase
1923     !!!next-token;
1924     redo B;
1925     } elsif ($token->{type} eq 'character') {
1926     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
1927     $self->{document}->manakai_append_text ($1);
1928     ## ISSUE: DOM3 Core does not allow Document > Text
1929     unless (length $token->{data}) {
1930     ## Stay in the phase
1931     !!!next-token;
1932     redo B;
1933     }
1934     }
1935     #
1936     } elsif ({
1937     'start tag' => 1,
1938     'end tag' => 1,
1939     'end-of-file' => 1,
1940     }->{$token->{type}}) {
1941     ## ISSUE: There is an issue in the spec
1942     #
1943     } else {
1944     die "$0: $token->{type}: Unknown token";
1945     }
1946     my $root_element; !!!create-element ($root_element, 'html');
1947     $self->{document}->append_child ($root_element);
1948     push @{$self->{open_elements}}, [$root_element, 'html'];
1949     #$phase = 'main';
1950     ## reprocess
1951     #redo B;
1952     return;
1953     } # B
1954     } # _tree_construction_root_element
1955    
1956     sub _reset_insertion_mode ($) {
1957     my $self = shift;
1958    
1959     ## Step 1
1960     my $last;
1961    
1962     ## Step 2
1963     my $i = -1;
1964     my $node = $self->{open_elements}->[$i];
1965    
1966     ## Step 3
1967     S3: {
1968     $last = 1 if $self->{open_elements}->[0]->[0] eq $node->[0];
1969     if (defined $self->{inner_html_node}) {
1970     if ($self->{inner_html_node}->[1] eq 'td' or
1971     $self->{inner_html_node}->[1] eq 'th') {
1972     #
1973     } else {
1974     $node = $self->{inner_html_node};
1975     }
1976     }
1977    
1978     ## Step 4..13
1979     my $new_mode = {
1980     select => 'in select',
1981     td => 'in cell',
1982     th => 'in cell',
1983     tr => 'in row',
1984     tbody => 'in table body',
1985     thead => 'in table head',
1986     tfoot => 'in table foot',
1987     caption => 'in caption',
1988     colgroup => 'in column group',
1989     table => 'in table',
1990     head => 'in body', # not in head!
1991     body => 'in body',
1992     frameset => 'in frameset',
1993     }->{$node->[1]};
1994     $self->{insertion_mode} = $new_mode and return if defined $new_mode;
1995    
1996     ## Step 14
1997     if ($node->[1] eq 'html') {
1998     unless (defined $self->{head_element}) {
1999     $self->{insertion_mode} = 'before head';
2000     } else {
2001     $self->{insertion_mode} = 'after head';
2002     }
2003     return;
2004     }
2005    
2006     ## Step 15
2007     $self->{insertion_mode} = 'in body' and return if $last;
2008    
2009     ## Step 16
2010     $i--;
2011     $node = $self->{open_elements}->[$i];
2012    
2013     ## Step 17
2014     redo S3;
2015     } # S3
2016     } # _reset_insertion_mode
2017    
2018     sub _tree_construction_main ($) {
2019     my $self = shift;
2020    
2021     my $phase = 'main';
2022 wakaba 1.1
2023     my $active_formatting_elements = [];
2024    
2025     my $reconstruct_active_formatting_elements = sub { # MUST
2026     my $insert = shift;
2027    
2028     ## Step 1
2029     return unless @$active_formatting_elements;
2030    
2031     ## Step 3
2032     my $i = -1;
2033     my $entry = $active_formatting_elements->[$i];
2034    
2035     ## Step 2
2036     return if $entry->[0] eq '#marker';
2037 wakaba 1.3 for (@{$self->{open_elements}}) {
2038 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2039     return;
2040     }
2041     }
2042    
2043     S4: {
2044     ## Step 4
2045     last S4 if $active_formatting_elements->[0]->[0] eq $entry->[0];
2046    
2047     ## Step 5
2048     $i--;
2049     $entry = $active_formatting_elements->[$i];
2050    
2051     ## Step 6
2052     if ($entry->[0] eq '#marker') {
2053     #
2054     } else {
2055     my $in_open_elements;
2056 wakaba 1.3 OE: for (@{$self->{open_elements}}) {
2057 wakaba 1.1 if ($entry->[0] eq $_->[0]) {
2058     $in_open_elements = 1;
2059     last OE;
2060     }
2061     }
2062     if ($in_open_elements) {
2063     #
2064     } else {
2065     redo S4;
2066     }
2067     }
2068    
2069     ## Step 7
2070     $i++;
2071     $entry = $active_formatting_elements->[$i];
2072     } # S4
2073    
2074     S7: {
2075     ## Step 8
2076     my $clone = [$entry->[0]->clone_node (0), $entry->[1]];
2077    
2078     ## Step 9
2079     $insert->($clone->[0]);
2080 wakaba 1.3 push @{$self->{open_elements}}, $clone;
2081 wakaba 1.1
2082     ## Step 10
2083 wakaba 1.3 $active_formatting_elements->[$i] = $self->{open_elements}->[-1];
2084 wakaba 1.1
2085     ## Step 11
2086     unless ($clone->[0] eq $active_formatting_elements->[-1]->[0]) {
2087     ## Step 7'
2088     $i++;
2089     $entry = $active_formatting_elements->[$i];
2090    
2091     redo S7;
2092     }
2093     } # S7
2094     }; # $reconstruct_active_formatting_elements
2095    
2096     my $clear_up_to_marker = sub {
2097     for (reverse 0..$#$active_formatting_elements) {
2098     if ($active_formatting_elements->[$_]->[0] eq '#marker') {
2099     splice @$active_formatting_elements, $_;
2100     return;
2101     }
2102     }
2103     }; # $clear_up_to_marker
2104    
2105     my $style_start_tag = sub {
2106 wakaba 1.6 my $style_el; !!!create-element ($style_el, 'style', $token->{attributes});
2107 wakaba 1.3 ## $self->{insertion_mode} eq 'in head' and ... (always true)
2108     (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
2109     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2110 wakaba 1.1 ->append_child ($style_el);
2111     $self->{content_model_flag} = 'CDATA';
2112 wakaba 1.13 delete $self->{escape}; # MUST
2113 wakaba 1.1
2114     my $text = '';
2115     !!!next-token;
2116     while ($token->{type} eq 'character') {
2117     $text .= $token->{data};
2118     !!!next-token;
2119     } # stop if non-character token or tokenizer stops tokenising
2120     if (length $text) {
2121     $style_el->manakai_append_text ($text);
2122     }
2123    
2124     $self->{content_model_flag} = 'PCDATA';
2125    
2126     if ($token->{type} eq 'end tag' and $token->{tag_name} eq 'style') {
2127     ## Ignore the token
2128     } else {
2129 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
2130 wakaba 1.1 ## ISSUE: And ignore?
2131     }
2132     !!!next-token;
2133     }; # $style_start_tag
2134    
2135     my $script_start_tag = sub {
2136     my $script_el;
2137     !!!create-element ($script_el, 'script', $token->{attributes});
2138     ## TODO: mark as "parser-inserted"
2139    
2140     $self->{content_model_flag} = 'CDATA';
2141 wakaba 1.13 delete $self->{escape}; # MUST
2142 wakaba 1.1
2143     my $text = '';
2144     !!!next-token;
2145     while ($token->{type} eq 'character') {
2146     $text .= $token->{data};
2147     !!!next-token;
2148     } # stop if non-character token or tokenizer stops tokenising
2149     if (length $text) {
2150     $script_el->manakai_append_text ($text);
2151     }
2152    
2153     $self->{content_model_flag} = 'PCDATA';
2154    
2155     if ($token->{type} eq 'end tag' and
2156     $token->{tag_name} eq 'script') {
2157     ## Ignore the token
2158     } else {
2159 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
2160 wakaba 1.1 ## ISSUE: And ignore?
2161     ## TODO: mark as "already executed"
2162     }
2163    
2164 wakaba 1.3 if (defined $self->{inner_html_node}) {
2165     ## TODO: mark as "already executed"
2166     } else {
2167 wakaba 1.1 ## TODO: $old_insertion_point = current insertion point
2168     ## TODO: insertion point = just before the next input character
2169    
2170 wakaba 1.3 (($self->{insertion_mode} eq 'in head' and defined $self->{head_element})
2171     ? $self->{head_element} : $self->{open_elements}->[-1]->[0])->append_child ($script_el);
2172 wakaba 1.1
2173     ## TODO: insertion point = $old_insertion_point (might be "undefined")
2174    
2175     ## TODO: if there is a script that will execute as soon as the parser resume, then...
2176     }
2177    
2178     !!!next-token;
2179     }; # $script_start_tag
2180    
2181     my $formatting_end_tag = sub {
2182     my $tag_name = shift;
2183    
2184     FET: {
2185     ## Step 1
2186     my $formatting_element;
2187     my $formatting_element_i_in_active;
2188     AFE: for (reverse 0..$#$active_formatting_elements) {
2189     if ($active_formatting_elements->[$_]->[1] eq $tag_name) {
2190     $formatting_element = $active_formatting_elements->[$_];
2191     $formatting_element_i_in_active = $_;
2192     last AFE;
2193     } elsif ($active_formatting_elements->[$_]->[0] eq '#marker') {
2194     last AFE;
2195     }
2196     } # AFE
2197     unless (defined $formatting_element) {
2198 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$tag_name);
2199 wakaba 1.1 ## Ignore the token
2200     !!!next-token;
2201     return;
2202     }
2203     ## has an element in scope
2204     my $in_scope = 1;
2205     my $formatting_element_i_in_open;
2206 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2207     my $node = $self->{open_elements}->[$_];
2208 wakaba 1.1 if ($node->[0] eq $formatting_element->[0]) {
2209     if ($in_scope) {
2210     $formatting_element_i_in_open = $_;
2211     last INSCOPE;
2212     } else { # in open elements but not in scope
2213 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2214 wakaba 1.1 ## Ignore the token
2215     !!!next-token;
2216     return;
2217     }
2218     } elsif ({
2219     table => 1, caption => 1, td => 1, th => 1,
2220     button => 1, marquee => 1, object => 1, html => 1,
2221     }->{$node->[1]}) {
2222     $in_scope = 0;
2223     }
2224     } # INSCOPE
2225     unless (defined $formatting_element_i_in_open) {
2226 wakaba 1.4 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
2227 wakaba 1.1 pop @$active_formatting_elements; # $formatting_element
2228     !!!next-token; ## TODO: ok?
2229     return;
2230     }
2231 wakaba 1.3 if (not $self->{open_elements}->[-1]->[0] eq $formatting_element->[0]) {
2232 wakaba 1.4 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
2233 wakaba 1.1 }
2234    
2235     ## Step 2
2236     my $furthest_block;
2237     my $furthest_block_i_in_open;
2238 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2239     my $node = $self->{open_elements}->[$_];
2240 wakaba 1.1 if (not $formatting_category->{$node->[1]} and
2241     #not $phrasing_category->{$node->[1]} and
2242     ($special_category->{$node->[1]} or
2243     $scoping_category->{$node->[1]})) {
2244     $furthest_block = $node;
2245     $furthest_block_i_in_open = $_;
2246     } elsif ($node->[0] eq $formatting_element->[0]) {
2247     last OE;
2248     }
2249     } # OE
2250    
2251     ## Step 3
2252     unless (defined $furthest_block) { # MUST
2253 wakaba 1.3 splice @{$self->{open_elements}}, $formatting_element_i_in_open;
2254 wakaba 1.1 splice @$active_formatting_elements, $formatting_element_i_in_active, 1;
2255     !!!next-token;
2256     return;
2257     }
2258    
2259     ## Step 4
2260 wakaba 1.3 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1];
2261 wakaba 1.1
2262     ## Step 5
2263     my $furthest_block_parent = $furthest_block->[0]->parent_node;
2264     if (defined $furthest_block_parent) {
2265     $furthest_block_parent->remove_child ($furthest_block->[0]);
2266     }
2267    
2268     ## Step 6
2269     my $bookmark_prev_el
2270     = $active_formatting_elements->[$formatting_element_i_in_active - 1]
2271     ->[0];
2272    
2273     ## Step 7
2274     my $node = $furthest_block;
2275     my $node_i_in_open = $furthest_block_i_in_open;
2276     my $last_node = $furthest_block;
2277     S7: {
2278     ## Step 1
2279     $node_i_in_open--;
2280 wakaba 1.3 $node = $self->{open_elements}->[$node_i_in_open];
2281 wakaba 1.1
2282     ## Step 2
2283     my $node_i_in_active;
2284     S7S2: {
2285     for (reverse 0..$#$active_formatting_elements) {
2286     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2287     $node_i_in_active = $_;
2288     last S7S2;
2289     }
2290     }
2291 wakaba 1.3 splice @{$self->{open_elements}}, $node_i_in_open, 1;
2292 wakaba 1.1 redo S7;
2293     } # S7S2
2294    
2295     ## Step 3
2296     last S7 if $node->[0] eq $formatting_element->[0];
2297    
2298     ## Step 4
2299     if ($last_node->[0] eq $furthest_block->[0]) {
2300     $bookmark_prev_el = $node->[0];
2301     }
2302    
2303     ## Step 5
2304     if ($node->[0]->has_child_nodes ()) {
2305     my $clone = [$node->[0]->clone_node (0), $node->[1]];
2306     $active_formatting_elements->[$node_i_in_active] = $clone;
2307 wakaba 1.3 $self->{open_elements}->[$node_i_in_open] = $clone;
2308 wakaba 1.1 $node = $clone;
2309     }
2310    
2311     ## Step 6
2312     $node->[0]->append_child ($last_node->[0]);
2313    
2314     ## Step 7
2315     $last_node = $node;
2316    
2317     ## Step 8
2318     redo S7;
2319     } # S7
2320    
2321     ## Step 8
2322     $common_ancestor_node->[0]->append_child ($last_node->[0]);
2323    
2324     ## Step 9
2325     my $clone = [$formatting_element->[0]->clone_node (0),
2326     $formatting_element->[1]];
2327    
2328     ## Step 10
2329     my @cn = @{$furthest_block->[0]->child_nodes};
2330     $clone->[0]->append_child ($_) for @cn;
2331    
2332     ## Step 11
2333     $furthest_block->[0]->append_child ($clone->[0]);
2334    
2335     ## Step 12
2336     my $i;
2337     AFE: for (reverse 0..$#$active_formatting_elements) {
2338     if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) {
2339     splice @$active_formatting_elements, $_, 1;
2340     $i-- and last AFE if defined $i;
2341     } elsif ($active_formatting_elements->[$_]->[0] eq $bookmark_prev_el) {
2342     $i = $_;
2343     }
2344     } # AFE
2345     splice @$active_formatting_elements, $i + 1, 0, $clone;
2346    
2347     ## Step 13
2348     undef $i;
2349 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2350     if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) {
2351     splice @{$self->{open_elements}}, $_, 1;
2352 wakaba 1.1 $i-- and last OE if defined $i;
2353 wakaba 1.3 } elsif ($self->{open_elements}->[$_]->[0] eq $furthest_block->[0]) {
2354 wakaba 1.1 $i = $_;
2355     }
2356     } # OE
2357 wakaba 1.3 splice @{$self->{open_elements}}, $i + 1, 1, $clone;
2358 wakaba 1.1
2359     ## Step 14
2360     redo FET;
2361     } # FET
2362     }; # $formatting_end_tag
2363    
2364     my $insert_to_current = sub {
2365 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child (shift);
2366 wakaba 1.1 }; # $insert_to_current
2367    
2368     my $insert_to_foster = sub {
2369     my $child = shift;
2370     if ({
2371     table => 1, tbody => 1, tfoot => 1,
2372     thead => 1, tr => 1,
2373 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
2374 wakaba 1.1 # MUST
2375     my $foster_parent_element;
2376     my $next_sibling;
2377 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2378     if ($self->{open_elements}->[$_]->[1] eq 'table') {
2379     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
2380 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
2381     $foster_parent_element = $parent;
2382 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
2383 wakaba 1.1 } else {
2384     $foster_parent_element
2385 wakaba 1.3 = $self->{open_elements}->[$_ - 1]->[0];
2386 wakaba 1.1 }
2387     last OE;
2388     }
2389     } # OE
2390 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0]
2391 wakaba 1.1 unless defined $foster_parent_element;
2392     $foster_parent_element->insert_before
2393     ($child, $next_sibling);
2394     } else {
2395 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($child);
2396 wakaba 1.1 }
2397     }; # $insert_to_foster
2398    
2399     my $in_body = sub {
2400     my $insert = shift;
2401     if ($token->{type} eq 'start tag') {
2402     if ($token->{tag_name} eq 'script') {
2403     $script_start_tag->();
2404     return;
2405     } elsif ($token->{tag_name} eq 'style') {
2406     $style_start_tag->();
2407     return;
2408     } elsif ({
2409     base => 1, link => 1, meta => 1,
2410     }->{$token->{tag_name}}) {
2411 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2412 wakaba 1.1 ## NOTE: This is an "as if in head" code clone
2413     my $el;
2414     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2415 wakaba 1.3 if (defined $self->{head_element}) {
2416     $self->{head_element}->append_child ($el);
2417 wakaba 1.1 } else {
2418     $insert->($el);
2419     }
2420    
2421     !!!next-token;
2422     return;
2423     } elsif ($token->{tag_name} eq 'title') {
2424 wakaba 1.3 !!!parse-error (type => 'in body:title');
2425 wakaba 1.1 ## NOTE: There is an "as if in head" code clone
2426     my $title_el;
2427     !!!create-element ($title_el, 'title', $token->{attributes});
2428 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
2429 wakaba 1.1 ->append_child ($title_el);
2430     $self->{content_model_flag} = 'RCDATA';
2431 wakaba 1.13 delete $self->{escape}; # MUST
2432 wakaba 1.1
2433     my $text = '';
2434     !!!next-token;
2435     while ($token->{type} eq 'character') {
2436     $text .= $token->{data};
2437     !!!next-token;
2438     }
2439     if (length $text) {
2440     $title_el->manakai_append_text ($text);
2441     }
2442    
2443     $self->{content_model_flag} = 'PCDATA';
2444    
2445     if ($token->{type} eq 'end tag' and
2446     $token->{tag_name} eq 'title') {
2447     ## Ignore the token
2448     } else {
2449 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2450 wakaba 1.1 ## ISSUE: And ignore?
2451     }
2452     !!!next-token;
2453     return;
2454     } elsif ($token->{tag_name} eq 'body') {
2455 wakaba 1.3 !!!parse-error (type => 'in body:body');
2456 wakaba 1.1
2457 wakaba 1.3 if (@{$self->{open_elements}} == 1 or
2458     $self->{open_elements}->[1]->[1] ne 'body') {
2459 wakaba 1.1 ## Ignore the token
2460     } else {
2461 wakaba 1.3 my $body_el = $self->{open_elements}->[1]->[0];
2462 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
2463     unless ($body_el->has_attribute_ns (undef, $attr_name)) {
2464     $body_el->set_attribute_ns
2465     (undef, [undef, $attr_name],
2466     $token->{attributes}->{$attr_name}->{value});
2467     }
2468     }
2469     }
2470     !!!next-token;
2471     return;
2472     } elsif ({
2473     address => 1, blockquote => 1, center => 1, dir => 1,
2474     div => 1, dl => 1, fieldset => 1, listing => 1,
2475     menu => 1, ol => 1, p => 1, ul => 1,
2476     pre => 1,
2477     }->{$token->{tag_name}}) {
2478     ## has a p element in scope
2479 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2480 wakaba 1.1 if ($_->[1] eq 'p') {
2481     !!!back-token;
2482     $token = {type => 'end tag', tag_name => 'p'};
2483     return;
2484     } elsif ({
2485     table => 1, caption => 1, td => 1, th => 1,
2486     button => 1, marquee => 1, object => 1, html => 1,
2487     }->{$_->[1]}) {
2488     last INSCOPE;
2489     }
2490     } # INSCOPE
2491    
2492     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2493     if ($token->{tag_name} eq 'pre') {
2494     !!!next-token;
2495     if ($token->{type} eq 'character') {
2496     $token->{data} =~ s/^\x0A//;
2497     unless (length $token->{data}) {
2498     !!!next-token;
2499     }
2500     }
2501     } else {
2502     !!!next-token;
2503     }
2504     return;
2505     } elsif ($token->{tag_name} eq 'form') {
2506 wakaba 1.3 if (defined $self->{form_element}) {
2507     !!!parse-error (type => 'in form:form');
2508 wakaba 1.1 ## Ignore the token
2509 wakaba 1.7 !!!next-token;
2510     return;
2511 wakaba 1.1 } else {
2512     ## has a p element in scope
2513 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2514 wakaba 1.1 if ($_->[1] eq 'p') {
2515     !!!back-token;
2516     $token = {type => 'end tag', tag_name => 'p'};
2517     return;
2518     } elsif ({
2519     table => 1, caption => 1, td => 1, th => 1,
2520     button => 1, marquee => 1, object => 1, html => 1,
2521     }->{$_->[1]}) {
2522     last INSCOPE;
2523     }
2524     } # INSCOPE
2525    
2526     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2527 wakaba 1.3 $self->{form_element} = $self->{open_elements}->[-1]->[0];
2528 wakaba 1.1 !!!next-token;
2529     return;
2530     }
2531     } elsif ($token->{tag_name} eq 'li') {
2532     ## has a p element in scope
2533 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2534 wakaba 1.1 if ($_->[1] eq 'p') {
2535     !!!back-token;
2536     $token = {type => 'end tag', tag_name => 'p'};
2537     return;
2538     } elsif ({
2539     table => 1, caption => 1, td => 1, th => 1,
2540     button => 1, marquee => 1, object => 1, html => 1,
2541     }->{$_->[1]}) {
2542     last INSCOPE;
2543     }
2544     } # INSCOPE
2545    
2546     ## Step 1
2547     my $i = -1;
2548 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2549 wakaba 1.1 LI: {
2550     ## Step 2
2551     if ($node->[1] eq 'li') {
2552 wakaba 1.8 if ($i != -1) {
2553     !!!parse-error (type => 'end tag missing:'.
2554     $self->{open_elements}->[-1]->[1]);
2555     ## TODO: test
2556     }
2557 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2558 wakaba 1.1 last LI;
2559     }
2560    
2561     ## Step 3
2562     if (not $formatting_category->{$node->[1]} and
2563     #not $phrasing_category->{$node->[1]} and
2564     ($special_category->{$node->[1]} or
2565     $scoping_category->{$node->[1]}) and
2566     $node->[1] ne 'address' and $node->[1] ne 'div') {
2567     last LI;
2568     }
2569    
2570     ## Step 4
2571     $i--;
2572 wakaba 1.3 $node = $self->{open_elements}->[$i];
2573 wakaba 1.1 redo LI;
2574     } # LI
2575    
2576     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2577     !!!next-token;
2578     return;
2579     } elsif ($token->{tag_name} eq 'dd' or $token->{tag_name} eq 'dt') {
2580     ## has a p element in scope
2581 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2582 wakaba 1.1 if ($_->[1] eq 'p') {
2583     !!!back-token;
2584     $token = {type => 'end tag', tag_name => 'p'};
2585     return;
2586     } elsif ({
2587     table => 1, caption => 1, td => 1, th => 1,
2588     button => 1, marquee => 1, object => 1, html => 1,
2589     }->{$_->[1]}) {
2590     last INSCOPE;
2591     }
2592     } # INSCOPE
2593    
2594     ## Step 1
2595     my $i = -1;
2596 wakaba 1.3 my $node = $self->{open_elements}->[$i];
2597 wakaba 1.1 LI: {
2598     ## Step 2
2599     if ($node->[1] eq 'dt' or $node->[1] eq 'dd') {
2600 wakaba 1.8 if ($i != -1) {
2601     !!!parse-error (type => 'end tag missing:'.
2602     $self->{open_elements}->[-1]->[1]);
2603     ## TODO: test
2604     }
2605 wakaba 1.3 splice @{$self->{open_elements}}, $i;
2606 wakaba 1.1 last LI;
2607     }
2608    
2609     ## Step 3
2610     if (not $formatting_category->{$node->[1]} and
2611     #not $phrasing_category->{$node->[1]} and
2612     ($special_category->{$node->[1]} or
2613     $scoping_category->{$node->[1]}) and
2614     $node->[1] ne 'address' and $node->[1] ne 'div') {
2615     last LI;
2616     }
2617    
2618     ## Step 4
2619     $i--;
2620 wakaba 1.3 $node = $self->{open_elements}->[$i];
2621 wakaba 1.1 redo LI;
2622     } # LI
2623    
2624     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2625     !!!next-token;
2626     return;
2627     } elsif ($token->{tag_name} eq 'plaintext') {
2628     ## has a p element in scope
2629 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2630 wakaba 1.1 if ($_->[1] eq 'p') {
2631     !!!back-token;
2632     $token = {type => 'end tag', tag_name => 'p'};
2633     return;
2634     } elsif ({
2635     table => 1, caption => 1, td => 1, th => 1,
2636     button => 1, marquee => 1, object => 1, html => 1,
2637     }->{$_->[1]}) {
2638     last INSCOPE;
2639     }
2640     } # INSCOPE
2641    
2642     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2643    
2644     $self->{content_model_flag} = 'PLAINTEXT';
2645    
2646     !!!next-token;
2647     return;
2648     } elsif ({
2649     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2650     }->{$token->{tag_name}}) {
2651     ## has a p element in scope
2652 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2653     my $node = $self->{open_elements}->[$_];
2654 wakaba 1.1 if ($node->[1] eq 'p') {
2655     !!!back-token;
2656     $token = {type => 'end tag', tag_name => 'p'};
2657     return;
2658     } elsif ({
2659     table => 1, caption => 1, td => 1, th => 1,
2660     button => 1, marquee => 1, object => 1, html => 1,
2661     }->{$node->[1]}) {
2662     last INSCOPE;
2663     }
2664     } # INSCOPE
2665    
2666     ## has an element in scope
2667     my $i;
2668 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2669     my $node = $self->{open_elements}->[$_];
2670 wakaba 1.1 if ({
2671     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
2672     }->{$node->[1]}) {
2673     $i = $_;
2674     last INSCOPE;
2675     } elsif ({
2676     table => 1, caption => 1, td => 1, th => 1,
2677     button => 1, marquee => 1, object => 1, html => 1,
2678     }->{$node->[1]}) {
2679     last INSCOPE;
2680     }
2681     } # INSCOPE
2682    
2683     if (defined $i) {
2684 wakaba 1.3 !!!parse-error (type => 'in hn:hn');
2685     splice @{$self->{open_elements}}, $i;
2686 wakaba 1.1 }
2687    
2688     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2689    
2690     !!!next-token;
2691     return;
2692     } elsif ($token->{tag_name} eq 'a') {
2693     AFE: for my $i (reverse 0..$#$active_formatting_elements) {
2694     my $node = $active_formatting_elements->[$i];
2695     if ($node->[1] eq 'a') {
2696 wakaba 1.3 !!!parse-error (type => 'in a:a');
2697 wakaba 1.1
2698     !!!back-token;
2699     $token = {type => 'end tag', tag_name => 'a'};
2700     $formatting_end_tag->($token->{tag_name});
2701    
2702     AFE2: for (reverse 0..$#$active_formatting_elements) {
2703     if ($active_formatting_elements->[$_]->[0] eq $node->[0]) {
2704     splice @$active_formatting_elements, $_, 1;
2705     last AFE2;
2706     }
2707     } # AFE2
2708 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
2709     if ($self->{open_elements}->[$_]->[0] eq $node->[0]) {
2710     splice @{$self->{open_elements}}, $_, 1;
2711 wakaba 1.1 last OE;
2712     }
2713     } # OE
2714     last AFE;
2715     } elsif ($node->[0] eq '#marker') {
2716     last AFE;
2717     }
2718     } # AFE
2719    
2720     $reconstruct_active_formatting_elements->($insert_to_current);
2721    
2722     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2723 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2724 wakaba 1.1
2725     !!!next-token;
2726     return;
2727     } elsif ({
2728     b => 1, big => 1, em => 1, font => 1, i => 1,
2729 wakaba 1.19 s => 1, small => 1, strile => 1,
2730 wakaba 1.1 strong => 1, tt => 1, u => 1,
2731     }->{$token->{tag_name}}) {
2732     $reconstruct_active_formatting_elements->($insert_to_current);
2733    
2734     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2735 wakaba 1.3 push @$active_formatting_elements, $self->{open_elements}->[-1];
2736 wakaba 1.1
2737     !!!next-token;
2738     return;
2739 wakaba 1.19 } elsif ($token->{tag_name} eq 'nobr') {
2740     $reconstruct_active_formatting_elements->($insert_to_current);
2741    
2742     ## has a |nobr| element in scope
2743     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2744     my $node = $self->{open_elements}->[$_];
2745     if ($node->[1] eq 'nobr') {
2746     !!!back-token;
2747     $token = {type => 'end tag', tag_name => 'nobr'};
2748     return;
2749     } elsif ({
2750     table => 1, caption => 1, td => 1, th => 1,
2751     button => 1, marquee => 1, object => 1, html => 1,
2752     }->{$node->[1]}) {
2753     last INSCOPE;
2754     }
2755     } # INSCOPE
2756    
2757     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2758     push @$active_formatting_elements, $self->{open_elements}->[-1];
2759    
2760     !!!next-token;
2761     return;
2762 wakaba 1.1 } elsif ($token->{tag_name} eq 'button') {
2763     ## has a button element in scope
2764 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
2765     my $node = $self->{open_elements}->[$_];
2766 wakaba 1.1 if ($node->[1] eq 'button') {
2767 wakaba 1.3 !!!parse-error (type => 'in button:button');
2768 wakaba 1.1 !!!back-token;
2769     $token = {type => 'end tag', tag_name => 'button'};
2770     return;
2771     } elsif ({
2772     table => 1, caption => 1, td => 1, th => 1,
2773     button => 1, marquee => 1, object => 1, html => 1,
2774     }->{$node->[1]}) {
2775     last INSCOPE;
2776     }
2777     } # INSCOPE
2778    
2779     $reconstruct_active_formatting_elements->($insert_to_current);
2780    
2781     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2782     push @$active_formatting_elements, ['#marker', ''];
2783    
2784     !!!next-token;
2785     return;
2786     } elsif ($token->{tag_name} eq 'marquee' or
2787     $token->{tag_name} eq 'object') {
2788     $reconstruct_active_formatting_elements->($insert_to_current);
2789    
2790     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2791     push @$active_formatting_elements, ['#marker', ''];
2792    
2793     !!!next-token;
2794     return;
2795     } elsif ($token->{tag_name} eq 'xmp') {
2796     $reconstruct_active_formatting_elements->($insert_to_current);
2797    
2798     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2799    
2800     $self->{content_model_flag} = 'CDATA';
2801 wakaba 1.13 delete $self->{escape}; # MUST
2802 wakaba 1.1
2803     !!!next-token;
2804     return;
2805     } elsif ($token->{tag_name} eq 'table') {
2806     ## has a p element in scope
2807 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2808 wakaba 1.1 if ($_->[1] eq 'p') {
2809     !!!back-token;
2810     $token = {type => 'end tag', tag_name => 'p'};
2811     return;
2812     } elsif ({
2813     table => 1, caption => 1, td => 1, th => 1,
2814     button => 1, marquee => 1, object => 1, html => 1,
2815     }->{$_->[1]}) {
2816     last INSCOPE;
2817     }
2818     } # INSCOPE
2819    
2820     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2821    
2822 wakaba 1.3 $self->{insertion_mode} = 'in table';
2823 wakaba 1.1
2824     !!!next-token;
2825     return;
2826     } elsif ({
2827     area => 1, basefont => 1, bgsound => 1, br => 1,
2828     embed => 1, img => 1, param => 1, spacer => 1, wbr => 1,
2829     image => 1,
2830     }->{$token->{tag_name}}) {
2831     if ($token->{tag_name} eq 'image') {
2832 wakaba 1.3 !!!parse-error (type => 'image');
2833 wakaba 1.1 $token->{tag_name} = 'img';
2834     }
2835    
2836     $reconstruct_active_formatting_elements->($insert_to_current);
2837    
2838     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2839 wakaba 1.3 pop @{$self->{open_elements}};
2840 wakaba 1.1
2841     !!!next-token;
2842     return;
2843     } elsif ($token->{tag_name} eq 'hr') {
2844     ## has a p element in scope
2845 wakaba 1.3 INSCOPE: for (reverse @{$self->{open_elements}}) {
2846 wakaba 1.1 if ($_->[1] eq 'p') {
2847     !!!back-token;
2848     $token = {type => 'end tag', tag_name => 'p'};
2849     return;
2850     } elsif ({
2851     table => 1, caption => 1, td => 1, th => 1,
2852     button => 1, marquee => 1, object => 1, html => 1,
2853     }->{$_->[1]}) {
2854     last INSCOPE;
2855     }
2856     } # INSCOPE
2857    
2858     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2859 wakaba 1.3 pop @{$self->{open_elements}};
2860 wakaba 1.1
2861     !!!next-token;
2862     return;
2863     } elsif ($token->{tag_name} eq 'input') {
2864     $reconstruct_active_formatting_elements->($insert_to_current);
2865    
2866     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2867 wakaba 1.3 ## TODO: associate with $self->{form_element} if defined
2868     pop @{$self->{open_elements}};
2869 wakaba 1.1
2870     !!!next-token;
2871     return;
2872     } elsif ($token->{tag_name} eq 'isindex') {
2873 wakaba 1.3 !!!parse-error (type => 'isindex');
2874 wakaba 1.1
2875 wakaba 1.3 if (defined $self->{form_element}) {
2876 wakaba 1.1 ## Ignore the token
2877     !!!next-token;
2878     return;
2879     } else {
2880     my $at = $token->{attributes};
2881     $at->{name} = {name => 'name', value => 'isindex'};
2882     my @tokens = (
2883     {type => 'start tag', tag_name => 'form'},
2884     {type => 'start tag', tag_name => 'hr'},
2885     {type => 'start tag', tag_name => 'p'},
2886     {type => 'start tag', tag_name => 'label'},
2887     {type => 'character',
2888     data => 'This is a searchable index. Insert your search keywords here: '}, # SHOULD
2889     ## TODO: make this configurable
2890     {type => 'start tag', tag_name => 'input', attributes => $at},
2891     #{type => 'character', data => ''}, # SHOULD
2892     {type => 'end tag', tag_name => 'label'},
2893     {type => 'end tag', tag_name => 'p'},
2894     {type => 'start tag', tag_name => 'hr'},
2895     {type => 'end tag', tag_name => 'form'},
2896     );
2897     $token = shift @tokens;
2898     !!!back-token (@tokens);
2899     return;
2900     }
2901     } elsif ({
2902     textarea => 1,
2903 wakaba 1.5 iframe => 1,
2904 wakaba 1.1 noembed => 1,
2905     noframes => 1,
2906     noscript => 0, ## TODO: 1 if scripting is enabled
2907     }->{$token->{tag_name}}) {
2908     my $tag_name = $token->{tag_name};
2909     my $el;
2910     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
2911    
2912     if ($token->{tag_name} eq 'textarea') {
2913 wakaba 1.3 ## TODO: $self->{form_element} if defined
2914 wakaba 1.1 $self->{content_model_flag} = 'RCDATA';
2915     } else {
2916     $self->{content_model_flag} = 'CDATA';
2917     }
2918 wakaba 1.13 delete $self->{escape}; # MUST
2919 wakaba 1.1
2920     $insert->($el);
2921    
2922     my $text = '';
2923 wakaba 1.9 if ($token->{tag_name} eq 'textarea') {
2924     !!!next-token;
2925     if ($token->{type} eq 'character') {
2926     $token->{data} =~ s/^\x0A//;
2927     unless (length $token->{data}) {
2928     !!!next-token;
2929     }
2930     }
2931     } else {
2932     !!!next-token;
2933     }
2934 wakaba 1.1 while ($token->{type} eq 'character') {
2935     $text .= $token->{data};
2936     !!!next-token;
2937     }
2938     if (length $text) {
2939     $el->manakai_append_text ($text);
2940     }
2941    
2942     $self->{content_model_flag} = 'PCDATA';
2943    
2944     if ($token->{type} eq 'end tag' and
2945     $token->{tag_name} eq $tag_name) {
2946     ## Ignore the token
2947     } else {
2948 wakaba 1.10 if ($token->{tag_name} eq 'textarea') {
2949     !!!parse-error (type => 'in RCDATA:#'.$token->{type});
2950     } else {
2951 wakaba 1.3 !!!parse-error (type => 'in CDATA:#'.$token->{type});
2952     }
2953 wakaba 1.1 ## ISSUE: And ignore?
2954     }
2955     !!!next-token;
2956     return;
2957     } elsif ($token->{tag_name} eq 'select') {
2958     $reconstruct_active_formatting_elements->($insert_to_current);
2959    
2960     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2961    
2962 wakaba 1.3 $self->{insertion_mode} = 'in select';
2963 wakaba 1.1 !!!next-token;
2964     return;
2965     } elsif ({
2966     caption => 1, col => 1, colgroup => 1, frame => 1,
2967     frameset => 1, head => 1, option => 1, optgroup => 1,
2968     tbody => 1, td => 1, tfoot => 1, th => 1,
2969     thead => 1, tr => 1,
2970     }->{$token->{tag_name}}) {
2971 wakaba 1.3 !!!parse-error (type => 'in body:'.$token->{tag_name});
2972 wakaba 1.1 ## Ignore the token
2973     !!!next-token;
2974     return;
2975    
2976     ## ISSUE: An issue on HTML5 new elements in the spec.
2977     } else {
2978     $reconstruct_active_formatting_elements->($insert_to_current);
2979    
2980     !!!insert-element-t ($token->{tag_name}, $token->{attributes});
2981    
2982     !!!next-token;
2983     return;
2984     }
2985     } elsif ($token->{type} eq 'end tag') {
2986     if ($token->{tag_name} eq 'body') {
2987 wakaba 1.20 if (@{$self->{open_elements}} > 1 and
2988     $self->{open_elements}->[1]->[1] eq 'body') {
2989     for (@{$self->{open_elements}}) {
2990     unless ({
2991     dd => 1, dt => 1, li => 1, p => 1, td => 1,
2992     th => 1, tr => 1, body => 1, html => 1,
2993     }->{$_->[1]}) {
2994     !!!parse-error (type => 'not closed:'.$_->[1]);
2995     }
2996 wakaba 1.1 }
2997 wakaba 1.20
2998 wakaba 1.3 $self->{insertion_mode} = 'after body';
2999 wakaba 1.1 !!!next-token;
3000     return;
3001     } else {
3002 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3003 wakaba 1.1 ## Ignore the token
3004     !!!next-token;
3005     return;
3006     }
3007     } elsif ($token->{tag_name} eq 'html') {
3008 wakaba 1.3 if (@{$self->{open_elements}} > 1 and $self->{open_elements}->[1]->[1] eq 'body') {
3009 wakaba 1.1 ## ISSUE: There is an issue in the spec.
3010 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'body') {
3011     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[1]->[1]);
3012 wakaba 1.1 }
3013 wakaba 1.3 $self->{insertion_mode} = 'after body';
3014 wakaba 1.1 ## reprocess
3015     return;
3016     } else {
3017 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3018 wakaba 1.1 ## Ignore the token
3019     !!!next-token;
3020     return;
3021     }
3022     } elsif ({
3023     address => 1, blockquote => 1, center => 1, dir => 1,
3024     div => 1, dl => 1, fieldset => 1, listing => 1,
3025     menu => 1, ol => 1, pre => 1, ul => 1,
3026     p => 1,
3027     dd => 1, dt => 1, li => 1,
3028     button => 1, marquee => 1, object => 1,
3029     }->{$token->{tag_name}}) {
3030     ## has an element in scope
3031     my $i;
3032 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3033     my $node = $self->{open_elements}->[$_];
3034 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3035     ## generate implied end tags
3036     if ({
3037     dd => ($token->{tag_name} ne 'dd'),
3038     dt => ($token->{tag_name} ne 'dt'),
3039     li => ($token->{tag_name} ne 'li'),
3040     p => ($token->{tag_name} ne 'p'),
3041     td => 1, th => 1, tr => 1,
3042 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3043 wakaba 1.1 !!!back-token;
3044     $token = {type => 'end tag',
3045 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3046 wakaba 1.1 return;
3047     }
3048     $i = $_;
3049     last INSCOPE unless $token->{tag_name} eq 'p';
3050     } elsif ({
3051     table => 1, caption => 1, td => 1, th => 1,
3052     button => 1, marquee => 1, object => 1, html => 1,
3053     }->{$node->[1]}) {
3054     last INSCOPE;
3055     }
3056     } # INSCOPE
3057    
3058 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
3059     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3060 wakaba 1.1 }
3061    
3062 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
3063 wakaba 1.1 $clear_up_to_marker->()
3064     if {
3065     button => 1, marquee => 1, object => 1,
3066     }->{$token->{tag_name}};
3067     !!!next-token;
3068     return;
3069 wakaba 1.12 } elsif ($token->{tag_name} eq 'form') {
3070     ## has an element in scope
3071     INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3072     my $node = $self->{open_elements}->[$_];
3073     if ($node->[1] eq $token->{tag_name}) {
3074     ## generate implied end tags
3075     if ({
3076     dd => 1, dt => 1, li => 1, p => 1,
3077     td => 1, th => 1, tr => 1,
3078     }->{$self->{open_elements}->[-1]->[1]}) {
3079     !!!back-token;
3080     $token = {type => 'end tag',
3081     tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3082     return;
3083     }
3084     last INSCOPE;
3085     } elsif ({
3086     table => 1, caption => 1, td => 1, th => 1,
3087     button => 1, marquee => 1, object => 1, html => 1,
3088     }->{$node->[1]}) {
3089     last INSCOPE;
3090     }
3091     } # INSCOPE
3092    
3093     if ($self->{open_elements}->[-1]->[1] eq $token->{tag_name}) {
3094     pop @{$self->{open_elements}};
3095     } else {
3096     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3097     }
3098    
3099     undef $self->{form_element};
3100     !!!next-token;
3101     return;
3102 wakaba 1.1 } elsif ({
3103     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3104     }->{$token->{tag_name}}) {
3105     ## has an element in scope
3106     my $i;
3107 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3108     my $node = $self->{open_elements}->[$_];
3109 wakaba 1.1 if ({
3110     h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
3111     }->{$node->[1]}) {
3112     ## generate implied end tags
3113     if ({
3114     dd => 1, dt => 1, li => 1, p => 1,
3115     td => 1, th => 1, tr => 1,
3116 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3117 wakaba 1.1 !!!back-token;
3118     $token = {type => 'end tag',
3119 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3120 wakaba 1.1 return;
3121     }
3122     $i = $_;
3123     last INSCOPE;
3124     } elsif ({
3125     table => 1, caption => 1, td => 1, th => 1,
3126     button => 1, marquee => 1, object => 1, html => 1,
3127     }->{$node->[1]}) {
3128     last INSCOPE;
3129     }
3130     } # INSCOPE
3131    
3132 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
3133     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3134 wakaba 1.1 }
3135    
3136 wakaba 1.3 splice @{$self->{open_elements}}, $i if defined $i;
3137 wakaba 1.1 !!!next-token;
3138     return;
3139     } elsif ({
3140     a => 1,
3141     b => 1, big => 1, em => 1, font => 1, i => 1,
3142     nobr => 1, s => 1, small => 1, strile => 1,
3143     strong => 1, tt => 1, u => 1,
3144     }->{$token->{tag_name}}) {
3145     $formatting_end_tag->($token->{tag_name});
3146 wakaba 1.8 ## TODO: <http://html5.org/tools/web-apps-tracker?from=883&to=884>
3147 wakaba 1.1 return;
3148     } elsif ({
3149     caption => 1, col => 1, colgroup => 1, frame => 1,
3150     frameset => 1, head => 1, option => 1, optgroup => 1,
3151     tbody => 1, td => 1, tfoot => 1, th => 1,
3152     thead => 1, tr => 1,
3153     area => 1, basefont => 1, bgsound => 1, br => 1,
3154     embed => 1, hr => 1, iframe => 1, image => 1,
3155 wakaba 1.5 img => 1, input => 1, isindex => 1, noembed => 1,
3156 wakaba 1.1 noframes => 1, param => 1, select => 1, spacer => 1,
3157     table => 1, textarea => 1, wbr => 1,
3158     noscript => 0, ## TODO: if scripting is enabled
3159     }->{$token->{tag_name}}) {
3160 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3161 wakaba 1.1 ## Ignore the token
3162     !!!next-token;
3163     return;
3164    
3165     ## ISSUE: Issue on HTML5 new elements in spec
3166    
3167     } else {
3168     ## Step 1
3169     my $node_i = -1;
3170 wakaba 1.3 my $node = $self->{open_elements}->[$node_i];
3171 wakaba 1.1
3172     ## Step 2
3173     S2: {
3174     if ($node->[1] eq $token->{tag_name}) {
3175     ## Step 1
3176     ## generate implied end tags
3177     if ({
3178     dd => 1, dt => 1, li => 1, p => 1,
3179     td => 1, th => 1, tr => 1,
3180 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3181 wakaba 1.1 !!!back-token;
3182     $token = {type => 'end tag',
3183 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3184 wakaba 1.1 return;
3185     }
3186    
3187     ## Step 2
3188 wakaba 1.3 if ($token->{tag_name} ne $self->{open_elements}->[-1]->[1]) {
3189     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3190 wakaba 1.1 }
3191    
3192     ## Step 3
3193 wakaba 1.3 splice @{$self->{open_elements}}, $node_i;
3194    
3195     !!!next-token;
3196 wakaba 1.1 last S2;
3197     } else {
3198     ## Step 3
3199     if (not $formatting_category->{$node->[1]} and
3200     #not $phrasing_category->{$node->[1]} and
3201     ($special_category->{$node->[1]} or
3202     $scoping_category->{$node->[1]})) {
3203 wakaba 1.3 !!!parse-error (type => 'not closed:'.$node->[1]);
3204 wakaba 1.1 ## Ignore the token
3205     !!!next-token;
3206     last S2;
3207     }
3208     }
3209    
3210     ## Step 4
3211     $node_i--;
3212 wakaba 1.3 $node = $self->{open_elements}->[$node_i];
3213 wakaba 1.1
3214     ## Step 5;
3215     redo S2;
3216     } # S2
3217 wakaba 1.3 return;
3218 wakaba 1.1 }
3219     }
3220     }; # $in_body
3221    
3222     B: {
3223 wakaba 1.3 if ($phase eq 'main') {
3224 wakaba 1.1 if ($token->{type} eq 'DOCTYPE') {
3225 wakaba 1.3 !!!parse-error (type => 'in html:#DOCTYPE');
3226 wakaba 1.1 ## Ignore the token
3227     ## Stay in the phase
3228     !!!next-token;
3229     redo B;
3230     } elsif ($token->{type} eq 'start tag' and
3231     $token->{tag_name} eq 'html') {
3232     ## TODO: unless it is the first start tag token, parse-error
3233 wakaba 1.3 my $top_el = $self->{open_elements}->[0]->[0];
3234 wakaba 1.1 for my $attr_name (keys %{$token->{attributes}}) {
3235     unless ($top_el->has_attribute_ns (undef, $attr_name)) {
3236     $top_el->set_attribute_ns
3237     (undef, [undef, $attr_name],
3238     $token->{attributes}->{$attr_name}->{value});
3239     }
3240     }
3241     !!!next-token;
3242     redo B;
3243     } elsif ($token->{type} eq 'end-of-file') {
3244     ## Generate implied end tags
3245     if ({
3246     dd => 1, dt => 1, li => 1, p => 1, td => 1, th => 1, tr => 1,
3247 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3248 wakaba 1.1 !!!back-token;
3249 wakaba 1.3 $token = {type => 'end tag', tag_name => $self->{open_elements}->[-1]->[1]};
3250 wakaba 1.1 redo B;
3251     }
3252    
3253 wakaba 1.3 if (@{$self->{open_elements}} > 2 or
3254     (@{$self->{open_elements}} == 2 and $self->{open_elements}->[1]->[1] ne 'body')) {
3255     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3256     } elsif (defined $self->{inner_html_node} and
3257     @{$self->{open_elements}} > 1 and
3258     $self->{open_elements}->[1]->[1] ne 'body') {
3259     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3260 wakaba 1.1 }
3261    
3262     ## Stop parsing
3263     last B;
3264    
3265     ## ISSUE: There is an issue in the spec.
3266     } else {
3267 wakaba 1.3 if ($self->{insertion_mode} eq 'before head') {
3268 wakaba 1.1 if ($token->{type} eq 'character') {
3269     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3270 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3271 wakaba 1.1 unless (length $token->{data}) {
3272     !!!next-token;
3273     redo B;
3274     }
3275     }
3276     ## As if <head>
3277 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
3278     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3279     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3280     $self->{insertion_mode} = 'in head';
3281 wakaba 1.1 ## reprocess
3282     redo B;
3283     } elsif ($token->{type} eq 'comment') {
3284     my $comment = $self->{document}->create_comment ($token->{data});
3285 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3286 wakaba 1.1 !!!next-token;
3287     redo B;
3288     } elsif ($token->{type} eq 'start tag') {
3289     my $attr = $token->{tag_name} eq 'head' ? $token->{attributes} : {};
3290 wakaba 1.3 !!!create-element ($self->{head_element}, 'head', $attr);
3291     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3292     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3293     $self->{insertion_mode} = 'in head';
3294 wakaba 1.1 if ($token->{tag_name} eq 'head') {
3295     !!!next-token;
3296     #} elsif ({
3297     # base => 1, link => 1, meta => 1,
3298     # script => 1, style => 1, title => 1,
3299     # }->{$token->{tag_name}}) {
3300     # ## reprocess
3301     } else {
3302     ## reprocess
3303     }
3304     redo B;
3305     } elsif ($token->{type} eq 'end tag') {
3306 wakaba 1.21 if ({head => 1, body => 1, html => 1}->{$token->{tag_name}}) {
3307 wakaba 1.1 ## As if <head>
3308 wakaba 1.3 !!!create-element ($self->{head_element}, 'head');
3309     $self->{open_elements}->[-1]->[0]->append_child ($self->{head_element});
3310     push @{$self->{open_elements}}, [$self->{head_element}, 'head'];
3311     $self->{insertion_mode} = 'in head';
3312 wakaba 1.1 ## reprocess
3313     redo B;
3314     } else {
3315 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3316 wakaba 1.21 ## Ignore the token ## ISSUE: An issue in the spec.
3317 wakaba 1.1 !!!next-token;
3318     redo B;
3319     }
3320     } else {
3321     die "$0: $token->{type}: Unknown type";
3322     }
3323 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in head') {
3324 wakaba 1.1 if ($token->{type} eq 'character') {
3325     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3326 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3327 wakaba 1.1 unless (length $token->{data}) {
3328     !!!next-token;
3329     redo B;
3330     }
3331     }
3332    
3333     #
3334     } elsif ($token->{type} eq 'comment') {
3335     my $comment = $self->{document}->create_comment ($token->{data});
3336 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3337 wakaba 1.1 !!!next-token;
3338     redo B;
3339     } elsif ($token->{type} eq 'start tag') {
3340     if ($token->{tag_name} eq 'title') {
3341     ## NOTE: There is an "as if in head" code clone
3342     my $title_el;
3343     !!!create-element ($title_el, 'title', $token->{attributes});
3344 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3345 wakaba 1.1 ->append_child ($title_el);
3346     $self->{content_model_flag} = 'RCDATA';
3347 wakaba 1.13 delete $self->{escape}; # MUST
3348 wakaba 1.1
3349     my $text = '';
3350     !!!next-token;
3351     while ($token->{type} eq 'character') {
3352     $text .= $token->{data};
3353     !!!next-token;
3354     }
3355     if (length $text) {
3356     $title_el->manakai_append_text ($text);
3357     }
3358    
3359     $self->{content_model_flag} = 'PCDATA';
3360    
3361     if ($token->{type} eq 'end tag' and
3362     $token->{tag_name} eq 'title') {
3363     ## Ignore the token
3364     } else {
3365 wakaba 1.3 !!!parse-error (type => 'in RCDATA:#'.$token->{type});
3366 wakaba 1.1 ## ISSUE: And ignore?
3367     }
3368     !!!next-token;
3369     redo B;
3370     } elsif ($token->{tag_name} eq 'style') {
3371     $style_start_tag->();
3372     redo B;
3373     } elsif ($token->{tag_name} eq 'script') {
3374     $script_start_tag->();
3375     redo B;
3376     } elsif ({base => 1, link => 1, meta => 1}->{$token->{tag_name}}) {
3377     ## NOTE: There are "as if in head" code clones
3378     my $el;
3379     !!!create-element ($el, $token->{tag_name}, $token->{attributes});
3380 wakaba 1.3 (defined $self->{head_element} ? $self->{head_element} : $self->{open_elements}->[-1]->[0])
3381 wakaba 1.1 ->append_child ($el);
3382    
3383     !!!next-token;
3384     redo B;
3385     } elsif ($token->{tag_name} eq 'head') {
3386 wakaba 1.3 !!!parse-error (type => 'in head:head');
3387 wakaba 1.1 ## Ignore the token
3388     !!!next-token;
3389     redo B;
3390     } else {
3391     #
3392     }
3393     } elsif ($token->{type} eq 'end tag') {
3394     if ($token->{tag_name} eq 'head') {
3395 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
3396     pop @{$self->{open_elements}};
3397 wakaba 1.1 } else {
3398 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:head');
3399 wakaba 1.1 }
3400 wakaba 1.3 $self->{insertion_mode} = 'after head';
3401 wakaba 1.1 !!!next-token;
3402     redo B;
3403 wakaba 1.21 } elsif ($token->{tag_name} eq 'body' or
3404     $token->{tag_name} eq 'html') {
3405 wakaba 1.1 #
3406     } else {
3407 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3408 wakaba 1.1 ## Ignore the token
3409     !!!next-token;
3410     redo B;
3411     }
3412     } else {
3413     #
3414     }
3415    
3416 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'head') {
3417 wakaba 1.1 ## As if </head>
3418 wakaba 1.3 pop @{$self->{open_elements}};
3419 wakaba 1.1 }
3420 wakaba 1.3 $self->{insertion_mode} = 'after head';
3421 wakaba 1.1 ## reprocess
3422     redo B;
3423    
3424     ## ISSUE: An issue in the spec.
3425 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after head') {
3426 wakaba 1.1 if ($token->{type} eq 'character') {
3427     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3428 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3429 wakaba 1.1 unless (length $token->{data}) {
3430     !!!next-token;
3431     redo B;
3432     }
3433     }
3434    
3435     #
3436     } elsif ($token->{type} eq 'comment') {
3437     my $comment = $self->{document}->create_comment ($token->{data});
3438 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3439 wakaba 1.1 !!!next-token;
3440     redo B;
3441     } elsif ($token->{type} eq 'start tag') {
3442     if ($token->{tag_name} eq 'body') {
3443     !!!insert-element ('body', $token->{attributes});
3444 wakaba 1.3 $self->{insertion_mode} = 'in body';
3445 wakaba 1.1 !!!next-token;
3446     redo B;
3447     } elsif ($token->{tag_name} eq 'frameset') {
3448     !!!insert-element ('frameset', $token->{attributes});
3449 wakaba 1.3 $self->{insertion_mode} = 'in frameset';
3450 wakaba 1.1 !!!next-token;
3451     redo B;
3452     } elsif ({
3453     base => 1, link => 1, meta => 1,
3454 wakaba 1.3 script => 1, style => 1, title => 1,
3455 wakaba 1.1 }->{$token->{tag_name}}) {
3456 wakaba 1.3 !!!parse-error (type => 'after head:'.$token->{tag_name});
3457     $self->{insertion_mode} = 'in head';
3458 wakaba 1.1 ## reprocess
3459     redo B;
3460     } else {
3461     #
3462     }
3463     } else {
3464     #
3465     }
3466    
3467     ## As if <body>
3468     !!!insert-element ('body');
3469 wakaba 1.3 $self->{insertion_mode} = 'in body';
3470 wakaba 1.1 ## reprocess
3471     redo B;
3472 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in body') {
3473 wakaba 1.1 if ($token->{type} eq 'character') {
3474     ## NOTE: There is a code clone of "character in body".
3475     $reconstruct_active_formatting_elements->($insert_to_current);
3476    
3477 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3478 wakaba 1.1
3479     !!!next-token;
3480     redo B;
3481     } elsif ($token->{type} eq 'comment') {
3482     ## NOTE: There is a code clone of "comment in body".
3483     my $comment = $self->{document}->create_comment ($token->{data});
3484 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3485 wakaba 1.1 !!!next-token;
3486     redo B;
3487     } else {
3488     $in_body->($insert_to_current);
3489     redo B;
3490     }
3491 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table') {
3492 wakaba 1.1 if ($token->{type} eq 'character') {
3493     ## NOTE: There are "character in table" code clones.
3494     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3495 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3496 wakaba 1.1
3497     unless (length $token->{data}) {
3498     !!!next-token;
3499     redo B;
3500     }
3501     }
3502    
3503 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3504    
3505 wakaba 1.1 ## As if in body, but insert into foster parent element
3506     ## ISSUE: Spec says that "whenever a node would be inserted
3507     ## into the current node" while characters might not be
3508     ## result in a new Text node.
3509     $reconstruct_active_formatting_elements->($insert_to_foster);
3510    
3511     if ({
3512     table => 1, tbody => 1, tfoot => 1,
3513     thead => 1, tr => 1,
3514 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3515 wakaba 1.1 # MUST
3516     my $foster_parent_element;
3517     my $next_sibling;
3518     my $prev_sibling;
3519 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3520     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3521     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3522 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3523     $foster_parent_element = $parent;
3524 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3525 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3526     } else {
3527 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3528 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3529     }
3530     last OE;
3531     }
3532     } # OE
3533 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3534 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3535     unless defined $foster_parent_element;
3536     if (defined $prev_sibling and
3537     $prev_sibling->node_type == 3) {
3538     $prev_sibling->manakai_append_text ($token->{data});
3539     } else {
3540     $foster_parent_element->insert_before
3541     ($self->{document}->create_text_node ($token->{data}),
3542     $next_sibling);
3543     }
3544     } else {
3545 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3546 wakaba 1.1 }
3547    
3548     !!!next-token;
3549     redo B;
3550     } elsif ($token->{type} eq 'comment') {
3551     my $comment = $self->{document}->create_comment ($token->{data});
3552 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3553 wakaba 1.1 !!!next-token;
3554     redo B;
3555     } elsif ($token->{type} eq 'start tag') {
3556     if ({
3557     caption => 1,
3558     colgroup => 1,
3559     tbody => 1, tfoot => 1, thead => 1,
3560     }->{$token->{tag_name}}) {
3561     ## Clear back to table context
3562 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3563     $self->{open_elements}->[-1]->[1] ne 'html') {
3564     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3565     pop @{$self->{open_elements}};
3566 wakaba 1.1 }
3567    
3568     push @$active_formatting_elements, ['#marker', '']
3569     if $token->{tag_name} eq 'caption';
3570    
3571     !!!insert-element ($token->{tag_name}, $token->{attributes});
3572 wakaba 1.3 $self->{insertion_mode} = {
3573 wakaba 1.1 caption => 'in caption',
3574     colgroup => 'in column group',
3575     tbody => 'in table body',
3576     tfoot => 'in table body',
3577     thead => 'in table body',
3578     }->{$token->{tag_name}};
3579     !!!next-token;
3580     redo B;
3581     } elsif ({
3582     col => 1,
3583     td => 1, th => 1, tr => 1,
3584     }->{$token->{tag_name}}) {
3585     ## Clear back to table context
3586 wakaba 1.3 while ($self->{open_elements}->[-1]->[1] ne 'table' and
3587     $self->{open_elements}->[-1]->[1] ne 'html') {
3588     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3589     pop @{$self->{open_elements}};
3590 wakaba 1.1 }
3591    
3592     !!!insert-element ($token->{tag_name} eq 'col' ? 'colgroup' : 'tbody');
3593 wakaba 1.3 $self->{insertion_mode} = $token->{tag_name} eq 'col'
3594 wakaba 1.1 ? 'in column group' : 'in table body';
3595     ## reprocess
3596     redo B;
3597     } elsif ($token->{tag_name} eq 'table') {
3598     ## NOTE: There are code clones for this "table in table"
3599 wakaba 1.3 !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3600 wakaba 1.1
3601     ## As if </table>
3602     ## have a table element in table scope
3603     my $i;
3604 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3605     my $node = $self->{open_elements}->[$_];
3606 wakaba 1.1 if ($node->[1] eq 'table') {
3607     $i = $_;
3608     last INSCOPE;
3609     } elsif ({
3610     table => 1, html => 1,
3611     }->{$node->[1]}) {
3612     last INSCOPE;
3613     }
3614     } # INSCOPE
3615     unless (defined $i) {
3616 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
3617 wakaba 1.1 ## Ignore tokens </table><table>
3618     !!!next-token;
3619     redo B;
3620     }
3621    
3622     ## generate implied end tags
3623     if ({
3624     dd => 1, dt => 1, li => 1, p => 1,
3625     td => 1, th => 1, tr => 1,
3626 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3627 wakaba 1.1 !!!back-token; # <table>
3628     $token = {type => 'end tag', tag_name => 'table'};
3629     !!!back-token;
3630     $token = {type => 'end tag',
3631 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3632 wakaba 1.1 redo B;
3633     }
3634    
3635 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3636     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3637 wakaba 1.1 }
3638    
3639 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3640 wakaba 1.1
3641 wakaba 1.3 $self->_reset_insertion_mode;
3642 wakaba 1.1
3643     ## reprocess
3644     redo B;
3645     } else {
3646     #
3647     }
3648     } elsif ($token->{type} eq 'end tag') {
3649     if ($token->{tag_name} eq 'table') {
3650     ## have a table element in table scope
3651     my $i;
3652 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3653     my $node = $self->{open_elements}->[$_];
3654 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3655     $i = $_;
3656     last INSCOPE;
3657     } elsif ({
3658     table => 1, html => 1,
3659     }->{$node->[1]}) {
3660     last INSCOPE;
3661     }
3662     } # INSCOPE
3663     unless (defined $i) {
3664 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3665 wakaba 1.1 ## Ignore the token
3666     !!!next-token;
3667     redo B;
3668     }
3669    
3670     ## generate implied end tags
3671     if ({
3672     dd => 1, dt => 1, li => 1, p => 1,
3673     td => 1, th => 1, tr => 1,
3674 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3675 wakaba 1.1 !!!back-token;
3676     $token = {type => 'end tag',
3677 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3678 wakaba 1.1 redo B;
3679     }
3680    
3681 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
3682     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3683 wakaba 1.1 }
3684    
3685 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3686 wakaba 1.1
3687 wakaba 1.3 $self->_reset_insertion_mode;
3688 wakaba 1.1
3689     !!!next-token;
3690     redo B;
3691     } elsif ({
3692     body => 1, caption => 1, col => 1, colgroup => 1,
3693     html => 1, tbody => 1, td => 1, tfoot => 1, th => 1,
3694     thead => 1, tr => 1,
3695     }->{$token->{tag_name}}) {
3696 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3697 wakaba 1.1 ## Ignore the token
3698     !!!next-token;
3699     redo B;
3700     } else {
3701     #
3702     }
3703     } else {
3704     #
3705     }
3706    
3707 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
3708 wakaba 1.1 $in_body->($insert_to_foster);
3709     redo B;
3710 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in caption') {
3711 wakaba 1.1 if ($token->{type} eq 'character') {
3712     ## NOTE: This is a code clone of "character in body".
3713     $reconstruct_active_formatting_elements->($insert_to_current);
3714    
3715 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
3716 wakaba 1.1
3717     !!!next-token;
3718     redo B;
3719     } elsif ($token->{type} eq 'comment') {
3720     ## NOTE: This is a code clone of "comment in body".
3721     my $comment = $self->{document}->create_comment ($token->{data});
3722 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3723 wakaba 1.1 !!!next-token;
3724     redo B;
3725     } elsif ($token->{type} eq 'start tag') {
3726     if ({
3727     caption => 1, col => 1, colgroup => 1, tbody => 1,
3728     td => 1, tfoot => 1, th => 1, thead => 1, tr => 1,
3729     }->{$token->{tag_name}}) {
3730 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3731 wakaba 1.1
3732     ## As if </caption>
3733     ## have a table element in table scope
3734     my $i;
3735 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3736     my $node = $self->{open_elements}->[$_];
3737 wakaba 1.1 if ($node->[1] eq 'caption') {
3738     $i = $_;
3739     last INSCOPE;
3740     } elsif ({
3741     table => 1, html => 1,
3742     }->{$node->[1]}) {
3743     last INSCOPE;
3744     }
3745     } # INSCOPE
3746     unless (defined $i) {
3747 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3748 wakaba 1.1 ## Ignore the token
3749     !!!next-token;
3750     redo B;
3751     }
3752    
3753     ## generate implied end tags
3754     if ({
3755     dd => 1, dt => 1, li => 1, p => 1,
3756     td => 1, th => 1, tr => 1,
3757 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3758 wakaba 1.1 !!!back-token; # <?>
3759     $token = {type => 'end tag', tag_name => 'caption'};
3760     !!!back-token;
3761     $token = {type => 'end tag',
3762 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3763 wakaba 1.1 redo B;
3764     }
3765    
3766 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3767     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3768 wakaba 1.1 }
3769    
3770 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3771 wakaba 1.1
3772     $clear_up_to_marker->();
3773    
3774 wakaba 1.3 $self->{insertion_mode} = 'in table';
3775 wakaba 1.1
3776     ## reprocess
3777     redo B;
3778     } else {
3779     #
3780     }
3781     } elsif ($token->{type} eq 'end tag') {
3782     if ($token->{tag_name} eq 'caption') {
3783     ## have a table element in table scope
3784     my $i;
3785 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3786     my $node = $self->{open_elements}->[$_];
3787 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
3788     $i = $_;
3789     last INSCOPE;
3790     } elsif ({
3791     table => 1, html => 1,
3792     }->{$node->[1]}) {
3793     last INSCOPE;
3794     }
3795     } # INSCOPE
3796     unless (defined $i) {
3797 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3798 wakaba 1.1 ## Ignore the token
3799     !!!next-token;
3800     redo B;
3801     }
3802    
3803     ## generate implied end tags
3804     if ({
3805     dd => 1, dt => 1, li => 1, p => 1,
3806     td => 1, th => 1, tr => 1,
3807 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3808 wakaba 1.1 !!!back-token;
3809     $token = {type => 'end tag',
3810 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3811 wakaba 1.1 redo B;
3812     }
3813    
3814 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3815     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3816 wakaba 1.1 }
3817    
3818 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3819 wakaba 1.1
3820     $clear_up_to_marker->();
3821    
3822 wakaba 1.3 $self->{insertion_mode} = 'in table';
3823 wakaba 1.1
3824     !!!next-token;
3825     redo B;
3826     } elsif ($token->{tag_name} eq 'table') {
3827 wakaba 1.3 !!!parse-error (type => 'not closed:caption');
3828 wakaba 1.1
3829     ## As if </caption>
3830     ## have a table element in table scope
3831     my $i;
3832 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
3833     my $node = $self->{open_elements}->[$_];
3834 wakaba 1.1 if ($node->[1] eq 'caption') {
3835     $i = $_;
3836     last INSCOPE;
3837     } elsif ({
3838     table => 1, html => 1,
3839     }->{$node->[1]}) {
3840     last INSCOPE;
3841     }
3842     } # INSCOPE
3843     unless (defined $i) {
3844 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:caption');
3845 wakaba 1.1 ## Ignore the token
3846     !!!next-token;
3847     redo B;
3848     }
3849    
3850     ## generate implied end tags
3851     if ({
3852     dd => 1, dt => 1, li => 1, p => 1,
3853     td => 1, th => 1, tr => 1,
3854 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3855 wakaba 1.1 !!!back-token; # </table>
3856     $token = {type => 'end tag', tag_name => 'caption'};
3857     !!!back-token;
3858     $token = {type => 'end tag',
3859 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
3860 wakaba 1.1 redo B;
3861     }
3862    
3863 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'caption') {
3864     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
3865 wakaba 1.1 }
3866    
3867 wakaba 1.3 splice @{$self->{open_elements}}, $i;
3868 wakaba 1.1
3869     $clear_up_to_marker->();
3870    
3871 wakaba 1.3 $self->{insertion_mode} = 'in table';
3872 wakaba 1.1
3873     ## reprocess
3874     redo B;
3875     } elsif ({
3876     body => 1, col => 1, colgroup => 1,
3877     html => 1, tbody => 1, td => 1, tfoot => 1,
3878     th => 1, thead => 1, tr => 1,
3879     }->{$token->{tag_name}}) {
3880 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
3881 wakaba 1.1 ## Ignore the token
3882     redo B;
3883     } else {
3884     #
3885     }
3886     } else {
3887     #
3888     }
3889    
3890     $in_body->($insert_to_current);
3891     redo B;
3892 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in column group') {
3893 wakaba 1.1 if ($token->{type} eq 'character') {
3894     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3895 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3896 wakaba 1.1 unless (length $token->{data}) {
3897     !!!next-token;
3898     redo B;
3899     }
3900     }
3901    
3902     #
3903     } elsif ($token->{type} eq 'comment') {
3904     my $comment = $self->{document}->create_comment ($token->{data});
3905 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
3906 wakaba 1.1 !!!next-token;
3907     redo B;
3908     } elsif ($token->{type} eq 'start tag') {
3909     if ($token->{tag_name} eq 'col') {
3910     !!!insert-element ($token->{tag_name}, $token->{attributes});
3911 wakaba 1.3 pop @{$self->{open_elements}};
3912 wakaba 1.1 !!!next-token;
3913     redo B;
3914     } else {
3915     #
3916     }
3917     } elsif ($token->{type} eq 'end tag') {
3918     if ($token->{tag_name} eq 'colgroup') {
3919 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3920     !!!parse-error (type => 'unmatched end tag:colgroup');
3921 wakaba 1.1 ## Ignore the token
3922     !!!next-token;
3923     redo B;
3924     } else {
3925 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3926     $self->{insertion_mode} = 'in table';
3927 wakaba 1.1 !!!next-token;
3928     redo B;
3929     }
3930     } elsif ($token->{tag_name} eq 'col') {
3931 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:col');
3932 wakaba 1.1 ## Ignore the token
3933     !!!next-token;
3934     redo B;
3935     } else {
3936     #
3937     }
3938     } else {
3939     #
3940     }
3941    
3942     ## As if </colgroup>
3943 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html') {
3944     !!!parse-error (type => 'unmatched end tag:colgroup');
3945 wakaba 1.1 ## Ignore the token
3946     !!!next-token;
3947     redo B;
3948     } else {
3949 wakaba 1.3 pop @{$self->{open_elements}}; # colgroup
3950     $self->{insertion_mode} = 'in table';
3951 wakaba 1.1 ## reprocess
3952     redo B;
3953     }
3954 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in table body') {
3955 wakaba 1.1 if ($token->{type} eq 'character') {
3956     ## NOTE: This is a "character in table" code clone.
3957     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
3958 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
3959 wakaba 1.1
3960     unless (length $token->{data}) {
3961     !!!next-token;
3962     redo B;
3963     }
3964     }
3965    
3966 wakaba 1.3 !!!parse-error (type => 'in table:#character');
3967    
3968 wakaba 1.1 ## As if in body, but insert into foster parent element
3969     ## ISSUE: Spec says that "whenever a node would be inserted
3970     ## into the current node" while characters might not be
3971     ## result in a new Text node.
3972     $reconstruct_active_formatting_elements->($insert_to_foster);
3973    
3974     if ({
3975     table => 1, tbody => 1, tfoot => 1,
3976     thead => 1, tr => 1,
3977 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
3978 wakaba 1.1 # MUST
3979     my $foster_parent_element;
3980     my $next_sibling;
3981     my $prev_sibling;
3982 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
3983     if ($self->{open_elements}->[$_]->[1] eq 'table') {
3984     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
3985 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
3986     $foster_parent_element = $parent;
3987 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
3988 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
3989     } else {
3990 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
3991 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
3992     }
3993     last OE;
3994     }
3995     } # OE
3996 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
3997 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
3998     unless defined $foster_parent_element;
3999     if (defined $prev_sibling and
4000     $prev_sibling->node_type == 3) {
4001     $prev_sibling->manakai_append_text ($token->{data});
4002     } else {
4003     $foster_parent_element->insert_before
4004     ($self->{document}->create_text_node ($token->{data}),
4005     $next_sibling);
4006     }
4007     } else {
4008 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4009 wakaba 1.1 }
4010    
4011     !!!next-token;
4012     redo B;
4013     } elsif ($token->{type} eq 'comment') {
4014     ## Copied from 'in table'
4015     my $comment = $self->{document}->create_comment ($token->{data});
4016 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4017 wakaba 1.1 !!!next-token;
4018     redo B;
4019     } elsif ($token->{type} eq 'start tag') {
4020     if ({
4021     tr => 1,
4022     th => 1, td => 1,
4023     }->{$token->{tag_name}}) {
4024 wakaba 1.3 unless ($token->{tag_name} eq 'tr') {
4025     !!!parse-error (type => 'missing start tag:tr');
4026     }
4027    
4028 wakaba 1.1 ## Clear back to table body context
4029     while (not {
4030     tbody => 1, tfoot => 1, thead => 1, html => 1,
4031 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4032     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4033     pop @{$self->{open_elements}};
4034 wakaba 1.1 }
4035    
4036 wakaba 1.3 $self->{insertion_mode} = 'in row';
4037 wakaba 1.1 if ($token->{tag_name} eq 'tr') {
4038     !!!insert-element ($token->{tag_name}, $token->{attributes});
4039     !!!next-token;
4040     } else {
4041     !!!insert-element ('tr');
4042     ## reprocess
4043     }
4044     redo B;
4045     } elsif ({
4046     caption => 1, col => 1, colgroup => 1,
4047     tbody => 1, tfoot => 1, thead => 1,
4048     }->{$token->{tag_name}}) {
4049     ## have an element in table scope
4050     my $i;
4051 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4052     my $node = $self->{open_elements}->[$_];
4053 wakaba 1.1 if ({
4054     tbody => 1, thead => 1, tfoot => 1,
4055     }->{$node->[1]}) {
4056     $i = $_;
4057     last INSCOPE;
4058     } elsif ({
4059     table => 1, html => 1,
4060     }->{$node->[1]}) {
4061     last INSCOPE;
4062     }
4063     } # INSCOPE
4064     unless (defined $i) {
4065 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4066 wakaba 1.1 ## Ignore the token
4067     !!!next-token;
4068     redo B;
4069     }
4070    
4071     ## Clear back to table body context
4072     while (not {
4073     tbody => 1, tfoot => 1, thead => 1, html => 1,
4074 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4075     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4076     pop @{$self->{open_elements}};
4077 wakaba 1.1 }
4078    
4079     ## As if <{current node}>
4080     ## have an element in table scope
4081     ## true by definition
4082    
4083     ## Clear back to table body context
4084     ## nop by definition
4085    
4086 wakaba 1.3 pop @{$self->{open_elements}};
4087     $self->{insertion_mode} = 'in table';
4088 wakaba 1.1 ## reprocess
4089     redo B;
4090     } elsif ($token->{tag_name} eq 'table') {
4091     ## NOTE: This is a code clone of "table in table"
4092 wakaba 1.3 !!!parse-error (type => 'not closed:table');
4093 wakaba 1.1
4094     ## As if </table>
4095     ## have a table element in table scope
4096     my $i;
4097 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4098     my $node = $self->{open_elements}->[$_];
4099 wakaba 1.1 if ($node->[1] eq 'table') {
4100     $i = $_;
4101     last INSCOPE;
4102     } elsif ({
4103     table => 1, html => 1,
4104     }->{$node->[1]}) {
4105     last INSCOPE;
4106     }
4107     } # INSCOPE
4108     unless (defined $i) {
4109 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
4110 wakaba 1.1 ## Ignore tokens </table><table>
4111     !!!next-token;
4112     redo B;
4113     }
4114    
4115     ## generate implied end tags
4116     if ({
4117     dd => 1, dt => 1, li => 1, p => 1,
4118     td => 1, th => 1, tr => 1,
4119 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4120 wakaba 1.1 !!!back-token; # <table>
4121     $token = {type => 'end tag', tag_name => 'table'};
4122     !!!back-token;
4123     $token = {type => 'end tag',
4124 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4125 wakaba 1.1 redo B;
4126     }
4127    
4128 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4129     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4130 wakaba 1.1 }
4131    
4132 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4133 wakaba 1.1
4134 wakaba 1.3 $self->_reset_insertion_mode;
4135 wakaba 1.1
4136     ## reprocess
4137     redo B;
4138     } else {
4139     #
4140     }
4141     } elsif ($token->{type} eq 'end tag') {
4142     if ({
4143     tbody => 1, tfoot => 1, thead => 1,
4144     }->{$token->{tag_name}}) {
4145     ## have an element in table scope
4146     my $i;
4147 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4148     my $node = $self->{open_elements}->[$_];
4149 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4150     $i = $_;
4151     last INSCOPE;
4152     } elsif ({
4153     table => 1, html => 1,
4154     }->{$node->[1]}) {
4155     last INSCOPE;
4156     }
4157     } # INSCOPE
4158     unless (defined $i) {
4159 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4160 wakaba 1.1 ## Ignore the token
4161     !!!next-token;
4162     redo B;
4163     }
4164    
4165     ## Clear back to table body context
4166     while (not {
4167     tbody => 1, tfoot => 1, thead => 1, html => 1,
4168 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4169     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4170     pop @{$self->{open_elements}};
4171 wakaba 1.1 }
4172    
4173 wakaba 1.3 pop @{$self->{open_elements}};
4174     $self->{insertion_mode} = 'in table';
4175 wakaba 1.1 !!!next-token;
4176     redo B;
4177     } elsif ($token->{tag_name} eq 'table') {
4178     ## have an element in table scope
4179     my $i;
4180 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4181     my $node = $self->{open_elements}->[$_];
4182 wakaba 1.1 if ({
4183     tbody => 1, thead => 1, tfoot => 1,
4184     }->{$node->[1]}) {
4185     $i = $_;
4186     last INSCOPE;
4187     } elsif ({
4188     table => 1, html => 1,
4189     }->{$node->[1]}) {
4190     last INSCOPE;
4191     }
4192     } # INSCOPE
4193     unless (defined $i) {
4194 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4195 wakaba 1.1 ## Ignore the token
4196     !!!next-token;
4197     redo B;
4198     }
4199    
4200     ## Clear back to table body context
4201     while (not {
4202     tbody => 1, tfoot => 1, thead => 1, html => 1,
4203 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4204     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4205     pop @{$self->{open_elements}};
4206 wakaba 1.1 }
4207    
4208     ## As if <{current node}>
4209     ## have an element in table scope
4210     ## true by definition
4211    
4212     ## Clear back to table body context
4213     ## nop by definition
4214    
4215 wakaba 1.3 pop @{$self->{open_elements}};
4216     $self->{insertion_mode} = 'in table';
4217 wakaba 1.1 ## reprocess
4218     redo B;
4219     } elsif ({
4220     body => 1, caption => 1, col => 1, colgroup => 1,
4221     html => 1, td => 1, th => 1, tr => 1,
4222     }->{$token->{tag_name}}) {
4223 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4224 wakaba 1.1 ## Ignore the token
4225     !!!next-token;
4226     redo B;
4227     } else {
4228     #
4229     }
4230     } else {
4231     #
4232     }
4233    
4234     ## As if in table
4235 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
4236 wakaba 1.1 $in_body->($insert_to_foster);
4237     redo B;
4238 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in row') {
4239 wakaba 1.1 if ($token->{type} eq 'character') {
4240     ## NOTE: This is a "character in table" code clone.
4241     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4242 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
4243 wakaba 1.1
4244     unless (length $token->{data}) {
4245     !!!next-token;
4246     redo B;
4247     }
4248     }
4249    
4250 wakaba 1.3 !!!parse-error (type => 'in table:#character');
4251    
4252 wakaba 1.1 ## As if in body, but insert into foster parent element
4253     ## ISSUE: Spec says that "whenever a node would be inserted
4254     ## into the current node" while characters might not be
4255     ## result in a new Text node.
4256     $reconstruct_active_formatting_elements->($insert_to_foster);
4257    
4258     if ({
4259     table => 1, tbody => 1, tfoot => 1,
4260     thead => 1, tr => 1,
4261 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4262 wakaba 1.1 # MUST
4263     my $foster_parent_element;
4264     my $next_sibling;
4265     my $prev_sibling;
4266 wakaba 1.3 OE: for (reverse 0..$#{$self->{open_elements}}) {
4267     if ($self->{open_elements}->[$_]->[1] eq 'table') {
4268     my $parent = $self->{open_elements}->[$_]->[0]->parent_node;
4269 wakaba 1.1 if (defined $parent and $parent->node_type == 1) {
4270     $foster_parent_element = $parent;
4271 wakaba 1.3 $next_sibling = $self->{open_elements}->[$_]->[0];
4272 wakaba 1.1 $prev_sibling = $next_sibling->previous_sibling;
4273     } else {
4274 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0];
4275 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child;
4276     }
4277     last OE;
4278     }
4279     } # OE
4280 wakaba 1.3 $foster_parent_element = $self->{open_elements}->[0]->[0] and
4281 wakaba 1.1 $prev_sibling = $foster_parent_element->last_child
4282     unless defined $foster_parent_element;
4283     if (defined $prev_sibling and
4284     $prev_sibling->node_type == 3) {
4285     $prev_sibling->manakai_append_text ($token->{data});
4286     } else {
4287     $foster_parent_element->insert_before
4288     ($self->{document}->create_text_node ($token->{data}),
4289     $next_sibling);
4290     }
4291     } else {
4292 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4293 wakaba 1.1 }
4294    
4295     !!!next-token;
4296     redo B;
4297     } elsif ($token->{type} eq 'comment') {
4298     ## Copied from 'in table'
4299     my $comment = $self->{document}->create_comment ($token->{data});
4300 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4301 wakaba 1.1 !!!next-token;
4302     redo B;
4303     } elsif ($token->{type} eq 'start tag') {
4304     if ($token->{tag_name} eq 'th' or
4305     $token->{tag_name} eq 'td') {
4306     ## Clear back to table row context
4307     while (not {
4308     tr => 1, html => 1,
4309 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4310     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4311     pop @{$self->{open_elements}};
4312 wakaba 1.1 }
4313    
4314     !!!insert-element ($token->{tag_name}, $token->{attributes});
4315 wakaba 1.3 $self->{insertion_mode} = 'in cell';
4316 wakaba 1.1
4317     push @$active_formatting_elements, ['#marker', ''];
4318    
4319     !!!next-token;
4320     redo B;
4321     } elsif ({
4322     caption => 1, col => 1, colgroup => 1,
4323     tbody => 1, tfoot => 1, thead => 1, tr => 1,
4324     }->{$token->{tag_name}}) {
4325     ## As if </tr>
4326     ## have an element in table scope
4327     my $i;
4328 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4329     my $node = $self->{open_elements}->[$_];
4330 wakaba 1.1 if ($node->[1] eq 'tr') {
4331     $i = $_;
4332     last INSCOPE;
4333     } elsif ({
4334     table => 1, html => 1,
4335     }->{$node->[1]}) {
4336     last INSCOPE;
4337     }
4338     } # INSCOPE
4339     unless (defined $i) {
4340 wakaba 1.3 !!!parse-error (type => 'unmacthed end tag:'.$token->{tag_name});
4341 wakaba 1.1 ## Ignore the token
4342     !!!next-token;
4343     redo B;
4344     }
4345    
4346     ## Clear back to table row context
4347     while (not {
4348     tr => 1, html => 1,
4349 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4350     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4351     pop @{$self->{open_elements}};
4352 wakaba 1.1 }
4353    
4354 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4355     $self->{insertion_mode} = 'in table body';
4356 wakaba 1.1 ## reprocess
4357     redo B;
4358     } elsif ($token->{tag_name} eq 'table') {
4359     ## NOTE: This is a code clone of "table in table"
4360 wakaba 1.3 !!!parse-error (type => 'not closed:table');
4361 wakaba 1.1
4362     ## As if </table>
4363     ## have a table element in table scope
4364     my $i;
4365 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4366     my $node = $self->{open_elements}->[$_];
4367 wakaba 1.1 if ($node->[1] eq 'table') {
4368     $i = $_;
4369     last INSCOPE;
4370     } elsif ({
4371     table => 1, html => 1,
4372     }->{$node->[1]}) {
4373     last INSCOPE;
4374     }
4375     } # INSCOPE
4376     unless (defined $i) {
4377 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:table');
4378 wakaba 1.1 ## Ignore tokens </table><table>
4379     !!!next-token;
4380     redo B;
4381     }
4382    
4383     ## generate implied end tags
4384     if ({
4385     dd => 1, dt => 1, li => 1, p => 1,
4386     td => 1, th => 1, tr => 1,
4387 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4388 wakaba 1.1 !!!back-token; # <table>
4389     $token = {type => 'end tag', tag_name => 'table'};
4390     !!!back-token;
4391     $token = {type => 'end tag',
4392 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4393 wakaba 1.1 redo B;
4394     }
4395    
4396 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'table') {
4397     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4398 wakaba 1.1 }
4399    
4400 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4401 wakaba 1.1
4402 wakaba 1.3 $self->_reset_insertion_mode;
4403 wakaba 1.1
4404     ## reprocess
4405     redo B;
4406     } else {
4407     #
4408     }
4409     } elsif ($token->{type} eq 'end tag') {
4410     if ($token->{tag_name} eq 'tr') {
4411     ## have an element in table scope
4412     my $i;
4413 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4414     my $node = $self->{open_elements}->[$_];
4415 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4416     $i = $_;
4417     last INSCOPE;
4418     } elsif ({
4419     table => 1, html => 1,
4420     }->{$node->[1]}) {
4421     last INSCOPE;
4422     }
4423     } # INSCOPE
4424     unless (defined $i) {
4425 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4426 wakaba 1.1 ## Ignore the token
4427     !!!next-token;
4428     redo B;
4429     }
4430    
4431     ## Clear back to table row context
4432     while (not {
4433     tr => 1, html => 1,
4434 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4435     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4436     pop @{$self->{open_elements}};
4437 wakaba 1.1 }
4438    
4439 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4440     $self->{insertion_mode} = 'in table body';
4441 wakaba 1.1 !!!next-token;
4442     redo B;
4443     } elsif ($token->{tag_name} eq 'table') {
4444     ## As if </tr>
4445     ## have an element in table scope
4446     my $i;
4447 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4448     my $node = $self->{open_elements}->[$_];
4449 wakaba 1.1 if ($node->[1] eq 'tr') {
4450     $i = $_;
4451     last INSCOPE;
4452     } elsif ({
4453     table => 1, html => 1,
4454     }->{$node->[1]}) {
4455     last INSCOPE;
4456     }
4457     } # INSCOPE
4458     unless (defined $i) {
4459 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{type});
4460 wakaba 1.1 ## Ignore the token
4461     !!!next-token;
4462     redo B;
4463     }
4464    
4465     ## Clear back to table row context
4466     while (not {
4467     tr => 1, html => 1,
4468 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4469     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4470     pop @{$self->{open_elements}};
4471 wakaba 1.1 }
4472    
4473 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4474     $self->{insertion_mode} = 'in table body';
4475 wakaba 1.1 ## reprocess
4476     redo B;
4477     } elsif ({
4478     tbody => 1, tfoot => 1, thead => 1,
4479     }->{$token->{tag_name}}) {
4480     ## have an element in table scope
4481     my $i;
4482 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4483     my $node = $self->{open_elements}->[$_];
4484 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4485     $i = $_;
4486     last INSCOPE;
4487     } elsif ({
4488     table => 1, html => 1,
4489     }->{$node->[1]}) {
4490     last INSCOPE;
4491     }
4492     } # INSCOPE
4493     unless (defined $i) {
4494 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4495 wakaba 1.1 ## Ignore the token
4496     !!!next-token;
4497     redo B;
4498     }
4499    
4500     ## As if </tr>
4501     ## have an element in table scope
4502     my $i;
4503 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4504     my $node = $self->{open_elements}->[$_];
4505 wakaba 1.1 if ($node->[1] eq 'tr') {
4506     $i = $_;
4507     last INSCOPE;
4508     } elsif ({
4509     table => 1, html => 1,
4510     }->{$node->[1]}) {
4511     last INSCOPE;
4512     }
4513     } # INSCOPE
4514     unless (defined $i) {
4515 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:tr');
4516 wakaba 1.1 ## Ignore the token
4517     !!!next-token;
4518     redo B;
4519     }
4520    
4521     ## Clear back to table row context
4522     while (not {
4523     tr => 1, html => 1,
4524 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4525     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4526     pop @{$self->{open_elements}};
4527 wakaba 1.1 }
4528    
4529 wakaba 1.3 pop @{$self->{open_elements}}; # tr
4530     $self->{insertion_mode} = 'in table body';
4531 wakaba 1.1 ## reprocess
4532     redo B;
4533     } elsif ({
4534     body => 1, caption => 1, col => 1,
4535     colgroup => 1, html => 1, td => 1, th => 1,
4536     }->{$token->{tag_name}}) {
4537 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4538 wakaba 1.1 ## Ignore the token
4539     !!!next-token;
4540     redo B;
4541     } else {
4542     #
4543     }
4544     } else {
4545     #
4546     }
4547    
4548     ## As if in table
4549 wakaba 1.3 !!!parse-error (type => 'in table:'.$token->{tag_name});
4550 wakaba 1.1 $in_body->($insert_to_foster);
4551     redo B;
4552 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in cell') {
4553 wakaba 1.1 if ($token->{type} eq 'character') {
4554     ## NOTE: This is a code clone of "character in body".
4555     $reconstruct_active_formatting_elements->($insert_to_current);
4556    
4557 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4558 wakaba 1.1
4559     !!!next-token;
4560     redo B;
4561     } elsif ($token->{type} eq 'comment') {
4562     ## NOTE: This is a code clone of "comment in body".
4563     my $comment = $self->{document}->create_comment ($token->{data});
4564 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4565 wakaba 1.1 !!!next-token;
4566     redo B;
4567     } elsif ($token->{type} eq 'start tag') {
4568     if ({
4569     caption => 1, col => 1, colgroup => 1,
4570     tbody => 1, td => 1, tfoot => 1, th => 1,
4571     thead => 1, tr => 1,
4572     }->{$token->{tag_name}}) {
4573     ## have an element in table scope
4574     my $tn;
4575 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4576     my $node = $self->{open_elements}->[$_];
4577 wakaba 1.1 if ($node->[1] eq 'td' or $node->[1] eq 'th') {
4578     $tn = $node->[1];
4579     last INSCOPE;
4580     } elsif ({
4581     table => 1, html => 1,
4582     }->{$node->[1]}) {
4583     last INSCOPE;
4584     }
4585     } # INSCOPE
4586     unless (defined $tn) {
4587 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4588 wakaba 1.1 ## Ignore the token
4589     !!!next-token;
4590     redo B;
4591     }
4592    
4593     ## Close the cell
4594     !!!back-token; # <?>
4595     $token = {type => 'end tag', tag_name => $tn};
4596     redo B;
4597     } else {
4598     #
4599     }
4600     } elsif ($token->{type} eq 'end tag') {
4601     if ($token->{tag_name} eq 'td' or $token->{tag_name} eq 'th') {
4602     ## have an element in table scope
4603     my $i;
4604 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4605     my $node = $self->{open_elements}->[$_];
4606 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4607     $i = $_;
4608     last INSCOPE;
4609     } elsif ({
4610     table => 1, html => 1,
4611     }->{$node->[1]}) {
4612     last INSCOPE;
4613     }
4614     } # INSCOPE
4615     unless (defined $i) {
4616 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4617 wakaba 1.1 ## Ignore the token
4618     !!!next-token;
4619     redo B;
4620     }
4621    
4622     ## generate implied end tags
4623     if ({
4624     dd => 1, dt => 1, li => 1, p => 1,
4625     td => ($token->{tag_name} eq 'th'),
4626     th => ($token->{tag_name} eq 'td'),
4627     tr => 1,
4628 wakaba 1.3 }->{$self->{open_elements}->[-1]->[1]}) {
4629 wakaba 1.1 !!!back-token;
4630     $token = {type => 'end tag',
4631 wakaba 1.3 tag_name => $self->{open_elements}->[-1]->[1]}; # MUST
4632 wakaba 1.1 redo B;
4633     }
4634    
4635 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne $token->{tag_name}) {
4636     !!!parse-error (type => 'not closed:'.$self->{open_elements}->[-1]->[1]);
4637 wakaba 1.1 }
4638    
4639 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4640 wakaba 1.1
4641     $clear_up_to_marker->();
4642    
4643 wakaba 1.3 $self->{insertion_mode} = 'in row';
4644 wakaba 1.1
4645     !!!next-token;
4646     redo B;
4647     } elsif ({
4648     body => 1, caption => 1, col => 1,
4649     colgroup => 1, html => 1,
4650     }->{$token->{tag_name}}) {
4651 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4652 wakaba 1.1 ## Ignore the token
4653     !!!next-token;
4654     redo B;
4655     } elsif ({
4656     table => 1, tbody => 1, tfoot => 1,
4657     thead => 1, tr => 1,
4658     }->{$token->{tag_name}}) {
4659     ## have an element in table scope
4660     my $i;
4661     my $tn;
4662 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4663     my $node = $self->{open_elements}->[$_];
4664 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4665     $i = $_;
4666     last INSCOPE;
4667     } elsif ($node->[1] eq 'td' or $node->[1] eq 'th') {
4668     $tn = $node->[1];
4669     ## NOTE: There is exactly one |td| or |th| element
4670     ## in scope in the stack of open elements by definition.
4671     } elsif ({
4672     table => 1, html => 1,
4673     }->{$node->[1]}) {
4674     last INSCOPE;
4675     }
4676     } # INSCOPE
4677     unless (defined $i) {
4678 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4679 wakaba 1.1 ## Ignore the token
4680     !!!next-token;
4681     redo B;
4682     }
4683    
4684     ## Close the cell
4685     !!!back-token; # </?>
4686     $token = {type => 'end tag', tag_name => $tn};
4687     redo B;
4688     } else {
4689     #
4690     }
4691     } else {
4692     #
4693     }
4694    
4695     $in_body->($insert_to_current);
4696     redo B;
4697 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in select') {
4698 wakaba 1.1 if ($token->{type} eq 'character') {
4699 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4700 wakaba 1.1 !!!next-token;
4701     redo B;
4702     } elsif ($token->{type} eq 'comment') {
4703     my $comment = $self->{document}->create_comment ($token->{data});
4704 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4705 wakaba 1.1 !!!next-token;
4706     redo B;
4707     } elsif ($token->{type} eq 'start tag') {
4708     if ($token->{tag_name} eq 'option') {
4709 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4710 wakaba 1.1 ## As if </option>
4711 wakaba 1.3 pop @{$self->{open_elements}};
4712 wakaba 1.1 }
4713    
4714     !!!insert-element ($token->{tag_name}, $token->{attributes});
4715     !!!next-token;
4716     redo B;
4717     } elsif ($token->{tag_name} eq 'optgroup') {
4718 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4719 wakaba 1.1 ## As if </option>
4720 wakaba 1.3 pop @{$self->{open_elements}};
4721 wakaba 1.1 }
4722    
4723 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4724 wakaba 1.1 ## As if </optgroup>
4725 wakaba 1.3 pop @{$self->{open_elements}};
4726 wakaba 1.1 }
4727    
4728     !!!insert-element ($token->{tag_name}, $token->{attributes});
4729     !!!next-token;
4730     redo B;
4731     } elsif ($token->{tag_name} eq 'select') {
4732 wakaba 1.3 !!!parse-error (type => 'not closed:select');
4733 wakaba 1.1 ## As if </select> instead
4734     ## have an element in table scope
4735     my $i;
4736 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4737     my $node = $self->{open_elements}->[$_];
4738 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4739     $i = $_;
4740     last INSCOPE;
4741     } elsif ({
4742     table => 1, html => 1,
4743     }->{$node->[1]}) {
4744     last INSCOPE;
4745     }
4746     } # INSCOPE
4747     unless (defined $i) {
4748 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4749 wakaba 1.1 ## Ignore the token
4750     !!!next-token;
4751     redo B;
4752     }
4753    
4754 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4755 wakaba 1.1
4756 wakaba 1.3 $self->_reset_insertion_mode;
4757 wakaba 1.1
4758     !!!next-token;
4759     redo B;
4760     } else {
4761     #
4762     }
4763     } elsif ($token->{type} eq 'end tag') {
4764     if ($token->{tag_name} eq 'optgroup') {
4765 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option' and
4766     $self->{open_elements}->[-2]->[1] eq 'optgroup') {
4767 wakaba 1.1 ## As if </option>
4768 wakaba 1.3 splice @{$self->{open_elements}}, -2;
4769     } elsif ($self->{open_elements}->[-1]->[1] eq 'optgroup') {
4770     pop @{$self->{open_elements}};
4771 wakaba 1.1 } else {
4772 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4773 wakaba 1.1 ## Ignore the token
4774     }
4775     !!!next-token;
4776     redo B;
4777     } elsif ($token->{tag_name} eq 'option') {
4778 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'option') {
4779     pop @{$self->{open_elements}};
4780 wakaba 1.1 } else {
4781 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4782 wakaba 1.1 ## Ignore the token
4783     }
4784     !!!next-token;
4785     redo B;
4786     } elsif ($token->{tag_name} eq 'select') {
4787     ## have an element in table scope
4788     my $i;
4789 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4790     my $node = $self->{open_elements}->[$_];
4791 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4792     $i = $_;
4793     last INSCOPE;
4794     } elsif ({
4795     table => 1, html => 1,
4796     }->{$node->[1]}) {
4797     last INSCOPE;
4798     }
4799     } # INSCOPE
4800     unless (defined $i) {
4801 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4802 wakaba 1.1 ## Ignore the token
4803     !!!next-token;
4804     redo B;
4805     }
4806    
4807 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4808 wakaba 1.1
4809 wakaba 1.3 $self->_reset_insertion_mode;
4810 wakaba 1.1
4811     !!!next-token;
4812     redo B;
4813     } elsif ({
4814     caption => 1, table => 1, tbody => 1,
4815     tfoot => 1, thead => 1, tr => 1, td => 1, th => 1,
4816     }->{$token->{tag_name}}) {
4817 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4818 wakaba 1.1
4819     ## have an element in table scope
4820     my $i;
4821 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4822     my $node = $self->{open_elements}->[$_];
4823 wakaba 1.1 if ($node->[1] eq $token->{tag_name}) {
4824     $i = $_;
4825     last INSCOPE;
4826     } elsif ({
4827     table => 1, html => 1,
4828     }->{$node->[1]}) {
4829     last INSCOPE;
4830     }
4831     } # INSCOPE
4832     unless (defined $i) {
4833     ## Ignore the token
4834     !!!next-token;
4835     redo B;
4836     }
4837    
4838     ## As if </select>
4839     ## have an element in table scope
4840     undef $i;
4841 wakaba 1.3 INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
4842     my $node = $self->{open_elements}->[$_];
4843 wakaba 1.1 if ($node->[1] eq 'select') {
4844     $i = $_;
4845     last INSCOPE;
4846     } elsif ({
4847     table => 1, html => 1,
4848     }->{$node->[1]}) {
4849     last INSCOPE;
4850     }
4851     } # INSCOPE
4852     unless (defined $i) {
4853 wakaba 1.3 !!!parse-error (type => 'unmatched end tag:select');
4854 wakaba 1.1 ## Ignore the </select> token
4855     !!!next-token; ## TODO: ok?
4856     redo B;
4857     }
4858    
4859 wakaba 1.3 splice @{$self->{open_elements}}, $i;
4860 wakaba 1.1
4861 wakaba 1.3 $self->_reset_insertion_mode;
4862 wakaba 1.1
4863     ## reprocess
4864     redo B;
4865     } else {
4866     #
4867     }
4868     } else {
4869     #
4870     }
4871    
4872 wakaba 1.3 !!!parse-error (type => 'in select:'.$token->{tag_name});
4873 wakaba 1.1 ## Ignore the token
4874     !!!next-token;
4875     redo B;
4876 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after body') {
4877 wakaba 1.1 if ($token->{type} eq 'character') {
4878     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4879     ## As if in body
4880     $reconstruct_active_formatting_elements->($insert_to_current);
4881    
4882 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4883 wakaba 1.1
4884     unless (length $token->{data}) {
4885     !!!next-token;
4886     redo B;
4887     }
4888     }
4889    
4890     #
4891 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4892 wakaba 1.1 } elsif ($token->{type} eq 'comment') {
4893     my $comment = $self->{document}->create_comment ($token->{data});
4894 wakaba 1.3 $self->{open_elements}->[0]->[0]->append_child ($comment);
4895 wakaba 1.1 !!!next-token;
4896     redo B;
4897 wakaba 1.3 } elsif ($token->{type} eq 'start tag') {
4898     !!!parse-error (type => 'after body:'.$token->{tag_name});
4899     #
4900 wakaba 1.1 } elsif ($token->{type} eq 'end tag') {
4901     if ($token->{tag_name} eq 'html') {
4902 wakaba 1.3 if (defined $self->{inner_html_node}) {
4903     !!!parse-error (type => 'unmatched end tag:html');
4904     ## Ignore the token
4905     !!!next-token;
4906     redo B;
4907     } else {
4908     $phase = 'trailing end';
4909     !!!next-token;
4910     redo B;
4911     }
4912 wakaba 1.1 } else {
4913 wakaba 1.3 !!!parse-error (type => 'after body:/'.$token->{tag_name});
4914 wakaba 1.1 }
4915     } else {
4916 wakaba 1.3 !!!parse-error (type => 'after body:#'.$token->{type});
4917 wakaba 1.1 }
4918    
4919 wakaba 1.3 $self->{insertion_mode} = 'in body';
4920 wakaba 1.1 ## reprocess
4921     redo B;
4922 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'in frameset') {
4923 wakaba 1.1 if ($token->{type} eq 'character') {
4924     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4925 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4926 wakaba 1.1
4927     unless (length $token->{data}) {
4928     !!!next-token;
4929     redo B;
4930     }
4931     }
4932    
4933     #
4934     } elsif ($token->{type} eq 'comment') {
4935     my $comment = $self->{document}->create_comment ($token->{data});
4936 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
4937 wakaba 1.1 !!!next-token;
4938     redo B;
4939     } elsif ($token->{type} eq 'start tag') {
4940     if ($token->{tag_name} eq 'frameset') {
4941     !!!insert-element ($token->{tag_name}, $token->{attributes});
4942     !!!next-token;
4943     redo B;
4944     } elsif ($token->{tag_name} eq 'frame') {
4945     !!!insert-element ($token->{tag_name}, $token->{attributes});
4946 wakaba 1.3 pop @{$self->{open_elements}};
4947 wakaba 1.1 !!!next-token;
4948     redo B;
4949     } elsif ($token->{tag_name} eq 'noframes') {
4950     $in_body->($insert_to_current);
4951     redo B;
4952     } else {
4953     #
4954     }
4955     } elsif ($token->{type} eq 'end tag') {
4956     if ($token->{tag_name} eq 'frameset') {
4957 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] eq 'html' and
4958     @{$self->{open_elements}} == 1) {
4959     !!!parse-error (type => 'unmatched end tag:'.$token->{tag_name});
4960 wakaba 1.1 ## Ignore the token
4961     !!!next-token;
4962     } else {
4963 wakaba 1.3 pop @{$self->{open_elements}};
4964 wakaba 1.1 !!!next-token;
4965     }
4966    
4967     ## if not inner_html and
4968 wakaba 1.3 if ($self->{open_elements}->[-1]->[1] ne 'frameset') {
4969     $self->{insertion_mode} = 'after frameset';
4970 wakaba 1.1 }
4971     redo B;
4972     } else {
4973     #
4974     }
4975     } else {
4976     #
4977     }
4978    
4979 wakaba 1.3 if (defined $token->{tag_name}) {
4980     !!!parse-error (type => 'in frameset:'.$token->{tag_name});
4981     } else {
4982     !!!parse-error (type => 'in frameset:#'.$token->{type});
4983     }
4984 wakaba 1.1 ## Ignore the token
4985     !!!next-token;
4986     redo B;
4987 wakaba 1.3 } elsif ($self->{insertion_mode} eq 'after frameset') {
4988 wakaba 1.1 if ($token->{type} eq 'character') {
4989     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
4990 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data});
4991 wakaba 1.1
4992     unless (length $token->{data}) {
4993     !!!next-token;
4994     redo B;
4995     }
4996     }
4997    
4998     #
4999     } elsif ($token->{type} eq 'comment') {
5000     my $comment = $self->{document}->create_comment ($token->{data});
5001 wakaba 1.3 $self->{open_elements}->[-1]->[0]->append_child ($comment);
5002 wakaba 1.1 !!!next-token;
5003     redo B;
5004     } elsif ($token->{type} eq 'start tag') {
5005     if ($token->{tag_name} eq 'noframes') {
5006     $in_body->($insert_to_current);
5007     redo B;
5008     } else {
5009     #
5010     }
5011     } elsif ($token->{type} eq 'end tag') {
5012     if ($token->{tag_name} eq 'html') {
5013     $phase = 'trailing end';
5014     !!!next-token;
5015     redo B;
5016     } else {
5017     #
5018     }
5019     } else {
5020     #
5021     }
5022    
5023 wakaba 1.3 if (defined $token->{tag_name}) {
5024     !!!parse-error (type => 'after frameset:'.$token->{tag_name});
5025     } else {
5026     !!!parse-error (type => 'after frameset:#'.$token->{type});
5027     }
5028 wakaba 1.1 ## Ignore the token
5029     !!!next-token;
5030     redo B;
5031    
5032     ## ISSUE: An issue in spec there
5033     } else {
5034 wakaba 1.3 die "$0: $self->{insertion_mode}: Unknown insertion mode";
5035 wakaba 1.1 }
5036     }
5037     } elsif ($phase eq 'trailing end') {
5038     ## states in the main stage is preserved yet # MUST
5039    
5040     if ($token->{type} eq 'DOCTYPE') {
5041 wakaba 1.3 !!!parse-error (type => 'after html:#DOCTYPE');
5042 wakaba 1.1 ## Ignore the token
5043     !!!next-token;
5044     redo B;
5045     } elsif ($token->{type} eq 'comment') {
5046     my $comment = $self->{document}->create_comment ($token->{data});
5047     $self->{document}->append_child ($comment);
5048     !!!next-token;
5049     redo B;
5050     } elsif ($token->{type} eq 'character') {
5051     if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {
5052     my $data = $1;
5053     ## As if in the main phase.
5054     ## NOTE: The insertion mode in the main phase
5055     ## just before the phase has been changed to the trailing
5056     ## end phase is either "after body" or "after frameset".
5057     $reconstruct_active_formatting_elements->($insert_to_current)
5058     if $phase eq 'main';
5059    
5060 wakaba 1.3 $self->{open_elements}->[-1]->[0]->manakai_append_text ($data);
5061 wakaba 1.1
5062     unless (length $token->{data}) {
5063     !!!next-token;
5064     redo B;
5065     }
5066     }
5067    
5068 wakaba 1.3 !!!parse-error (type => 'after html:#character');
5069 wakaba 1.1 $phase = 'main';
5070     ## reprocess
5071     redo B;
5072     } elsif ($token->{type} eq 'start tag' or
5073     $token->{type} eq 'end tag') {
5074 wakaba 1.3 !!!parse-error (type => 'after html:'.$token->{tag_name});
5075 wakaba 1.1 $phase = 'main';
5076     ## reprocess
5077     redo B;
5078     } elsif ($token->{type} eq 'end-of-file') {
5079     ## Stop parsing
5080     last B;
5081     } else {
5082     die "$0: $token->{type}: Unknown token";
5083     }
5084     }
5085     } # B
5086    
5087     ## Stop parsing # MUST
5088    
5089     ## TODO: script stuffs
5090 wakaba 1.3 } # _tree_construct_main
5091    
5092     sub set_inner_html ($$$) {
5093     my $class = shift;
5094     my $node = shift;
5095     my $s = \$_[0];
5096     my $onerror = $_[1];
5097    
5098     my $nt = $node->node_type;
5099     if ($nt == 9) {
5100     # MUST
5101    
5102     ## Step 1 # MUST
5103     ## TODO: If the document has an active parser, ...
5104     ## ISSUE: There is an issue in the spec.
5105    
5106     ## Step 2 # MUST
5107     my @cn = @{$node->child_nodes};
5108     for (@cn) {
5109     $node->remove_child ($_);
5110     }
5111    
5112     ## Step 3, 4, 5 # MUST
5113     $class->parse_string ($$s => $node, $onerror);
5114     } elsif ($nt == 1) {
5115     ## TODO: If non-html element
5116    
5117     ## NOTE: Most of this code is copied from |parse_string|
5118    
5119     ## Step 1 # MUST
5120 wakaba 1.14 my $this_doc = $node->owner_document;
5121     my $doc = $this_doc->implementation->create_document;
5122 wakaba 1.18 $doc->manakai_is_html (1);
5123 wakaba 1.3 my $p = $class->new;
5124     $p->{document} = $doc;
5125    
5126     ## Step 9 # MUST
5127     my $i = 0;
5128     my $line = 1;
5129     my $column = 0;
5130     $p->{set_next_input_character} = sub {
5131     my $self = shift;
5132 wakaba 1.14
5133     pop @{$self->{prev_input_character}};
5134     unshift @{$self->{prev_input_character}}, $self->{next_input_character};
5135    
5136 wakaba 1.3 $self->{next_input_character} = -1 and return if $i >= length $$s;
5137     $self->{next_input_character} = ord substr $$s, $i++, 1;
5138     $column++;
5139 wakaba 1.4
5140     if ($self->{next_input_character} == 0x000A) { # LF
5141     $line++;
5142     $column = 0;
5143     } elsif ($self->{next_input_character} == 0x000D) { # CR
5144 wakaba 1.15 $i++ if substr ($$s, $i, 1) eq "\x0A";
5145 wakaba 1.3 $self->{next_input_character} = 0x000A; # LF # MUST
5146     $line++;
5147 wakaba 1.4 $column = 0;
5148 wakaba 1.3 } elsif ($self->{next_input_character} > 0x10FFFF) {
5149     $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5150     } elsif ($self->{next_input_character} == 0x0000) { # NULL
5151 wakaba 1.14 !!!parse-error (type => 'NULL');
5152 wakaba 1.3 $self->{next_input_character} = 0xFFFD; # REPLACEMENT CHARACTER # MUST
5153     }
5154     };
5155 wakaba 1.14 $p->{prev_input_character} = [-1, -1, -1];
5156     $p->{next_input_character} = -1;
5157 wakaba 1.3
5158     my $ponerror = $onerror || sub {
5159     my (%opt) = @_;
5160     warn "Parse error ($opt{type}) at line $opt{line} column $opt{column}\n";
5161     };
5162     $p->{parse_error} = sub {
5163     $ponerror->(@_, line => $line, column => $column);
5164     };
5165    
5166     $p->_initialize_tokenizer;
5167     $p->_initialize_tree_constructor;
5168    
5169     ## Step 2
5170     my $node_ln = $node->local_name;
5171     $p->{content_model_flag} = {
5172     title => 'RCDATA',
5173     textarea => 'RCDATA',
5174     style => 'CDATA',
5175     script => 'CDATA',
5176     xmp => 'CDATA',
5177     iframe => 'CDATA',
5178     noembed => 'CDATA',
5179     noframes => 'CDATA',
5180     noscript => 'CDATA',
5181     plaintext => 'PLAINTEXT',
5182     }->{$node_ln} || 'PCDATA';
5183     ## ISSUE: What is "the name of the element"? local name?
5184    
5185     $p->{inner_html_node} = [$node, $node_ln];
5186    
5187     ## Step 4
5188     my $root = $doc->create_element_ns
5189     ('http://www.w3.org/1999/xhtml', [undef, 'html']);
5190    
5191     ## Step 5 # MUST
5192     $doc->append_child ($root);
5193    
5194     ## Step 6 # MUST
5195     push @{$p->{open_elements}}, [$root, 'html'];
5196    
5197     undef $p->{head_element};
5198    
5199     ## Step 7 # MUST
5200     $p->_reset_insertion_mode;
5201    
5202     ## Step 8 # MUST
5203     my $anode = $node;
5204     AN: while (defined $anode) {
5205     if ($anode->node_type == 1) {
5206     my $nsuri = $anode->namespace_uri;
5207     if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') {
5208     if ($anode->local_name eq 'form') { ## TODO: case?
5209     $p->{form_element} = $anode;
5210     last AN;
5211     }
5212     }
5213     }
5214     $anode = $anode->parent_node;
5215     } # AN
5216    
5217     ## Step 3 # MUST
5218     ## Step 10 # MUST
5219     {
5220     my $self = $p;
5221     !!!next-token;
5222     }
5223     $p->_tree_construction_main;
5224    
5225     ## Step 11 # MUST
5226     my @cn = @{$node->child_nodes};
5227     for (@cn) {
5228     $node->remove_child ($_);
5229     }
5230     ## ISSUE: mutation events? read-only?
5231    
5232     ## Step 12 # MUST
5233     @cn = @{$root->child_nodes};
5234     for (@cn) {
5235 wakaba 1.14 $this_doc->adopt_node ($_);
5236 wakaba 1.3 $node->append_child ($_);
5237     }
5238 wakaba 1.14 ## ISSUE: mutation events?
5239 wakaba 1.3
5240     $p->_terminate_tree_constructor;
5241     } else {
5242     die "$0: |set_inner_html| is not defined for node of type $nt";
5243     }
5244     } # set_inner_html
5245    
5246     } # tree construction stage
5247 wakaba 1.1
5248     sub get_inner_html ($$$) {
5249 wakaba 1.3 my (undef, $node, $on_error) = @_;
5250 wakaba 1.1
5251     ## Step 1
5252     my $s = '';
5253    
5254     my $in_cdata;
5255     my $parent = $node;
5256     while (defined $parent) {
5257     if ($parent->node_type == 1 and
5258     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
5259     {
5260     style => 1, script => 1, xmp => 1, iframe => 1,
5261     noembed => 1, noframes => 1, noscript => 1,
5262     }->{$parent->local_name}) { ## TODO: case thingy
5263     $in_cdata = 1;
5264     }
5265     $parent = $parent->parent_node;
5266     }
5267    
5268     ## Step 2
5269     my @node = @{$node->child_nodes};
5270     C: while (@node) {
5271     my $child = shift @node;
5272     unless (ref $child) {
5273     if ($child eq 'cdata-out') {
5274     $in_cdata = 0;
5275     } else {
5276     $s .= $child; # end tag
5277     }
5278     next C;
5279     }
5280    
5281     my $nt = $child->node_type;
5282     if ($nt == 1) { # Element
5283     my $tag_name = lc $child->tag_name; ## ISSUE: Definition of "lowercase"
5284     $s .= '<' . $tag_name;
5285    
5286     ## ISSUE: Non-html elements
5287    
5288     my @attrs = @{$child->attributes}; # sort order MUST be stable
5289     for my $attr (@attrs) { # order is implementation dependent
5290     my $attr_name = lc $attr->name; ## ISSUE: Definition of "lowercase"
5291     $s .= ' ' . $attr_name . '="';
5292     my $attr_value = $attr->value;
5293     ## escape
5294     $attr_value =~ s/&/&amp;/g;
5295     $attr_value =~ s/</&lt;/g;
5296     $attr_value =~ s/>/&gt;/g;
5297     $attr_value =~ s/"/&quot;/g;
5298     $s .= $attr_value . '"';
5299     }
5300     $s .= '>';
5301    
5302     next C if {
5303     area => 1, base => 1, basefont => 1, bgsound => 1,
5304     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
5305     img => 1, input => 1, link => 1, meta => 1, param => 1,
5306     spacer => 1, wbr => 1,
5307     }->{$tag_name};
5308    
5309     if (not $in_cdata and {
5310     style => 1, script => 1, xmp => 1, iframe => 1,
5311     noembed => 1, noframes => 1, noscript => 1,
5312     }->{$tag_name}) {
5313     unshift @node, 'cdata-out';
5314     $in_cdata = 1;
5315     }
5316    
5317     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
5318     } elsif ($nt == 3 or $nt == 4) {
5319     if ($in_cdata) {
5320     $s .= $child->data;
5321     } else {
5322     my $value = $child->data;
5323     $value =~ s/&/&amp;/g;
5324     $value =~ s/</&lt;/g;
5325     $value =~ s/>/&gt;/g;
5326     $value =~ s/"/&quot;/g;
5327     $s .= $value;
5328     }
5329     } elsif ($nt == 8) {
5330     $s .= '<!--' . $child->data . '-->';
5331     } elsif ($nt == 10) {
5332     $s .= '<!DOCTYPE ' . $child->name . '>';
5333     } elsif ($nt == 5) { # entrefs
5334     push @node, @{$child->child_nodes};
5335     } else {
5336     $on_error->($child) if defined $on_error;
5337     }
5338     ## ISSUE: This code does not support PIs.
5339     } # C
5340    
5341     ## Step 3
5342     return \$s;
5343     } # get_inner_html
5344    
5345     1;
5346 wakaba 1.21 # $Date: 2007/06/23 14:25:05 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24